├── .gitignore
├── LICENSE
├── README.md
├── README_ch.md
├── assert
    ├── es.sql
    ├── es_search_chunk.sql
    └── install.sh
├── doc
    ├── LLM.md
    ├── balance.md
    ├── chunk.md
    ├── db.md
    ├── design.md
    ├── embedding.md
    ├── install.md
    ├── parser.md
    ├── pipeline.md
    └── search.md
├── pom.xml
├── src
    ├── main
    │   └── java
    │   │   └── org
    │   │       ├── agent
    │   │           ├── Agent.java
    │   │           ├── EmergentOrganization.java
    │   │           ├── Environment.java
    │   │           ├── HierarchicalOrganization.java
    │   │           ├── Interaction.java
    │   │           ├── MASExample.java
    │   │           └── Organization.java
    │   │       ├── chunk
    │   │           ├── FixedSizeSplitter.java
    │   │           ├── ParagraphSplitter.java
    │   │           ├── RecursiveSplitter.java
    │   │           ├── SemanticBlockSplitter.java
    │   │           ├── SentenceSplitter.java
    │   │           └── TextSplitter.java
    │   │       ├── constant
    │   │           └── Config.java
    │   │       ├── controler
    │   │           ├── ChatController.java
    │   │           └── SearchController.java
    │   │       ├── demo
    │   │           ├── Debate.java
    │   │           └── SimulatorDebate.java
    │   │       ├── entity
    │   │           ├── Document.java
    │   │           ├── File.java
    │   │           ├── KnowledgeBase.java
    │   │           ├── SearchInput.java
    │   │           ├── SearchOutput.java
    │   │           └── User.java
    │   │       ├── parser
    │   │           ├── ExcelParser.java
    │   │           ├── FileParser.java
    │   │           ├── FileParserFactory.java
    │   │           ├── HTMLParser.java
    │   │           ├── PDFParser.java
    │   │           ├── PPTParser.java
    │   │           ├── PureTextParser.java
    │   │           └── WordParser.java
    │   │       ├── rag
    │   │           ├── AdvancedRAG.java
    │   │           ├── ModularRAG.java
    │   │           └── NaiveRAG.java
    │   │       ├── search
    │   │           ├── Pipeline.java
    │   │           ├── RecallStrategy.java
    │   │           ├── RerankStrategy.java
    │   │           └── SortStrategy.java
    │   │       ├── service
    │   │           ├── LLM
    │   │           │   ├── OllamaChatService.java
    │   │           │   └── OpenAIChatService.java
    │   │           ├── Main.java
    │   │           ├── balance
    │   │           │   ├── LoadBalancer.java
    │   │           │   ├── Main.java
    │   │           │   ├── NacosLoadBalancingClient.java
    │   │           │   ├── RandomLoadBalancer.java
    │   │           │   ├── RoundRobinLoadBalancer.java
    │   │           │   └── WeightedRandomLoadBalancer.java
    │   │           ├── db
    │   │           │   ├── ESClient.java
    │   │           │   ├── MinIOClient.java
    │   │           │   ├── MysqlClient.java
    │   │           │   └── RedisClient.java
    │   │           └── embedding
    │   │           │   ├── BaichuanEmbeddingService.java
    │   │           │   ├── EmbeddingService.java
    │   │           │   ├── JinaEmbeddingRerankService.java
    │   │           │   └── JinaEmbeddingService.java
    │   │       ├── utils
    │   │           ├── DistanceUtils.java
    │   │           ├── HttpClientUtil.java
    │   │           ├── SnowflakeIdGenerator.java
    │   │           └── TrustAllCerts.java
    │   │       └── web
    │   │           ├── KeywordToMarkdownCrawler.java
    │   │           ├── SearchEngine.java
    │   │           └── UrlToMarkdownConverter.java
    └── test
    │   └── java
    │       └── org
    │           ├── chat
    │               └── NaiveRAGTest.java
    │           └── db
    │               ├── ESClientTest.java
    │               ├── ElasticsearchConnectionTest.java
    │               └── OpenAIChatServiceTest.java
└── webapp
    ├── resources
        ├── biglog.png
        └── ezgif-81180eba7adb9d.gif
    └── views
        ├── chat.html
        ├── knowledge_base.html
        ├── login.html
        ├── main.html
        ├── model_management.html
        ├── register.html
        └── search.html


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Build and Release Folders
 2 | bin-debug/
 3 | bin-release/
 4 | [Oo]bj/
 5 | [Bb]in/
 6 | 
 7 | # Other files and folders
 8 | .settings/
 9 | 
10 | # Executables
11 | *.swf
12 | *.air
13 | *.ipa
14 | *.apk
15 | 
16 | # Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties`
17 | # should NOT be excluded as they contain compiler settings and other important
18 | # information for Eclipse / Flash Builder.
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <a href="">
  3 | <img src="webapp/resources/biglog.png" alt="ragflow logo">
  4 | </a>
  5 | </div>
  6 | 
  7 | 
  8 | [English](README.md) | [简体中文](README_ch.md) 
  9 | # JAVA-RAG
 10 | 
 11 | ### Introduction
 12 | RAG (Retrieval - Augmented Generation) project, implemented in pure Java without relying on frameworks like JFinal or spring - boot. It provides the RAG pipeline and Agent pattern, which makes it more convenient to adapt to the enterprise - level environment and more conducive to secondary development.
 13 | ### Quick Start
 14 | ```java
 15 |     public void demoNaiveRAG() {
 16 |         NaiveRAG naiveRAG = new NaiveRAG(
 17 |                 new Document("./202X Enterprise Plan.pdf"),
 18 |                 "Briefly summarize this article");
 19 |         try {
 20 |             naiveRAG
 21 |                     // Parsing
 22 |                    .parsering()
 23 |                     // Chunking
 24 |                    .chunking()
 25 |                     // Vectorization
 26 |                    .embedding()
 27 |                     // Sorting
 28 |                    .sorting()
 29 |                     // LLM response
 30 |                    .LLMChat();
 31 |         } catch (Exception e) {
 32 |             e.printStackTrace();
 33 |             assert false : "error stack trace";
 34 |         }
 35 |         System.out.println(naiveRAG.getResponse());
 36 |     }
 37 | ```
 38 | 
 39 | ### Usage Tutorial
 40 | 
 41 | #### 💽 [Database Storage](doc/db.md)
 42 | - Read and write for multi-turn conversations in Redis
 43 | - File storage in MinIO
 44 | - Search engine with Elastic Search
 45 | 
 46 | #### 🧠 [LLM Conversations](doc/LLM.md)
 47 | - OpenAI chat interface
 48 | - Ollama chat interface
 49 | - Chat with multi-turn conversations
 50 | 
 51 | #### 📚 [Document Parsing](doc/parser.md)
 52 | - Word
 53 | - PPT
 54 | - PDF
 55 | - EXCEL
 56 | - PPT
 57 | - Markdown, HTML
 58 | 
 59 | #### ✂️ [Chunking](doc/chunk.md)
 60 | - Fixed size
 61 | - Sentence splitting
 62 | - Recursive splitting
 63 | - Semantic chunking
 64 | 
 65 | #### 📊 [Vectorization Models](doc/embedding.md)
 66 | - Jina-Cobert
 67 | - Baichuan
 68 | 
 69 | #### 🔎 [Search](doc/search.md)
 70 | - Recall
 71 | - Sorting
 72 | - Re-ranking
 73 | 
 74 | #### 🎁 [more pipeline](doc/pipeline.md)
 75 | - Advanced RAG
 76 | - Modular RAG
 77 | #### 🦾 [Agent]
 78 | - MASExample.java
 79 | 
 80 | #### 🎰 [balance](doc/balance.md)
 81 | - RoundRobinLoadBalancer
 82 | - WeightedRandomLoadBalancer
 83 | 
 84 | ### Project Structure
 85 | Explanation
 86 | ```shell
 87 | ├── agent
 88 | ├── chunk
 89 | ├── constant
 90 | ├── controler
 91 | ├── demo
 92 | ├── entity
 93 | ├── parser
 94 | ├── rag
 95 | ├── search
 96 | ├── service
 97 | │   ├── LLM
 98 | │   ├── balance
 99 | │   ├── db
100 | │   └── embedding
101 | ├── utils
102 | └── web
103 | ```
104 | 
105 | ### 🧒 Concise Installation Tutorial
106 | 
107 | 1. Clone the code
108 | ```shell
109 | git clone https://github.com/ChinaYiqun/java-rag.git
110 | ```
111 | 
112 | 2. Enter the project directory
113 | ```shell
114 | cd java-rag
115 | ```
116 | 
117 | 3. Configure Maven dependencies
118 | ```shell
119 | mvn clean install
120 | ```
121 | 
122 | 4. Create relevant databases
123 | ```shell
124 | sysctl -w vm.max_map_count=262144
125 | # Create a docker network
126 | docker network create elastic
127 | # Pull Elasticsearch
128 | docker pull docker.elastic.co/elasticsearch/elasticsearch:8.11.4
129 | # Run Elasticsearch
130 | docker run --name es01 --net elastic -p 9200:9200 -it -m 2GB docker.elastic.co/elasticsearch/elasticsearch:8.11.4
131 | # Reset password and enrollment token
132 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
133 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
134 | # Install MinIO script
135 | mkdir -p ~/minio/data
136 | docker run \ -p 9000:9000 \ -p 9090:9090 \ --name minio \ -v ~/minio/data:/data \ -e "MINIO_ROOT_USER=ROOTNAME" \ -e "MINIO_ROOT_PASSWORD=CHANGEME123" \ quay.io/minio/minio server /data --console-address ":9090"
137 | ```
138 | 
139 | ### 🥸 Detailed Installation Tutorial
140 | - See [Link](doc/install.md) for details.
141 | 
142 | ### Features
143 | 
144 | - OpenAI-style LLM/Embedding interfaces
145 | - Very simple dependency management with pom.xml (Maven)
146 | - Support for multi-user and multi-knowledge base management
147 | - Free arrangement of search strategies: multi-channel recall, rough sorting, fine sorting, re-ranking
148 | - Free arrangement of file chunking: fixed size, sentence splitting, recursive splitting, semantic chunking
149 | - Support for mainstream file parsing with Apache POI
150 | - Integration of mainstream databases: Elastic Search, Redis, Mysql, MinIO
151 | - Highly customizable configuration with Nacos
152 | 
153 | ### view
154 | ![ezgif-81180eba7adb9d.gif](webapp/resources/ezgif-81180eba7adb9d.gif)
155 | 
156 | ### References
157 | 
158 | - [llm-apps-java-spring-ai](https://github.com/ThomasVitale/llm-apps-java-spring-ai/tree/main)
159 | - [ragflow](https://github.com/infiniflow/ragflow)
160 | - [ollama](https://github.com/ollama/ollama)
161 | - [langchain](https://github.com/langchain-ai/langchain)


--------------------------------------------------------------------------------
/README_ch.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <a href="">
  3 | <img src="webapp/resources/biglog.png" alt="ragflow logo">
  4 | </a>
  5 | </div>
  6 | 
  7 | [English](README.md) | [简体中文](README_ch.md)
  8 | # JAVA-RAG
  9 | 
 10 | ### 介绍
 11 | RAG (Retrieval-Augmented Generation)项目,pure Java 实现,不依赖JFinal,spring-boot等。提供 RAG pipeline 和 Agent 模式,更便于依托企业级环境进行改造,更利于二次开发
 12 | ### 快速入门
 13 | ```java
 14 |     public void demoNaiveRAG() {
 15 |         NaiveRAG naiveRAG = new NaiveRAG(
 16 |                 new Document("./202X企业规划.pdf"),
 17 |                 "简要总结这篇文章");
 18 |         try {
 19 |             naiveRAG
 20 |                     // 解析
 21 |                     .parsering()
 22 |                     // 分块
 23 |                     .chunking()
 24 |                     // 向量化
 25 |                     .embedding()
 26 |                     // 排序
 27 |                     .sorting()
 28 |                     // 大模型回复
 29 |                     .LLMChat();
 30 |         } catch (Exception e) {
 31 |             e.printStackTrace();
 32 |             assert false : "error stack trace";
 33 |         }
 34 |         System.out.println(naiveRAG.getResponse());
 35 |     }
 36 | ```
 37 | 
 38 | ### 用法教程
 39 | 
 40 | #### 💽 [数据库存储](doc/db.md)
 41 | - Redis 多轮对话读写
 42 | - MinIO 文件存储
 43 | - Elastic Search 搜索引擎
 44 | #### 🧠 [LLM 对话](doc/LLM.md)
 45 | - OpenAI 聊天接口
 46 | - Ollama 聊天接口
 47 | - 带有多轮对话的聊天
 48 | #### 📚 [文档解析](doc/parser.md)
 49 | - Word
 50 | - PPT
 51 | - PDF
 52 | - EXCEL
 53 | - PPT
 54 | - MarkDow,HTML
 55 | #### ✂️ [分块](doc/chunk.md)
 56 | - 固定大小
 57 | - 句子分割
 58 | - 递归分割
 59 | - 语义分块
 60 | #### 📊 [向量化模型](doc/embedding.md)
 61 | - Jina-Cobert
 62 | - Baichuan
 63 | #### 🔎 [搜索](doc/search.md)
 64 | - 召回
 65 | - 排序
 66 | - 重排序
 67 | #### 🎁 [更多 pipeline](doc/pipeline.md)
 68 | - Advanced RAG
 69 | - Modular RAG
 70 | #### 🦾 [Agent]
 71 | - MASExample.java
 72 | #### 🎰 [负载均衡](doc/balance.md)
 73 | - 轮询
 74 | - 权重随机
 75 | ### 项目结构
 76 | 说明
 77 | ```shell
 78 | 
 79 | ├── pom.xml
 80 | ├── src
 81 | │   ├── main
 82 | │   │   ├── java
 83 | │   │   │   └── org
 84 | │   │   │       ├── chunk
 85 | │   │   │       ├── constant
 86 | │   │   │       ├── entity
 87 | │   │   │       ├── parser
 88 | │   │   │       ├── rag
 89 | │   │   │       ├── search
 90 | │   │   │       ├── service
 91 | │   │   │       │   ├── LLM
 92 | │   │   │       │   ├── db
 93 | │   │   │       │   └── embedding
 94 | │   │   │       └── utils
 95 | │   │   └── resources
 96 | │   └── test
 97 | │       └── java
 98 | │           └── org
 99 | │               ├── chat
100 | │               └── db
101 | 
102 | 
103 | ```
104 | 
105 | ### 🧒 简明安装教程
106 | 
107 | 1.  clone 代码
108 | ```shell
109 | git clone https://gitee.com/ChinaYiqun/java-rag.git
110 | ```    
111 | 2. 进入项目目录
112 | ```shell
113 | cd java-rag
114 | ```
115 | 3. 配置 Maven 依赖
116 | ```shell
117 | mvn clean install
118 | ```
119 | 
120 | 4. 创建相关数据库
121 | 
122 | ```shell
123 | sysctl -w vm.max_map_count=262144
124 | #  创建 docker network
125 | docker network create elastic
126 | #  拉 ES
127 | docker pull docker.elastic.co/elasticsearch/elasticsearch:8.11.4
128 | # 运行 ES
129 | docker run --name es01 --net elastic -p 9200:9200 -it -m 2GB docker.elastic.co/elasticsearch/elasticsearch:8.11.4
130 | # 重置 password and enrollment token
131 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
132 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
133 | # 安装minio脚本
134 | mkdir -p ~/minio/data
135 | docker run \ -p 9000:9000 \ -p 9090:9090 \ --name minio \ -v ~/minio/data:/data \ -e "MINIO_ROOT_USER=ROOTNAME" \ -e "MINIO_ROOT_PASSWORD=CHANGEME123" \ quay.io/minio/minio server /data --console-address ":9090"
136 | ```
137 | ### 🥸 详细安装教程
138 | - 详见 [链接](doc/install.md)
139 | 
140 | ### 功能点
141 | 
142 | - OpenAI式 LLM/Embedding 接口
143 | - 十分简洁的依赖项管理 , pom.xml(Maven)
144 | - 支持多用户、多知识库 管理
145 | - 搜索策略自由编排:多路召回/粗排/精排/重排
146 | - 文件分块自由编排：固定大小/句子分割/递归分割/语义分块
147 | - 主流文件解析支持 Apache POI
148 | - 主流数据库集成 Elastic Search/Redis/Mysql/MinIO
149 | - 配置灵活度高度定制 Nacos
150 | 
151 | 
152 | ### 页面
153 | ![ezgif-81180eba7adb9d.gif](webapp/resources/ezgif-81180eba7adb9d.gif)
154 | 
155 | 
156 | ### 参考
157 | 
158 | - [llm-apps-java-spring-ai](https://github.com/ThomasVitale/llm-apps-java-spring-ai/tree/main)
159 | - [ragflow](https://github.com/infiniflow/ragflow)
160 | - [ollama](https://github.com/ollama/ollama)
161 | - [langchain](https://github.com/langchain-ai/langchain)


--------------------------------------------------------------------------------
/assert/es.sql:
--------------------------------------------------------------------------------
  1 | - 建表语句
  2 | PUT /documents
  3 | - 字段Mapping
  4 | POST /documents/_mapping
  5 | {
  6 | 
  7 |   "properties": {
  8 | 
  9 |     "user_id": {
 10 | 
 11 |       "type": "keyword"
 12 | 
 13 |     },
 14 | 
 15 |     "file_id": {
 16 | 
 17 |       "type": "keyword"
 18 | 
 19 |     },
 20 | 
 21 |     "kb_id": {
 22 | 
 23 |       "type": "keyword"
 24 | 
 25 |     },
 26 | 
 27 |     "chunk_id": {
 28 | 
 29 |       "type": "integer"
 30 | 
 31 |     },
 32 | 
 33 |     "chunk_size": {
 34 | 
 35 |       "type": "integer"
 36 | 
 37 |     },
 38 | 
 39 |     "chunk_text": {
 40 | 
 41 |       "type": "text",
 42 | 
 43 |       "analyzer": "ik_max_word",
 44 | 
 45 |       "search_analyzer": "ik_smart"
 46 | 
 47 |     },
 48 | 
 49 |     "text_emb": {
 50 | 
 51 |       "type": "dense_vector",
 52 | 
 53 |       "dims": 512
 54 | 
 55 |     },
 56 |     "clip_emb":{
 57 |       "type": "dense_vector",
 58 | 
 59 |       "dims": 512
 60 | 
 61 |     },
 62 |     "doc_type": {
 63 | 
 64 |       "type": "keyword"
 65 | 
 66 |     },
 67 | 
 68 |     "version": {
 69 | 
 70 |       "type": "keyword"
 71 | 
 72 |     },
 73 | 
 74 |     "author": {
 75 | 
 76 |       "type": "keyword"
 77 | 
 78 |     },
 79 | 
 80 |     "created_time": {
 81 | 
 82 |       "type": "integer"
 83 | 
 84 |     },
 85 | 
 86 |     "modified_time": {
 87 | 
 88 |       "type": "integer"
 89 | 
 90 |     },
 91 | 
 92 |     "file_name": {
 93 | 
 94 |       "type": "text",
 95 | 
 96 |       "analyzer": "ik_max_word",
 97 | 
 98 |       "search_analyzer": "ik_smart"
 99 | 
100 |     },
101 | 
102 |     "storage_path": {
103 | 
104 |       "type": "keyword"
105 | 
106 |     }
107 | 
108 |   }
109 | 
110 | }


--------------------------------------------------------------------------------
/assert/es_search_chunk.sql:
--------------------------------------------------------------------------------
 1 | {
 2 |     "query": {
 3 |         "bool": {
 4 |             "should": [
 5 |                 {"match": {"chunk_text": {"query": "%s", "boost": %f}}}
 6 |             ],
 7 |             "filter": [
 8 |                 {"term": {"user_id": "%s"}}
 9 |             ]
10 |         }
11 |     },
12 |     "highlight": {
13 |         "fields": {
14 |             "chunk_text": {}
15 |         }
16 |     },
17 |     "size": %d
18 | }


--------------------------------------------------------------------------------
/assert/install.sh:
--------------------------------------------------------------------------------
 1 | 安装es脚本
 2 | # 看一下端口占用情况
 3 | netstat -tuln
 4 | # 看一下内存情况,再决定 ES 内存分配情况
 5 | free -h
 6 | # 设置 虚拟内存区域的最大映射计数
 7 | sysctl -w vm.max_map_count=262144
 8 | #  创建 docker network
 9 | docker network create elastic
10 | #  拉 ES
11 | docker pull docker.elastic.co/elasticsearch/elasticsearch:8.11.4
12 | # 运行 ES
13 | docker run --name es01 --net elastic -p 9200:9200 -it -m 2GB docker.elastic.co/elasticsearch/elasticsearch:8.11.4
14 | # 重置 password and enrollment token
15 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
16 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
17 | # 进入ES 环境
18 | docker exec -it es01 /bin/bash
19 | # 安装ik中文分词器
20 | bin/elasticsearch-plugin install https://get.infini.cloud/elasticsearch/analysis-ik/8.11.4
21 | # 重启 ES配置生效
22 | docker restart es01
23 | # 拉取 kibana
24 | docker pull docker.elastic.co/kibana/kibana:8.11.4
25 | # 运行 kibana
26 | docker run --name kib01 --net elastic -p 5601:5601 docker.elastic.co/kibana/kibana:8.11.4
27 | 
28 | # 安装minio脚本
29 | mkdir -p ~/minio/data
30 | docker run \ -p 9000:9000 \ -p 9090:9090 \ --name minio \ -v ~/minio/data:/data \ -e "MINIO_ROOT_USER=ROOTNAME" \ -e "MINIO_ROOT_PASSWORD=CHANGEME123" \ quay.io/minio/minio server /data --console-address ":9090"
31 | 
32 | 
33 | # 安装mysql 脚本
34 | sudo docker run --name some-mysql \
35 |   -e MYSQL_ROOT_PASSWORD=my-secret-pw \
36 |   -p 3306:3306 \
37 |   -d mysql/mysql-server
38 | 
39 | sudo docker exec -it some-mysql /bin/bash
40 | mysql -u root -p
41 | CREATE DATABASE pkb CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
42 | CREATE USER 'some-mysql'@'%' IDENTIFIED WITH mysql_native_password BY 'my-secret-pw';
43 | GRANT ALL PRIVILEGES ON *.* TO 'some-mysql'@'%' WITH GRANT OPTION;
44 | FLUSH PRIVILEGES;
45 | 
46 | 


--------------------------------------------------------------------------------
/doc/LLM.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### `OpenAIChatService` 接口文档
 3 | 
 4 | #### 类概述
 5 | `OpenAIChatService` 类用于与 OpenAI 的 API 进行交互，发送请求并获取响应。它提供了两个方法来生成文本，一个方法不涉及聊天历史记录，另一个方法会处理聊天历史记录并将新消息添加到 Redis 中。
 6 | 
 7 | #### 构造方法
 8 | - `OpenAIChatService(String apiKey)`
 9 |     - **描述**：构造一个 `OpenAIChatService` 实例。
10 |     - **参数**：
11 |         - `apiKey`：OpenAI API 的密钥，用于身份验证。
12 |     - **返回值**：无
13 | 
14 | #### 方法
15 | - `public String generateText(String url, JSONObject params) throws IOException`
16 |     - **描述**：发送请求到指定的 API 并获取响应。
17 |     - **参数**：
18 |         - `url`：API 的 URL。
19 |         - `params`：请求参数，以 `JSONObject` 形式表示。
20 |     - **返回值**：生成的文本。
21 |     - **异常**：
22 |         - `IOException`：如果请求失败，抛出该异常。
23 | 
24 | - `public String generateText(String url, String chatId, JSONObject newMessage) throws IOException`
25 |     - **描述**：发送请求到指定的 API 并获取响应，同时处理聊天历史记录。
26 |     - **参数**：
27 |         - `url`：API 的 URL。
28 |         - `chatId`：对话的唯一标识，用于在 Redis 中存储和获取聊天历史记录。
29 |         - `newMessage`：新的消息内容，以 `JSONObject` 形式表示。
30 |     - **返回值**：生成的文本。
31 |     - **异常**：
32 |         - `IOException`：如果请求失败，抛出该异常。
33 | 
34 | ### `OllamaChatService` 接口文档
35 | 
36 | #### 类概述
37 | `OllamaChatService` 类用于与 Ollama 的 API 进行交互，发送聊天请求并获取响应。
38 | 
39 | #### 构造方法
40 | - `OllamaChatService()`
41 |     - **描述**：构造一个 `OllamaChatService` 实例。
42 |     - **参数**：无
43 |     - **返回值**：无
44 | 
45 | #### 方法
46 | - `public String generateChatCompletion(String model, String message) throws Exception`
47 |     - **描述**：发送聊天请求到指定的 API 并获取响应。
48 |     - **参数**：
49 |         - `model`：使用的模型名称。
50 |         - `message`：用户发送的消息内容。
51 |     - **返回值**：API 返回的响应体字符串。
52 |     - **异常**：
53 |         - `Exception`：如果请求失败，抛出该异常。


--------------------------------------------------------------------------------
/doc/balance.md:
--------------------------------------------------------------------------------
  1 | ### 负载均衡接口文档
  2 | 
  3 | #### 1. `LoadBalancer` 接口
  4 | - **功能描述**：该接口定义了负载均衡器的基本行为，即从一组服务实例中选择一个实例。
  5 | - **接口方法**：
  6 | ```java
  7 | /**
  8 |  * 从给定的服务实例列表中选择一个实例。
  9 |  * 
 10 |  * @param instances 服务实例列表，可能为空。
 11 |  * @return 选中的服务实例，如果列表为空则返回 null。
 12 |  */
 13 | Instance select(List<Instance> instances);
 14 | ```
 15 | - **使用方法**：其他具体的负载均衡器类需要实现这个接口，并在 `select` 方法中实现具体的选择逻辑。
 16 | 
 17 | #### 2. `RoundRobinLoadBalancer` 类
 18 | - **功能描述**：实现了轮询负载均衡策略，按照顺序依次选择服务实例。
 19 | - **类属性**：
 20 | ```java
 21 | private final AtomicInteger index = new AtomicInteger(0);
 22 | ```
 23 | - `index`：用于记录当前选择的实例索引，使用 `AtomicInteger` 保证线程安全。
 24 | - **方法实现**：
 25 | ```java
 26 | /**
 27 |  * 从给定的服务实例列表中使用轮询策略选择一个实例。
 28 |  * 
 29 |  * @param instances 服务实例列表，可能为空。
 30 |  * @return 选中的服务实例，如果列表为空则返回 null。
 31 |  */
 32 | @Override
 33 | public Instance select(List<Instance> instances) {
 34 |     if (instances == null || instances.isEmpty()) {
 35 |         return null;
 36 |     }
 37 |     int currentIndex = index.getAndIncrement() % instances.size();
 38 |     return instances.get(currentIndex);
 39 | }
 40 | ```
 41 | - **使用方法**：创建 `RoundRobinLoadBalancer` 实例，然后将其作为参数传递给 `NacosLoadBalancingClient` 类，即可使用轮询策略进行负载均衡。
 42 | ```java
 43 | LoadBalancer roundRobinLoadBalancer = new RoundRobinLoadBalancer();
 44 | NacosLoadBalancingClient client = new NacosLoadBalancingClient("your-service-name", roundRobinLoadBalancer);
 45 | Instance instance = client.getNextInstance();
 46 | ```
 47 | 
 48 | #### 3. `RandomLoadBalancer` 类
 49 | - **功能描述**：实现了随机负载均衡策略，随机选择一个服务实例。
 50 | - **类属性**：
 51 | ```java
 52 | private final Random random = new Random();
 53 | ```
 54 | - `random`：用于生成随机数。
 55 | - **方法实现**：
 56 | ```java
 57 | /**
 58 |  * 从给定的服务实例列表中使用随机策略选择一个实例。
 59 |  * 
 60 |  * @param instances 服务实例列表，可能为空。
 61 |  * @return 选中的服务实例，如果列表为空则返回 null。
 62 |  */
 63 | @Override
 64 | public Instance select(List<Instance> instances) {
 65 |     if (instances == null || instances.isEmpty()) {
 66 |         return null;
 67 |     }
 68 |     return instances.get(random.nextInt(instances.size()));
 69 | }
 70 | ```
 71 | - **使用方法**：创建 `RandomLoadBalancer` 实例，然后将其作为参数传递给 `NacosLoadBalancingClient` 类，即可使用随机策略进行负载均衡。
 72 | ```java
 73 | LoadBalancer randomLoadBalancer = new RandomLoadBalancer();
 74 | NacosLoadBalancingClient client = new NacosLoadBalancingClient("your-service-name", randomLoadBalancer);
 75 | Instance instance = client.getNextInstance();
 76 | ```
 77 | 
 78 | #### 4. `WeightedRandomLoadBalancer` 类
 79 | - **功能描述**：实现了根据服务实例权重进行随机选择的负载均衡策略。
 80 | - **类属性**：
 81 | ```java
 82 | private final Random random = new Random();
 83 | ```
 84 | - `random`：用于生成随机数。
 85 | - **方法实现**：
 86 | ```java
 87 | /**
 88 |  * 从给定的服务实例列表中使用加权随机策略选择一个实例。
 89 |  * 
 90 |  * @param instances 服务实例列表，可能为空。
 91 |  * @return 选中的服务实例，如果列表为空则返回 null。
 92 |  */
 93 | @Override
 94 | public Instance select(List<Instance> instances) {
 95 |     if (instances == null || instances.isEmpty()) {
 96 |         return null;
 97 |     }
 98 |     double totalWeight = 0;
 99 |     for (Instance instance : instances) {
100 |         totalWeight += instance.getWeight();
101 |     }
102 |     double randomWeight = random.nextDouble() * totalWeight;
103 |     double currentWeight = 0;
104 |     for (Instance instance : instances) {
105 |         currentWeight += instance.getWeight();
106 |         if (currentWeight >= randomWeight) {
107 |             return instance;
108 |         }
109 |     }
110 |     return instances.get(0);
111 | }
112 | ```
113 | - **使用方法**：创建 `WeightedRandomLoadBalancer` 实例，然后将其作为参数传递给 `NacosLoadBalancingClient` 类，即可使用加权随机策略进行负载均衡。
114 | ```java
115 | LoadBalancer weightedRandomLoadBalancer = new WeightedRandomLoadBalancer();
116 | NacosLoadBalancingClient client = new NacosLoadBalancingClient("your-service-name", weightedRandomLoadBalancer);
117 | Instance instance = client.getNextInstance();
118 | ```
119 | 
120 | #### 5. `NacosLoadBalancingClient` 类
121 | - **功能描述**：负责从 Nacos 服务中获取服务实例，并处理实例的动态变化，同时使用指定的负载均衡策略选择实例。
122 | - **类属性**：
123 | ```java
124 | private static final String SERVER_ADDRESSES = "http://124.223.85.176:8848";
125 | private static final String NAMESPACE = "public";
126 | private final NamingService namingService;
127 | private final List<Instance> instances = new ArrayList<>();
128 | private final LoadBalancer loadBalancer;
129 | ```
130 | - `SERVER_ADDRESSES`：Nacos 服务器地址。
131 | - `NAMESPACE`：Nacos 命名空间。
132 | - `namingService`：Nacos 命名服务实例。
133 | - `instances`：存储当前可用的服务实例列表。
134 | - `loadBalancer`：使用的负载均衡策略实例。
135 | - **构造方法**：
136 | ```java
137 | /**
138 |  * 构造方法，初始化 Nacos 命名服务，并订阅指定服务的实例变化事件。
139 |  * 
140 |  * @param serviceName 要发现的服务名称。
141 |  * @param loadBalancer 使用的负载均衡策略实例。
142 |  * @throws NacosException 如果与 Nacos 服务通信出现异常。
143 |  */
144 | public NacosLoadBalancingClient(String serviceName, LoadBalancer loadBalancer) throws NacosException {
145 |     this.loadBalancer = loadBalancer;
146 |     Properties properties = new Properties();
147 |     properties.put("serverAddr", SERVER_ADDRESSES);
148 |     properties.put("namespace", NAMESPACE);
149 |     namingService = NacosFactory.createNamingService(properties);
150 |     namingService.subscribe(serviceName, new EventListener() {
151 |         @Override
152 |         public void onEvent(Event event) {
153 |             if (event instanceof NamingEvent) {
154 |                 NamingEvent namingEvent = (NamingEvent) event;
155 |                 instances.clear();
156 |                 instances.addAll(namingEvent.getInstances());
157 |                 System.out.println("Service instances updated: " + instances);
158 |             }
159 |         }
160 |     });
161 |     instances.addAll(namingService.getAllInstances(serviceName));
162 | }
163 | ```
164 | - **公共方法**：
165 | ```java
166 | /**
167 |  * 使用指定的负载均衡策略选择下一个服务实例。
168 |  * 
169 |  * @return 选中的服务实例，如果没有可用实例则返回 null。
170 |  */
171 | public Instance getNextInstance() {
172 |     return loadBalancer.select(instances);
173 | }
174 | ```
175 | - **使用方法**：创建 `NacosLoadBalancingClient` 实例，传入服务名称和负载均衡策略实例，然后调用 `getNextInstance` 方法获取下一个服务实例。
176 | ```java
177 | try {
178 |     LoadBalancer loadBalancer = new RoundRobinLoadBalancer();
179 |     NacosLoadBalancingClient client = new NacosLoadBalancingClient("your-service-name", loadBalancer);
180 |     Instance instance = client.getNextInstance();
181 |     System.out.println("Selected instance: " + instance);
182 | } catch (NacosException e) {
183 |     e.printStackTrace();
184 | }
185 | ```
186 | 
187 | #### 6. `Main` 类
188 | - **功能描述**：演示如何使用不同的负载均衡策略从 Nacos 服务中选择服务实例。
189 | - **使用方法**：运行 `Main` 类的 `main` 方法，即可看到使用轮询、随机和加权随机三种策略选择服务实例的结果。
190 | ```java
191 | public static void main(String[] args) throws NacosException {
192 |     String serviceName = "your-service-name";
193 | 
194 |     // 使用轮询策略
195 |     LoadBalancer roundRobinLoadBalancer = new RoundRobinLoadBalancer();
196 |     NacosLoadBalancingClient roundRobinClient = new NacosLoadBalancingClient(serviceName, roundRobinLoadBalancer);
197 |     Instance roundRobinInstance = roundRobinClient.getNextInstance();
198 |     System.out.println("Round Robin selected instance: " + roundRobinInstance);
199 | 
200 |     // 使用随机策略
201 |     LoadBalancer randomLoadBalancer = new RandomLoadBalancer();
202 |     NacosLoadBalancingClient randomClient = new NacosLoadBalancingClient(serviceName, randomLoadBalancer);
203 |     Instance randomInstance = randomClient.getNextInstance();
204 |     System.out.println("Random selected instance: " + randomInstance);
205 | 
206 |     // 使用加权随机策略
207 |     LoadBalancer weightedRandomLoadBalancer = new WeightedRandomLoadBalancer();
208 |     NacosLoadBalancingClient weightedRandomClient = new NacosLoadBalancingClient(serviceName, weightedRandomLoadBalancer);
209 |     Instance weightedRandomInstance = weightedRandomClient.getNextInstance();
210 |     System.out.println("Weighted Random selected instance: " + weightedRandomInstance);
211 | }
212 | ```
213 | 


--------------------------------------------------------------------------------
/doc/chunk.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### 1. `TextSplitter` 接口
 3 | 
 4 | #### 接口描述
 5 | `TextSplitter` 接口定义了一个文本分割器的基本行为，任何实现该接口的类都需要提供一个 `split` 方法，用于将输入的文本分割成字符串列表。
 6 | 
 7 | #### 方法列表
 8 | | 方法名 | 描述 | 参数 | 返回值 |
 9 | | ---- | ---- | ---- | ---- |
10 | | `split(String text)` | 将输入的文本分割成字符串列表。 | `text`：需要分割的文本。 | 分割后的字符串列表。 |
11 | 
12 | ### 2. `SemanticBlockSplitter` 类
13 | 
14 | #### 类描述
15 | `SemanticBlockSplitter` 类实现了 `TextSplitter` 接口，用于将文本按照段落（假设每个段落都是一个语义块）进行分割。
16 | 
17 | #### 方法列表
18 | | 方法名 | 描述 | 参数 | 返回值 |
19 | | ---- | ---- | ---- | ---- |
20 | | `split(String text)` | 实现 `TextSplitter` 接口的 `split` 方法，将输入文本按段落分割。 | `text`：需要分割的文本。 | 分割后的段落列表。 |
21 | 
22 | ### 3. `RecursiveSplitter` 类
23 | 
24 | #### 类描述
25 | `RecursiveSplitter` 类实现了 `TextSplitter` 接口，通过递归的方式将文本分割成多个部分。
26 | 
27 | #### 构造方法
28 | | 方法名 | 描述 | 参数 |
29 | | ---- | ---- | ---- |
30 | | `RecursiveSplitter(int depth)` | 创建一个 `RecursiveSplitter` 实例，指定递归的深度。 | `depth`：递归的深度。 |
31 | 
32 | #### 方法列表
33 | | 方法名 | 描述 | 参数 | 返回值 |
34 | | ---- | ---- | ---- | ---- |
35 | | `split(String text)` | 实现 `TextSplitter` 接口的 `split` 方法，调用 `recursiveSplit` 方法进行递归分割。 | `text`：需要分割的文本。 | 递归分割后的字符串列表。 |
36 | | `recursiveSplit(String text, int currentDepth)` | 递归地将文本分割成两部分，直到达到指定的递归深度。 | `text`：需要分割的文本；`currentDepth`：当前递归的深度。 | 递归分割后的字符串列表。 |
37 | 
38 | ### 4. `SentenceSplitter` 类
39 | 
40 | #### 类描述
41 | `SentenceSplitter` 类实现了 `TextSplitter` 接口，用于将文本按照句子进行分割。
42 | 
43 | #### 方法列表
44 | | 方法名 | 描述 | 参数 | 返回值 |
45 | | ---- | ---- | ---- | ---- |
46 | | `split(String text)` | 实现 `TextSplitter` 接口的 `split` 方法，将输入文本按句子分割。 | `text`：需要分割的文本。 | 分割后的句子列表。 |
47 | 
48 | ### 5. `FixedSizeSplitter` 类
49 | 
50 | #### 类描述
51 | `FixedSizeSplitter` 类实现了 `TextSplitter` 接口，用于将文本按照固定的大小进行分割。
52 | 
53 | #### 构造方法
54 | | 方法名 | 描述 | 参数 |
55 | | ---- | ---- | ---- |
56 | | `FixedSizeSplitter(int size)` | 创建一个 `FixedSizeSplitter` 实例，指定分割的固定大小。 | `size`：分割的固定大小。 |
57 | 
58 | #### 方法列表
59 | | 方法名 | 描述 | 参数 | 返回值 |
60 | | ---- | ---- | ---- | ---- |
61 | | `split(String text)` | 实现 `TextSplitter` 接口的 `split` 方法，将输入文本按固定大小分割。 | `text`：需要分割的文本。 | 分割后的字符串列表。 |


--------------------------------------------------------------------------------
/doc/db.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ### ESClient 类
  3 | 
  4 | #### 类概述
  5 | `ESClient` 类用于与 Elasticsearch 进行交互，包括测试连接、添加文档块、搜索文档块等操作。
  6 | 
  7 | #### 构造方法
  8 | - `public ESClient(String esUrl, String username, String password)`
  9 |     - **参数**：
 10 |         - `esUrl`：Elasticsearch 的 URL 地址。
 11 |         - `username`：Elasticsearch 的用户名。
 12 |         - `password`：Elasticsearch 的密码。
 13 |     - **描述**：初始化 `ESClient` 实例，创建 `OkHttpClient` 对象用于后续的 HTTP 请求。
 14 | 
 15 | #### 静态方法
 16 | - `public static ESClient getInstance()`
 17 |     - **返回值**：返回 `ESClient` 的单例实例。
 18 |     - **描述**：获取 `ESClient` 的单例实例。
 19 | 
 20 | #### 实例方法
 21 | - `public void testConnection()`
 22 |     - **描述**：测试与 Elasticsearch 的连接。如果连接成功，打印“连接到 Elasticsearch 成功！”；如果连接失败，打印失败状态码。
 23 | - `public boolean addChunk(Document document)`
 24 |     - **参数**：
 25 |         - `document`：要添加的文档对象。
 26 |     - **返回值**：`boolean` 类型，表示文档是否成功添加。
 27 |     - **描述**：向 Elasticsearch 添加一个新的文档块。
 28 | - `public List<Document> searchChunk(Document document, float boost, int size)`
 29 |     - **参数**：
 30 |         - `document`：包含搜索信息的文档对象。
 31 |         - `boost`：提升值。
 32 |         - `size`：结果大小。
 33 |     - **返回值**：`List<Document>` 类型，表示搜索到的文档列表。
 34 |     - **描述**：根据给定的文档信息、提升值和结果大小，在 Elasticsearch 中搜索文档块。
 35 | 
 36 | ### MysqlClient 类
 37 | 
 38 | #### 类概述
 39 | `MysqlClient` 类用于与 MySQL 数据库进行交互，包括创建连接、初始化用户表等操作。
 40 | 
 41 | #### 构造方法
 42 | - `public MysqlClient(String host, String user, String password, String dbName, int port)`
 43 |     - **参数**：
 44 |         - `host`：MySQL 数据库的主机地址。
 45 |         - `user`：MySQL 数据库的用户名。
 46 |         - `password`：MySQL 数据库的密码。
 47 |         - `dbName`：数据库名称。
 48 |         - `port`：MySQL 数据库的端口号。
 49 |     - **描述**：初始化 `MysqlClient` 实例，创建与 MySQL 数据库的连接。
 50 | 
 51 | #### 实例方法
 52 | - `public void initUserTable()`
 53 |     - **描述**：初始化用户表。如果表不存在，则创建名为 `user` 的表，包含 `user_id`、`username` 和 `password` 字段。
 54 | - `private void executeUpdate(String sql)`
 55 |     - **参数**：
 56 |         - `sql`：要执行的 SQL 语句。
 57 |     - **描述**：执行给定的 SQL 更新语句。
 58 | 
 59 | ### MinIOClient 类
 60 | 
 61 | #### 类概述
 62 | `MinIOClient` 类用于与 MinIO 对象存储服务进行交互，包括上传文件、下载文件等操作。
 63 | 
 64 | #### 构造方法
 65 | - `private MinIOClient(String endpoint, String accessKey, String secretKey)`
 66 |     - **参数**：
 67 |         - `endpoint`：MinIO 服务的端点地址。
 68 |         - `accessKey`：访问 MinIO 服务的访问密钥。
 69 |         - `secretKey`：访问 MinIO 服务的秘密密钥。
 70 |     - **描述**：初始化 `MinIOClient` 实例，创建 `MinioClient` 对象用于后续的文件操作。
 71 | 
 72 | #### 静态方法
 73 | - `public static synchronized MinIOClient getInstance(String endpoint, String accessKey, String secretKey)`
 74 |     - **参数**：
 75 |         - `endpoint`：MinIO 服务的端点地址。
 76 |         - `accessKey`：访问 MinIO 服务的访问密钥。
 77 |         - `secretKey`：访问 MinIO 服务的秘密密钥。
 78 |     - **返回值**：返回 `MinIOClient` 的单例实例。
 79 |     - **描述**：获取 `MinIOClient` 的单例实例。
 80 | 
 81 | #### 实例方法
 82 | - `public boolean uploadFile(String bucketName, String objectName, String filePath)`
 83 |     - **参数**：
 84 |         - `bucketName`：MinIO 存储桶名称。
 85 |         - `objectName`：要上传的对象名称。
 86 |         - `filePath`：要上传的文件的本地路径。
 87 |     - **返回值**：`boolean` 类型，表示文件是否上传成功。
 88 |     - **描述**：将指定路径的文件上传到 MinIO 存储桶中。
 89 | - `public boolean downloadFile(String bucketName, String objectName, String downloadPath)`
 90 |     - **参数**：
 91 |         - `bucketName`：MinIO 存储桶名称。
 92 |         - `objectName`：要下载的对象名称。
 93 |         - `downloadPath`：下载文件的本地路径。
 94 |     - **返回值**：`boolean` 类型，表示文件是否下载成功。
 95 |     - **描述**：从 MinIO 存储桶中下载指定的文件到本地路径。
 96 | 
 97 | ### RedisClient 类
 98 | 
 99 | #### 类概述
100 | `RedisClient` 类用于与 Redis 数据库进行交互，包括添加元素到列表、获取列表元素、删除键等操作。
101 | 
102 | #### 静态方法
103 | - `public static synchronized RedisClient getInstance()`
104 |     - **返回值**：返回 `RedisClient` 的单例实例。
105 |     - **描述**：获取 `RedisClient` 的单例实例。
106 | - `public static long lpush(String key, String element, Integer expireSeconds)`
107 |     - **参数**：
108 |         - `key`：Redis 中的键名。
109 |         - `element`：要添加到列表头部的元素。
110 |         - `expireSeconds`：键的过期时间（可选）。
111 |     - **返回值**：`long` 类型，表示列表的长度。
112 |     - **描述**：将元素添加到 Redis 列表的头部，并设置过期时间（如果需要）。
113 | - `public static List<String> lrange(String key, long start, long end)`
114 |     - **参数**：
115 |         - `key`：Redis 中的键名。
116 |         - `start`：列表的起始索引。
117 |         - `end`：列表的结束索引。
118 |     - **返回值**：`List<String>` 类型，表示列表中指定范围内的元素。
119 |     - **描述**：获取 Redis 列表中指定范围内的元素。
120 | - `public static boolean delete(String key)`
121 |     - **参数**：
122 |         - `key`：Redis 中的键名。
123 |     - **返回值**：`boolean` 类型，表示键是否删除成功。
124 |     - **描述**：删除 Redis 中指定的键。
125 | - `public static void close()`
126 |     - **描述**：关闭 Jedis 连接。


--------------------------------------------------------------------------------
/doc/design.md:
--------------------------------------------------------------------------------
  1 | # 面向于企业RAG的设计与实现
  2 | 
  3 | ## 1. 为算法工程师补齐工程开发相关知识
  4 | 
  5 | ## 2. 为开发工程师补齐算法、模型相关知识点
  6 | 
  7 | ---
  8 | 
  9 | ### 技术栈
 10 | 
 11 | #### 1. LLM
 12 | 
 13 | ##### 1.1 API 和 参数
 14 | 
 15 | ###### 1.1.1 API
 16 | 
 17 | ###### 1.1.2 参数
 18 | 
 19 | 在 ChatGPT 中，温度（Temperature）、Top-P 和 Top-K 是三个重要的参数，它们用于控制模型生成文本的随机性和多样性。下面是每个参数的含义和作用：
 20 | 
 21 | 1. 温度（Temperature）：温度参数影响模型预测字符的概率分布。较高的温度值会使输出更加随机和多样化，而较低的温度值会使输出更加确定和集中。温度参数的范围通常是 0 到 2，其中 0 表示模型总是选择概率最高的下一个词，而 2 表示模型的选择更加随机。
 22 | 
 23 | 2. Top-P（Nucleus Sampling）：Top-P 采样是一种解码策略，它根据候选项的可能性得分之和达到阈值 P 来选择候选项的个数。这种方法可以有效地避免选择长尾中的低概率 token 作为候选项。Top-P 值通常设置得较高，例如 0.75 或 0.9，以保持一定的随机性，同时避免过于随机的结果。
 24 | 
 25 | 3. Top-K：Top-K 采样是另一种解码策略，它选择概率最高的前 K 个 token 作为候选项。这种方法可以在一定程度上保证全局最优，但选择 K 的值是一个挑战，因为它需要平衡探索和利用之间的关系。如果同时使用 Top-K 和 Top-P，Top-P 将在 Top-K 之后起作用。
 26 | 
 27 | 这些参数可以根据具体的应用场景和需求进行调整，以获得最佳的生成效果。例如，对于需要高度创造性和多样性的任务，可能会选择较高的温度和 Top-P 值；而对于需要准确性和一致性的任务，则可能选择较低的温度值。
 28 | 
 29 | 详见 [https://www.chatgpt.com](https://www.chatgpt.com) 或者 [https://platform.baichuan-ai.com/docs/api](https://platform.baichuan-ai.com/docs/api)
 30 | 
 31 | ##### 1.2 微调 和 强化学习
 32 | 
 33 | ###### 1.2.1 微调
 34 | 
 35 | - ptuning & ptuning 路由
 36 | - lora & lora 路由
 37 | 
 38 | ###### 1.2.2 强化学习
 39 | 
 40 | ###### 1.2.3 LLama Factory
 41 | 
 42 | ##### 1.3 加速 & 部署
 43 | 
 44 | ###### 1.3.1 加速
 45 | 
 46 | - 数据级别优化
 47 | 
 48 |     - 输入压缩：减少输入长度，如提示词裁剪、摘要生成等。
 49 |     - 输出组织：优化输出内容的结构，如思维骨架、子问题分解等。
 50 | 
 51 |     - PromptCompressor：一个用于生成压缩提示词的工具，以减少大模型的输入长度。这种方法可以帮助减少模型推理时的计算量和内存占用，从而加快推理速度。
 52 |       GitHub链接：[https://github.com/example/PromptCompressor](https://github.com/example/PromptCompressor)
 53 | 
 54 | - 模型级别优化
 55 | 
 56 |     - 结构优化：设计更高效的模型结构，如混合专家网络、稀疏注意力等。
 57 |     - 参数优化：模型压缩技术，如量化、稀疏化、知识蒸馏等。
 58 | 
 59 |     - SparseGPT：Facebook AI研究院开发的稀疏注意力机制，用于加速大模型推理。通过引入稀疏性，SparseGPT可以在保持模型性能的同时减少计算量。
 60 |       GitHub链接：[https://github.com/facebookresearch/SparseGPT](https://github.com/facebookresearch/SparseGPT)
 61 |     - LoRA：微软研究院提出的LoRA技术，通过微调少量参数来适应大模型。LoRA允许模型在不重新训练整个模型的情况下适应新的任务或数据，从而节省时间和资源。
 62 |       GitHub链接：[https://github.com/microsoft/LoRA](https://github.com/microsoft/LoRA)
 63 | 
 64 | - 系统级别优化
 65 | 
 66 |     - 推理引擎优化：优化模型推理过程中的关键算子，如注意力计算、线性计算等。
 67 |     - 服务系统优化：提高异步请求处理的效率，如内存管理、批处理、调度策略等。
 68 | 
 69 |     - vLLM：一个开源的大模型推理加速框架，通过PagedAttention高效地管理attention中缓存的张量。vLLM优化了内存管理和计算调度，以提高大模型推理的效率。
 70 |       GitHub链接：[https://github.com/vllm-project/vllm](https://github.com/vllm-project/vllm)
 71 |     - FlashAttention：Facebook AI研究院开发的注意力计算优化库，用于加速大模型推理。FlashAttention通过优化注意力计算的算法和实现，减少了计算时间和内存占用。
 72 |       GitHub链接：[https://github.com/facebookresearch/flash-attention](https://github.com/facebookresearch/flash-attention)
 73 |     - PageAttention：一种优化注意力计算的技术，通过分页管理注意力缓存来提高效率。这种方法可以有效地减少内存占用和提高计算速度。
 74 |       GitHub链接：[https://github.com/example/PageAttention](https://github.com/example/PageAttention)（示例链接，请替换为实际链接）
 75 | 
 76 | - 硬件加速
 77 | 
 78 |     - ALLO：一个基于FPGA的LLM推理加速项目，旨在提升大模型在边缘设备上的推理性能。ALLO通过定制硬件逻辑来加速模型推理，特别适合于资源受限的环境。
 79 |       GitHub链接：[https://github.com/example/ALLO](https://github.com/example/ALLO)
 80 |     - NVIDIA TensorRT：一个高性能深度学习推理优化器和运行时库，用于加速深度学习模型的推理。TensorRT可以优化模型的计算图，减少计算量，并利用GPU的并行计算能力来加速推理。
 81 |       GitHub链接：[https://github.com/NVIDIA/TensorRT](https://github.com/NVIDIA/TensorRT)
 82 | 
 83 | [https://arxiv.org/abs/2404.14294](https://arxiv.org/abs/2404.14294)
 84 | [https://www.bentoml.com/blog/benchmarking-llm-inference-backends](https://www.bentoml.com/blog/benchmarking-llm-inference-backends)
 85 | 
 86 | #### 2. 数据搜索
 87 | 
 88 | 
 89 | ##### 2.1 数据库分类
 90 | 
 91 | - 对象存储数据库
 92 | 
 93 |   - 分布式文件系统
 94 |   https://www.cnblogs.com/crazymakercircle/p/15408581.html
 95 |   - minio
 96 |   https://www.minio.org.cn/docs/minio/linux/operations/concepts.html
 97 | 
 98 | 
 99 | - K-V 数据库
100 |   - Redis
101 |   - 
102 | ##### 2.2 向量数据库
103 | 
104 | ##### 2.3 搜素方法论
105 | 
106 | #### 3. 微服务
107 | 
108 | ##### 3.1 容器管理
109 | 
110 | ##### 3.2 服务管理
111 | 
112 | ##### 3.3 集群管理和资源调度
113 | 
114 | #### 4. 其他重要组件
115 | 
116 | ##### 4.1 日志系统
117 | 
118 | ##### 4.2 监控系统
119 | 
120 | ##### 4.3 文档解析
121 | 
122 | #### 5. 下一代RAG
123 | 
124 | ##### 5.1 Agent workflow
125 | 
126 | ##### 5.2 Multi-Agent 编排
127 | 
128 | ---


--------------------------------------------------------------------------------
/doc/embedding.md:
--------------------------------------------------------------------------------
 1 | ### `EmbeddingService` 接口
 2 | 
 3 | **接口描述**：
 4 | `EmbeddingService` 接口定义了获取嵌入向量的方法，用于与不同的嵌入服务进行交互。
 5 | 
 6 | **方法列表**：
 7 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
 8 | | --- | --- | --- | --- | --- |
 9 | | `getEmbedding(String url, String input)` | 获取单个输入文本的嵌入向量 | `url`：嵌入服务的 API URL；`input`：需要获取嵌入向量的输入文本 | `double[]`：表示输入文本的嵌入向量 | `IOException`：如果请求过程中发生 I/O 错误 |
10 | | `getEmbeddings(String url, String[] inputs)` | 获取多个输入文本的嵌入向量 | `url`：嵌入服务的 API URL；`inputs`：需要获取嵌入向量的输入文本数组 | `double[][]`：表示多个输入文本的嵌入向量，每个输入文本对应一个 `double[]` 数组 | `IOException`：如果请求过程中发生 I/O 错误 |
11 | 
12 | ### `JinaEmbeddingService` 类
13 | 
14 | **类描述**：
15 | `JinaEmbeddingService` 类实现了 `EmbeddingService` 接口，用于与 Jina 嵌入服务进行交互，获取文本的嵌入向量。
16 | 
17 | **构造方法**：
18 | | 方法名 | 描述 | 参数 |
19 | | --- | --- | --- |
20 | | `JinaEmbeddingService(String apiKey)` | 构造一个 `JinaEmbeddingService` 实例 | `apiKey`：Jina 服务的 API 密钥 |
21 | 
22 | **方法列表**：
23 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
24 | | --- | --- | --- | --- | --- |
25 | | `getEmbedding(String url, String input)` | 获取单个输入文本的 Jina 嵌入向量 | `url`：Jina 嵌入服务的 API URL；`input`：需要获取嵌入向量的输入文本 | `double[]`：表示输入文本的嵌入向量 | `IOException`：如果请求过程中发生 I/O 错误 |
26 | | `getEmbeddings(String url, String[] inputs)` | 获取多个输入文本的 Jina 嵌入向量 | `url`：Jina 嵌入服务的 API URL；`inputs`：需要获取嵌入向量的输入文本数组 | `double[][]`：表示多个输入文本的嵌入向量，每个输入文本对应一个 `double[]` 数组 | `IOException`：如果请求过程中发生 I/O 错误 |
27 | 
28 | ### `BaichuanEmbeddingService` 类
29 | 
30 | **类描述**：
31 | `BaichuanEmbeddingService` 类实现了 `EmbeddingService` 接口，用于与百川嵌入服务进行交互，获取文本的嵌入向量。
32 | 
33 | **构造方法**：
34 | | 方法名 | 描述 | 参数 |
35 | | --- | --- | --- |
36 | | `BaichuanEmbeddingService(String apiKey)` | 构造一个 `BaichuanEmbeddingService` 实例 | `apiKey`：百川服务的 API 密钥 |
37 | 
38 | **方法列表**：
39 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
40 | | --- | --- | --- | --- | --- |
41 | | `getEmbedding(String url, String input)` | 获取单个输入文本的百川嵌入向量 | `url`：百川嵌入服务的 API URL；`input`：需要获取嵌入向量的输入文本 | `double[]`：表示输入文本的嵌入向量 | `IOException`：如果请求过程中发生 I/O 错误 |
42 | | `getEmbeddings(String url, String[] inputs)` | 获取多个输入文本的百川嵌入向量 | `url`：百川嵌入服务的 API URL；`inputs`：需要获取嵌入向量的输入文本数组 | `double[][]`：表示多个输入文本的嵌入向量，每个输入文本对应一个 `double[]` 数组 | `IOException`：如果请求过程中发生 I/O 错误 |
43 | 
44 | ### `JinaEmbeddingRerankService` 类
45 | 
46 | **类描述**：
47 | `JinaEmbeddingRerankService` 类实现了 `EmbeddingService` 接口，用于与 Jina 多向量嵌入服务进行交互，获取文本的嵌入向量，同时提供了重排序相关的功能。
48 | 
49 | **构造方法**：
50 | | 方法名 | 描述 | 参数 |
51 | | --- | --- | --- |
52 | | `JinaEmbeddingRerankService(String apiKey)` | 构造一个 `JinaEmbeddingRerankService` 实例 | `apiKey`：Jina 服务的 API 密钥 |
53 | 
54 | **静态方法**：
55 | | 方法名 | 描述 | 参数 | 返回值 |
56 | | --- | --- | --- | --- |
57 | | `getInstance()` | 获取 `JinaEmbeddingRerankService` 的单例实例 | 无 | `JinaEmbeddingRerankService`：`JinaEmbeddingRerankService` 的单例实例 |
58 | 
59 | **方法列表**：
60 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
61 | | --- | --- | --- | --- | --- |
62 | | `getMultiVectorEmbeddingJSONArray(String url, String[] inputs)` | 获取多个输入文本的 Jina 多向量嵌入的 JSON 数组 | `url`：Jina 多向量嵌入服务的 API URL；`inputs`：需要获取嵌入向量的输入文本数组 | `JSONArray`：表示多个输入文本的多向量嵌入的 JSON 数组 | `IOException`：如果请求过程中发生 I/O 错误 |
63 | | `getEmbeddings(String url, String[] inputs)` | 获取多个输入文本的 Jina 多向量嵌入向量 | `url`：Jina 多向量嵌入服务的 API URL；`inputs`：需要获取嵌入向量的输入文本数组 | `double[][]`：表示多个输入文本的嵌入向量，每个输入文本对应一个 `double[]` 数组 | `IOException`：如果请求过程中发生 I/O 错误 |
64 | | `getEmbedding(String url, String input)` | 获取单个输入文本的 Jina 多向量嵌入向量 | `url`：Jina 多向量嵌入服务的 API URL；`input`：需要获取嵌入向量的输入文本 | `double[]`：表示输入文本的嵌入向量 | `IOException`：如果请求过程中发生 I/O 错误 |


--------------------------------------------------------------------------------
/doc/install.md:
--------------------------------------------------------------------------------
  1 | ### 安装 Elasticsearch（ES）教程
  2 | 
  3 | #### 1. 环境检查
  4 | 在安装 Elasticsearch 之前，需要对系统环境进行一些检查，确保安装过程顺利进行。
  5 | - **查看端口占用情况**：
  6 |   端口占用情况会影响 Elasticsearch 服务的正常启动。可以使用以下命令查看当前系统的端口占用情况：
  7 |     ```bash
  8 |     netstat -tuln
  9 |     ```
 10 |   此命令将显示所有监听的 TCP 和 UDP 端口，你需要确保 9200 端口未被占用，因为 Elasticsearch 默认使用该端口提供服务。
 11 | - **查看内存情况**：
 12 |   Elasticsearch 对内存要求较高，因此需要根据系统内存情况合理分配 ES 的内存。使用以下命令查看系统内存信息：
 13 |     ```bash
 14 |     free -h
 15 |     ```
 16 |   根据输出结果，你可以了解系统的可用内存，以便后续为 ES 分配合适的内存。
 17 | 
 18 | #### 2. 设置虚拟内存区域的最大映射计数
 19 | Elasticsearch 在运行时需要足够的虚拟内存映射，因此需要设置虚拟内存区域的最大映射计数。执行以下命令：
 20 | ```bash
 21 | sysctl -w vm.max_map_count=262144
 22 | ```
 23 | 该命令将 `vm.max_map_count` 设置为 262144，以满足 Elasticsearch 的运行需求。
 24 | 
 25 | #### 3. 创建 Docker 网络
 26 | 为了让 Elasticsearch 和 Kibana 能够相互通信，需要创建一个 Docker 网络。使用以下命令创建名为 `elastic` 的 Docker 网络：
 27 | ```bash
 28 | docker network create elastic
 29 | ```
 30 | 
 31 | #### 4. 拉取 Elasticsearch 镜像
 32 | 使用 Docker 拉取 Elasticsearch 8.11.4 版本的镜像，执行以下命令：
 33 | ```bash
 34 | docker pull docker.elastic.co/elasticsearch/elasticsearch:8.11.4
 35 | ```
 36 | 此命令将从 Docker Hub 下载 Elasticsearch 8.11.4 版本的镜像到本地。
 37 | 
 38 | #### 5. 运行 Elasticsearch 容器
 39 | 下载完成镜像后，使用以下命令运行 Elasticsearch 容器：
 40 | ```bash
 41 | docker run --name es01 --net elastic -p 9200:9200 -it -m 2GB docker.elastic.co/elasticsearch/elasticsearch:8.11.4
 42 | ```
 43 | 参数说明：
 44 | - `--name es01`：为容器指定名称为 `es01`。
 45 | - `--net elastic`：将容器加入到之前创建的 `elastic` 网络中。
 46 | - `-p 9200:9200`：将容器的 9200 端口映射到主机的 9200 端口。
 47 | - `-it`：以交互模式运行容器。
 48 | - `-m 2GB`：为容器分配 2GB 的内存。
 49 | 
 50 | #### 6. 重置密码和生成 Kibana 注册令牌
 51 | 容器启动后，需要重置 Elasticsearch 的默认密码，并生成 Kibana 的注册令牌。执行以下两个命令：
 52 | ```bash
 53 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
 54 | docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
 55 | ```
 56 | 第一个命令用于重置 `elastic` 用户的密码，第二个命令用于生成 Kibana 的注册令牌。
 57 | 
 58 | #### 7. 进入 Elasticsearch 容器环境
 59 | 如果需要在容器内执行一些操作，可以使用以下命令进入 Elasticsearch 容器的环境：
 60 | ```bash
 61 | docker exec -it es01 /bin/bash
 62 | ```
 63 | 
 64 | #### 8. 安装 IK 中文分词器
 65 | 为了支持中文分词，需要安装 IK 中文分词器。在容器内执行以下命令：
 66 | ```bash
 67 | bin/elasticsearch-plugin install https://get.infini.cloud/elasticsearch/analysis-ik/8.11.4
 68 | ```
 69 | 
 70 | #### 9. 重启 Elasticsearch 容器
 71 | 安装完分词器后，需要重启 Elasticsearch 容器使配置生效，执行以下命令：
 72 | ```bash
 73 | docker restart es01
 74 | ```
 75 | 
 76 | #### 10. 拉取和运行 Kibana
 77 | Kibana 是 Elasticsearch 的可视化工具，方便对 Elasticsearch 进行管理和查询。
 78 | - **拉取 Kibana 镜像**：
 79 |     ```bash
 80 |     docker pull docker.elastic.co/kibana/kibana:8.11.4
 81 |     ```
 82 | - **运行 Kibana 容器**：
 83 |     ```bash
 84 |     docker run --name kib01 --net elastic -p 5601:5601 docker.elastic.co/kibana/kibana:8.11.4
 85 |     ```
 86 |   参数说明：
 87 |     - `--name kib01`：为容器指定名称为 `kib01`。
 88 |     - `--net elastic`：将容器加入到 `elastic` 网络中。
 89 |     - `-p 5601:5601`：将容器的 5601 端口映射到主机的 5601 端口，通过浏览器访问 `http://localhost:5601` 即可打开 Kibana 界面。
 90 | 
 91 | ### 安装 MySQL 教程
 92 | 
 93 | #### 1. 运行 MySQL 容器
 94 | 使用 Docker 运行 MySQL 容器，执行以下命令：
 95 | ```bash
 96 | sudo docker run --name some-mysql \
 97 |   -e MYSQL_ROOT_PASSWORD=my-secret-pw \
 98 |   -p 3306:3306 \
 99 |   -d mysql/mysql-server
100 | ```
101 | 参数说明：
102 | - `--name some-mysql`：为容器指定名称为 `some-mysql`。
103 | - `-e MYSQL_ROOT_PASSWORD=my-secret-pw`：设置 MySQL 的 `root` 用户密码为 `my-secret-pw`。
104 | - `-p 3306:3306`：将容器的 3306 端口映射到主机的 3306 端口。
105 | - `-d`：以守护进程模式运行容器。
106 | 
107 | #### 2. 进入 MySQL 容器环境
108 | 使用以下命令进入 MySQL 容器的环境：
109 | ```bash
110 | sudo docker exec -it some-mysql /bin/bash
111 | ```
112 | 
113 | #### 3. 登录 MySQL 并创建数据库和用户
114 | 在容器内执行以下命令登录 MySQL：
115 | ```bash
116 | mysql -u root -p
117 | ```
118 | 输入之前设置的 `root` 用户密码后，即可登录 MySQL。登录成功后，执行以下 SQL 语句创建数据库和用户：
119 | ```sql
120 | CREATE DATABASE pkb CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
121 | CREATE USER 'some-mysql'@'%' IDENTIFIED WITH mysql_native_password BY 'my-secret-pw';
122 | GRANT ALL PRIVILEGES ON *.* TO 'some-mysql'@'%' WITH GRANT OPTION;
123 | FLUSH PRIVILEGES;
124 | ```
125 | - `CREATE DATABASE pkb ...`：创建名为 `pkb` 的数据库，使用 `utf8mb4` 字符集和 `utf8mb4_unicode_ci` 排序规则。
126 | - `CREATE USER ...`：创建名为 `some-mysql` 的用户，允许从任何主机连接，密码为 `my-secret-pw`。
127 | - `GRANT ALL PRIVILEGES ...`：授予 `some-mysql` 用户所有数据库和表的全部权限。
128 | - `FLUSH PRIVILEGES`：刷新权限使设置生效。
129 | 
130 | ### 安装 MinIO 教程
131 | 
132 | #### 1. 创建数据存储目录
133 | 为 MinIO 容器创建数据存储目录，执行以下命令：
134 | ```bash
135 | mkdir -p ~/minio/data
136 | ```
137 | 该命令将在用户主目录下创建 `minio/data` 目录，用于存储 MinIO 的数据。
138 | 
139 | #### 2. 运行 MinIO 容器
140 | 使用以下命令运行 MinIO 容器：
141 | ```bash
142 | docker run -p 9000:9000 -p 9090:9090 --name minio -v ~/minio/data:/data -e "MINIO_ROOT_USER=ROOTNAME" -e "MINIO_ROOT_PASSWORD=CHANGEME123" quay.io/minio/minio server /data --console-address ":9090"
143 | ```
144 | 参数说明：
145 | - `-p 9000:9000`：将容器的 9000 端口映射到主机的 9000 端口，用于 MinIO 的 API 服务。
146 | - `-p 9090:9090`：将容器的 9090 端口映射到主机的 9090 端口，用于 MinIO 的控制台服务。
147 | - `--name minio`：为容器指定名称为 `minio`。
148 | - `-v ~/minio/data:/data`：将主机的 `~/minio/data` 目录挂载到容器的 `/data` 目录，实现数据持久化。
149 | - `-e "MINIO_ROOT_USER=ROOTNAME"`：设置 MinIO 的根用户名为 `ROOTNAME`。
150 | - `-e "MINIO_ROOT_PASSWORD=CHANGEME123"`：设置 MinIO 的根用户密码为 `CHANGEME123`。
151 | - `quay.io/minio/minio server /data --console-address ":9090"`：启动 MinIO 服务，指定数据存储目录为 `/data`，控制台地址为 `:9090`。
152 | 
153 | 启动成功后，通过浏览器访问 `http://localhost:9090` 即可打开 MinIO 的控制台界面，使用设置的用户名和密码登录。 


--------------------------------------------------------------------------------
/doc/parser.md:
--------------------------------------------------------------------------------
 1 | ### `FileParser` 接口
 2 | 
 3 | #### 概述
 4 | `FileParser` 接口定义了文件解析的基本方法，用于解析文件并返回其文本内容。
 5 | 
 6 | #### 方法列表
 7 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
 8 | | ---- | ---- | ---- | ---- | ---- |
 9 | | `parse(File file)` | 解析给定的文件并返回其文本内容 | `file`：要解析的文件对象 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
10 | | `parse(String filePath)` | 根据文件路径解析文件并返回其文本内容 | `filePath`：要解析的文件的路径 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
11 | 
12 | ### `WordParser` 类
13 | 
14 | #### 概述
15 | `WordParser` 类实现了 `FileParser` 接口，用于解析 Word 文件（.doc 和 .docx）并返回其文本内容。
16 | 
17 | #### 方法列表
18 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
19 | | ---- | ---- | ---- | ---- | ---- |
20 | | `parse(File file)` | 解析给定的 Word 文件并返回其文本内容 | `file`：要解析的 Word 文件对象 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
21 | | `parse(String filePath)` | 根据文件路径解析 Word 文件并返回其文本内容 | `filePath`：要解析的 Word 文件的路径 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
22 | 
23 | ### `FileParserFactory` 类
24 | 
25 | #### 概述
26 | `FileParserFactory` 类是一个工厂类，用于根据文件类型或文件路径获取相应的文件解析器。
27 | 
28 | #### 方法列表
29 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
30 | | ---- | ---- | ---- | ---- | ---- |
31 | | `getFileParser(String fileType)` | 根据文件类型获取相应的文件解析器 | `fileType`：文件类型（如 "html", "pdf" 等） | 对应的文件解析器对象 | `IllegalArgumentException`：如果文件类型不支持 |
32 | | `getFileParserByPath(String filePath)` | 根据文件路径获取相应的文件解析器 | `filePath`：文件的路径 | 对应的文件解析器对象 | `IllegalArgumentException`：如果文件路径无效 |
33 | | `easyParse(String filePath)` | 简单解析文件，根据文件路径获取解析器并解析文件 | `filePath`：文件的路径 | 解析后的字符串 | `RuntimeException`：如果发生 I/O 错误 |
34 | | `testParseFile(String filePath)` | 测试解析文件的方法，打印文件内容 | `filePath`：文件的路径 | 无 | `IOException`：如果发生 I/O 错误 |
35 | 
36 | ### `HTMLParser` 类
37 | 
38 | #### 概述
39 | `HTMLParser` 类实现了 `FileParser` 接口，用于解析 HTML 文件并返回其文本内容，还支持解析 URL 指向的 HTML 页面。
40 | 
41 | #### 方法列表
42 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
43 | | ---- | ---- | ---- | ---- | ---- |
44 | | `parse(File file)` | 解析给定的 HTML 文件并返回其文本内容 | `file`：要解析的 HTML 文件对象 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
45 | | `parse(String filePath)` | 根据文件路径解析 HTML 文件并返回其文本内容 | `filePath`：要解析的 HTML 文件的路径 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
46 | | `parseHTMLFromURL(String url)` | 解析 URL 指向的 HTML 页面并返回其文本内容 | `url`：URL 字符串 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
47 | 
48 | ### `PDFParser` 类
49 | 
50 | #### 概述
51 | `PDFParser` 类实现了 `FileParser` 接口，用于解析 PDF 文件并返回其文本内容。
52 | 
53 | #### 方法列表
54 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
55 | | ---- | ---- | ---- | ---- | ---- |
56 | | `parse(File file)` | 解析给定的 PDF 文件并返回其文本内容 | `file`：要解析的 PDF 文件对象 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
57 | | `parse(String filePath)` | 根据文件路径解析 PDF 文件并返回其文本内容 | `filePath`：要解析的 PDF 文件的路径 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
58 | 
59 | ### `PPTParser` 类
60 | 
61 | #### 概述
62 | `PPTParser` 类实现了 `FileParser` 接口，用于解析 PowerPoint 文件（.ppt 和 .pptx）并返回其文本内容。
63 | 
64 | #### 方法列表
65 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
66 | | ---- | ---- | ---- | ---- | ---- |
67 | | `parse(File file)` | 解析给定的 PowerPoint 文件并返回其文本内容 | `file`：要解析的 PowerPoint 文件对象 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
68 | | `parse(String filePath)` | 根据文件路径解析 PowerPoint 文件并返回其文本内容 | `filePath`：要解析的 PowerPoint 文件的路径 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
69 | 
70 | ### `ExcelParser` 类
71 | 
72 | #### 概述
73 | `ExcelParser` 类实现了 `FileParser` 接口，用于解析 Excel 文件（.xls 和 .xlsx）并返回其文本内容。
74 | 
75 | #### 方法列表
76 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
77 | | ---- | ---- | ---- | ---- | ---- |
78 | | `parse(File file)` | 解析给定的 Excel 文件并返回其文本内容 | `file`：要解析的 Excel 文件对象 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
79 | | `parse(String filePath)` | 根据文件路径解析 Excel 文件并返回其文本内容 | `filePath`：要解析的 Excel 文件的路径 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
80 | 
81 | ### `PureTextParser` 类
82 | 
83 | #### 概述
84 | `PureTextParser` 类实现了 `FileParser` 接口，用于解析纯文本文件（.txt, .md, .py, .java 等）并返回其文本内容。
85 | 
86 | #### 方法列表
87 | | 方法名 | 描述 | 参数 | 返回值 | 异常 |
88 | | ---- | ---- | ---- | ---- | ---- |
89 | | `parse(File file)` | 解析给定的纯文本文件并返回其文本内容 | `file`：要解析的纯文本文件对象 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |
90 | | `parse(String filePath)` | 根据文件路径解析纯文本文件并返回其文本内容 | `filePath`：要解析的纯文本文件的路径 | 解析后的字符串 | `IOException`：如果发生 I/O 错误 |


--------------------------------------------------------------------------------
/doc/pipeline.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## 类介绍
 3 | 
 4 | ### 1. `Document` 类
 5 | `Document` 类用于表示一个文档对象，包含文档的存储路径、分块后的文本以及嵌入向量等信息。在 `AdvancedRAG` 和 `ModularRAG` 类中作为主要的数据载体使用。
 6 | 
 7 | ### 2. `AdvancedRAG` 类
 8 | `AdvancedRAG` 类实现了一个基本的 RAG 流程，通过链式调用的方式依次完成文档解析、分块、嵌入、排序、高级筛选和大模型聊天等步骤。
 9 | 
10 | #### 使用示例
11 | ```java
12 | public static void main(String[] args) {
13 |     AdvancedRAG advancedRAG = new AdvancedRAG(
14 |             new Document("./202X企业规划.pdf"),
15 |             "简要总结这篇文章");
16 |     try {
17 |         advancedRAG
18 |                 // 解析文档
19 |                 .parsing()
20 |                 // 分块处理
21 |                 .chunking()
22 |                 // 嵌入处理
23 |                 .embedding()
24 |                 // 排序处理
25 |                 .sorting()
26 |                 // 高级筛选
27 |                 .advancedFiltering()
28 |                 // 大模型聊天
29 |                 .LLMChat();
30 |     } catch (IOException e) {
31 |         throw new RuntimeException(e);
32 |     }
33 |     System.out.println(advancedRAG.getResponse());
34 | }
35 | ```
36 | 
37 | #### 设计思路
38 | - **链式调用**：每个处理步骤的方法都返回当前对象 `this`，方便进行链式调用，使得代码更加简洁和易读。
39 | - **模块化设计**：将 RAG 流程拆分为多个独立的方法，每个方法负责一个特定的任务，提高了代码的可维护性和可扩展性。
40 | - **建造者模式（Builder Pattern）**：通过链式调用的方式逐步构建 RAG 处理流程，类似于建造者模式中的构建步骤。
41 | 
42 | ### 3. `ModularRAG` 类
43 | `ModularRAG` 类在 `AdvancedRAG` 类的基础上，增加了更多的分块策略和筛选条件，以及对生成的回复进行后处理的功能。
44 | 
45 | #### 使用示例
46 | ```java
47 | public static void main(String[] args) {
48 |     ModularRAG modularRAG = new ModularRAG(
49 |             new Document("./202X企业规划.pdf"),
50 |             "简要总结这篇文章");
51 |     try {
52 |         modularRAG
53 |                 // 解析文档
54 |                 .parsing()
55 |                 // 分块处理
56 |                 .chunking()
57 |                 // 嵌入处理
58 |                 .embedding()
59 |                 // 排序处理
60 |                 .sorting()
61 |                 // 高级筛选
62 |                 .advancedFiltering()
63 |                 // 对筛选后的块重新排序
64 |                 .reSortingFilteredChunks()
65 |                 // 大模型聊天
66 |                 .LLMChat()
67 |                 // 后处理
68 |                 .postProcessing();
69 |     } catch (IOException e) {
70 |         throw new RuntimeException(e);
71 |     }
72 |     System.out.println(modularRAG.getResponse());
73 | }
74 | ```
75 | 
76 | #### 设计思路
77 | - **模块化扩展**：在 `AdvancedRAG` 类的基础上，增加了更多的分块策略和筛选条件，以及对生成的回复进行后处理的功能，提高了代码的灵活性和可扩展性。
78 | - **复用性**：复用了 `AdvancedRAG` 类中的部分方法，减少了代码的重复。
79 | - **装饰者模式（Decorator Pattern）**：在 `AdvancedRAG` 类的基础上，通过增加新的方法和功能，对其进行了扩展和装饰。
80 | 
81 | 


--------------------------------------------------------------------------------
/doc/search.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### RecallStrategy 类接口文档
 3 | 
 4 | **类名**：`RecallStrategy`
 5 | 
 6 | **包名**：`org.search`
 7 | 
 8 | **描述**：该类提供了一个静态方法用于从 Elasticsearch 中召回文档。
 9 | 
10 | **方法列表**：
11 | 
12 | | 方法名 | 描述 | 参数 | 返回值 |
13 | | --- | --- | --- | --- |
14 | | `esRecall(SearchInput searchInput, SearchOutput searchOutput)` | 从 Elasticsearch 中召回文档并将结果存储在 `SearchOutput` 对象中。 | `searchInput`：搜索输入对象，包含查询文档信息。<br>`searchOutput`：搜索输出对象，用于存储召回的文档结果。 | `void` |
15 | 
16 | ### RerankStrategy 类接口文档
17 | 
18 | **类名**：`RerankStrategy`
19 | 
20 | **包名**：`org.search`
21 | 
22 | **描述**：该类提供了一个静态方法用于对召回的文档进行重排序。
23 | 
24 | **方法列表**：
25 | 
26 | | 方法名 | 描述 | 参数 | 返回值 |
27 | | --- | --- | --- | --- |
28 | | `JinaCobertRerank(SearchInput searchInput, SearchOutput searchOutput)` | 使用 Jina Embedding 服务对召回的文档进行重排序，根据文档与输入文档的归一化平方误差距离进行排序。 | `searchInput`：搜索输入对象，包含查询文档信息。<br>`searchOutput`：搜索输出对象，包含召回的文档列表，重排序后更新该列表。 | `void` |
29 | 
30 | ### SortStrategy 类接口文档
31 | 
32 | **类名**：`SortStrategy`
33 | 
34 | **包名**：`org.search`
35 | 
36 | **描述**：该类提供了一个静态方法用于对文档列表按 `score` 字段进行降序排序。
37 | 
38 | **方法列表**：
39 | 
40 | | 方法名 | 描述 | 参数 | 返回值 |
41 | | --- | --- | --- | --- |
42 | | `dummySort(SearchInput searchInput, SearchOutput searchOutput)` | 对 `SearchOutput` 中的文档列表按 `score` 字段进行降序排序。 | `searchInput`：搜索输入对象（在此方法中未使用）。<br>`searchOutput`：搜索输出对象，包含文档列表，排序后更新该列表。 | `void` |
43 | 
44 | ### Pipeline 类接口文档
45 | 
46 | **类名**：`Pipeline`
47 | 
48 | **包名**：`org.search`
49 | 
50 | **描述**：该类封装了搜索流程，包括召回、排序和重排序操作。
51 | 
52 | **属性列表**：
53 | 
54 | | 属性名 | 类型 | 描述 |
55 | | --- | --- | --- |
56 | | `searchInput` | `SearchInput` | 搜索输入对象，用于存储查询信息。 |
57 | | `searchOutput` | `SearchOutput` | 搜索输出对象，用于存储搜索结果。 |
58 | 
59 | **方法列表**：
60 | 
61 | | 方法名 | 描述 | 参数 | 返回值 |
62 | | --- | --- | --- | --- |
63 | | `recall(SearchInput searchInput, SearchOutput searchOutput)` | 调用 `RecallStrategy.esRecall` 方法进行文档召回。 | `searchInput`：搜索输入对象，包含查询文档信息。<br>`searchOutput`：搜索输出对象，用于存储召回的文档结果。 | `void` |
64 | | `recall()` | 调用 `RecallStrategy.esRecall` 方法使用类内的 `searchInput` 和 `searchOutput` 对象进行文档召回。 | 无 | `void` |
65 | | `sort()` | 调用 `SortStrategy.dummySort` 方法对文档列表按 `score` 字段进行降序排序。 | 无 | `void` |
66 | | `rerank(SearchInput searchInput, SearchOutput searchOutput)` | 调用 `RerankStrategy.JinaCobertRerank` 方法对文档进行重排序。 | `searchInput`：搜索输入对象，包含查询文档信息。<br>`searchOutput`：搜索输出对象，包含召回的文档列表，重排序后更新该列表。 | `void` |
67 | | `rerank()` | 调用 `RerankStrategy.JinaCobertRerank` 方法使用类内的 `searchInput` 和 `searchOutput` 对象对文档进行重排序。 | 无 | `void` |
68 | | `getDefaultResult()` | 按顺序执行召回、排序和重排序操作，并返回最终的搜索结果。 | 无 | `SearchOutput` |


--------------------------------------------------------------------------------
/src/main/java/org/agent/Agent.java:
--------------------------------------------------------------------------------
  1 | package org.agent;
  2 | 
  3 | import org.constant.Config;
  4 | import org.json.JSONObject;
  5 | import org.service.LLM.OpenAIChatService;
  6 | 
  7 | import java.io.IOException;
  8 | 
  9 | // Agent 类，包含角色、能力和知识模型等属性
 10 | public class Agent {
 11 |     private String role; // 角色
 12 |     private String[] abilities; // 能力
 13 |     private String knowledgeModel; // 知识模型
 14 | 
 15 |     // 构造函数
 16 |     public Agent(String role, String[] abilities, String knowledgeModel) {
 17 |         this.role = role;
 18 |         this.abilities = abilities;
 19 |         this.knowledgeModel = knowledgeModel;
 20 |     }
 21 | 
 22 |     // 学习方法
 23 |     public void learn() {
 24 |         // 这里可以实现具体的学习逻辑
 25 |         System.out.println(role + " 正在学习...");
 26 |     }
 27 | 
 28 |     // 规划方法
 29 |     public void plan() {
 30 |         // 这里可以实现具体的规划逻辑
 31 |         System.out.println(role + " 正在规划...");
 32 |     }
 33 | 
 34 |     // 推理方法
 35 |     public void reason() {
 36 |         // 这里可以实现具体的推理逻辑
 37 |         System.out.println(role + " 正在推理...");
 38 |     }
 39 | 
 40 |     // 决策方法
 41 |     public void decide() {
 42 |         // 这里可以实现具体的决策逻辑
 43 |         System.out.println(role + " 正在决策...");
 44 |     }
 45 | 
 46 |     // 添加能力方法
 47 |     public void addAbility(String ability) {
 48 |         String[] newAbilities = new String[abilities.length + 1];
 49 |         System.arraycopy(abilities, 0, newAbilities, 0, abilities.length);
 50 |         newAbilities[abilities.length] = ability;
 51 |         abilities = newAbilities;
 52 |         System.out.println(role + " 已添加能力: " + ability);
 53 |     }
 54 | 
 55 |     // 调用大模型能力
 56 |     public String callLLM() {
 57 |         // 替换为您的API密钥
 58 |         String apiKey = Config.API_KEY;
 59 |         // 使用百川Baichuan3-Turbo模型
 60 |         String model = Config.LLM_MODEL;
 61 |         // API的URL
 62 |         String url = Config.LLM_URL;
 63 | 
 64 |         OpenAIChatService openAIChatService = new OpenAIChatService(apiKey);
 65 | 
 66 |         try {
 67 |             // 构建请求参数
 68 |             JSONObject params = new JSONObject()
 69 |                     .put("model", model)
 70 |                     .put("messages", new JSONObject[] {
 71 |                             new JSONObject().put("role", "user").put("content", "一些需要询问的内容")
 72 |                     })
 73 |                     .put("temperature", 0.3)
 74 |                     .put("stream", false);
 75 | 
 76 |             // 调用大模型生成回复
 77 |             return openAIChatService.generateText(url, params);
 78 |         } catch (IOException e) {
 79 |             e.printStackTrace();
 80 |             return null;
 81 |         }
 82 |     }
 83 | 
 84 | 
 85 |     // 调用大模型生成辩论观点
 86 |     public String generateDebatePoint(String topic) {
 87 |         // 替换为您的API密钥
 88 |         String apiKey = Config.API_KEY;
 89 |         // 使用百川Baichuan3-Turbo模型
 90 |         String model = Config.LLM_MODEL;
 91 |         // API的URL
 92 |         String url = Config.LLM_URL;
 93 |         OpenAIChatService openAIChatService = new OpenAIChatService(apiKey);
 94 |         try {
 95 |             // 构建请求参数
 96 |             JSONObject params = new JSONObject()
 97 |                     .put("model", model)
 98 |                     .put("messages", new JSONObject[] {
 99 |                             new JSONObject().put("role", "user").put("content", "针对辩论赛主题：" + topic + "，作为 " + role + " 发表观点")
100 |                     })
101 |                     .put("temperature", 0.3)
102 |                     .put("stream", false);
103 |             // 调用大模型生成回复
104 |             return openAIChatService.generateText(url, params);
105 |         } catch (IOException e) {
106 |             e.printStackTrace();
107 |             return null;
108 |         }
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/src/main/java/org/agent/EmergentOrganization.java:
--------------------------------------------------------------------------------
1 | package org.agent;
2 | 
3 | // EmergentOrganization 类，继承自 Organization 类，实现了基于涌现行为的组织方式
4 | public class EmergentOrganization extends Organization {
5 |     @Override
6 |     public void organizeAgents() {
7 |         System.out.println("正在进行基于涌现行为的组织方式...");
8 |     }
9 | }


--------------------------------------------------------------------------------
/src/main/java/org/agent/Environment.java:
--------------------------------------------------------------------------------
 1 | package org.agent;
 2 | 
 3 | // Environment 类，包含环境类型属性
 4 | public class Environment {
 5 |     private String environmentType; // 环境类型
 6 | 
 7 |     // 构造函数
 8 |     public Environment(String environmentType) {
 9 |         this.environmentType = environmentType;
10 |     }
11 | 
12 |     // 智能体感知环境方法
13 |     public void perceive() {
14 |         System.out.println("智能体正在感知 " + environmentType + " 环境...");
15 |     }
16 | 
17 |     // 智能体影响环境方法
18 |     public void affect() {
19 |         System.out.println("智能体正在影响 " + environmentType + " 环境...");
20 |     }
21 | }


--------------------------------------------------------------------------------
/src/main/java/org/agent/HierarchicalOrganization.java:
--------------------------------------------------------------------------------
1 | package org.agent;
2 | 
3 | // HierarchicalOrganization 类，继承自 Organization 类，实现了分层控制的组织方式
4 | public class HierarchicalOrganization extends Organization {
5 |     @Override
6 |     public void organizeAgents() {
7 |         System.out.println("正在进行分层控制的组织方式...");
8 |     }
9 | }


--------------------------------------------------------------------------------
/src/main/java/org/agent/Interaction.java:
--------------------------------------------------------------------------------
 1 | package org.agent;
 2 | 
 3 | // Interaction 类，包含交互类型属性
 4 | public class Interaction {
 5 |     private String interactionType; // 交互类型
 6 | 
 7 |     // 构造函数
 8 |     public Interaction(String interactionType) {
 9 |         this.interactionType = interactionType;
10 |     }
11 | 
12 |     // 智能体之间进行交互的方法
13 |     public void interact() {
14 |         System.out.println("智能体正在进行 " + interactionType + " 交互...");
15 |     }
16 | }


--------------------------------------------------------------------------------
/src/main/java/org/agent/MASExample.java:
--------------------------------------------------------------------------------
 1 | package org.agent;
 2 | 
 3 | // MASExample 类，用于演示多智能体系统的使用
 4 | public class MASExample {
 5 |     public static void main(String[] args) {
 6 |         // 创建 Agent 实例
 7 |         Agent agent = new Agent("分析员", new String[]{"数据分析", "逻辑推理"}, "知识图谱模型");
 8 |         agent.learn();
 9 |         agent.plan();
10 |         agent.reason();
11 |         agent.decide();
12 |         agent.addAbility("机器学习");
13 |         String llmResponse = agent.callLLM();
14 |         System.out.println("大模型回复: " + llmResponse);
15 | 
16 |         // 创建 Environment 实例
17 |         Environment environment = new Environment("复杂环境");
18 |         environment.perceive();
19 |         environment.affect();
20 | 
21 |         // 创建 Interaction 实例
22 |         Interaction interaction = new Interaction("合作");
23 |         interaction.interact();
24 | 
25 |         // 创建 HierarchicalOrganization 实例
26 |         HierarchicalOrganization hierarchicalOrganization = new HierarchicalOrganization();
27 |         hierarchicalOrganization.organizeAgents();
28 | 
29 |         // 创建 EmergentOrganization 实例
30 |         EmergentOrganization emergentOrganization = new EmergentOrganization();
31 |         emergentOrganization.organizeAgents();
32 |     }
33 | }


--------------------------------------------------------------------------------
/src/main/java/org/agent/Organization.java:
--------------------------------------------------------------------------------
1 | package org.agent;
2 | 
3 | // Organization 类，抽象类，定义了组织智能体的抽象方法
4 | public abstract class Organization {
5 |     // 组织智能体的抽象方法
6 |     public abstract void organizeAgents();
7 | }


--------------------------------------------------------------------------------
/src/main/java/org/chunk/FixedSizeSplitter.java:
--------------------------------------------------------------------------------
 1 | package org.chunk;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | public class FixedSizeSplitter implements TextSplitter {
 7 |     private final int size;
 8 | 
 9 |     public FixedSizeSplitter(int size) {
10 |         this.size = size;
11 |     }
12 | 
13 |     @Override
14 |     public List<String> split(String text) {
15 |         List<String> result = new ArrayList<>();
16 |         for (int i = 0; i < text.length(); i += size) {
17 |             int end = Math.min(text.length(), i + size);
18 |             result.add(text.substring(i, end));
19 |         }
20 |         return result;
21 |     }
22 | }


--------------------------------------------------------------------------------
/src/main/java/org/chunk/ParagraphSplitter.java:
--------------------------------------------------------------------------------
 1 | package org.chunk;
 2 | 
 3 | 
 4 | 
 5 | import java.util.ArrayList;
 6 | import java.util.Arrays;
 7 | import java.util.List;
 8 | 
 9 | // 定义 ParagraphSplitter 类，实现 TextSplitter 接口
10 | public class ParagraphSplitter  implements TextSplitter {
11 | 
12 |     // 实现 split 方法，用于将文本按段落分割
13 |    public List<String> split(String text) {
14 |         // 以换行符作为段落分隔符，将文本分割成段落数组
15 |         String[] paragraphs = text.split("\\n\\n");
16 |         // 将段落数组转换为 List 并返回
17 |         return new ArrayList<>(Arrays.asList(paragraphs));
18 |     }
19 | }


--------------------------------------------------------------------------------
/src/main/java/org/chunk/RecursiveSplitter.java:
--------------------------------------------------------------------------------
 1 | package org.chunk;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Arrays;
 5 | import java.util.List;
 6 | 
 7 | public class RecursiveSplitter implements TextSplitter {
 8 |     private final int depth;
 9 | 
10 |     public RecursiveSplitter(int depth) {
11 |         this.depth = depth;
12 |     }
13 | 
14 |     @Override
15 |     public List<String> split(String text) {
16 |         return recursiveSplit(text, 0);
17 |     }
18 | 
19 |     private List<String> recursiveSplit(String text, int currentDepth) {
20 |         if (currentDepth >= depth) {
21 |             return new ArrayList<>(Arrays.asList(text));
22 |         }
23 | 
24 |         List<String> result = new ArrayList<>();
25 |         int midIndex = text.length() / 2;
26 |         String left = text.substring(0, midIndex);
27 |         String right = text.substring(midIndex);
28 | 
29 |         result.addAll(recursiveSplit(left, currentDepth + 1));
30 |         result.addAll(recursiveSplit(right, currentDepth + 1));
31 | 
32 |         return result;
33 |     }
34 | }


--------------------------------------------------------------------------------
/src/main/java/org/chunk/SemanticBlockSplitter.java:
--------------------------------------------------------------------------------
 1 | package org.chunk;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Arrays;
 5 | import java.util.List;
 6 | 
 7 | public class SemanticBlockSplitter implements TextSplitter {
 8 |     // todo 这里暂时简化处理，假设每个段落都是一个语义块
 9 |     @Override
10 |     public List<String> split(String text) {
11 |         String[] blocks = text.trim().split("\\n\\n");
12 |         return new ArrayList<>(Arrays.asList(blocks));
13 |     }
14 | }


--------------------------------------------------------------------------------
/src/main/java/org/chunk/SentenceSplitter.java:
--------------------------------------------------------------------------------
 1 | package org.chunk;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Arrays;
 5 | import java.util.List;
 6 | import java.util.regex.Pattern;
 7 | 
 8 | public class SentenceSplitter implements TextSplitter {
 9 |     private static final Pattern SENTENCE_DELIMITER = Pattern.compile("[.!?] +");
10 | 
11 |     @Override
12 |     public List<String> split(String text) {
13 |         String[] sentences = SENTENCE_DELIMITER.split(text.trim());
14 |         return new ArrayList<>(Arrays.asList(sentences));
15 |     }
16 | }


--------------------------------------------------------------------------------
/src/main/java/org/chunk/TextSplitter.java:
--------------------------------------------------------------------------------
1 | package org.chunk;
2 | 
3 | import java.util.List;
4 | 
5 | public interface TextSplitter {
6 |     List<String> split(String text);
7 | }


--------------------------------------------------------------------------------
/src/main/java/org/constant/Config.java:
--------------------------------------------------------------------------------
 1 | package org.constant;
 2 | 
 3 | public class Config {
 4 |     // API密钥
 5 |     public static final String API_KEY = "sk-fec928b1e67db9d34e092c9599e1ce3a";
 6 |     public static final String EMBEDDING_API_URL = "https://api.baichuan-ai.com/v1/embeddings";
 7 |     // 使用的模型
 8 |     public static final String LLM_MODEL = "Baichuan3-Turbo";
 9 |     // API的URL
10 |     public static final String LLM_URL = "https://api.baichuan-ai.com/v1/chat/completions";
11 | 
12 | 
13 |     // Redis 连接配置
14 |     public static final String REDIS_HOST = "124.223.85.176";
15 |     public static final int REDIS_PORT = 6379;
16 |     public static final String REDIS_PASSWORD = "123456";
17 | 
18 |     // Redis 过期时间
19 |     public static int REDIS_EXPIRE_SECONDS = 180;
20 | 
21 |     // ES 连接配置
22 |     public static final String esUrl = "https://124.223.85.176:9200";
23 |     public static final String esUserName = "elastic";
24 |     public static final String esPassWord = "8hbdbMHjAsx9bfDJFh9U";
25 | 
26 |     // Jina API 密钥
27 |     public static final String Jina_API_KEY = "jina_852794709b6d4a858b87ef1361ce0112hHKiLuPFe9FkYssD6RcVG9kiJD8D";
28 |     public static final String Jina_multi_vector ="https://api.jina.ai/v1/multi-vector";
29 | 
30 | 
31 |     // Serp API 密钥
32 |     public static final String SerpAPI = "1af00627e582c9238b8c947d2300dd13331a9817523811a83dc16245ed98d444";
33 | }


--------------------------------------------------------------------------------
/src/main/java/org/controler/SearchController.java:
--------------------------------------------------------------------------------
  1 | package org.controler;
  2 | 
  3 | import com.google.gson.JsonArray;
  4 | import com.google.gson.JsonObject;
  5 | import fi.iki.elonen.NanoHTTPD;
  6 | import org.json.JSONObject;
  7 | import org.service.LLM.OpenAIChatService;
  8 | import org.constant.Config;
  9 | import org.web.SearchEngine;
 10 | 
 11 | import java.io.IOException;
 12 | import java.util.HashMap;
 13 | import java.util.Map;
 14 | 
 15 | public class SearchController extends NanoHTTPD {
 16 | 
 17 |     public SearchController(int port) throws IOException {
 18 |         super(port);
 19 |         start(NanoHTTPD.SOCKET_READ_TIMEOUT, false);
 20 |         System.out.println("Search Server started on port " + port);
 21 |     }
 22 | 
 23 |     @Override
 24 |     public Response serve(IHTTPSession session) {
 25 |         String uri = session.getUri();
 26 |         Method method = session.getMethod();
 27 | 
 28 |         try {
 29 |             // 添加 CORS 头信息，允许所有来源的请求
 30 |             Response response;
 31 |             if (Method.OPTIONS.equals(method)) {
 32 |                 response = newFixedLengthResponse(Response.Status.OK, NanoHTTPD.MIME_PLAINTEXT, "");
 33 |                 response.addHeader("Access-Control-Allow-Origin", "*");
 34 |                 response.addHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
 35 |                 response.addHeader("Access-Control-Allow-Headers", "Content-Type");
 36 |                 return response;
 37 |             }
 38 | 
 39 |             if (Method.POST.equals(method) && "/search".equals(uri)) {
 40 |                 // 处理 POST 请求，获取搜索关键词并调用 SearchEngine 进行搜索
 41 |                 try {
 42 |                     session.parseBody(new HashMap<>());
 43 |                 } catch (ResponseException e) {
 44 |                     throw new RuntimeException(e);
 45 |                 }
 46 |                 Map<String, String> params = session.getParms();
 47 |                 String keyword = params.get("keyword");
 48 | 
 49 |                 if (keyword != null && !keyword.isEmpty()) {
 50 |                     Map<String, String> searchParams = new HashMap<>();
 51 |                     searchParams.put("engine", "baidu");
 52 |                     searchParams.put("q", keyword);
 53 |                     searchParams.put("api_key", Config.SerpAPI);
 54 | 
 55 | 
 56 |                     JsonObject searchResult = SearchEngine.getResult(searchParams);
 57 |                     if (searchResult != null) {
 58 |                         // 可以在这里对搜索结果进行处理，比如转换为 HTML 格式返回给前端
 59 |                         StringBuilder html = new StringBuilder();
 60 |                         html.append(parseResultsHtml(searchResult));
 61 |                         response = newFixedLengthResponse(html.toString());
 62 |                     } else {
 63 |                         response = newFixedLengthResponse(Response.Status.INTERNAL_ERROR, NanoHTTPD.MIME_PLAINTEXT, "搜索失败");
 64 |                     }
 65 |                 } else {
 66 |                     response = newFixedLengthResponse(Response.Status.BAD_REQUEST, NanoHTTPD.MIME_PLAINTEXT, "关键词不能为空");
 67 |                 }
 68 |             } else {
 69 |                 response = newFixedLengthResponse(Response.Status.NOT_FOUND, NanoHTTPD.MIME_PLAINTEXT, "未找到该页面");
 70 |             }
 71 | 
 72 |             response.addHeader("Access-Control-Allow-Origin", "*");
 73 |             return response;
 74 |         } catch (IOException | RuntimeException e) {
 75 |             return newFixedLengthResponse(Response.Status.INTERNAL_ERROR, NanoHTTPD.MIME_PLAINTEXT, "服务器内部错误: " + e.getMessage());
 76 |         }
 77 |     }
 78 | 
 79 |     // 新增方法：将搜索结果转换为 HTML 内容
 80 |     private String parseResultsHtml(JsonObject results) {
 81 |         StringBuilder html = new StringBuilder();
 82 |         if (results != null) {
 83 |             // 解析 search_information 中的 query_displayed 字段
 84 |             JsonObject searchInformation = results.getAsJsonObject("search_information");
 85 |             if (searchInformation != null) {
 86 |                 String queryDisplayed = searchInformation.get("query_displayed").getAsString();
 87 |                 html.append("<p>查询关键词: ").append(queryDisplayed).append("</p>");
 88 |             }
 89 | 
 90 |             // 解析 organic_results 数组中的 title、link 和 snippet 字段
 91 |             JsonArray organicResults = results.getAsJsonArray("organic_results");
 92 |             if (organicResults != null) {
 93 |                 for (int i = 0; i < organicResults.size(); i++) {
 94 |                     JsonObject result = organicResults.get(i).getAsJsonObject();
 95 |                     String title = result.get("title").getAsString();
 96 |                     String link = result.get("link").getAsString();
 97 |                     String snippet = "";
 98 |                     if (result.has("snippet") &&!result.get("snippet").isJsonNull()) {
 99 |                         snippet = result.get("snippet").getAsString();
100 |                     }
101 |                     html.append("<li>");
102 |                     html.append("<h3><a href='").append(link).append("' target='_blank'>").append(title).append("</a></h3>");
103 |                     html.append("<p>").append(snippet).append("</p>");
104 |                     html.append("</li>");
105 |                 }
106 |             }
107 |         }
108 |         return html.toString();
109 |     }
110 | 
111 |     public static void main(String[] args) {
112 |         try {
113 |             new SearchController(8080);
114 |         } catch (IOException e) {
115 |             System.err.println("Could not start server: " + e.getMessage());
116 |         }
117 |     }
118 | }


--------------------------------------------------------------------------------
/src/main/java/org/demo/Debate.java:
--------------------------------------------------------------------------------
 1 | package org.demo;
 2 | 
 3 | 
 4 | import org.agent.Agent;
 5 | 
 6 | // Debate 类，用于模拟辩论赛
 7 | public class Debate {
 8 |     private Agent[] agents;
 9 |     private String topic;
10 | 
11 |     public Debate(Agent[] agents, String topic) {
12 |         this.agents = agents;
13 |         this.topic = topic;
14 |     }
15 | 
16 |     public void startDebate() {
17 |         System.out.println("辩论赛开始，主题是：" + topic);
18 |         for (Agent agent : agents) {
19 |             String debatePoint = agent.generateDebatePoint(topic);
20 |             System.out.println(agent + " 的观点：" + debatePoint);
21 |         }
22 |         System.out.println("辩论赛结束");
23 |     }
24 | }


--------------------------------------------------------------------------------
/src/main/java/org/demo/SimulatorDebate.java:
--------------------------------------------------------------------------------
 1 | package org.demo;
 2 | 
 3 | import org.agent.Agent;
 4 | 
 5 | public class SimulatorDebate {
 6 |     public static void main(String[] args) {
 7 |         // 创建 Agent 实例
 8 |         Agent agent1 = new Agent("正方", new String[]{"逻辑推理", "语言表达"}, "知识图谱模型");
 9 |         Agent agent2 = new Agent("反方", new String[]{"批判性思维", "数据分析"}, "知识图谱模型");
10 |         // 创建 Debate 实例
11 |         Debate debate = new Debate(new Agent[]{agent1, agent2}, "人工智能是否会取代人类工作");
12 |         // 开始辩论赛
13 |         debate.startDebate();
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/main/java/org/entity/Document.java:
--------------------------------------------------------------------------------
 1 | package org.entity;
 2 | 
 3 | import com.alibaba.fastjson.annotation.JSONField;
 4 | import lombok.Data;
 5 | 
 6 | @Data
 7 | public class Document {
 8 |     @JSONField(name = "user_id")
 9 |     String userId;
10 |     @JSONField(name = "file_id")
11 |     String fileId;
12 |     @JSONField(name = "kb_id")
13 |     String kbId;
14 |     @JSONField(name = "chunk_id")
15 |     Integer chunkId;
16 |     @JSONField(name = "chunk_size")
17 |     Integer chunkSize;
18 |     @JSONField(name = "chunk_text")
19 |     String chunkText;
20 |     @JSONField(name = "text_emb")
21 |     double[] textEmb;
22 |     @JSONField(name = "clip_emb")
23 |     double[] clipEmb;
24 |     @JSONField(name = "doc_type")
25 |     String docType;
26 |     @JSONField(name = "version")
27 |     String version;
28 |     @JSONField(name = "author")
29 |     String author;
30 |     @JSONField(name = "created_time")
31 |     Long createdTime;
32 |     @JSONField(name = "modified_time")
33 |     Long modifiedTime;
34 |     @JSONField(name = "file_name")
35 |     String fileName;
36 |     @JSONField(name = "storage_path")
37 |     String storagePath;
38 |     @JSONField(name = "_score")
39 |     Float score;
40 | 
41 |     public Document() {
42 |     }
43 | 
44 |     public Document(String storagePath) {
45 |         this.storagePath = storagePath;
46 |     }
47 | }


--------------------------------------------------------------------------------
/src/main/java/org/entity/File.java:
--------------------------------------------------------------------------------
 1 | package org.entity;
 2 | 
 3 | public class File {
 4 |     private Integer userId;
 5 |     private Integer fileId;
 6 |     private Integer kbId;
 7 |     private String docType;
 8 |     private String fileName;
 9 |     private String storagePath;
10 | 
11 |     // 构造器、getter和setter省略
12 | }


--------------------------------------------------------------------------------
/src/main/java/org/entity/KnowledgeBase.java:
--------------------------------------------------------------------------------
1 | package org.entity;
2 | 
3 | public class KnowledgeBase {
4 |     private Integer kbId;
5 |     private String kbName;
6 |     private Integer userId;
7 | 
8 |     // 构造器、getter和setter省略
9 | }


--------------------------------------------------------------------------------
/src/main/java/org/entity/SearchInput.java:
--------------------------------------------------------------------------------
 1 | package org.entity;
 2 | 
 3 | import lombok.Data;
 4 | 
 5 | @Data
 6 | public class SearchInput {
 7 |     public User user;
 8 |     public KnowledgeBase knowledgeBase;
 9 |     public Document document;
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/org/entity/SearchOutput.java:
--------------------------------------------------------------------------------
 1 | package org.entity;
 2 | 
 3 | import lombok.Data;
 4 | 
 5 | import java.util.List;
 6 | @Data
 7 | public class SearchOutput {
 8 |     public List<Document> documents;
 9 |     public Integer code;
10 |     public String msg;
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/org/entity/User.java:
--------------------------------------------------------------------------------
1 | package org.entity;
2 | 
3 | public class User {
4 |     private Integer userId;
5 |     private String username;
6 |     private String password;
7 | 
8 |     // 构造器、getter和setter省略
9 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/ExcelParser.java:
--------------------------------------------------------------------------------
 1 | package org.parser;
 2 | 
 3 | import org.apache.poi.ss.usermodel.*;
 4 | import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 5 | 
 6 | import java.io.File;
 7 | import java.io.FileInputStream;
 8 | import java.io.IOException;
 9 | 
10 | public class ExcelParser implements FileParser{
11 | 
12 |     /**
13 |      * 解析Excel文件并返回其文本内容
14 |      *
15 |      * @param file Excel文件
16 |      * @return 解析后的字符串
17 |      * @throws IOException 如果发生I/O错误
18 |      */
19 |     public String parse(File file) throws IOException {
20 |         try (FileInputStream fis = new FileInputStream(file);
21 |              Workbook workbook = new XSSFWorkbook(fis)) {
22 | 
23 |             StringBuilder text = new StringBuilder();
24 |             for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
25 |                 Sheet sheet = workbook.getSheetAt(i);
26 |                 for (Row row : sheet) {
27 |                     for (Cell cell : row) {
28 |                         text.append(cell.getStringCellValue()).append("\t");
29 |                     }
30 |                     text.append("\n");
31 |                 }
32 |                 text.append("\n");
33 |             }
34 |             return text.toString();
35 |         }
36 |     }
37 | 
38 |     /**
39 |      * 根据文件路径解析Excel文件并返回其文本内容
40 |      *
41 |      * @param filePath Excel文件的路径
42 |      * @return 解析后的字符串
43 |      * @throws IOException 如果发生I/O错误
44 |      */
45 |     public  String parse(String filePath) throws IOException {
46 |         File file = new File(filePath);
47 |         return parse(file);
48 |     }
49 | 
50 |     // 测试方法
51 |     public static void main(String[] args) {
52 |         try {
53 |             ExcelParser excelParser = new ExcelParser();
54 |             String content = excelParser.parse("path/to/your/excel_file.xlsx");
55 |             System.out.println(content);
56 |         } catch (IOException e) {
57 |             e.printStackTrace();
58 |         }
59 |     }
60 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/FileParser.java:
--------------------------------------------------------------------------------
1 | package org.parser;
2 | 
3 | import java.io.File;
4 | import java.io.IOException;
5 | 
6 | public interface FileParser {
7 |     public String parse(File file) throws IOException;
8 |     public String parse(String filePath) throws IOException;
9 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/FileParserFactory.java:
--------------------------------------------------------------------------------
 1 | package org.parser;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | public class FileParserFactory {
 6 | 
 7 |     public static FileParser getFileParser(String fileType) {
 8 |         switch (fileType.toLowerCase()) {
 9 |             case "html":
10 |                 return new HTMLParser();
11 |             case "pdf":
12 |                 return new PDFParser();
13 |             case "txt":
14 |             case "md":
15 |             case "py":
16 |             case "java":
17 |                 return new PureTextParser();
18 |             case "doc":
19 |             case "docx":
20 |                 return new WordParser();
21 |             case "ppt":
22 |             case "pptx":
23 |                 return new PPTParser();
24 |             case "xls":
25 |             case "xlsx":
26 |                 return new ExcelParser();
27 |             default:
28 |                 throw new IllegalArgumentException("Unsupported file type: " + fileType);
29 |         }
30 |     }
31 | 
32 |     public static FileParser getFileParserByOriginalName(String originalFileName) {
33 |         int lastIndex = originalFileName.lastIndexOf('.');
34 |         if (lastIndex == -1) {
35 |             throw new IllegalArgumentException("Invalid original file name: " + originalFileName);
36 |         }
37 |         String fileType = originalFileName.substring(lastIndex + 1);
38 |         return getFileParser(fileType);
39 |     }
40 | 
41 |     public static String easyParse(String filePath, String originalFileName) {
42 |         FileParser parser = FileParserFactory.getFileParserByOriginalName(originalFileName);
43 |         String content = null;
44 |         try {
45 |             content = parser.parse(filePath);
46 |         } catch (IOException e) {
47 |             throw new RuntimeException(e);
48 |         }
49 |         return content;
50 |     }
51 | 
52 |     public static String easyParse(String filePath) {
53 |         FileParser parser = FileParserFactory.getFileParser(filePath);
54 |         String content = null;
55 |         try {
56 |             content = parser.parse(filePath);
57 |         } catch (IOException e) {
58 |             throw new RuntimeException(e);
59 |         }
60 |         return content;
61 |     }
62 | 
63 |     public static void main(String[] args) {
64 |         try {
65 |             // 解析PDF文件
66 |             testParseFile("C:\\Users\\19664\\Desktop\\2311.12351v2.pdf");
67 |         } catch (IOException e) {
68 |             e.printStackTrace();
69 |         }
70 |     }
71 | 
72 |     private static void testParseFile(String filePath) throws IOException {
73 | //        FileParser parser = FileParserFactory.getFileParserByPath(filePath);
74 | //        String content = parser.parse(filePath);
75 | //        System.out.println("Content of " + filePath + ":\n" + content);
76 | //        System.out.println("----------------------------------------\n");
77 |     }
78 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/HTMLParser.java:
--------------------------------------------------------------------------------
 1 | package org.parser;
 2 | import org.jsoup.Jsoup;
 3 | import org.jsoup.nodes.Document;
 4 | 
 5 | import java.io.File;
 6 | import java.io.IOException;
 7 | 
 8 | public class HTMLParser implements FileParser {
 9 | 
10 |     /**
11 |      * 解析HTML文件并返回其文本内容
12 |      *
13 |      * @param file HTML文件
14 |      * @return 解析后的字符串
15 |      * @throws IOException 如果发生I/O错误
16 |      */
17 |     public  String parse(File file) throws IOException {
18 |         Document doc = Jsoup.parse(file, "UTF-8");
19 |         return doc.text();
20 |     }
21 | 
22 |     /**
23 |      * 根据文件路径解析HTML文件并返回其文本内容
24 |      *
25 |      * @param filePath HTML文件的路径
26 |      * @return 解析后的字符串
27 |      * @throws IOException 如果发生I/O错误
28 |      */
29 |     public  String parse(String filePath) throws IOException {
30 |         File file = new File(filePath);
31 |         return parse(file);
32 |     }
33 | 
34 |     /**
35 |      * 解析URL指向的HTML页面并返回其文本内容
36 |      *
37 |      * @param url URL字符串
38 |      * @return 解析后的字符串
39 |      * @throws IOException 如果发生I/O错误
40 |      */
41 |     public static String parseHTMLFromURL(String url) throws IOException {
42 |         Document doc = Jsoup.connect(url).get();
43 |         return doc.text();
44 |     }
45 | 
46 |     // 测试方法
47 |     public static void main(String[] args) {
48 |         try {
49 |             HTMLParser htmlParser = new HTMLParser();
50 | 
51 |             // 解析本地HTML文件
52 |             String localContent = htmlParser.parse("path/to/your/html_file.html");
53 |             System.out.println(localContent);
54 | 
55 |             // 解析在线HTML页面
56 |             String onlineContent = parseHTMLFromURL("https://example.com");
57 |             System.out.println(onlineContent);
58 |         } catch (IOException e) {
59 |             e.printStackTrace();
60 |         }
61 |     }
62 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/PDFParser.java:
--------------------------------------------------------------------------------
 1 | package org.parser;
 2 | 
 3 | import org.apache.pdfbox.pdmodel.PDDocument;
 4 | import org.apache.pdfbox.text.PDFTextStripper;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | 
 9 | public class PDFParser implements FileParser{
10 | 
11 |     /**
12 |      * 解析PDF文件并返回其文本内容
13 |      *
14 |      * @param file PDF文件
15 |      * @return 解析后的字符串
16 |      * @throws IOException 如果发生I/O错误
17 |      */
18 |     public  String parse(File file) throws IOException {
19 |         try (PDDocument document = PDDocument.load(file)) {
20 |             PDFTextStripper pdfTextStripper = new PDFTextStripper();
21 |             return pdfTextStripper.getText(document);
22 |         }
23 |     }
24 | 
25 |     /**
26 |      * 根据文件路径解析PDF文件并返回其文本内容
27 |      *
28 |      * @param filePath PDF文件的路径
29 |      * @return 解析后的字符串
30 |      * @throws IOException 如果发生I/O错误
31 |      */
32 |     public  String parse(String filePath) throws IOException {
33 |         File file = new File(filePath);
34 |         return parse(file);
35 |     }
36 | 
37 |     // 测试方法
38 |     public static void main(String[] args) {
39 |         try {
40 |             PDFParser parser = new PDFParser();
41 |             String content = parser.parse("C:\\Users\\19664\\Desktop\\2311.12351v2.pdf");
42 |             System.out.println(content);
43 |         } catch (IOException e) {
44 |             e.printStackTrace();
45 |         }
46 |     }
47 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/PPTParser.java:
--------------------------------------------------------------------------------
 1 | package org.parser;
 2 | 
 3 | import org.apache.poi.xslf.usermodel.*;
 4 | 
 5 | import java.io.File;
 6 | import java.io.FileInputStream;
 7 | import java.io.IOException;
 8 | 
 9 | public class PPTParser implements FileParser{
10 | 
11 |     /**
12 |      * 解析PowerPoint文件并返回其文本内容
13 |      *
14 |      * @param file PowerPoint文件
15 |      * @return 解析后的字符串
16 |      * @throws IOException 如果发生I/O错误
17 |      */
18 |     public  String parse(File file) throws IOException {
19 |         try (FileInputStream fis = new FileInputStream(file);
20 |              XMLSlideShow ppt = new XMLSlideShow(fis)) {
21 | 
22 |             StringBuilder text = new StringBuilder();
23 |             for (XSLFSlide slide : ppt.getSlides()) {
24 |                 for (XSLFShape shape : slide.getShapes()) {
25 |                     if (shape instanceof XSLFTextShape) {
26 |                         XSLFTextShape textShape = (XSLFTextShape) shape;
27 |                         text.append(textShape.getText()).append("\n");
28 |                     }
29 |                 }
30 |                 text.append("\n");
31 |             }
32 |             return text.toString();
33 |         }
34 |     }
35 | 
36 |     /**
37 |      * 根据文件路径解析PowerPoint文件并返回其文本内容
38 |      *
39 |      * @param filePath PowerPoint文件的路径
40 |      * @return 解析后的字符串
41 |      * @throws IOException 如果发生I/O错误
42 |      */
43 |     public  String parse(String filePath) throws IOException {
44 |         File file = new File(filePath);
45 |         return parse(file);
46 |     }
47 | 
48 |     // 测试方法
49 |     public static void main(String[] args) {
50 |         try {
51 |             PDFParser parser = new PDFParser();
52 |             String content = parser.parse("path/to/your/ppt_file.pptx");
53 |             System.out.println(content);
54 |         } catch (IOException e) {
55 |             e.printStackTrace();
56 |         }
57 |     }
58 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/PureTextParser.java:
--------------------------------------------------------------------------------
 1 | package org.parser;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.File;
 5 | import java.io.FileReader;
 6 | import java.io.IOException;
 7 | 
 8 | public class PureTextParser implements FileParser{
 9 | 
10 |     /**
11 |      * 解析纯文本文件并返回其文本内容
12 |      *
13 |      * @param file 纯文本文件
14 |      * @return 解析后的字符串
15 |      * @throws IOException 如果发生I/O错误
16 |      */
17 |     public  String parse(File file) throws IOException {
18 |         try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
19 |             StringBuilder text = new StringBuilder();
20 |             String line;
21 |             while ((line = reader.readLine()) != null) {
22 |                 text.append(line).append("\n");
23 |             }
24 |             return text.toString();
25 |         }
26 |     }
27 | 
28 |     /**
29 |      * 根据文件路径解析纯文本文件并返回其文本内容
30 |      *
31 |      * @param filePath 纯文本文件的路径
32 |      * @return 解析后的字符串
33 |      * @throws IOException 如果发生I/O错误
34 |      */
35 |     public  String parse(String filePath) throws IOException {
36 |         File file = new File(filePath);
37 |         return parse(file);
38 |     }
39 | 
40 |     // 测试方法
41 |     public static void main(String[] args) {
42 | 
43 |     }
44 | }


--------------------------------------------------------------------------------
/src/main/java/org/parser/WordParser.java:
--------------------------------------------------------------------------------
 1 | package org.parser;
 2 | 
 3 | import org.apache.poi.xwpf.usermodel.XWPFDocument;
 4 | import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 5 | 
 6 | import java.io.File;
 7 | import java.io.FileInputStream;
 8 | import java.io.IOException;
 9 | import java.util.List;
10 | 
11 | public class WordParser implements FileParser{
12 | 
13 |     /**
14 |      * 解析Word文件并返回其文本内容
15 |      *
16 |      * @param file Word文件
17 |      * @return 解析后的字符串
18 |      * @throws IOException 如果发生I/O错误
19 |      */
20 |     public  String parse(File file) throws IOException {
21 |         try (FileInputStream fis = new FileInputStream(file);
22 |              XWPFDocument document = new XWPFDocument(fis)) {
23 | 
24 |             StringBuilder text = new StringBuilder();
25 |             List<XWPFParagraph> paragraphs = document.getParagraphs();
26 |             for (XWPFParagraph paragraph : paragraphs) {
27 |                 text.append(paragraph.getText()).append("\n");
28 |             }
29 |             return text.toString();
30 |         }
31 |     }
32 | 
33 |     /**
34 |      * 根据文件路径解析Word文件并返回其文本内容
35 |      *
36 |      * @param filePath Word文件的路径
37 |      * @return 解析后的字符串
38 |      * @throws IOException 如果发生I/O错误
39 |      */
40 |     public  String parse(String filePath) throws IOException {
41 |         File file = new File(filePath);
42 |         return parse(file);
43 |     }
44 | 
45 |     // 测试方法
46 |     public static void main(String[] args) {
47 | 
48 |     }
49 | }


--------------------------------------------------------------------------------
/src/main/java/org/rag/AdvancedRAG.java:
--------------------------------------------------------------------------------
  1 | package org.rag;
  2 | 
  3 | import org.apache.commons.lang3.StringUtils;
  4 | import org.chunk.FixedSizeSplitter;
  5 | import org.constant.Config;
  6 | import org.entity.Document;
  7 | import org.json.JSONObject;
  8 | import org.parser.FileParserFactory;
  9 | import org.service.LLM.OpenAIChatService;
 10 | import org.service.embedding.BaichuanEmbeddingService;
 11 | import org.utils.DistanceUtils;
 12 | 
 13 | import java.io.IOException;
 14 | import java.util.ArrayList;
 15 | import java.util.Comparator;
 16 | import java.util.List;
 17 | import java.util.stream.Collectors;
 18 | 
 19 | public class AdvancedRAG {
 20 |     private Document document;
 21 |     private List<Document> chunks;
 22 |     private String query;
 23 |     private String response;
 24 | 
 25 |     public String getResponse() {
 26 |         return response;
 27 |     }
 28 | 
 29 |     public void setResponse(String response) {
 30 |         this.response = response;
 31 |     }
 32 | 
 33 |     public AdvancedRAG() {
 34 |     }
 35 | 
 36 |     public AdvancedRAG(Document document, String query) {
 37 |         this.document = document;
 38 |         this.query = query;
 39 |     }
 40 | 
 41 |     public Document getDocument() {
 42 |         return document;
 43 |     }
 44 | 
 45 |     public void setDocument(Document document) {
 46 |         this.document = document;
 47 |     }
 48 | 
 49 |     /**
 50 |      * 解析文档
 51 |      * @return 当前对象，方便链式调用
 52 |      */
 53 |     public AdvancedRAG parsing() {
 54 |         String filePath = document.getStoragePath();
 55 |         if (StringUtils.isBlank(filePath)) {
 56 |             throw new IllegalArgumentException("Document storage path cannot be blank");
 57 |         }
 58 |         String chunkText = FileParserFactory.easyParse(filePath);
 59 |         document.setChunkText(chunkText);
 60 |         return this;
 61 |     }
 62 | 
 63 |     /**
 64 |      * 分块处理
 65 |      * @return 当前对象，方便链式调用
 66 |      */
 67 |     public AdvancedRAG chunking() {
 68 |         // 可以使用更复杂的分块策略，这里简单示例使用固定大小分块
 69 |         FixedSizeSplitter fixedSizeSplitter = new FixedSizeSplitter(512);
 70 |         List<String> stringList = fixedSizeSplitter.split(document.getChunkText());
 71 |         chunks = new ArrayList<>();
 72 |         for (String chunkText : stringList) {
 73 |             Document chunkDoc = new Document();
 74 |             chunkDoc.setChunkText(chunkText);
 75 |             chunks.add(chunkDoc);
 76 |         }
 77 |         return this;
 78 |     }
 79 | 
 80 |     /**
 81 |      * 嵌入处理
 82 |      * @return 当前对象，方便链式调用
 83 |      * @throws IOException 可能的IO异常
 84 |      */
 85 |     public AdvancedRAG embedding() throws IOException {
 86 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(Config.API_KEY);
 87 |         // 获取查询的嵌入向量
 88 |         double[] queryEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, query);
 89 | 
 90 |         // 为每个文档块生成嵌入向量
 91 |         for (Document chunk : chunks) {
 92 |             double[] chunkEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, chunk.getChunkText());
 93 |             chunk.setTextEmb(chunkEmbedding);
 94 |         }
 95 |         return this;
 96 |     }
 97 | 
 98 |     /**
 99 |      * 排序处理
100 |      * @return 当前对象，方便链式调用
101 |      * @throws IOException 可能的IO异常
102 |      */
103 |     public AdvancedRAG sorting() throws IOException {
104 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(Config.API_KEY);
105 |         // 获取查询的嵌入向量
106 |         double[] queryEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, query);
107 |         // 根据嵌入向量与查询的嵌入向量之间的距离对文档块进行排序
108 |         chunks.sort(Comparator.comparingDouble(chunk -> DistanceUtils.squaredErrorDistance(queryEmbedding, chunk.getTextEmb())));
109 |         return this;
110 |     }
111 | 
112 |     /**
113 |      * 高级筛选，例如过滤掉一些不相关的块
114 |      * @return 当前对象，方便链式调用
115 |      */
116 |     public AdvancedRAG advancedFiltering() {
117 |         // 简单示例：过滤掉长度小于10的块
118 |         chunks = chunks.stream()
119 |                 .filter(chunk -> chunk.getChunkText().length() > 10)
120 |                 .collect(Collectors.toList());
121 |         return this;
122 |     }
123 | 
124 |     /**
125 |      * 大模型聊天
126 |      * @return 当前对象，方便链式调用
127 |      */
128 |     public AdvancedRAG LLMChat() {
129 |         // 替换为您的API密钥
130 |         String apiKey = Config.API_KEY;
131 |         // 使用百川Baichuan3-Turbo模型
132 |         String model = Config.LLM_MODEL;
133 |         // API的URL
134 |         String url = Config.LLM_URL;
135 | 
136 |         OpenAIChatService openAIChatService = new OpenAIChatService(apiKey);
137 | 
138 |         try {
139 |             // 构建请求参数
140 |             JSONObject params = new JSONObject()
141 |                     .put("model", model)
142 |                     .put("messages", new JSONObject[] {
143 |                             new JSONObject().put("role", "user").put("content", query)
144 |                     })
145 |                     .put("temperature", 0.3)
146 |                     .put("stream", false);
147 | 
148 |             // 调用大模型生成回复
149 |             response = openAIChatService.generateText(url, params);
150 |         } catch (IOException e) {
151 |             e.printStackTrace();
152 |         }
153 |         return this;
154 |     }
155 | 
156 |     public static void main(String[] args) {
157 |         AdvancedRAG advancedRAG = new AdvancedRAG(
158 |                 new Document("./202X企业规划.pdf"),
159 |                 "简要总结这篇文章");
160 |         try {
161 |             advancedRAG
162 |                     // 解析
163 |                     .parsing()
164 |                     // 分块
165 |                     .chunking()
166 |                     // 向量化
167 |                     .embedding()
168 |                     // 排序
169 |                     .sorting()
170 |                     // 高级筛选
171 |                     .advancedFiltering()
172 |                     // 大模型回复
173 |                     .LLMChat();
174 |         } catch (IOException e) {
175 |             throw new RuntimeException(e);
176 |         }
177 |         System.out.println(advancedRAG.response);
178 |     }
179 | }


--------------------------------------------------------------------------------
/src/main/java/org/rag/ModularRAG.java:
--------------------------------------------------------------------------------
  1 | package org.rag;
  2 | 
  3 | import org.chunk.FixedSizeSplitter;
  4 | import org.chunk.ParagraphSplitter;
  5 | import org.constant.Config;
  6 | import org.entity.Document;
  7 | import org.json.JSONObject;
  8 | import org.parser.FileParserFactory;
  9 | import org.service.LLM.OpenAIChatService;
 10 | import org.service.embedding.BaichuanEmbeddingService;
 11 | import org.utils.DistanceUtils;
 12 | 
 13 | import java.io.IOException;
 14 | import java.util.ArrayList;
 15 | import java.util.Comparator;
 16 | import java.util.List;
 17 | import java.util.stream.Collectors;
 18 | 
 19 | public class ModularRAG {
 20 |     private Document document;
 21 |     private List<Document> chunks;
 22 |     private String query;
 23 |     private String response;
 24 |     private List<Document> filteredChunks;
 25 | 
 26 |     public String getResponse() {
 27 |         return response;
 28 |     }
 29 | 
 30 |     public void setResponse(String response) {
 31 |         this.response = response;
 32 |     }
 33 | 
 34 |     public ModularRAG() {
 35 |     }
 36 | 
 37 |     public ModularRAG(Document document, String query) {
 38 |         this.document = document;
 39 |         this.query = query;
 40 |     }
 41 | 
 42 |     public Document getDocument() {
 43 |         return document;
 44 |     }
 45 | 
 46 |     public void setDocument(Document document) {
 47 |         this.document = document;
 48 |     }
 49 | 
 50 |     /**
 51 |      * 解析文档
 52 |      * @return 当前对象，方便链式调用
 53 |      */
 54 |     public ModularRAG parsing() {
 55 |         String filePath = document.getStoragePath();
 56 |         if (filePath == null || filePath.isEmpty()) {
 57 |             throw new IllegalArgumentException("Document storage path cannot be blank");
 58 |         }
 59 |         String chunkText = FileParserFactory.easyParse(filePath);
 60 |         document.setChunkText(chunkText);
 61 |         return this;
 62 |     }
 63 | 
 64 |     /**
 65 |      * 分块处理，使用多种分块策略
 66 |      * @return 当前对象，方便链式调用
 67 |      */
 68 |     public ModularRAG chunking() {
 69 |         // 可以使用更复杂的分块策略，这里简单示例使用固定大小分块
 70 |         FixedSizeSplitter fixedSizeSplitter = new FixedSizeSplitter(512);
 71 |         List<String> stringList = fixedSizeSplitter.split(document.getChunkText());
 72 |         chunks = new ArrayList<>();
 73 |         for (String chunkText : stringList) {
 74 |             Document chunkDoc = new Document();
 75 |             chunkDoc.setChunkText(chunkText);
 76 |             chunks.add(chunkDoc);
 77 |         }
 78 | 
 79 |         // 额外的分块策略：按段落分块
 80 |         ParagraphSplitter paragraphSplitter = new ParagraphSplitter();
 81 |         List<String> paragraphChunks = paragraphSplitter.split(document.getChunkText());
 82 |         for (String paragraphChunk : paragraphChunks) {
 83 |             Document paragraphDoc = new Document();
 84 |             paragraphDoc.setChunkText(paragraphChunk);
 85 |             chunks.add(paragraphDoc);
 86 |         }
 87 | 
 88 |         return this;
 89 |     }
 90 | 
 91 |     /**
 92 |      * 嵌入处理
 93 |      * @return 当前对象，方便链式调用
 94 |      * @throws IOException 可能的IO异常
 95 |      */
 96 |     public ModularRAG embedding() throws IOException {
 97 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(Config.API_KEY);
 98 |         // 获取查询的嵌入向量
 99 |         double[] queryEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, query);
100 | 
101 |         // 为每个文档块生成嵌入向量
102 |         for (Document chunk : chunks) {
103 |             double[] chunkEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, chunk.getChunkText());
104 |             chunk.setTextEmb(chunkEmbedding);
105 |         }
106 |         return this;
107 |     }
108 | 
109 |     /**
110 |      * 排序处理
111 |      * @return 当前对象，方便链式调用
112 |      * @throws IOException 可能的IO异常
113 |      */
114 |     public ModularRAG sorting() throws IOException {
115 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(Config.API_KEY);
116 |         // 获取查询的嵌入向量
117 |         double[] queryEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, query);
118 |         // 根据嵌入向量与查询的嵌入向量之间的距离对文档块进行排序
119 |         chunks.sort(Comparator.comparingDouble(chunk -> DistanceUtils.squaredErrorDistance(queryEmbedding, chunk.getTextEmb())));
120 |         return this;
121 |     }
122 | 
123 |     /**
124 |      * 高级筛选，例如过滤掉一些不相关的块
125 |      * @return 当前对象，方便链式调用
126 |      */
127 |     public ModularRAG advancedFiltering() {
128 |         // 简单示例：过滤掉长度小于10的块
129 |         filteredChunks = chunks.stream()
130 |                 .filter(chunk -> chunk.getChunkText().length() > 10)
131 |                 .collect(Collectors.toList());
132 | 
133 |         // 额外的筛选：过滤掉包含特定关键词的块
134 |         String keywordToExclude = "无关内容";
135 |         filteredChunks = filteredChunks.stream()
136 |                 .filter(chunk -> !chunk.getChunkText().contains(keywordToExclude))
137 |                 .collect(Collectors.toList());
138 | 
139 |         return this;
140 |     }
141 | 
142 |     /**
143 |      * 对筛选后的块进行重新排序
144 |      * @return 当前对象，方便链式调用
145 |      * @throws IOException 可能的IO异常
146 |      */
147 |     public ModularRAG reSortingFilteredChunks() throws IOException {
148 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(Config.API_KEY);
149 |         // 获取查询的嵌入向量
150 |         double[] queryEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, query);
151 |         // 根据嵌入向量与查询的嵌入向量之间的距离对筛选后的文档块进行排序
152 |         filteredChunks.sort(Comparator.comparingDouble(chunk -> DistanceUtils.squaredErrorDistance(queryEmbedding, chunk.getTextEmb())));
153 |         return this;
154 |     }
155 | 
156 |     /**
157 |      * 大模型聊天
158 |      * @return 当前对象，方便链式调用
159 |      */
160 |     public ModularRAG LLMChat() {
161 |         // 替换为您的API密钥
162 |         String apiKey = Config.API_KEY;
163 |         // 使用百川Baichuan3-Turbo模型
164 |         String model = Config.LLM_MODEL;
165 |         // API的URL
166 |         String url = Config.LLM_URL;
167 | 
168 |         OpenAIChatService openAIChatService = new OpenAIChatService(apiKey);
169 | 
170 |         try {
171 |             // 构建请求参数
172 |             JSONObject params = new JSONObject()
173 |                     .put("model", model)
174 |                     .put("messages", new JSONObject[] {
175 |                             new JSONObject().put("role", "user").put("content", query)
176 |                     })
177 |                     .put("temperature", 0.3)
178 |                     .put("stream", false);
179 | 
180 |             // 调用大模型生成回复
181 |             response = openAIChatService.generateText(url, params);
182 |         } catch (IOException e) {
183 |             e.printStackTrace();
184 |         }
185 |         return this;
186 |     }
187 | 
188 |     /**
189 |      * 对生成的回复进行后处理
190 |      * @return 当前对象，方便链式调用
191 |      */
192 |     public ModularRAG postProcessing() {
193 |         // 简单示例：去除回复中的多余空格
194 |         if (response != null) {
195 |             response = response.replaceAll("\\s+", " ");
196 |         }
197 |         return this;
198 |     }
199 | 
200 |     public static void main(String[] args) {
201 |         ModularRAG modularRAG = new ModularRAG(
202 |                 new Document("./202X企业规划.pdf"),
203 |                 "简要总结这篇文章");
204 |         try {
205 |             modularRAG
206 |                     // 解析
207 |                     .parsing()
208 |                     // 分块
209 |                     .chunking()
210 |                     // 向量化
211 |                     .embedding()
212 |                     // 排序
213 |                     .sorting()
214 |                     // 高级筛选
215 |                     .advancedFiltering()
216 |                     // 对筛选后的块重新排序
217 |                     .reSortingFilteredChunks()
218 |                     // 大模型回复
219 |                     .LLMChat()
220 |                     // 后处理
221 |                     .postProcessing();
222 |         } catch (IOException e) {
223 |             throw new RuntimeException(e);
224 |         }
225 |         System.out.println(modularRAG.response);
226 |     }
227 | }


--------------------------------------------------------------------------------
/src/main/java/org/rag/NaiveRAG.java:
--------------------------------------------------------------------------------
  1 | package org.rag;
  2 | 
  3 | 
  4 | import org.chunk.FixedSizeSplitter;
  5 | import org.constant.Config;
  6 | import org.entity.Document;
  7 | import org.json.JSONObject;
  8 | import org.parser.FileParserFactory;
  9 | import org.parser.WordParser;
 10 | import org.service.LLM.OpenAIChatService;
 11 | import org.service.embedding.BaichuanEmbeddingService;
 12 | import org.service.embedding.EmbeddingService;
 13 | import org.utils.DistanceUtils;
 14 | 
 15 | import java.io.IOException;
 16 | import java.util.Comparator;
 17 | import java.util.List;
 18 | import java.util.Optional;
 19 | import java.util.stream.Collectors;
 20 | import java.util.stream.IntStream;
 21 | 
 22 | public class NaiveRAG {
 23 |     Document document;
 24 |     List<Document> chunks;
 25 |     String query;
 26 |     String response;
 27 | 
 28 |     public String getResponse() {
 29 |         return response;
 30 |     }
 31 | 
 32 |     public void setResponse(String response) {
 33 |         this.response = response;
 34 |     }
 35 | 
 36 |     public NaiveRAG() {
 37 |     }
 38 | 
 39 |     public NaiveRAG(Document document, String query) {
 40 |         this.document = document;
 41 |         this.query = query;
 42 |     }
 43 | 
 44 |     public Document getDocument() {
 45 |         return document;
 46 |     }
 47 | 
 48 |     public void setDocument(Document document) {
 49 |         this.document = document;
 50 |     }
 51 | 
 52 |     public NaiveRAG parsering(){
 53 |         String filePath = document.getStoragePath();
 54 |         String chunkText = FileParserFactory.easyParse(filePath);
 55 |         document.setChunkText(chunkText);
 56 |         return this;
 57 |     }
 58 | 
 59 |     public NaiveRAG chunking(){
 60 |         FixedSizeSplitter fixedSizeSplitter = new FixedSizeSplitter(512);
 61 |         List<String> stringList =  fixedSizeSplitter.split(document.getChunkText());
 62 |         chunks = stringList.stream()
 63 |                 .map(chunkText -> {
 64 |                     Document chunkDoc = new Document();
 65 |                     chunkDoc.setChunkText(chunkText);
 66 |                     return chunkDoc;
 67 |                 })
 68 |                 .collect(Collectors.toList());
 69 |         return this;
 70 |     }
 71 | 
 72 |     public NaiveRAG embedding() throws IOException {
 73 | 
 74 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(Config.API_KEY);
 75 |         // 获取查询的嵌入向量
 76 |         double[] queryEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, query);
 77 | 
 78 |         // 为每个文档块生成嵌入向量
 79 |         for (Document chunk : chunks) {
 80 |             double[] chunkEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, chunk.getChunkText());
 81 |             chunk.setTextEmb(chunkEmbedding);
 82 |         }
 83 | 
 84 |         return this;
 85 |     }
 86 | 
 87 |     public NaiveRAG sorting() throws IOException {
 88 | 
 89 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(Config.API_KEY);
 90 |         // 获取查询的嵌入向量
 91 |         double[] queryEmbedding = embeddingService.getEmbedding(Config.EMBEDDING_API_URL, query);
 92 |         // 根据嵌入向量与查询的嵌入向量之间的距离对文档块进行排序
 93 |         chunks.sort(Comparator.comparingDouble(chunk -> DistanceUtils.squaredErrorDistance(queryEmbedding, chunk.getTextEmb())));
 94 |         return this;
 95 |     }
 96 | 
 97 |     public NaiveRAG LLMChat(){
 98 |         // 替换为您的API密钥
 99 |         String apiKey = Config.API_KEY;
100 |         // 使用百川Baichuan3-Turbo模型
101 |         String model = Config.LLM_MODEL;
102 |         // API的URL
103 |         String url = Config.LLM_URL;
104 | 
105 |         OpenAIChatService openAIChatService = new OpenAIChatService(apiKey);
106 | 
107 |         try {
108 |             // 构建请求参数
109 |             JSONObject params = new JSONObject()
110 |                     .put("model", model)
111 |                     .put("messages", new JSONObject[] {
112 |                             new JSONObject().put("role", "user").put("content", "1+1 = ?")
113 |                     })
114 |                     .put("temperature", 0.3)
115 |                     .put("stream", false);
116 | 
117 |             // 这里可以替换为您想要询问的问题
118 |             response = openAIChatService.generateText(url, params);
119 |         } catch (IOException e) {
120 |             e.printStackTrace();
121 |         }
122 |         return this;
123 |     }
124 | 
125 |     public static void main(String[] args) {
126 |         NaiveRAG naiveRAG = new NaiveRAG(
127 |                 new Document("./202X企业规划.pdf"),
128 |                 "简要总结这篇文章");
129 |         try {
130 |             naiveRAG
131 |                     // 解析
132 |                     .parsering()
133 |                     // 分块
134 |                     .chunking()
135 |                     // 向量化
136 |                     .embedding()
137 |                     // 排序
138 |                     .sorting()
139 |                     // 大模型回复
140 |                     .LLMChat();
141 |         } catch (IOException e) {
142 |             throw new RuntimeException(e);
143 |         }
144 |         System.out.println(naiveRAG.response);
145 |     }
146 | 
147 | }
148 | 


--------------------------------------------------------------------------------
/src/main/java/org/search/Pipeline.java:
--------------------------------------------------------------------------------
 1 | package org.search;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import com.alibaba.fastjson.parser.ParserConfig;
 5 | import com.alibaba.fastjson.serializer.SerializerFeature;
 6 | import org.entity.Document;
 7 | import org.entity.SearchInput;
 8 | import org.entity.SearchOutput;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | public class Pipeline {
13 |     private final static Logger logger = LoggerFactory.getLogger(Pipeline.class);
14 |     static {
15 |         try{
16 |             ParserConfig.getGlobalInstance().setSafeMode(true);
17 |             JSON.DEFAULT_GENERATE_FEATURE |= SerializerFeature.WriteNonStringKeyAsString.getMask();
18 |             JSON.DEFAULT_GENERATE_FEATURE |= SerializerFeature.DisableCircularReferenceDetect.getMask();
19 |         }catch (Throwable t){
20 |             logger.info(t.getMessage(), t);
21 |         }
22 |     }
23 |     private SearchInput searchInput;
24 |     private SearchOutput searchOutput;
25 | 
26 |     public void recall(SearchInput searchInput,SearchOutput searchOutput){
27 |         RecallStrategy.esRecall(searchInput,searchOutput);
28 |     }
29 | 
30 | 
31 |     public void recall(){
32 |         RecallStrategy.esRecall(searchInput,searchOutput);
33 |     }
34 |     public void sort(){
35 |         SortStrategy.dummySort(searchInput,searchOutput);
36 |     }
37 | 
38 |     public void rerank(SearchInput searchInput,SearchOutput searchOutput){
39 |         RerankStrategy.JinaCobertRerank(searchInput,searchOutput);
40 |     }
41 | 
42 |     public void rerank(){
43 |         RerankStrategy.JinaCobertRerank(searchInput,searchOutput);
44 |     }
45 |     public SearchOutput getDefaultResult(){
46 |         // 召回
47 |         recall();
48 |         // 排序
49 |         sort();
50 |         // 重排
51 |         rerank();
52 |         return searchOutput;
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/org/search/RecallStrategy.java:
--------------------------------------------------------------------------------
 1 | package org.search;
 2 | 
 3 | import org.entity.Document;
 4 | import org.entity.SearchInput;
 5 | import org.entity.SearchOutput;
 6 | import org.service.db.ESClient;
 7 | 
 8 | import java.util.List;
 9 | 
10 | public class RecallStrategy {
11 | 
12 |     public static void esRecall(SearchInput searchInput,SearchOutput searchOutput){
13 |         ESClient esClient = ESClient.getInstance();
14 |         Document queryDoc = searchInput.getDocument();
15 |         List<Document> chunks =  esClient.searchChunk(queryDoc,1,100);
16 |         searchOutput.setDocuments(chunks);
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/java/org/search/RerankStrategy.java:
--------------------------------------------------------------------------------
 1 | package org.search;
 2 | 
 3 | import org.constant.Config;
 4 | import org.entity.SearchInput;
 5 | import org.entity.SearchOutput;
 6 | import org.entity.Document;
 7 | import org.service.embedding.JinaEmbeddingRerankService;
 8 | import org.utils.DistanceUtils;
 9 | 
10 | import java.util.Comparator;
11 | import java.io.IOException;
12 | import java.util.List;
13 | import java.util.stream.Collectors;
14 | import java.util.stream.IntStream;
15 | 
16 | public class RerankStrategy {
17 |     public static void JinaCobertRerank(SearchInput searchInput, SearchOutput searchOutput){
18 |         JinaEmbeddingRerankService service = JinaEmbeddingRerankService.instance;
19 |         try {
20 |             double[] input = service.getEmbedding(Config.Jina_multi_vector, searchInput.getDocument().getChunkText());
21 |             double[][] outputs = service.getEmbeddings(Config.Jina_multi_vector, searchOutput.getDocuments().stream()
22 |                     .map(Document::getChunkText)
23 |                     .toArray(String[]::new));
24 | 
25 |             // 计算每个输出向量与输入向量之间的归一化平方误差距离
26 |             List<Double> distances = IntStream.range(0, outputs.length)
27 |                     .mapToObj(i -> DistanceUtils.squaredErrorDistance(input, outputs[i]))
28 |                     .collect(Collectors.toList());
29 | 
30 |             // 将距离与对应的文档一起存储
31 |             List<DocumentWithDistance> documentsWithDistance = IntStream.range(0, searchOutput.getDocuments().size())
32 |                     .mapToObj(i -> new DocumentWithDistance(searchOutput.getDocuments().get(i), distances.get(i)))
33 |                     .collect(Collectors.toList());
34 | 
35 |             // 根据距离对文档进行排序
36 |             documentsWithDistance.sort(Comparator.comparingDouble(DocumentWithDistance::getDistance));
37 | 
38 |             // 更新 searchOutput 中的 documents 顺序
39 |             searchOutput.setDocuments(documentsWithDistance.stream()
40 |                     .map(DocumentWithDistance::getDocument)
41 |                     .collect(Collectors.toList()));
42 |         } catch (IOException e) {
43 |             throw new RuntimeException(e);
44 |         }
45 |     }
46 | 
47 | 
48 | 
49 |     private static class DocumentWithDistance {
50 |         private Document document;
51 |         private double distance;
52 | 
53 |         public DocumentWithDistance(Document document, double distance) {
54 |             this.document = document;
55 |             this.distance = distance;
56 |         }
57 | 
58 |         public Document getDocument() {
59 |             return document;
60 |         }
61 | 
62 |         public double getDistance() {
63 |             return distance;
64 |         }
65 |     }
66 | }


--------------------------------------------------------------------------------
/src/main/java/org/search/SortStrategy.java:
--------------------------------------------------------------------------------
 1 | package org.search;
 2 | 
 3 | import org.entity.Document;
 4 | import org.entity.SearchInput;
 5 | import org.entity.SearchOutput;
 6 | 
 7 | import java.util.Collections;
 8 | import java.util.Comparator;
 9 | import java.util.List;
10 | 
11 | public class SortStrategy {
12 |     public static void dummySort(SearchInput searchInput, SearchOutput searchOutput){
13 |         List<Document> documents = searchOutput.getDocuments();
14 |         Collections.sort(documents, Comparator.comparingDouble(Document::getScore).reversed()); // 按score降序排序
15 |     }
16 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/LLM/OllamaChatService.java:
--------------------------------------------------------------------------------
 1 | package org.service.LLM;
 2 | 
 3 | import okhttp3.OkHttpClient;
 4 | import okhttp3.MediaType;
 5 | import okhttp3.Request;
 6 | import okhttp3.RequestBody;
 7 | import okhttp3.Response;
 8 | import org.json.JSONObject;
 9 | 
10 | import java.io.IOException;
11 | 
12 | public class OllamaChatService {
13 | 
14 |     private OkHttpClient client = new OkHttpClient();
15 | 
16 |     public String generateChatCompletion(String model, String message) throws Exception {
17 |         String url = "http://localhost:11434/api/chat";
18 | 
19 |         // 创建请求体
20 |         JSONObject requestBody = new JSONObject();
21 |         requestBody.put("model", model);
22 |         JSONObject messageObject = new JSONObject();
23 |         messageObject.put("role", "user");
24 |         messageObject.put("content", message);
25 |         requestBody.put("messages", new JSONObject[]{messageObject});
26 | 
27 |         // 转换为JSON字符串
28 |         String jsonRequestBody = requestBody.toString();
29 |         RequestBody body = RequestBody.create(jsonRequestBody, MediaType.get("application/json; charset=utf-8"));
30 | 
31 |         // 创建请求
32 |         Request request = new Request.Builder()
33 |                 .url(url)
34 |                 .post(body)
35 |                 .build();
36 | 
37 |         // 执行请求并获取响应
38 |         try (Response response = client.newCall(request).execute()) {
39 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
40 | 
41 |             // 获取响应体
42 |             String responseBody = response.body().string();
43 |             return responseBody;
44 |         }
45 |     }
46 | 
47 |     public static void main(String[] args) {
48 |         OllamaChatService service = new OllamaChatService();
49 | 
50 |         try {
51 |             String response = service.generateChatCompletion("llama3.2", "why is the sky blue?");
52 |             System.out.println(response);
53 |         } catch (Exception e) {
54 |             e.printStackTrace();
55 |         }
56 |     }
57 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/LLM/OpenAIChatService.java:
--------------------------------------------------------------------------------
  1 | package org.service.LLM;
  2 | 
  3 | import okhttp3.*;
  4 | import org.constant.Config;
  5 | import org.json.JSONObject;
  6 | import org.service.db.RedisClient;
  7 | 
  8 | import java.io.IOException;
  9 | import java.util.List;
 10 | 
 11 | public class OpenAIChatService {
 12 | 
 13 |     private final String apiKey;
 14 |     private final OkHttpClient client;
 15 | 
 16 |     public OpenAIChatService(String apiKey) {
 17 |         this.apiKey = apiKey;
 18 |         this.client = new OkHttpClient();
 19 |     }
 20 | 
 21 |     /**
 22 |      * 发送请求到指定的API并获取响应
 23 |      *
 24 |      * @param url    API的URL
 25 |      * @param params 请求参数
 26 |      * @return 生成的文本
 27 |      * @throws IOException 如果请求失败
 28 |      */
 29 |     public String generateText(String url, JSONObject params) throws IOException {
 30 |         RequestBody body = RequestBody.create(params.toString(), MediaType.get("application/json; charset=utf-8"));
 31 | 
 32 |         Request request = new Request.Builder()
 33 |                 .url(url)
 34 |                 .post(body)
 35 |                 .header("Content-Type", "application/json")
 36 |                 .header("Authorization", "Bearer " + apiKey)
 37 |                 .build();
 38 | 
 39 |         try (Response response = client.newCall(request).execute()) {
 40 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
 41 | 
 42 |             String responseBody = response.body().string();
 43 |             JSONObject jsonObject = new JSONObject(responseBody);
 44 |             return jsonObject.getJSONArray("choices").getJSONObject(0).getJSONObject("message").getString("content");
 45 |         }
 46 |     }
 47 | 
 48 | 
 49 |     /**
 50 |      * 发送请求到指定的API并获取响应
 51 |      *
 52 |      * @param url    API的URL
 53 |      * @param chatId 对话的唯一标识
 54 |      * @param newMessage 新的消息内容
 55 |      * @return 生成的文本
 56 |      * @throws IOException 如果请求失败
 57 |      */
 58 |     public String generateText(String url, String chatId, JSONObject newMessage) throws IOException {
 59 |         RedisClient redisClient = RedisClient.getInstance();
 60 |         // 从 Redis 中获取现有的 messages 历史记录
 61 |         List<String> historyMessages = redisClient.lrange(chatId, 0, -1);
 62 |         JSONObject[] messageArray = new JSONObject[historyMessages.size() + 1];
 63 |         for (int i = 0; i < historyMessages.size(); i++) {
 64 |             messageArray[i] = new JSONObject(historyMessages.get(i));
 65 |         }
 66 |         messageArray[messageArray.length - 1] = newMessage;
 67 | 
 68 |         // 构建请求参数
 69 |         JSONObject params = new JSONObject()
 70 |                 .put("model", Config.LLM_MODEL)
 71 |                 .put("messages", messageArray)
 72 |                 .put("temperature", 0.3)
 73 |                 .put("stream", false);
 74 | 
 75 |         // 发送请求并获取响应
 76 |         RequestBody body = RequestBody.create(params.toString(), MediaType.get("application/json; charset=utf-8"));
 77 |         Request request = new Request.Builder()
 78 |                 .url(url)
 79 |                 .post(body)
 80 |                 .header("Content-Type", "application/json")
 81 |                 .header("Authorization", "Bearer " + apiKey)
 82 |                 .build();
 83 | 
 84 |         try (Response response = client.newCall(request).execute()) {
 85 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
 86 | 
 87 |             String responseBody = response.body().string();
 88 |             JSONObject jsonObject = new JSONObject(responseBody);
 89 |             String generatedText = jsonObject.getJSONArray("choices").getJSONObject(0).getJSONObject("message").getString("content");
 90 | 
 91 |             // 将新的消息和生成的文本添加到 messages 历史记录中
 92 |             redisClient.lpush(chatId, newMessage.toString(), Config.REDIS_EXPIRE_SECONDS);
 93 |             redisClient.lpush(chatId, new JSONObject().put("role", "assistant").put("content", generatedText).toString(), Config.REDIS_EXPIRE_SECONDS);
 94 | 
 95 |             return generatedText;
 96 |         }
 97 |     }
 98 | 
 99 | 
100 | 
101 |     public static void main(String[] args) {
102 |         // 替换为您的API密钥
103 |         String apiKey = Config.API_KEY;
104 |         // 使用百川Baichuan3-Turbo模型
105 |         String model = Config.LLM_MODEL;
106 |         // API的URL
107 |         String url = Config.LLM_URL;
108 | 
109 |         OpenAIChatService openAIChatService = new OpenAIChatService(apiKey);
110 | 
111 |         try {
112 |             // 构建请求参数
113 |             JSONObject params = new JSONObject()
114 |                     .put("model", model)
115 |                     .put("messages", new JSONObject[] {
116 |                             new JSONObject().put("role", "user").put("content", "1+1 = ?")
117 |                     })
118 |                     .put("temperature", 0.3)
119 |                     .put("stream", false);
120 | 
121 |             // 这里可以替换为您想要询问的问题
122 |             String generatedText = openAIChatService.generateText(url, params);
123 |             System.out.println(generatedText);
124 | 
125 | 
126 |             // 测试 generateText 方法，包含聊天ID和新消息
127 |             String chatId = "chat123";
128 |             JSONObject newMessage = new JSONObject().put("role", "user").put("content", "What my last question?");
129 |             String generatedTextWithChatId = openAIChatService.generateText(url, chatId, newMessage);
130 |             System.out.println("Generated Text (With Chat ID): " + generatedTextWithChatId);
131 |         } catch (IOException e) {
132 |             e.printStackTrace();
133 |         }
134 |     }
135 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/Main.java:
--------------------------------------------------------------------------------
1 | package org.service;
2 | 
3 | public class Main {
4 |     public static void main(String[] args) {
5 |         System.out.println("Hello world!");
6 |     }
7 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/balance/LoadBalancer.java:
--------------------------------------------------------------------------------
1 | package org.service.balance;
2 | 
3 | import com.alibaba.nacos.api.naming.pojo.Instance;
4 | import java.util.List;
5 | 
6 | // 定义负载均衡器接口
7 | public interface LoadBalancer {
8 |     Instance select(List<Instance> instances);
9 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/balance/Main.java:
--------------------------------------------------------------------------------
 1 | package org.service.balance;
 2 | 
 3 | import com.alibaba.nacos.api.exception.NacosException;
 4 | import com.alibaba.nacos.api.naming.pojo.Instance;
 5 | 
 6 | // 主类，演示如何使用负载均衡客户端
 7 | public class Main {
 8 |     public static void main(String[] args) throws NacosException {
 9 |         String serviceName = "your-service-name";
10 | 
11 |         // 使用轮询策略
12 |         LoadBalancer roundRobinLoadBalancer = new RoundRobinLoadBalancer();
13 |         NacosLoadBalancingClient roundRobinClient = new NacosLoadBalancingClient(serviceName, roundRobinLoadBalancer);
14 |         Instance roundRobinInstance = roundRobinClient.getNextInstance();
15 |         System.out.println("Round Robin selected instance: " + roundRobinInstance);
16 | 
17 |         // 使用随机策略
18 |         LoadBalancer randomLoadBalancer = new RandomLoadBalancer();
19 |         NacosLoadBalancingClient randomClient = new NacosLoadBalancingClient(serviceName, randomLoadBalancer);
20 |         Instance randomInstance = randomClient.getNextInstance();
21 |         System.out.println("Random selected instance: " + randomInstance);
22 | 
23 |         // 使用加权随机策略
24 |         LoadBalancer weightedRandomLoadBalancer = new WeightedRandomLoadBalancer();
25 |         NacosLoadBalancingClient weightedRandomClient = new NacosLoadBalancingClient(serviceName, weightedRandomLoadBalancer);
26 |         Instance weightedRandomInstance = weightedRandomClient.getNextInstance();
27 |         System.out.println("Weighted Random selected instance: " + weightedRandomInstance);
28 |     }
29 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/balance/NacosLoadBalancingClient.java:
--------------------------------------------------------------------------------
 1 | package org.service.balance;
 2 | 
 3 | import com.alibaba.nacos.api.NacosFactory;
 4 | import com.alibaba.nacos.api.exception.NacosException;
 5 | import com.alibaba.nacos.api.naming.NamingService;
 6 | import com.alibaba.nacos.api.naming.listener.Event;
 7 | import com.alibaba.nacos.api.naming.listener.EventListener;
 8 | import com.alibaba.nacos.api.naming.listener.NamingEvent;
 9 | import com.alibaba.nacos.api.naming.pojo.Instance;
10 | import java.util.ArrayList;
11 | import java.util.List;
12 | import java.util.Properties;
13 | 
14 | // 服务发现和负载均衡客户端
15 | public class NacosLoadBalancingClient {
16 |     private static final String SERVER_ADDRESSES = "http://124.223.85.176:8848";
17 |     private static final String NAMESPACE = "public";
18 |     private final NamingService namingService;
19 |     private final List<Instance> instances = new ArrayList<>();
20 |     private final LoadBalancer loadBalancer;
21 | 
22 |     public NacosLoadBalancingClient(String serviceName, LoadBalancer loadBalancer) throws NacosException {
23 |         this.loadBalancer = loadBalancer;
24 |         Properties properties = new Properties();
25 |         properties.put("serverAddr", SERVER_ADDRESSES);
26 |         properties.put("namespace", NAMESPACE);
27 |         namingService = NacosFactory.createNamingService(properties);
28 |         namingService.subscribe(serviceName, new EventListener() {
29 |             @Override
30 |             public void onEvent(Event event) {
31 |                 if (event instanceof NamingEvent) {
32 |                     NamingEvent namingEvent = (NamingEvent) event;
33 |                     instances.clear();
34 |                     instances.addAll(namingEvent.getInstances());
35 |                     System.out.println("Service instances updated: " + instances);
36 |                 }
37 |             }
38 |         });
39 |         instances.addAll(namingService.getAllInstances(serviceName));
40 |     }
41 | 
42 |     public Instance getNextInstance() {
43 |         return loadBalancer.select(instances);
44 |     }
45 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/balance/RandomLoadBalancer.java:
--------------------------------------------------------------------------------
 1 | package org.service.balance;
 2 | 
 3 | import com.alibaba.nacos.api.naming.pojo.Instance;
 4 | import java.util.List;
 5 | import java.util.Random;
 6 | 
 7 | // 随机负载均衡器实现
 8 | public class RandomLoadBalancer implements LoadBalancer {
 9 |     private final Random random = new Random();
10 | 
11 |     @Override
12 |     public Instance select(List<Instance> instances) {
13 |         if (instances == null || instances.isEmpty()) {
14 |             return null;
15 |         }
16 |         return instances.get(random.nextInt(instances.size()));
17 |     }
18 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/balance/RoundRobinLoadBalancer.java:
--------------------------------------------------------------------------------
 1 | package org.service.balance;
 2 | 
 3 | import com.alibaba.nacos.api.naming.pojo.Instance;
 4 | import java.util.List;
 5 | import java.util.concurrent.atomic.AtomicInteger;
 6 | 
 7 | // 轮询负载均衡器实现
 8 | public class RoundRobinLoadBalancer implements LoadBalancer {
 9 |     private final AtomicInteger index = new AtomicInteger(0);
10 | 
11 |     @Override
12 |     public Instance select(List<Instance> instances) {
13 |         if (instances == null || instances.isEmpty()) {
14 |             return null;
15 |         }
16 |         int currentIndex = index.getAndIncrement() % instances.size();
17 |         return instances.get(currentIndex);
18 |     }
19 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/balance/WeightedRandomLoadBalancer.java:
--------------------------------------------------------------------------------
 1 | package org.service.balance;
 2 | import com.alibaba.nacos.api.naming.pojo.Instance;
 3 | import java.util.List;
 4 | import java.util.Random;
 5 | 
 6 | // 加权随机负载均衡器实现
 7 | public class WeightedRandomLoadBalancer implements LoadBalancer {
 8 |     private final Random random = new Random();
 9 | 
10 |     @Override
11 |     public Instance select(List<Instance> instances) {
12 |         if (instances == null || instances.isEmpty()) {
13 |             return null;
14 |         }
15 |         double totalWeight = 0;
16 |         for (Instance instance : instances) {
17 |             totalWeight += instance.getWeight();
18 |         }
19 |         double randomWeight = random.nextDouble() * totalWeight;
20 |         double currentWeight = 0;
21 |         for (Instance instance : instances) {
22 |             currentWeight += instance.getWeight();
23 |             if (currentWeight >= randomWeight) {
24 |                 return instance;
25 |             }
26 |         }
27 |         return instances.get(0);
28 |     }
29 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/db/ESClient.java:
--------------------------------------------------------------------------------
  1 | package org.service.db;
  2 | 
  3 | import com.alibaba.fastjson.JSON;
  4 | 
  5 | import com.alibaba.fastjson.JSONArray;
  6 | import com.alibaba.fastjson.JSONObject;
  7 | import okhttp3.*;
  8 | import org.constant.Config;
  9 | import org.entity.Document;
 10 | import org.utils.HttpClientUtil;
 11 | import org.utils.SnowflakeIdGenerator;
 12 | 
 13 | import java.io.BufferedReader;
 14 | import java.io.FileReader;
 15 | import java.io.IOException;
 16 | import java.util.ArrayList;
 17 | import java.util.List;
 18 | 
 19 | 
 20 | public class ESClient {
 21 |     private String esUrl;
 22 |     private String username;
 23 |     private String password;
 24 |     private OkHttpClient client;
 25 |     private static final String QUERY_TEMPLATE_PATH = "assert/es_search_chunk.sql";
 26 |     private static final ESClient instance = new ESClient(Config.esUrl, Config.esUserName, Config.esPassWord);
 27 |     public ESClient(String esUrl, String username, String password) {
 28 |         this.esUrl = esUrl;
 29 |         this.username = username;
 30 |         this.password = password;
 31 |         this.client = HttpClientUtil.createHttpClient(username, password);
 32 |     }
 33 |     public static ESClient getInstance() {
 34 |         return instance;
 35 |     }
 36 | 
 37 | 
 38 |     public void testConnection() {
 39 |         Request request = new Request.Builder()
 40 |                 .url(esUrl + "/_cat/health")
 41 |                 .build();
 42 | 
 43 |         try (Response response = client.newCall(request).execute()) {
 44 |             if (response.isSuccessful()) {
 45 |                 System.out.println("连接到Elasticsearch成功！");
 46 |             } else {
 47 |                 System.out.println("连接到Elasticsearch失败，状态码：" + response.code());
 48 |             }
 49 |         } catch (IOException e) {
 50 |             e.printStackTrace();
 51 |         }
 52 |     }
 53 | 
 54 | 
 55 |     /**
 56 |      * 向Elasticsearch添加一个新的文档块。
 57 |      *
 58 |      * @param document 要添加的文档对象
 59 |      * @return 是否成功添加
 60 |      */
 61 |     public boolean addChunk(Document document) {
 62 |         // 生成唯一的ID
 63 |         String uniqueId = SnowflakeIdGenerator.generateUniqueID(); // 使用Snowflake算法生成唯一ID
 64 | 
 65 |         // 将Document对象转换为JSON格式的字符串
 66 |         String jsonString = JSON.toJSONString(document);
 67 | 
 68 |         // 发送POST请求到Elasticsearch
 69 |         RequestBody body = RequestBody.create(jsonString, MediaType.get("application/json; charset=utf-8"));
 70 |         Request request = new Request.Builder()
 71 |                 .url(esUrl + "/documents/_create/" + uniqueId)
 72 |                 .post(body)
 73 |                 .build();
 74 | 
 75 |         try (Response response = client.newCall(request).execute()) {
 76 |             // 检查响应状态码是否为201，表示文档已成功添加
 77 |             if (response.isSuccessful() && response.code() == 201) {
 78 |                 System.out.println("embedding 添加成功！");
 79 |                 return true;
 80 |             } else {
 81 |                 System.out.println("embedding 添加失败，状态码：" + response.code());
 82 |                 return false;
 83 |             }
 84 |         } catch (IOException e) {
 85 |             e.printStackTrace();
 86 |             return false;
 87 |         }
 88 |     }
 89 | 
 90 | 
 91 | 
 92 | 
 93 |     /**
 94 |      * 从文件中读取查询模板并返回格式化的查询字符串。
 95 |      *
 96 |      * @param chunkText 文本内容
 97 |      * @param boost 提升值
 98 |      * @param userId 用户ID
 99 |      * @param size 结果大小
100 |      * @return 格式化的查询字符串
101 |      */
102 |     private String getFormattedQuery(String chunkText, float boost, String userId, int size) {
103 |         StringBuilder queryTemplate = new StringBuilder();
104 |         try (BufferedReader br = new BufferedReader(new FileReader(QUERY_TEMPLATE_PATH))) {
105 |             String line;
106 |             while ((line = br.readLine()) != null) {
107 |                 queryTemplate.append(line);
108 |             }
109 |         } catch (IOException e) {
110 |             e.printStackTrace();
111 |             return null;
112 |         }
113 | 
114 |         return String.format(queryTemplate.toString(), chunkText, boost, userId, size);
115 |     }
116 | 
117 |     public List<Document> searchChunk(Document document, float boost, int size) {
118 |         String queryJson = getFormattedQuery(document.getChunkText(), boost, document.getUserId(), size);
119 | 
120 |         if (queryJson == null) {
121 |             System.out.println("Failed to read query template.");
122 |             return new ArrayList<>();
123 |         }
124 | 
125 |         RequestBody body = RequestBody.create(queryJson, MediaType.get("application/json; charset=utf-8"));
126 |         Request request = new Request.Builder()
127 |                 .url(esUrl + "/documents/_search")
128 |                 .post(body)
129 |                 .build();
130 | 
131 |         try (Response response = client.newCall(request).execute()) {
132 |             if (response.isSuccessful()) {
133 |                 String responseBody = response.body().string();
134 |                 return parseSearchResults(responseBody);
135 |             } else {
136 |                 System.out.println("搜索失败，状态码：" + response.code());
137 |                 return new ArrayList<>();
138 |             }
139 |         } catch (IOException e) {
140 |             e.printStackTrace();
141 |             return new ArrayList<>();
142 |         }
143 |     }
144 | 
145 |     private List<Document> parseSearchResults(String responseBody) {
146 |         List<Document> documents = new ArrayList<>();
147 |         JSONObject responseJson = JSON.parseObject(responseBody);
148 | 
149 |         // 获取hits数组
150 |         JSONObject hits = responseJson.getJSONObject("hits");
151 |         JSONArray hitsArray = hits.getJSONArray("hits");
152 | 
153 |         for (int i = 0; i < hitsArray.size(); i++) {
154 |             JSONObject hit = hitsArray.getJSONObject(i);
155 |             JSONObject source = hit.getJSONObject("_source");
156 | 
157 |             // 直接将_source中的JSON反序列化为Document对象
158 |             Document document = JSON.toJavaObject(source, Document.class);
159 |             document.setScore(hit.getFloat("_score"));
160 |             documents.add(document);
161 |         }
162 | 
163 |         return documents;
164 |     }
165 | 
166 | 
167 | 
168 |     public static void main(String[] args) {
169 |         ESClient esClient = ESClient.getInstance();
170 |         Document d = new Document();
171 |         d.setUserId("200");
172 |         d.setChunkText("哈利波特");
173 |         esClient.addChunk(d);
174 |         Document d_s = new Document();
175 |         d_s.setUserId("200");
176 |         d_s.setChunkText("哈利波特");
177 | 
178 |         List<Document> documents = esClient.searchChunk(d_s,1,10);
179 |         System.out.println(documents);
180 | 
181 |     }
182 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/db/MinIOClient.java:
--------------------------------------------------------------------------------
  1 | package org.service.db;
  2 | 
  3 | import io.minio.*;
  4 | import io.minio.errors.*;
  5 | import java.io.*;
  6 | import java.security.InvalidKeyException;
  7 | import java.security.NoSuchAlgorithmException;
  8 | 
  9 | public class MinIOClient {
 10 |     private static MinIOClient instance;
 11 |     private MinioClient minioClient;
 12 | 
 13 |     private MinIOClient(String endpoint, String accessKey, String secretKey) {
 14 |         this.minioClient = MinioClient.builder()
 15 |                 .endpoint(endpoint)
 16 |                 .credentials(accessKey, secretKey)
 17 |                 .build();
 18 |     }
 19 | 
 20 |     public static synchronized MinIOClient getInstance(String endpoint, String accessKey, String secretKey) {
 21 |         if (instance == null) {
 22 |             instance = new MinIOClient(endpoint, accessKey, secretKey);
 23 |         }
 24 |         return instance;
 25 |     }
 26 | 
 27 |     public boolean uploadFile(String bucketName, String objectName, String filePath) {
 28 |         try {
 29 |             File file = new File(filePath);
 30 |             minioClient.putObject(PutObjectArgs.builder()
 31 |                     .bucket(bucketName)
 32 |                     .object(objectName)
 33 |                     .stream(new FileInputStream(file), file.length(), -1)
 34 |                     .build());
 35 |             return true;
 36 |         } catch (Exception e) {
 37 |             System.out.println("Error uploading file: " + e.getMessage());
 38 |             return false;
 39 |         }
 40 |     }
 41 | 
 42 | //    public boolean downloadFile(String bucketName, String objectName, String downloadPath) {
 43 | //        try {
 44 | //            minioClient.getObject(GetObjectArgs.builder()
 45 | //                    .bucket(bucketName)
 46 | //                    .object(objectName)
 47 | //                    .build());
 48 | //            return true;
 49 | //        } catch (Exception e) {
 50 | //            System.out.println("Error downloading file: " + e.getMessage());
 51 | //            return false;
 52 | //        }
 53 | //    }
 54 |     public boolean downloadFile(String bucketName, String objectName, String downloadPath) {
 55 |         try {
 56 |             // 创建目标文件的File对象
 57 |             File outputFile = new File(downloadPath + File.separator + objectName);
 58 |             // 创建输出流用于写入文件
 59 |             FileOutputStream outputStream = new FileOutputStream(outputFile);
 60 | 
 61 |             // 获取对象并读取数据
 62 |             InputStream inputStream = minioClient.getObject(GetObjectArgs.builder()
 63 |                     .bucket(bucketName)
 64 |                     .object(objectName)
 65 |                     .build());
 66 | 
 67 |             // 将数据从输入流复制到输出流
 68 |             byte[] buffer = new byte[1024];
 69 |             int bytesRead;
 70 |             while ((bytesRead = inputStream.read(buffer)) != -1) {
 71 |                 outputStream.write(buffer, 0, bytesRead);
 72 |             }
 73 | 
 74 |             // 关闭流
 75 |             outputStream.close();
 76 |             inputStream.close();
 77 | 
 78 |             return true;
 79 |         } catch (MinioException | IOException e) {
 80 |             System.out.println("Error downloading file: " + e.getMessage());
 81 |             return false;
 82 |         } catch (NoSuchAlgorithmException e) {
 83 |             throw new RuntimeException(e);
 84 |         } catch (InvalidKeyException e) {
 85 |             throw new RuntimeException(e);
 86 |         }
 87 |     }
 88 | 
 89 | 
 90 | 
 91 |     public static void main(String[] args) {
 92 |         String minioEndpoint = "http://124.223.85.176:9000";
 93 |         String minioAccessKey = "ROOTNAME";
 94 |         String minioSecretKey = "CHANGEME123";
 95 | 
 96 |         MinIOClient minioClient = MinIOClient.getInstance(minioEndpoint, minioAccessKey, minioSecretKey);
 97 | 
 98 |         String file_path = "C:\\Users\\19664\\Desktop\\demo.png";
 99 |         String file_out_path = "C:\\Users\\19664\\Desktop\\tmp";
100 |         String bucket_name = "documents";
101 |         String object_name = "demo.png";
102 | 
103 |         // 上传文件
104 |         boolean uploadSuccess = minioClient.uploadFile(bucket_name, object_name, file_path);
105 |         if (uploadSuccess) {
106 |             System.out.println("File uploaded successfully.");
107 |         } else {
108 |             System.out.println("Failed to upload file.");
109 |         }
110 | 
111 |         // 下载文件
112 |         boolean downloadSuccess = minioClient.downloadFile(bucket_name, object_name, file_out_path);
113 |         if (downloadSuccess) {
114 |             System.out.println("File downloaded successfully.");
115 |         } else {
116 |             System.out.println("Failed to download file.");
117 |         }
118 |     }
119 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/db/MysqlClient.java:
--------------------------------------------------------------------------------
 1 | package org.service.db;
 2 | import java.sql.Connection;
 3 | import java.sql.DriverManager;
 4 | import java.sql.SQLException;
 5 | import java.sql.Statement;
 6 | 
 7 | public class MysqlClient {
 8 |     private Connection conn;
 9 | 
10 |     public MysqlClient(String host, String user, String password, String dbName, int port) {
11 |         this.conn = createConnection(host, user, password, dbName, port);
12 |     }
13 | 
14 |     private Connection createConnection(String host, String user, String password, String dbName, int port) {
15 |         try {
16 |             String url = "jdbc:mysql://" + host + ":" + port + "/" + dbName + "?useUnicode=true&characterEncoding=UTF-8";
17 |             return DriverManager.getConnection(url, user, password);
18 |         } catch (SQLException e) {
19 |             System.out.println("Error connecting to MySQL: " + e.getMessage());
20 |             return null;
21 |         }
22 |     }
23 | 
24 |     public void initUserTable() {
25 |         String createTableSql = "CREATE TABLE IF NOT EXISTS user (" +
26 |                 "user_id INT AUTO_INCREMENT PRIMARY KEY," +
27 |                 "username VARCHAR(255) NOT NULL," +
28 |                 "password VARCHAR(255) NOT NULL" +
29 |                 ");";
30 |         executeUpdate(createTableSql);
31 |     }
32 | 
33 |     // 其他方法...
34 | 
35 |     private void executeUpdate(String sql) {
36 |         try (Statement stmt = conn.createStatement()) {
37 |             stmt.executeUpdate(sql);
38 |         } catch (SQLException e) {
39 |             System.out.println("SQL execution error: " + e.getMessage());
40 |         }
41 |     }
42 | 
43 |     // 为了简洁起见，其他方法如 find_by_user_id, add_user 等将被省略
44 |     // 但它们的实现逻辑将与Python代码类似，使用PreparedStatement来执行SQL查询和更新
45 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/db/RedisClient.java:
--------------------------------------------------------------------------------
 1 | package org.service.db;
 2 | 
 3 | import org.constant.Config;
 4 | import redis.clients.jedis.Jedis;
 5 | import redis.clients.jedis.exceptions.JedisException;
 6 | 
 7 | import java.util.Collections;
 8 | import java.util.List;
 9 | 
10 | public class RedisClient {
11 |     private static Jedis jedis;
12 |     private static RedisClient instance;
13 |     static {
14 |         // 静态代码块中初始化 Jedis 客户端
15 |         jedis = new Jedis(Config.REDIS_HOST, Config.REDIS_PORT);
16 |         if (Config.REDIS_PASSWORD != null && !Config.REDIS_PASSWORD.isEmpty()) {
17 |             jedis.auth(Config.REDIS_PASSWORD);
18 |         }
19 |     }
20 | 
21 |     // 提供一个公共的静态方法来获取实例
22 |     public static synchronized RedisClient getInstance() {
23 |         if (instance == null) {
24 |             instance = new RedisClient();
25 |         }
26 |         return instance;
27 |     }
28 | 
29 |     // 将元素添加到列表的头部，并设置过期时间（如果需要）
30 |     public static long lpush(String key, String element, Integer expireSeconds) {
31 |         try {
32 |             jedis.lpush(key, element);
33 |             if (expireSeconds != null && !jedis.exists(key)) {
34 |                 jedis.expire(key, expireSeconds);
35 |             }
36 |             return jedis.llen(key);
37 |         } catch (JedisException e) {
38 |             e.printStackTrace();
39 |             return -1;
40 |         }
41 |     }
42 | 
43 |     // 获取列表中指定范围内的元素
44 |     public static List<String> lrange(String key, long start, long end) {
45 |         try {
46 |             return jedis.lrange(key, start, end);
47 |         } catch (JedisException e) {
48 |             e.printStackTrace();
49 |             return Collections.emptyList();
50 |         }
51 |     }
52 | 
53 |     // 删除指定的键
54 |     public static boolean delete(String key) {
55 |         try {
56 |             if (jedis.exists(key)) {
57 |                 jedis.del(key);
58 |                 return true;
59 |             }
60 |             return false;
61 |         } catch (JedisException e) {
62 |             e.printStackTrace();
63 |             return false;
64 |         }
65 |     }
66 | 
67 |     // 关闭 Jedis 连接
68 |     public static void close() {
69 |         if (jedis != null) {
70 |             jedis.close();
71 |         }
72 |     }
73 | 
74 |     // 主方法，用于演示如何使用 RedisClient 类
75 |     public static void main(String[] args) {
76 |         RedisClient redisClient = new RedisClient();
77 |         // 向列表中添加元素并设置过期时间
78 |         long result = lpush("myListKey", "myElement", 60); // 设置过期时间为 60 秒
79 |         System.out.println("List length after lpush: " + result);
80 | 
81 |         // 获取列表中的所有元素
82 |         List<String> elements = lrange("myListKey", 0, -1);
83 |         System.out.println("List elements: " + elements);
84 | 
85 |         // 删除列表
86 |         boolean isDeleted = delete("myListKey");
87 |         System.out.println("Is list deleted? " + isDeleted);
88 | 
89 |         // 关闭 Redis 连接
90 |         close();
91 |     }
92 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/embedding/BaichuanEmbeddingService.java:
--------------------------------------------------------------------------------
  1 | package org.service.embedding;
  2 | 
  3 | import okhttp3.*;
  4 | import org.constant.Config;
  5 | import org.json.JSONArray;
  6 | import org.json.JSONObject;
  7 | 
  8 | import java.io.IOException;
  9 | 
 10 | public class BaichuanEmbeddingService implements EmbeddingService{
 11 | 
 12 |     private final String apiKey;
 13 |     private final OkHttpClient client;
 14 | 
 15 |     public BaichuanEmbeddingService(String apiKey) {
 16 |         this.apiKey = apiKey;
 17 |         this.client = new OkHttpClient();
 18 |     }
 19 | 
 20 |     /**
 21 |      * 发送请求到指定的Embeddings API并获取响应
 22 |      *
 23 |      * @param url   API的URL
 24 |      * @param input 输入文本
 25 |      * @return 嵌入向量
 26 |      * @throws IOException 如果请求失败
 27 |      */
 28 |     public double[] getEmbedding(String url, String input) throws IOException {
 29 |         RequestBody body = RequestBody.create(
 30 |                 new JSONObject()
 31 |                         .put("model", "Baichuan-Text-Embedding")
 32 |                         .put("input", input)
 33 |                         .toString(),
 34 |                 MediaType.get("application/json; charset=utf-8")
 35 |         );
 36 | 
 37 |         Request request = new Request.Builder()
 38 |                 .url(url)
 39 |                 .post(body)
 40 |                 .header("Content-Type", "application/json")
 41 |                 .header("Authorization", "Bearer " + apiKey)
 42 |                 .build();
 43 | 
 44 |         try (Response response = client.newCall(request).execute()) {
 45 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
 46 | 
 47 |             String responseBody = response.body().string();
 48 |             JSONObject jsonObject = new JSONObject(responseBody);
 49 |             JSONObject dataObject = jsonObject.getJSONArray("data").getJSONObject(0);
 50 |             double[] embedding = new double[dataObject.getJSONArray("embedding").length()];
 51 |             for (int i = 0; i < embedding.length; i++) {
 52 |                 embedding[i] = dataObject.getJSONArray("embedding").getDouble(i);
 53 |             }
 54 |             return embedding;
 55 |         }
 56 |     }
 57 | 
 58 |     public double[][] getEmbeddings(String url, String[] inputs) throws IOException {
 59 |         JSONArray inputArray = new JSONArray();
 60 |         for (String input : inputs) {
 61 |             inputArray.put(input);
 62 |         }
 63 | 
 64 |         RequestBody body = RequestBody.create(
 65 |                 new JSONObject()
 66 |                         .put("model", "Baichuan-Text-Embedding")
 67 |                         .put("inputs", inputArray)
 68 |                         .toString(),
 69 |                 MediaType.get("application/json; charset=utf-8")
 70 |         );
 71 | 
 72 |         Request request = new Request.Builder()
 73 |                 .url(url)
 74 |                 .post(body)
 75 |                 .header("Content-Type", "application/json")
 76 |                 .header("Authorization", "Bearer " + apiKey)
 77 |                 .build();
 78 | 
 79 |         try (Response response = client.newCall(request).execute()) {
 80 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
 81 | 
 82 |             String responseBody = response.body().string();
 83 |             JSONObject jsonObject = new JSONObject(responseBody);
 84 |             JSONArray dataArray = jsonObject.getJSONArray("data");
 85 |             double[][] embeddings = new double[dataArray.length()][];
 86 | 
 87 |             for (int i = 0; i < dataArray.length(); i++) {
 88 |                 JSONObject dataObject = dataArray.getJSONObject(i);
 89 |                 JSONArray embeddingArray = dataObject.getJSONArray("embedding");
 90 |                 double[] embedding = new double[embeddingArray.length()];
 91 |                 for (int j = 0; j < embeddingArray.length(); j++) {
 92 |                     embedding[j] = embeddingArray.getDouble(j);
 93 |                 }
 94 |                 embeddings[i] = embedding;
 95 |             }
 96 |             return embeddings;
 97 |         }
 98 |     }
 99 | 
100 |     public static void main(String[] args) {
101 |         // 替换为您的API密钥
102 |         String apiKey = Config.API_KEY;
103 |         BaichuanEmbeddingService embeddingService = new BaichuanEmbeddingService(apiKey);
104 | 
105 |         try {
106 |             // API的URL
107 |             String url = "https://api.baichuan-ai.com/v1/embeddings";
108 |             // 这里可以替换为您想要获取嵌入的文本
109 |             String input = "百川大模型";
110 |             double[] embedding = embeddingService.getEmbedding(url, input);
111 |             // 打印嵌入向量
112 |             for (double value : embedding) {
113 |                 System.out.println(value);
114 |             }
115 |         } catch (IOException e) {
116 |             e.printStackTrace();
117 |         }
118 |     }
119 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/embedding/EmbeddingService.java:
--------------------------------------------------------------------------------
1 | package org.service.embedding;
2 | 
3 | import java.io.IOException;
4 | 
5 | public interface EmbeddingService {
6 |     double[] getEmbedding(String url, String input) throws IOException;
7 |     public double[][] getEmbeddings(String url, String[] inputs) throws IOException;
8 | }
9 | 


--------------------------------------------------------------------------------
/src/main/java/org/service/embedding/JinaEmbeddingRerankService.java:
--------------------------------------------------------------------------------
  1 | package org.service.embedding;
  2 | 
  3 | import okhttp3.MediaType;
  4 | import org.constant.Config;
  5 | 
  6 | import java.io.IOException;
  7 | import okhttp3.*;
  8 | import org.json.JSONArray;
  9 | import org.json.JSONObject;
 10 | import org.service.db.ESClient;
 11 | 
 12 | public class JinaEmbeddingRerankService implements EmbeddingService{
 13 | 
 14 |     private final String apiKey;
 15 |     private final OkHttpClient client;
 16 |     public static final JinaEmbeddingRerankService instance = new JinaEmbeddingRerankService(Config.Jina_API_KEY) ;
 17 |     public static JinaEmbeddingRerankService getInstance() {
 18 |         return instance;
 19 |     }
 20 |     public JinaEmbeddingRerankService(String apiKey) {
 21 |         this.apiKey = Config.Jina_API_KEY;
 22 |         this.client = new OkHttpClient();
 23 |     }
 24 | 
 25 |     public JSONArray getMultiVectorEmbeddingJSONArray(String url, String[] inputs) throws IOException {
 26 |         JSONArray inputArray = new JSONArray();
 27 |         for (String input : inputs) {
 28 |             inputArray.put(input);
 29 |         }
 30 | 
 31 |         JSONObject requestBody = new JSONObject()
 32 |                 .put("model", "jina-colbert-v2")
 33 |                 .put("dimensions", 128)
 34 |                 .put("input_type", "query")
 35 |                 .put("embedding_type", "float")
 36 |                 .put("input", inputArray);
 37 | 
 38 |         RequestBody body = RequestBody.create(
 39 |                 requestBody.toString(),
 40 |                 MediaType.get("application/json; charset=utf-8")
 41 |         );
 42 | 
 43 |         Request request = new Request.Builder()
 44 |                 .url(url)
 45 |                 .post(body)
 46 |                 .header("Content-Type", "application/json")
 47 |                 .header("Authorization", "Bearer " + apiKey)
 48 |                 .build();
 49 | 
 50 |         try (Response response = client.newCall(request).execute()) {
 51 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
 52 | 
 53 |             String responseBody = response.body().string();
 54 |             JSONObject jsonObject = new JSONObject(responseBody);
 55 |             return jsonObject.getJSONArray("data");
 56 |         }
 57 |     }
 58 | 
 59 |     public double[][] getEmbeddings(String url, String[] inputs) throws IOException {
 60 | 
 61 |         JSONArray data = this.getMultiVectorEmbeddingJSONArray(url, inputs);
 62 |         // 获取嵌入向量的数量
 63 |         int vectorCount = data.length();
 64 |         // 获取第一个嵌入向量的维度
 65 |         int vectorDimension = data.getJSONObject(0).getJSONArray("embeddings").getJSONArray(0).length();
 66 | 
 67 |         // 创建二维 double 数组来存储嵌入向量
 68 |         double[][] embeddings = new double[vectorCount][vectorDimension];
 69 | 
 70 |         for (int i = 0; i < vectorCount; i++) {
 71 |             // 注意这里需要获取 "embeddings" 数组的第一个元素，它才是实际的嵌入向量
 72 |             JSONArray embeddingArray = data.getJSONObject(i).getJSONArray("embeddings").getJSONArray(0);
 73 |             for (int j = 0; j < vectorDimension; j++) {
 74 |                 embeddings[i][j] = embeddingArray.getDouble(j);
 75 |             }
 76 |         }
 77 | 
 78 |         return embeddings;
 79 |     }
 80 | 
 81 |     public double[] getEmbedding(String url, String input) throws IOException {
 82 |         // 将单个输入放入数组中
 83 |         String[] inputs = {input};
 84 |         // 调用原有的getEmbeddings方法获取嵌入向量数组
 85 |         double[][] embeddingsArray = getEmbeddings(url, inputs);
 86 |         // 返回第一个（也是唯一一个）嵌入向量
 87 |         return embeddingsArray[0];
 88 |     }
 89 | 
 90 | 
 91 | 
 92 |     public static void main(String[] args) {
 93 |         // 替换为您的API密钥
 94 |         String apiKey = Config.Jina_API_KEY;
 95 |         JinaEmbeddingRerankService service = new JinaEmbeddingRerankService(apiKey);
 96 | 
 97 |         try {
 98 |             // API的URL
 99 |             String url = "https://api.jina.ai/v1/multi-vector";
100 |             // 这里可以替换为您想要获取嵌入的文本数组
101 |             String[] inputs = {
102 |                     "hello",
103 |                     "你好"
104 |             };
105 |             double[][] embeddings = service.getEmbeddings(url, inputs);
106 |             // 打印嵌入向量
107 |             System.out.println(embeddings[0][0]);
108 |         } catch (IOException e) {
109 |             e.printStackTrace();
110 |         }
111 |     }
112 | }


--------------------------------------------------------------------------------
/src/main/java/org/service/embedding/JinaEmbeddingService.java:
--------------------------------------------------------------------------------
  1 | package org.service.embedding;
  2 | 
  3 | import okhttp3.MediaType;
  4 | import org.constant.Config;
  5 | 
  6 | import java.io.IOException;
  7 | import okhttp3.*;
  8 | import org.json.JSONArray;
  9 | import org.json.JSONObject;
 10 | public class JinaEmbeddingService implements EmbeddingService {
 11 | 
 12 |     private final String apiKey;
 13 |     private final OkHttpClient client;
 14 | 
 15 |     public JinaEmbeddingService(String apiKey) {
 16 |         this.apiKey = apiKey;
 17 |         this.client = new OkHttpClient();
 18 |     }
 19 | 
 20 |     @Override
 21 |     public double[] getEmbedding(String url, String input) throws IOException {
 22 |         RequestBody body = RequestBody.create(
 23 |                 new JSONObject()
 24 |                         .put("model", "jina-embeddings-v2-base-zh")
 25 |                         .put("normalized", true)
 26 |                         .put("embedding_type", "float")
 27 |                         .put("input", input)
 28 |                         .toString(),
 29 |                 MediaType.get("application/json; charset=utf-8")
 30 |         );
 31 | 
 32 |         Request request = new Request.Builder()
 33 |                 .url(url)
 34 |                 .post(body)
 35 |                 .header("Content-Type", "application/json")
 36 |                 .header("Authorization", "Bearer " + apiKey)
 37 |                 .build();
 38 | 
 39 |         try (Response response = client.newCall(request).execute()) {
 40 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
 41 | 
 42 |             String responseBody = response.body().string();
 43 |             JSONObject jsonObject = new JSONObject(responseBody);
 44 |             JSONObject dataObject = jsonObject.getJSONArray("data").getJSONObject(0);
 45 |             double[] embedding = new double[dataObject.getJSONArray("embedding").length()];
 46 |             for (int i = 0; i < embedding.length; i++) {
 47 |                 embedding[i] = dataObject.getJSONArray("embedding").getDouble(i);
 48 |             }
 49 |             return embedding;
 50 |         }
 51 |     }
 52 | 
 53 |     public double[][] getEmbeddings(String url, String[] inputs) throws IOException {
 54 |         JSONArray inputArray = new JSONArray();
 55 |         for (String input : inputs) {
 56 |             inputArray.put(input);
 57 |         }
 58 | 
 59 |         RequestBody body = RequestBody.create(
 60 |                 new JSONObject()
 61 |                         .put("model", "jina-embeddings-v2-base-zh")
 62 |                         .put("normalized", true)
 63 |                         .put("embedding_type", "float")
 64 |                         .put("inputs", inputArray)
 65 |                         .toString(),
 66 |                 MediaType.get("application/json; charset=utf-8")
 67 |         );
 68 | 
 69 |         Request request = new Request.Builder()
 70 |                 .url(url)
 71 |                 .post(body)
 72 |                 .header("Content-Type", "application/json")
 73 |                 .header("Authorization", "Bearer " + apiKey)
 74 |                 .build();
 75 | 
 76 |         try (Response response = client.newCall(request).execute()) {
 77 |             if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
 78 | 
 79 |             String responseBody = response.body().string();
 80 |             JSONObject jsonObject = new JSONObject(responseBody);
 81 |             JSONArray dataArray = jsonObject.getJSONArray("data");
 82 |             double[][] embeddings = new double[dataArray.length()][];
 83 | 
 84 |             for (int i = 0; i < dataArray.length(); i++) {
 85 |                 JSONObject dataObject = dataArray.getJSONObject(i);
 86 |                 JSONArray embeddingArray = dataObject.getJSONArray("embedding");
 87 |                 double[] embedding = new double[embeddingArray.length()];
 88 |                 for (int j = 0; j < embeddingArray.length(); j++) {
 89 |                     embedding[j] = embeddingArray.getDouble(j);
 90 |                 }
 91 |                 embeddings[i] = embedding;
 92 |             }
 93 |             return embeddings;
 94 |         }
 95 |     }
 96 | 
 97 | 
 98 |     public static void main(String[] args) {
 99 |         // 替换为您的API密钥
100 |         String apiKey = Config.Jina_API_KEY;
101 |         JinaEmbeddingService embeddingService = new JinaEmbeddingService(apiKey);
102 | 
103 |         try {
104 |             // API的URL
105 |             String url = "https://api.jina.ai/v1/embeddings";
106 |             // 这里可以替换为您想要获取嵌入的文本
107 |             String input = "您的查询可以是中文";
108 |             double[] embedding = embeddingService.getEmbedding(url, input);
109 |             // 打印嵌入向量
110 |             for (double value : embedding) {
111 |                 System.out.println(value);
112 |             }
113 |         } catch (IOException e) {
114 |             e.printStackTrace();
115 |         }
116 |     }
117 | }


--------------------------------------------------------------------------------
/src/main/java/org/utils/DistanceUtils.java:
--------------------------------------------------------------------------------
 1 | package org.utils;
 2 | 
 3 | import java.util.Arrays;
 4 | import java.util.stream.IntStream;
 5 | 
 6 | public class DistanceUtils {
 7 | 
 8 |     /**
 9 |      * 计算两个向量之间的平方误差距离
10 |      * @param v1 第一个向量
11 |      * @param v2 第二个向量
12 |      * @return 平方误差距离
13 |      */
14 |     public static double squaredErrorDistance(double[] v1, double[] v2) {
15 |         return IntStream.range(0, v1.length)
16 |                 .mapToDouble(i -> Math.pow(v1[i] - v2[i], 2))
17 |                 .sum();
18 |     }
19 | 
20 |     /**
21 |      * 计算两个向量之间的绝对距离误差
22 |      * @param v1 第一个向量
23 |      * @param v2 第二个向量
24 |      * @return 绝对距离误差
25 |      */
26 |     public static double absoluteErrorDistance(double[] v1, double[] v2) {
27 |         return IntStream.range(0, v1.length)
28 |                 .mapToDouble(i -> Math.abs(v1[i] - v2[i]))
29 |                 .sum();
30 |     }
31 | 
32 |     /**
33 |      * 计算两个向量之间的余弦相似度，然后取 1 - 余弦相似度得到余弦误差
34 |      * @param v1 第一个向量
35 |      * @param v2 第二个向量
36 |      * @return 余弦误差
37 |      */
38 |     public static double cosineError(double[] v1, double[] v2) {
39 |         double dotProduct = IntStream.range(0, v1.length)
40 |                 .mapToDouble(i -> v1[i] * v2[i])
41 |                 .sum();
42 |         double magnitudeV1 = Math.sqrt(IntStream.range(0, v1.length)
43 |                 .mapToDouble(i -> v1[i] * v1[i])
44 |                 .sum());
45 |         double magnitudeV2 = Math.sqrt(IntStream.range(0, v2.length)
46 |                 .mapToDouble(i -> v2[i] * v2[i])
47 |                 .sum());
48 |         double cosineSimilarity = dotProduct / (magnitudeV1 * magnitudeV2);
49 |         return 1 - cosineSimilarity;
50 |     }
51 | }


--------------------------------------------------------------------------------
/src/main/java/org/utils/HttpClientUtil.java:
--------------------------------------------------------------------------------
 1 | package org.utils;
 2 | 
 3 | import okhttp3.Credentials;
 4 | import okhttp3.OkHttpClient;
 5 | import okhttp3.OkHttpClient.Builder;
 6 | 
 7 | import javax.net.ssl.HostnameVerifier;
 8 | import javax.net.ssl.SSLContext;
 9 | import javax.net.ssl.SSLSession;
10 | import javax.net.ssl.TrustManager;
11 | import javax.net.ssl.X509TrustManager;
12 | import java.security.KeyManagementException;
13 | import java.security.NoSuchAlgorithmException;
14 | import java.security.SecureRandom;
15 | import java.security.cert.X509Certificate;
16 | 
17 | public class HttpClientUtil {
18 | 
19 |     public static OkHttpClient createHttpClient(String username, String password) {
20 |         Builder builder = new OkHttpClient.Builder();
21 | 
22 |         // 如果提供了用户名和密码，则设置基本认证
23 |         if (username != null && password != null) {
24 |             builder.authenticator((route, response) -> {
25 |                 String credential = Credentials.basic(username, password);
26 |                 return response.request().newBuilder()
27 |                         .header("Authorization", credential)
28 |                         .build();
29 |             });
30 |         }
31 | 
32 |         // 禁用SSL证书验证
33 |         builder = disableSslVerification(builder);
34 | 
35 |         return builder.build();
36 |     }
37 | 
38 |     private static Builder disableSslVerification(Builder clientBuilder) {
39 |         try {
40 |             // 创建一个信任所有证书的TrustManager
41 |             TrustManager[] trustManagers = new TrustManager[]{
42 |                     new X509TrustManager() {
43 |                         @Override
44 |                         public void checkClientTrusted(X509Certificate[] chain, String authType) {
45 |                         }
46 | 
47 |                         @Override
48 |                         public void checkServerTrusted(X509Certificate[] chain, String authType) {
49 |                         }
50 | 
51 |                         @Override
52 |                         public X509Certificate[] getAcceptedIssuers() {
53 |                             return new X509Certificate[0];
54 |                         }
55 |                     }
56 |             };
57 | 
58 |             // 初始化SSLContext
59 |             SSLContext sslContext = SSLContext.getInstance("SSL");
60 |             sslContext.init(null, trustManagers, new SecureRandom());
61 | 
62 |             // 配置OkHttpClient使用上面创建的SSLContext
63 |             clientBuilder.sslSocketFactory(sslContext.getSocketFactory(), (X509TrustManager) trustManagers[0]);
64 |             clientBuilder.hostnameVerifier(new HostnameVerifier() {
65 |                 @Override
66 |                 public boolean verify(String hostname, SSLSession session) {
67 |                     return true;
68 |                 }
69 |             });
70 | 
71 |             return clientBuilder;
72 |         } catch (NoSuchAlgorithmException | KeyManagementException e) {
73 |             throw new RuntimeException(e);
74 |         }
75 |     }
76 | }


--------------------------------------------------------------------------------
/src/main/java/org/utils/SnowflakeIdGenerator.java:
--------------------------------------------------------------------------------
  1 | package org.utils;
  2 | 
  3 | public class SnowflakeIdGenerator {
  4 | 
  5 |     // 起始的时间戳
  6 |     private final static long START_TIMESTAMP = 1288834974657L;
  7 | 
  8 |     // 每一部分占用的位数
  9 |     private final static long SEQUENCE_BIT = 12; // 序列号占用的位数
 10 |     private final static long MACHINE_BIT = 5;   // 机器标识占用的位数
 11 |     private final static long DATACENTER_BIT = 5;// 数据中心占用的位数
 12 | 
 13 |     // 最大值计算
 14 |     private final static long MAX_DATACENTER_NUM = -1L ^ (-1L << DATACENTER_BIT);
 15 |     private final static long MAX_MACHINE_NUM = -1L ^ (-1L << MACHINE_BIT);
 16 |     private final static long MAX_SEQUENCE = -1L ^ (-1L << SEQUENCE_BIT);
 17 | 
 18 |     // 每一部分向左移动的位数
 19 |     private final static long MACHINE_LEFT = SEQUENCE_BIT;
 20 |     private final static long DATACENTER_LEFT = SEQUENCE_BIT + MACHINE_BIT;
 21 |     private final static long TIMESTAMP_LEFT = DATACENTER_LEFT + DATACENTER_BIT;
 22 | 
 23 |     private long datacenterId;  // 数据中心
 24 |     private long machineId;     // 机器标识
 25 |     private long sequence = 0L; // 序列号
 26 |     private long lastTimestamp = -1L; // 上一次时间戳
 27 | 
 28 |     public SnowflakeIdGenerator(long datacenterId, long machineId) {
 29 |         if (datacenterId > MAX_DATACENTER_NUM || datacenterId < 0) {
 30 |             throw new IllegalArgumentException("datacenterId can't be greater than MAX_DATACENTER_NUM or less than 0");
 31 |         }
 32 |         if (machineId > MAX_MACHINE_NUM || machineId < 0) {
 33 |             throw new IllegalArgumentException("machineId can't be greater than MAX_MACHINE_NUM or less than 0");
 34 |         }
 35 |         this.datacenterId = datacenterId;
 36 |         this.machineId = machineId;
 37 |     }
 38 | 
 39 |     /**
 40 |      * 生成下一个ID
 41 |      *
 42 |      * @return 返回下一个ID
 43 |      */
 44 |     public synchronized long nextId() {
 45 |         long timestamp = timeGen();
 46 | 
 47 |         // 如果当前时间小于上一次ID生成的时间戳，说明系统时钟回退过，抛出异常
 48 |         if (timestamp < lastTimestamp) {
 49 |             throw new RuntimeException(
 50 |                     String.format("Clock moved backwards. Refusing to generate id for %d milliseconds", lastTimestamp - timestamp));
 51 |         }
 52 | 
 53 |         // 如果是同一时间生成的，则进行序列号加一操作
 54 |         if (lastTimestamp == timestamp) {
 55 |             sequence = (sequence + 1) & MAX_SEQUENCE;
 56 |             // 序列号溢出处理
 57 |             if (sequence == 0) {
 58 |                 // 阻塞到下一个毫秒，获得新的时间戳
 59 |                 timestamp = tilNextMillis(lastTimestamp);
 60 |             }
 61 |         } else {
 62 |             // 时间戳改变，序列号重置
 63 |             sequence = 0L;
 64 |         }
 65 | 
 66 |         // 更新上一次时间戳
 67 |         lastTimestamp = timestamp;
 68 | 
 69 |         // 生成最终的ID
 70 |         return ((timestamp - START_TIMESTAMP) << TIMESTAMP_LEFT)
 71 |                 | (datacenterId << DATACENTER_LEFT)
 72 |                 | (machineId << MACHINE_LEFT)
 73 |                 | sequence;
 74 |     }
 75 | 
 76 |     /**
 77 |      * 获取下一个时间戳
 78 |      *
 79 |      * @param lastTimestamp 上一个时间戳
 80 |      * @return 下一个时间戳
 81 |      */
 82 |     private long tilNextMillis(long lastTimestamp) {
 83 |         long timestamp = timeGen();
 84 |         while (timestamp <= lastTimestamp) {
 85 |             timestamp = timeGen();
 86 |         }
 87 |         return timestamp;
 88 |     }
 89 | 
 90 |     /**
 91 |      * 获取当前时间戳
 92 |      *
 93 |      * @return 当前时间戳
 94 |      */
 95 |     private long timeGen() {
 96 |         return System.currentTimeMillis();
 97 |     }
 98 | 
 99 |     /**
100 |      * 生成唯一ID并转换为字符串
101 |      *
102 |      * @return 唯一ID的字符串表示
103 |      */
104 |     public static String generateUniqueID() {
105 |         SnowflakeIdGenerator generator = new SnowflakeIdGenerator(1, 1); // 这里的数据中心和机器ID可以根据实际情况设置
106 |         return String.valueOf(generator.nextId());
107 |     }
108 | }


--------------------------------------------------------------------------------
/src/main/java/org/utils/TrustAllCerts.java:
--------------------------------------------------------------------------------
 1 | package org.utils;
 2 | 
 3 | import javax.net.ssl.*;
 4 | import java.security.KeyManagementException;
 5 | import java.security.NoSuchAlgorithmException;
 6 | import java.security.cert.X509Certificate;
 7 | 
 8 | public class TrustAllCerts implements X509TrustManager {
 9 |     @Override
10 |     public void checkClientTrusted(X509Certificate[] chain, String authType) {
11 |         // Do nothing, trust all certificates
12 |     }
13 | 
14 |     @Override
15 |     public void checkServerTrusted(X509Certificate[] chain, String authType) {
16 |         // Do nothing, trust all certificates
17 |     }
18 | 
19 |     @Override
20 |     public X509Certificate[] getAcceptedIssuers() {
21 |         return new X509Certificate[0];
22 |     }
23 | 
24 |     public static void disableSSLCertificateChecking() {
25 |         try {
26 |             TrustManager[] trustAllCerts = new TrustManager[]{new TrustAllCerts()};
27 |             SSLContext sc = SSLContext.getInstance("TLS");
28 |             sc.init(null, trustAllCerts, new java.security.SecureRandom());
29 |             HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
30 |             HostnameVerifier allHostsValid = (hostname, session) -> true;
31 |             HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid);
32 |         } catch (NoSuchAlgorithmException | KeyManagementException e) {
33 |             e.printStackTrace();
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/org/web/KeywordToMarkdownCrawler.java:
--------------------------------------------------------------------------------
 1 | package org.web;
 2 | 
 3 | import com.google.gson.JsonArray;
 4 | import com.google.gson.JsonObject;
 5 | 
 6 | import java.io.IOException;
 7 | import java.util.ArrayList;
 8 | import java.util.HashMap;
 9 | import java.util.List;
10 | import java.util.Map;
11 | import java.util.concurrent.*;
12 | 
13 | public class KeywordToMarkdownCrawler {
14 | 
15 |     private static final int TIMEOUT_SECONDS = 1; // 设置超时时间
16 | 
17 |     public static List<String> getMarkdownsForKeyword(String keyword) {
18 |         List<String> markdowns = new ArrayList<>();
19 | 
20 |         // 构建请求参数
21 |         Map<String, String> parameter = new HashMap<>();
22 |         parameter.put("engine", "bing");
23 |         parameter.put("q", keyword);
24 |         parameter.put("api_key", "1af00627e582c9238b8c947d2300dd13331a9817523811a83dc16245ed98d444"); // 替换为你的 API 密钥
25 |         parameter.put("count","50");
26 |         // 获取搜索结果
27 |         JsonObject results = SearchEngine.getResult(parameter);
28 |         if (results != null) {
29 |             // 解析搜索结果中的 URL
30 |             JsonArray organicResults = results.getAsJsonArray("organic_results");
31 |             if (organicResults != null) {
32 |                 ExecutorService executor = Executors.newFixedThreadPool(100); // 创建固定线程池
33 |                 List<Future<String>> futures = new ArrayList<>();
34 | 
35 |                 for (int i = 0; i < organicResults.size(); i++) {
36 |                     JsonObject result = organicResults.get(i).getAsJsonObject();
37 |                     String link = result.get("link").getAsString();
38 | 
39 |                     // 创建 FutureTask
40 |                     Callable<String> callable = () -> {
41 |                         try {
42 |                             return UrlToMarkdownConverter.convertUrlToMarkdown(link);
43 |                         } catch (IOException e) {
44 |                             throw new RuntimeException("获取 Markdown 内容时出现错误: " + e.getMessage(), e);
45 |                         }
46 |                     };
47 | 
48 |                     Future<String> future = executor.submit(callable);
49 |                     futures.add(future);
50 |                 }
51 | 
52 |                 for (Future<String> future : futures) {
53 |                     try {
54 |                         String markdown = future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS);
55 |                         markdowns.add(markdown);
56 |                     } catch (TimeoutException e) {
57 |                         future.cancel(true); // 取消超时的任务
58 |                         System.out.println("任务超时并已取消: " + e.getMessage());
59 |                     } catch (InterruptedException | ExecutionException e) {
60 |                         System.out.println("任务执行过程中出现错误: " + e.getMessage());
61 |                     }
62 |                 }
63 | 
64 |                 executor.shutdown(); // 关闭线程池
65 |             }
66 |         }
67 | 
68 |         return markdowns;
69 |     }
70 | 
71 |     public static void main(String[] args) {
72 |         long startTime = System.currentTimeMillis(); // 记录程序开始时间
73 |         String keyword = "哪吒二"; // 示例关键词
74 |         List<String> markdowns = getMarkdownsForKeyword(keyword);
75 | 
76 |         // 打印获取到的 Markdown 内容
77 |         for (String markdown : markdowns) {
78 |             System.out.println(markdown);
79 |         }
80 |         long endTime = System.currentTimeMillis(); // 记录程序结束时间
81 |         long duration = (endTime - startTime) / 1000; // 计算运行时间（秒）
82 |         System.out.println(markdowns.size());
83 |         System.out.println("程序运行时间为：" + duration + " 秒");
84 |     }
85 | }


--------------------------------------------------------------------------------
/src/main/java/org/web/SearchEngine.java:
--------------------------------------------------------------------------------
  1 | package org.web;
  2 | 
  3 | import com.google.gson.JsonArray;
  4 | import com.google.gson.JsonObject;
  5 | import com.google.gson.JsonParser;
  6 | import okhttp3.*;
  7 | 
  8 | import java.io.IOException;
  9 | import java.util.HashMap;
 10 | import java.util.Map;
 11 | 
 12 | public class SearchEngine {
 13 | 
 14 |     public static JsonObject getResult(Map<String, String> parameter) {
 15 |         // 构建请求URL
 16 |         StringBuilder urlBuilder = new StringBuilder("https://serpapi.com/search.json");
 17 |         for (Map.Entry<String, String> entry : parameter.entrySet()) {
 18 |             if (urlBuilder.indexOf("?") == -1) {
 19 |                 urlBuilder.append("?").append(entry.getKey()).append("=").append(entry.getValue());
 20 |             } else {
 21 |                 urlBuilder.append("&").append(entry.getKey()).append("=").append(entry.getValue());
 22 |             }
 23 |         }
 24 |         String url = urlBuilder.toString();
 25 | 
 26 |         OkHttpClient client = new OkHttpClient();
 27 |         Request request = new Request.Builder()
 28 |                 .url(url)
 29 |                 .build();
 30 | 
 31 |         try (Response response = client.newCall(request).execute()) {
 32 |             if (response.isSuccessful()) {
 33 |                 String responseBody = response.body().string();
 34 |                 // 解析JSON响应
 35 |                 return JsonParser.parseString(responseBody).getAsJsonObject();
 36 |             } else {
 37 |                 System.out.println("请求失败，状态码: " + response.code());
 38 |             }
 39 |         } catch (IOException e) {
 40 |             System.out.println("请求过程中出现异常: " + e.getMessage());
 41 |         }
 42 |         return null;
 43 |     }
 44 | 
 45 |     public static void parseResults(JsonObject results) {
 46 |         if (results != null) {
 47 |             // 解析 search_information 中的 query_displayed 字段
 48 | //            JsonObject searchInformation = results.getAsJsonObject("search_information");
 49 | //            if (searchInformation != null) {
 50 | //                String queryDisplayed = searchInformation.get("query_displayed").getAsString();
 51 | //                System.out.println("查询关键词: " + queryDisplayed);
 52 | //            }
 53 | 
 54 |             // 解析 organic_results 数组中的 title、link 和 snippet 字段
 55 |             JsonArray organicResults = results.getAsJsonArray("organic_results");
 56 |             if (organicResults != null) {
 57 |                 for (int i = 0; i < organicResults.size(); i++) {
 58 |                     JsonObject result = organicResults.get(i).getAsJsonObject();
 59 |                     String title = result.get("title").getAsString();
 60 |                     String link = result.get("link").getAsString();
 61 |                     String snippet = "";
 62 |                     if (result.has("snippet") &&!result.get("snippet").isJsonNull()) {
 63 |                         snippet = result.get("snippet").getAsString();
 64 |                     }
 65 |                     System.out.println("第 " + (i + 1) + " 条结果:");
 66 |                     System.out.println("标题: " + title);
 67 |                     System.out.println("链接: " + link);
 68 |                     System.out.println("摘要: " + snippet);
 69 |                     System.out.println("-----------------");
 70 |                 }
 71 |             }
 72 |         }
 73 |     }
 74 | 
 75 |     /**
 76 |      * 解析结果并返回 HTML 表格格式的字符串
 77 |      * @param results JSON 结果对象
 78 |      * @return HTML 表格字符串
 79 |      */
 80 |     public static String parseResultsHtml(JsonObject results) {
 81 |         StringBuilder html = new StringBuilder();
 82 |         html.append("<table border='1'>");
 83 |         html.append("<tr><th>序号</th><th>标题</th><th>链接</th><th>摘要</th></tr>");
 84 | 
 85 |         if (results != null) {
 86 |             // 解析 organic_results 数组中的 title、link 和 snippet 字段
 87 |             JsonArray organicResults = results.getAsJsonArray("organic_results");
 88 |             if (organicResults != null) {
 89 |                 for (int i = 0; i < organicResults.size(); i++) {
 90 |                     JsonObject result = organicResults.get(i).getAsJsonObject();
 91 |                     String title = result.get("title").getAsString();
 92 |                     String link = result.get("link").getAsString();
 93 |                     String snippet = "";
 94 |                     if (result.has("snippet") &&!result.get("snippet").isJsonNull()) {
 95 |                         snippet = result.get("snippet").getAsString();
 96 |                     }
 97 |                     html.append("<tr>");
 98 |                     html.append("<td>").append(i + 1).append("</td>");
 99 |                     html.append("<td>").append(title).append("</td>");
100 |                     html.append("<td><a href='").append(link).append("'>").append(link).append("</a></td>");
101 |                     html.append("<td>").append(snippet).append("</td>");
102 |                     html.append("</tr>");
103 |                 }
104 |             }
105 |         }
106 |         html.append("</table>");
107 |         return html.toString();
108 |     }
109 | 
110 |     public static void main(String[] args) {
111 |         // 构建请求参数
112 |         Map<String, String> parameter = new HashMap<>();
113 |         parameter.put("engine", "bing");
114 |         parameter.put("q", "哪吒二");
115 |         parameter.put("api_key", "1af00627e582c9238b8c947d2300dd13331a9817523811a83dc16245ed98d444");
116 | 
117 | 
118 |         JsonObject results = getResult(parameter);
119 |         if (results != null) {
120 |             parseResults(results);
121 |             // 调用 parseResultsHtml 方法获取 HTML 表格字符串
122 | //            String htmlTable = parseResultsHtml(results);
123 | //            System.out.println(htmlTable);
124 | 
125 |         }
126 |     }
127 | }


--------------------------------------------------------------------------------
/src/main/java/org/web/UrlToMarkdownConverter.java:
--------------------------------------------------------------------------------
 1 | package org.web;
 2 | 
 3 | import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
 4 | import com.vladsch.flexmark.util.data.MutableDataSet;
 5 | import org.jsoup.Jsoup;
 6 | import org.jsoup.nodes.Document;
 7 | 
 8 | import java.io.IOException;
 9 | 
10 | public class UrlToMarkdownConverter {
11 | 
12 |     /**
13 |      * 从指定 URL 获取 HTML 内容
14 |      * @param url 网页的 URL
15 |      * @return HTML 内容字符串
16 |      * @throws IOException 如果网络请求出现问题
17 |      */
18 |     public static String getHtmlFromUrl(String url) throws IOException {
19 |         Document doc = Jsoup.connect(url)
20 |                 .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
21 |                 .get();
22 |         return doc.html();
23 |     }
24 | 
25 |     /**
26 |      * 将 HTML 内容转换为 Markdown 内容
27 |      * @param html HTML 内容字符串
28 |      * @return Markdown 内容字符串
29 |      */
30 |     public static String convertHtmlToMarkdown(String html) {
31 |         MutableDataSet options = new MutableDataSet();
32 |         FlexmarkHtmlConverter converter = FlexmarkHtmlConverter.builder(options).build();
33 |         return converter.convert(html);
34 |     }
35 | 
36 |     /**
37 |      * 从 URL 直接转换为 Markdown 内容
38 |      * @param url 网页的 URL
39 |      * @return Markdown 内容字符串
40 |      * @throws IOException 如果网络请求出现问题
41 |      */
42 |     public static String convertUrlToMarkdown(String url) throws IOException {
43 |         String html = getHtmlFromUrl(url);
44 |         return convertHtmlToMarkdown(html);
45 |     }
46 | 
47 |     public static void main(String[] args) {
48 |         long startTime = System.currentTimeMillis(); // 记录程序开始时间
49 |         String url = "https://zh.wikipedia.org/wiki/哪吒之魔童闹海";
50 |         try {
51 |             String markdown = convertUrlToMarkdown(url);
52 |             System.out.println(markdown);
53 |         } catch (IOException e) {
54 |             System.err.println("获取网页内容时出现错误: " + e.getMessage());
55 |         }
56 |         long endTime = System.currentTimeMillis(); // 记录程序结束时间
57 |         long duration = (endTime - startTime) / 1000; // 计算运行时间（秒）
58 |         System.out.println("程序运行时间为：" + duration + " 秒");
59 |     }
60 | 
61 | }


--------------------------------------------------------------------------------
/src/test/java/org/chat/NaiveRAGTest.java:
--------------------------------------------------------------------------------
 1 | //package org.chat;
 2 | //import org.entity.Document;
 3 | //import org.junit.Test;
 4 | //import org.rag.NaiveRAG;
 5 | //
 6 | //public class NaiveRAGTest {
 7 | //
 8 | //    @Test
 9 | //    public void demoNaiveRAG() {
10 | //        NaiveRAG naiveRAG = new NaiveRAG(
11 | //                new Document("./202X企业规划.pdf"),
12 | //                "简要总结这篇文章");
13 | //        try {
14 | //            naiveRAG
15 | //                    // 解析
16 | //                    .parsering()
17 | //                    // 分块
18 | //                    .chunking()
19 | //                    // 向量化
20 | //                    .embedding()
21 | //                    // 排序
22 | //                    .sorting()
23 | //                    // 大模型回复
24 | //                    .LLMChat();
25 | //        } catch (Exception e) {
26 | //            e.printStackTrace();
27 | //            assert false : "error stack trace";
28 | //        }
29 | //        System.out.println(naiveRAG.getResponse());
30 | //    }
31 | //}


--------------------------------------------------------------------------------
/src/test/java/org/db/ESClientTest.java:
--------------------------------------------------------------------------------
 1 | //package org.db;
 2 | //
 3 | //
 4 | //
 5 | //import org.apache.http.auth.AuthScope;
 6 | //import org.apache.http.auth.UsernamePasswordCredentials;
 7 | //import org.apache.http.client.CredentialsProvider;
 8 | //import org.apache.http.client.methods.CloseableHttpResponse;
 9 | //import org.apache.http.client.methods.HttpGet;
10 | //import org.apache.http.impl.client.BasicCredentialsProvider;
11 | //import org.apache.http.impl.client.CloseableHttpClient;
12 | //import org.apache.http.impl.client.HttpClients;
13 | //import org.utils.TrustAllCerts;
14 | //
15 | //import java.io.IOException;
16 | //
17 | //public class ESClientTest {
18 | //
19 | //    private final CloseableHttpClient httpClient;
20 | //    private final String esUrl;
21 | //
22 | //    /**
23 | //     * 构造函数，用于初始化Elasticsearch客户端和基础URL
24 | //     * @param hostname ES服务器的主机名或IP地址
25 | //     * @param username 用户名
26 | //     * @param password 密码
27 | //     */
28 | //    public ESClientTest(String hostname, String username, String password) {
29 | //        TrustAllCerts.disableSSLCertificateChecking(); // 禁用SSL证书检查
30 | //        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
31 | //        credentialsProvider.setCredentials(AuthScope.ANY,
32 | //                new UsernamePasswordCredentials(username, password));
33 | //
34 | //        this.httpClient = HttpClients.custom()
35 | //                .setDefaultCredentialsProvider(credentialsProvider)
36 | //                .build();
37 | //
38 | //        this.esUrl = "https://" + hostname + ":9200";
39 | //    }
40 | //
41 | //    /**
42 | //     * 测试与Elasticsearch的连接
43 | //     * @return 连接是否成功的布尔值
44 | //     */
45 | //    public boolean testConnection() {
46 | //        try {
47 | //            // 发送GET请求到Elasticsearch的健康检查API
48 | //            HttpGet request = new HttpGet(esUrl + "/_cat/health");
49 | //            try (CloseableHttpResponse response = httpClient.execute(request)) {
50 | //                // 检查响应状态码是否为200，表示连接成功
51 | //                int statusCode = response.getStatusLine().getStatusCode();
52 | //                if (statusCode == 200) {
53 | //                    System.out.println("连接到Elasticsearch成功！");
54 | //                    return true;
55 | //                } else {
56 | //                    System.out.println("连接到Elasticsearch失败，状态码：" + statusCode);
57 | //                    return false;
58 | //                }
59 | //            }
60 | //        } catch (IOException e) {
61 | //            e.printStackTrace();
62 | //            return false;
63 | //        }
64 | //    }
65 | //
66 | //    public static void main(String[] args) {
67 | //        try {
68 | //            // 创建ES客户端测试实例
69 | //            ESClientTest esClientTest = new ESClientTest("124.223.85.176", "elastic", "8hbdbMHjAsx9bfDJFh9U");
70 | //
71 | //            // 执行连接测试
72 | //            boolean isConnected = esClientTest.testConnection();
73 | //
74 | //            // 根据需要可以进行其他操作...
75 | //        } catch (Exception e) {
76 | //            e.printStackTrace();
77 | //        }
78 | //    }
79 | //}
80 | 


--------------------------------------------------------------------------------
/src/test/java/org/db/ElasticsearchConnectionTest.java:
--------------------------------------------------------------------------------
 1 | //package org.db;
 2 | //
 3 | //import org.apache.http.auth.AuthScope;
 4 | //import org.apache.http.auth.UsernamePasswordCredentials;
 5 | //import org.elasticsearch.client.RestHighLevelClient;
 6 | //import org.elasticsearch.client.RequestOptions;
 7 | //import org.elasticsearch.client.RestClients;
 8 | //
 9 | //import java.io.IOException;
10 | //
11 | //public class ElasticsearchConnectionTest {
12 | //
13 | //    public static void main(String[] args) {
14 | //        // 创建 RestHighLevelClient 实例
15 | //        RestHighLevelClient client = new RestHighLevelClient(
16 | //                RestClients.create("http://124.223.85.176/")
17 | //                        .setHttpClientConfigCallback(httpClientBuilder ->
18 | //                                httpClientBuilder.setDefaultCredentialsProvider(
19 | //                                        new DefaultCredentialsProvider()
20 | //                                                .add(new AuthScope("124.223.85.176", 443), new UsernamePasswordCredentials("elastic", "8hbdbMHjAsx9bfDJFh9U")))
21 | //                        )
22 | //        );
23 | //
24 | //        try {
25 | //            // 测试连接
26 | //            boolean isConnected = testElasticsearchConnection(client);
27 | //            if (isConnected) {
28 | //                System.out.println("Successfully connected to Elasticsearch.");
29 | //            } else {
30 | //                System.out.println("Failed to connect to Elasticsearch.");
31 | //            }
32 | //        } catch (Exception e) {
33 | //            System.out.println("An error occurred while testing the connection: " + e.getMessage());
34 | //        } finally {
35 | //            // 关闭客户端
36 | //            try {
37 | //                client.close();
38 | //            } catch (IOException e) {
39 | //                System.out.println("Error closing Elasticsearch client: " + e.getMessage());
40 | //            }
41 | //        }
42 | //    }
43 | //
44 | //    private static boolean testElasticsearchConnection(RestHighLevelClient client) throws IOException {
45 | //        try {
46 | //            // 执行一个简单的请求来测试连接
47 | //            client.info(RequestOptions.DEFAULT);
48 | //            return true;
49 | //        } catch (Exception e) {
50 | //            // 如果发生异常，则认为连接失败
51 | //            return false;
52 | //        }
53 | //    }
54 | //}


--------------------------------------------------------------------------------
/src/test/java/org/db/OpenAIChatServiceTest.java:
--------------------------------------------------------------------------------
 1 | //package org.db;
 2 | //
 3 | //import org.json.JSONObject;
 4 | //import org.junit.Before;
 5 | //import org.junit.Test;
 6 | //import org.service.LLM.OpenAIChatService;
 7 | //
 8 | //import java.io.IOException;
 9 | //
10 | //import static org.junit.Assert.*;
11 | //
12 | //public class OpenAIChatServiceTest {
13 | //
14 | //    private OpenAIChatService openAIChatService;
15 | //    private String apiKey = "your_api_key_here"; // 使用有效的API密钥
16 | //
17 | //    @Before
18 | //    public void setUp() {
19 | //        openAIChatService = new OpenAIChatService(apiKey);
20 | //    }
21 | //
22 | //    @Test
23 | //    public void testGenerateTextWithoutChatId() throws IOException {
24 | //        String url = "https://api.openai.com/v1/chat/completions";
25 | //        JSONObject params = new JSONObject()
26 | //                .put("model", "gpt-3.5")
27 | //                .put("messages", new JSONObject[] {
28 | //                        new JSONObject().put("role", "user").put("content", "1+1 = ?")
29 | //                })
30 | //                .put("temperature", 0.3)
31 | //                .put("stream", false);
32 | //
33 | //        String result = openAIChatService.generateText(url, params);
34 | //        assertNotNull(result);
35 | //        assertTrue(result.contains("2"));
36 | //    }
37 | //
38 | //    @Test
39 | //    public void testGenerateTextWithChatId() throws IOException {
40 | //        String url = "https://api.openai.com/v1/chat/completions";
41 | //        String chatId = "chat123";
42 | //        JSONObject newMessage = new JSONObject().put("role", "user").put("content", "What was my last question?");
43 | //
44 | //        String result = openAIChatService.generateText(url, chatId, newMessage);
45 | //        assertNotNull(result);
46 | //        // 根据实际情况调整断言内容
47 | //        assertTrue(result.contains("last question"));
48 | //    }
49 | //}


--------------------------------------------------------------------------------
/webapp/resources/biglog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChinaYiqun/java-rag/8af5310db73c20c77bfb5f33f6bd83d6f44fcc26/webapp/resources/biglog.png


--------------------------------------------------------------------------------
/webapp/resources/ezgif-81180eba7adb9d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChinaYiqun/java-rag/8af5310db73c20c77bfb5f33f6bd83d6f44fcc26/webapp/resources/ezgif-81180eba7adb9d.gif


--------------------------------------------------------------------------------
/webapp/views/chat.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>Chat Page</title>
  8 |     <!-- 引入 Layui CSS 文件 -->
  9 |     <link rel="stylesheet" href="https://cdn.staticfile.org/layui/2.8.12/css/layui.min.css">
 10 |     <style>
 11 |         /* 整体页面容器 */
 12 |         .page-container {
 13 |             min-height: 100vh;
 14 |             display: flex;
 15 |             flex-direction: column;
 16 |         }
 17 | 
 18 |         /* 定义聊天区域的样式 */
 19 |         .chat-container {
 20 |             flex: 1;
 21 |             overflow-y: auto;
 22 |             border: none;
 23 |             padding: 0;
 24 |         }
 25 | 
 26 |         /* 定义消息样式 */
 27 |         .message {
 28 |             margin-bottom: 10px;
 29 |             max-width: 70%;
 30 |             padding: 8px 12px;
 31 |             border-radius: 8px;
 32 |         }
 33 | 
 34 |         /* 对方消息样式 */
 35 |         .message.other {
 36 |             background-color: #f1f0f0;
 37 |             float: left;
 38 |             clear: both;
 39 |         }
 40 | 
 41 |         /* 我方消息样式 */
 42 |         .message.self {
 43 |             background-color: #0084ff;
 44 |             color: white;
 45 |             float: right;
 46 |             clear: both;
 47 |         }
 48 | 
 49 |         /* 定义输入框和发送按钮区域样式 */
 50 |         .input-area {
 51 |             padding: 10px 0;
 52 |             /* 去掉顶部边框 */
 53 |             border-top: none;
 54 |             position: fixed;
 55 |             bottom: 0;
 56 |             left: 0;
 57 |             width: 100%;
 58 |             background-color: white;
 59 |             z-index: 100;
 60 |         }
 61 | 
 62 |         /* 让输入框和按钮在同一行显示 */
 63 |         .layui-form-item {
 64 |             display: flex;
 65 |             align-items: center;
 66 |         }
 67 | 
 68 |         .layui-input {
 69 |             flex: 1;
 70 |             margin-right: 10px;
 71 |         }
 72 | 
 73 |         /* 确保容器内元素双端对齐 */
 74 |         .layui-container {
 75 |             max-width: 800px;
 76 |             margin: 0 auto;
 77 |             width: 90%; /* 可以调整这个值来控制左右边距 */
 78 |         }
 79 |     </style>
 80 | </head>
 81 | 
 82 | <body>
 83 | <!-- 整体页面容器 -->
 84 | <div class="page-container">
 85 |     <!-- 引入 Layui 容器 -->
 86 |     <div class="layui-container">
 87 |         <!-- 引入 Layui 行 -->
 88 |         <div class="layui-row">
 89 |             <!-- 核心区域，占 12 列 -->
 90 |             <div class="layui-col-md12">
 91 |                 <!-- 聊天历史消息展示区域 -->
 92 |                 <div class="chat-container" id="chatHistory">
 93 |                     <div class="message self">我：你好</div>
 94 |                     <!-- 对方消息 -->
 95 |                     <div class="message other">🤖：你好！我是机器人助理，我能回答各类知识问题，如科学、历史、文化等；创作多种文本，像诗歌、小说、计划书；辅助语言学习，包括语法、翻译；启发创意，如广告、手工灵感；陪日常闲聊，倾听分享情绪；解逻辑推理题；整理总结信息；提供编程代码示例并协助解决编程问题。</div>
 96 | 
 97 |                     <div class="message self">我：忙了一天工作。</div>
 98 | 
 99 |                     <div class="message other">🤖：辛苦了呀，忙了一天工作肯定挺累的。快坐下来好好休息休息，放松一下。可以和我说说今天工作上有没有发生什么特别的事儿，不管是开心的还是让你觉得有点小郁闷的，都能跟我聊聊，说不定说出来会感觉轻松不少呢。 </div>
100 |                 </div>
101 |             </div>
102 |         </div>
103 |     </div>
104 |     <!-- 输入框和发送按钮区域，占 12 列 -->
105 |     <div class="layui-row">
106 |         <div class="layui-col-md12 input-area">
107 |             <div class="layui-container">
108 |                 <form class="layui-form" id="chatForm">
109 |                     <div class="layui-form-item">
110 |                         <input type="text" id="inputMessage" placeholder="请输入消息" class="layui-input">
111 |                         <input type="file" id="fileInput" style="display: none;">
112 |                         <button class="layui-btn" id="sendButton">发送</button>
113 |                         <button class="layui-btn" id="uploadButton">上传文件</button>
114 |                     </div>
115 |                 </form>
116 |             </div>
117 |         </div>
118 |     </div>
119 | </div>
120 | 
121 | <!-- 引入 layui.js -->
122 | <script src="//unpkg.com/layui@2.9.21/dist/layui.js"></script>
123 | <script>
124 |     layui.use(['jquery'], function () {
125 |         var $ = layui.jquery;
126 | 
127 |         // 点击发送按钮事件
128 |         $('#sendButton').on('click', function (e) {
129 |             // 阻止表单的默认提交行为，避免页面刷新
130 |             e.preventDefault();
131 | 
132 |             // 打印日志到控制台
133 |             console.log('发送按钮被点击');
134 |             var message = $('#inputMessage').val();
135 |             if (message) {
136 |                 // 添加新消息到聊天历史
137 |                 $('#chatHistory').append('<div class="message self">我：' + message + '</div>');
138 |                 // 清空输入框
139 |                 $('#inputMessage').val('');
140 |                 // 发送消息到服务器
141 |                 $.ajax({
142 |                     url: 'http://localhost:8080/send',
143 |                     method: 'POST',
144 |                     data: { message: message },
145 |                     dataType: 'json', // 指定返回数据类型为 JSON
146 |                     success: function (response) {
147 |                         // 获取生成的回复消息
148 |                         var generatedText = response.generatedText;
149 |                         // 添加机器人的回复消息到聊天历史
150 |                         $('#chatHistory').append('<div class="message other">🤖：' + generatedText + '</div>');
151 |                         // 滚动到聊天历史底部
152 |                         $('#chatHistory').scrollTop($('#chatHistory')[0].scrollHeight);
153 |                     },
154 |                     error: function () {
155 |                         console.log('请求失败');
156 |                     }
157 |                 });
158 |             }
159 |         });
160 | 
161 |         // 监听输入框按下回车键事件
162 |         $('#inputMessage').on('keydown', function (e) {
163 |             if (e.keyCode === 13) {
164 |                 $('#sendButton').click();
165 |             }
166 |         });
167 | 
168 |         // 点击上传文件按钮事件
169 |         $('#uploadButton').on('click', function (e) {
170 |             e.preventDefault();
171 |             $('#fileInput').click();
172 |         });
173 | 
174 |         // 监听文件选择事件
175 |         $('#fileInput').on('change', function () {
176 |             var file = this.files[0];
177 |             if (file) {
178 |                 // 创建 FormData 对象
179 |                 var formData = new FormData();
180 |                 formData.append('isFile', 'true');
181 |                 formData.append('file', file);
182 |                 formData.append('originalFileName', file.name);
183 | 
184 |                 // 显示正在上传的提示
185 |                 $('#chatHistory').append('<div class="message self">我：正在上传文件，请稍候...</div>');
186 |                 $('#chatHistory').scrollTop($('#chatHistory')[0].scrollHeight);
187 | 
188 |                 // 发送文件到服务器
189 |                 $.ajax({
190 |                     url: 'http://localhost:8080/sendFile',
191 |                     method: 'POST',
192 |                     data: formData,
193 |                     contentType: false,
194 |                     processData: false,
195 |                     success: function (response) {
196 |                         try {
197 |                             var responseJson = JSON.parse(response);
198 |                             // 获取生成的回复消息
199 |                             var generatedText = responseJson.generatedText;
200 |                             // 添加机器人的回复消息到聊天历史
201 |                             $('#chatHistory').append('<div class="message other">🤖：' + generatedText + '</div>');
202 |                         } catch (error) {
203 |                             console.log('解析响应数据失败:', error);
204 |                             $('#chatHistory').append('<div class="message other">🤖：解析服务器响应失败，请重试。</div>');
205 |                         }
206 |                         // 滚动到聊天历史底部
207 |                         $('#chatHistory').scrollTop($('#chatHistory')[0].scrollHeight);
208 |                     },
209 |                     error: function () {
210 |                         console.log('文件上传请求失败');
211 |                         $('#chatHistory').append('<div class="message other">🤖：文件上传或处理失败，请重试。</div>');
212 |                         $('#chatHistory').scrollTop($('#chatHistory')[0].scrollHeight);
213 |                     }
214 |                 });
215 |             }
216 |         });
217 |     });
218 | </script>
219 | </body>
220 | 
221 | </html>


--------------------------------------------------------------------------------
/webapp/views/knowledge_base.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>Layui Grid Example</title>
  8 |     <!-- 引入 Layui CSS 文件 -->
  9 |     <link rel="stylesheet" href="https://cdn.staticfile.org/layui/2.8.12/css/layui.min.css">
 10 |     <style>
 11 |         /* 为列元素添加左右内边距 */
 12 |        .layui-col-md3,
 13 |        .layui-col-md6 {
 14 |             padding: 0 5px;
 15 |         }
 16 | 
 17 |         /* 定义矩形框的样式 */
 18 |        .rectangle {
 19 |             background-color: #f2f2f2;
 20 |             border: 1px solid #ccc;
 21 |             text-align: center;
 22 |             padding: 20px;
 23 |             margin-bottom: 10px;
 24 |         }
 25 | 
 26 |         /* 定义数字的样式 */
 27 |        .big-number {
 28 |             font-size: 48px;
 29 |             font-weight: bold;
 30 |         }
 31 | 
 32 |         /* 定义信息项的样式 */
 33 |        .info-item {
 34 |             margin-bottom: 5px;
 35 |         }
 36 |     </style>
 37 | </head>
 38 | 
 39 | <body>
 40 | <!-- 引入 Layui 容器 -->
 41 | <div class="layui-container">
 42 |     <!-- 引入 Layui 行 -->
 43 |     <div class="layui-row">
 44 |         <!-- 第一个矩形框，占 3 列 -->
 45 |         <div class="layui-col-md3">
 46 |             <div class="rectangle">
 47 |                 <div class="info-item">知识库名称：知识库 1</div>
 48 |                 <div class="info-item">文档数量：100</div>
 49 |                 <div class="info-item">创建时间：2024-01-01</div>
 50 |                 <div class="info-item">ID：1</div>
 51 |             </div>
 52 |         </div>
 53 |         <!-- 第二个矩形框，占 3 列 -->
 54 |         <div class="layui-col-md3">
 55 |             <div class="rectangle">
 56 |                 <div class="info-item">知识库名称：知识库 2</div>
 57 |                 <div class="info-item">文档数量：200</div>
 58 |                 <div class="info-item">创建时间：2024-02-01</div>
 59 |                 <div class="info-item">ID：2</div>
 60 |             </div>
 61 |         </div>
 62 |         <!-- 第三个矩形框，占 3 列 -->
 63 |         <div class="layui-col-md3">
 64 |             <div class="rectangle">
 65 |                 <div class="info-item">知识库名称：知识库 3</div>
 66 |                 <div class="info-item">文档数量：300</div>
 67 |                 <div class="info-item">创建时间：2024-03-01</div>
 68 |                 <div class="info-item">ID：3</div>
 69 |             </div>
 70 |         </div>
 71 |         <!-- 第四个矩形框，占 3 列 -->
 72 |         <div class="layui-col-md3">
 73 |             <div class="rectangle">
 74 |                 <div class="info-item">知识库名称：知识库 4</div>
 75 |                 <div class="info-item">文档数量：400</div>
 76 |                 <div class="info-item">创建时间：2024-04-01</div>
 77 |                 <div class="info-item">ID：4</div>
 78 |             </div>
 79 |         </div>
 80 |     </div>
 81 |     <!-- 新的一行 -->
 82 |     <div class="layui-row">
 83 |         <!-- 第五个矩形框，占 6 列 -->
 84 |         <div class="layui-col-md6">
 85 |             <div class="rectangle">
 86 |                 <div class="info-item">知识库名称：知识库 5</div>
 87 |                 <div class="info-item">文档数量：500</div>
 88 |                 <div class="info-item">创建时间：2024-05-01</div>
 89 |                 <div class="info-item">ID：5</div>
 90 |             </div>
 91 |         </div>
 92 |         <!-- 第六个矩形框，占 6 列 -->
 93 |         <div class="layui-col-md6">
 94 |             <div class="rectangle">
 95 |                 <div class="info-item">知识库名称：知识库 6</div>
 96 |                 <div class="info-item">文档数量：600</div>
 97 |                 <div class="info-item">创建时间：2024-06-01</div>
 98 |                 <div class="info-item">ID：6</div>
 99 |             </div>
100 |         </div>
101 |     </div>
102 | </div>
103 | 
104 | <!-- 引入 Layui JavaScript 文件 -->
105 | <script src="https://cdn.staticfile.org/layui/2.8.12/layui.min.js"></script>
106 | <script>
107 |     // 初始化 Layui
108 |     layui.use(function () {
109 |         var $ = layui.jquery;
110 |         // 这里可以添加其他 Layui 模块的初始化代码
111 |     });
112 | </script>
113 | </body>
114 | 
115 | </html>


--------------------------------------------------------------------------------
/webapp/views/login.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 6 |     <title>Demo</title>
 7 |     <!-- 请勿在项目正式环境中引用该 layui.css 地址 -->
 8 |     <link href="//unpkg.com/layui@2.9.21/dist/css/layui.css" rel="stylesheet">
 9 | </head>
10 | <body>
11 | <style>
12 |     .demo-login-container{width: 320px; margin: 21px auto 0;}
13 |     .demo-login-other .layui-icon{position: relative; display: inline-block; margin: 0 2px; top: 2px; font-size: 26px;}
14 | </style>
15 | <form class="layui-form">
16 |     <div class="demo-login-container">
17 |         <div class="layui-form-item">
18 |             <div class="layui-input-wrap">
19 |                 <div class="layui-input-prefix">
20 |                     <i class="layui-icon layui-icon-username"></i>
21 |                 </div>
22 |                 <input type="text" name="username" value="" lay-verify="required" placeholder="用户名" lay-reqtext="请填写用户名" autocomplete="off" class="layui-input" lay-affix="clear">
23 |             </div>
24 |         </div>
25 |         <div class="layui-form-item">
26 |             <div class="layui-input-wrap">
27 |                 <div class="layui-input-prefix">
28 |                     <i class="layui-icon layui-icon-password"></i>
29 |                 </div>
30 |                 <input type="password" name="password" value="" lay-verify="required" placeholder="密   码" lay-reqtext="请填写密码" autocomplete="off" class="layui-input" lay-affix="eye">
31 |             </div>
32 |         </div>
33 |         <div class="layui-form-item">
34 |             <div class="layui-row">
35 |                 <div class="layui-col-xs7">
36 |                     <div class="layui-input-wrap">
37 |                         <div class="layui-input-prefix">
38 |                             <i class="layui-icon layui-icon-vercode"></i>
39 |                         </div>
40 |                         <input type="text" name="captcha" value="" lay-verify="required" placeholder="验证码" lay-reqtext="请填写验证码" autocomplete="off" class="layui-input" lay-affix="clear">
41 |                     </div>
42 |                 </div>
43 |                 <div class="layui-col-xs5">
44 |                     <div style="margin-left: 10px;">
45 |                         <img src="https://www.oschina.net/action/user/captcha" onclick="this.src='https://www.oschina.net/action/user/captcha?t='+ new Date().getTime();">
46 |                     </div>
47 |                 </div>
48 |             </div>
49 |         </div>
50 |         <div class="layui-form-item">
51 |             <input type="checkbox" name="remember" lay-skin="primary" title="记住密码">
52 |             <a href="#forget" style="float: right; margin-top: 7px;">忘记密码？</a>
53 |         </div>
54 |         <div class="layui-form-item">
55 |             <button class="layui-btn layui-btn-fluid" lay-submit lay-filter="demo-login">登录</button>
56 |         </div>
57 |         <div class="layui-form-item demo-login-other">
58 |             <label>社交账号登录</label>
59 |             <span style="padding: 0 21px 0 6px;">
60 |         <a href="javascript:;"><i class="layui-icon layui-icon-login-qq" style="color: #3492ed;"></i></a>
61 |         <a href="javascript:;"><i class="layui-icon layui-icon-login-wechat" style="color: #4daf29;"></i></a>
62 |         <a href="javascript:;"><i class="layui-icon layui-icon-login-weibo" style="color: #cf1900;"></i></a>
63 |       </span>
64 |             或 <a href="#reg">注册帐号</a>
65 |         </div>
66 |     </div>
67 | </form>
68 | 
69 | <!-- 请勿在项目正式环境中引用该 layui.js 地址 -->
70 | <script src="//unpkg.com/layui@2.9.21/dist/layui.js"></script>
71 | <script>
72 |     layui.use(function(){
73 |       var form = layui.form;
74 |       var layer = layui.layer;
75 |       // 提交事件
76 |       form.on('submit(demo-login)', function(data){
77 |         var field = data.field; // 获取表单字段值
78 |         // 显示填写结果，仅作演示用
79 |         layer.alert(JSON.stringify(field), {
80 |           title: '当前填写的字段值'
81 |         });
82 |         // 此处可执行 Ajax 等操作
83 |         // …
84 |         return false; // 阻止默认 form 跳转
85 |       });
86 |     });
87 | </script>
88 | 
89 | </body>
90 | </html>


--------------------------------------------------------------------------------
/webapp/views/main.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh-CN">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>基于 layui 的界面设计</title>
  8 |     <!-- 引入 layui 的 CSS 文件，注意在正式项目中请替换为正确的路径 -->
  9 |     <link rel="stylesheet" href="//unpkg.com/layui@2.9.21/dist/css/layui.css">
 10 |     <style>
 11 |         body {
 12 |             background-color: #f9f9f9;
 13 |             font-family: Arial, sans-serif;
 14 |         }
 15 | 
 16 |        .layui-nav {
 17 |             height: 60px;
 18 |             background-color: #333;
 19 |             display: flex;
 20 |             align-items: center;
 21 |         }
 22 | 
 23 |        .layui-nav-left {
 24 |             flex: 1;
 25 |         }
 26 | 
 27 |        .layui-nav-center {
 28 |             flex: 2;
 29 |             display: flex;
 30 |             justify-content: center;
 31 |         }
 32 | 
 33 |        .layui-nav-right {
 34 |             flex: 1;
 35 |             display: flex;
 36 |             justify-content: flex-end;
 37 |         }
 38 | 
 39 |        .layui-nav-item a {
 40 |             color: #fff;
 41 |             padding: 0 20px;
 42 |         }
 43 | 
 44 |        .layui-nav-item.layui-this {
 45 |             background-color: #009688;
 46 |         }
 47 | 
 48 |        .layui-nav-child {
 49 |             background-color: #444;
 50 |         }
 51 | 
 52 |        .layui-nav-child dd a {
 53 |             color: #fff;
 54 |         }
 55 | 
 56 |        .content {
 57 |             padding: 20px;
 58 |             background-color: #fff;
 59 |             min-height: 500px;
 60 |         }
 61 |     </style>
 62 | </head>
 63 | 
 64 | <body>
 65 | <!-- 导航栏 -->
 66 | <ul class="layui-nav" lay-filter="demo-filter-nav">
 67 |     <div class="layui-nav-left">
 68 |         <li class="layui-nav-item"><a href="javascript:void(0);" data-url="home.html">首页</a></li>
 69 |     </div>
 70 |     <div class="layui-nav-center">
 71 |         <li class="layui-nav-item layui-this"><a href="javascript:void(0);" data-url="knowledge_base.html">知识库</a></li>
 72 |         <li class="layui-nav-item"><a href="javascript:void(0);" data-url="chat.html">聊天</a></li>
 73 |         <li class="layui-nav-item"><a href="javascript:void(0);" data-url="search.html">搜索</a></li>
 74 |         <li class="layui-nav-item"><a href="javascript:void(0);" data-url="model_management.html">模型管理</a></li>
 75 |         <li class="layui-nav-item"><a href="javascript:void(0);" data-url="vector_management.html">向量管理</a></li>
 76 |     </div>
 77 |     <div class="layui-nav-right">
 78 |         <li class="layui-nav-item"><a href="javascript:void(0);" data-url="end_page.html">设置</a></li>
 79 |     </div>
 80 | </ul>
 81 | <!-- 内容区域 -->
 82 | <div class="content">
 83 |     <!-- 这里可以根据不同的 tab 内容进行填充 -->
 84 | </div>
 85 | <!-- 引入 layui 的 JS 文件，注意在正式项目中请替换为正确的路径 -->
 86 | <script src="//unpkg.com/layui@2.9.21/dist/layui.js"></script>
 87 | <script>
 88 |     layui.use(['element', 'jquery'], function () {
 89 |         var element = layui.element;
 90 |         var $ = layui.jquery;
 91 | 
 92 |         // 监听导航栏点击事件
 93 |         $('.layui-nav-item a').click(function () {
 94 |             var url = $(this).data('url');
 95 |             if (url) {
 96 |                 $.ajax({
 97 |                     url: url,
 98 |                     method: 'GET',
 99 |                     success: function (data) {
100 |                         $('.content').html(data);
101 |                     },
102 |                     error: function () {
103 |                         $('.content').html('<h2>加载失败</h2><p>无法加载该页面内容，请稍后重试。</p>');
104 |                     }
105 |                 });
106 |             }
107 |         });
108 | 
109 |         // 初始加载知识库页面内容
110 |         $.ajax({
111 |             url: 'knowledge_base.html',
112 |             method: 'GET',
113 |             success: function (data) {
114 |                 $('.content').html(data);
115 |             },
116 |             error: function () {
117 |                 $('.content').html('<h2>加载失败</h2><p>无法加载该页面内容，请稍后重试。</p>');
118 |             }
119 |         });
120 |     });
121 | </script>
122 | </body>
123 | 
124 | </html>


--------------------------------------------------------------------------------
/webapp/views/model_management.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |   <meta charset="utf-8">
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |   <title>模型管理 - layui</title>
  8 |   <link rel="stylesheet" href="https://cdn.staticfile.org/layui/2.7.6/css/layui.css">
  9 |   <style>
 10 |     body {
 11 |         padding: 0; /* 将页面边距设置为0 */
 12 |     }
 13 | 
 14 | .layui-table-view {
 15 |         margin: 8px 0;
 16 |     }
 17 | 
 18 | .model-intro {
 19 |         display: none;
 20 |         position: absolute;
 21 |         background-color: #f9f9f9;
 22 |         border: 1px solid #ccc;
 23 |         padding: 5px;
 24 |         z-index: 1;
 25 |     }
 26 | 
 27 |     tr:hover.model-intro {
 28 |         display: block;
 29 |     }
 30 |   </style>
 31 | </head>
 32 | 
 33 | <body>
 34 | 
 35 | <table id="modelTable"></table>
 36 | 
 37 | <script src="https://cdn.staticfile.org/layui/2.7.6/layui.js"></script>
 38 | <script>
 39 |   layui.use('table', function () {
 40 |       var table = layui.table;
 41 | 
 42 |       // 模拟数据，实际使用时需替换为真实接口获取的数据
 43 |       var data = [
 44 |           {
 45 |               modelName: "模型1",
 46 |               globalName: "全局名称1",
 47 |               apiUrl: "https://api.example.com/1",
 48 |               apiKey: "abc123",
 49 |               remainingTokens: 100,
 50 |               modelIntro: "这是模型1的介绍，用于演示目的。"
 51 |           },
 52 |           {
 53 |               modelName: "模型2",
 54 |               globalName: "全局名称2",
 55 |               apiUrl: "https://api.example.com/2",
 56 |               apiKey: "def456",
 57 |               remainingTokens: 200,
 58 |               modelIntro: "这是模型2的介绍，用于演示目的。"
 59 |           }
 60 |       ];
 61 | 
 62 |       // 渲染表格
 63 |       table.render({
 64 |           elem: '#modelTable',
 65 |           data: data,
 66 |           cols: [[
 67 |               { field: 'modelName', title: '模型名称' },
 68 |               { field: 'globalName', title: '全局名称' },
 69 |               { field: 'apiUrl', title: 'API_URL' },
 70 |               { field: 'apiKey', title: 'API_KEY' },
 71 |               { field:'remainingTokens', title: '剩余token数量' },
 72 |               {
 73 |                   title: '操作',
 74 |                   templet: function (d) {
 75 |                       return '<button class="layui-btn layui-btn-xs" onclick="editModel(\'' + d.modelName + '\')">编辑</button>';
 76 |                   }
 77 |               },
 78 |               {
 79 |                   title: 'Mapping Detail Config',
 80 |                   templet: function (d) {
 81 |                       return '<button class="layui-btn layui-btn-xs" onclick="expandConfig(\'' + d.modelName + '\')">扩展</button>';
 82 |                   }
 83 |               }
 84 |           ]]
 85 |       });
 86 | 
 87 |       // 模拟编辑函数
 88 |       function editModel(modelName) {
 89 |           layer.msg('正在编辑模型：' + modelName);
 90 |       }
 91 | 
 92 |       // 模拟扩展配置函数
 93 |       function expandConfig(modelName) {
 94 |           layer.msg('正在扩展模型'+ modelName +'的配置');
 95 |       }
 96 |   });
 97 | </script>
 98 | </body>
 99 | 
100 | </html>


--------------------------------------------------------------------------------
/webapp/views/register.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <meta charset="utf-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  6 |     <title>Demo</title>
  7 |     <!-- 请勿在项目正式环境中引用该 layui.css 地址 -->
  8 |     <link href="//unpkg.com/layui@2.9.21/dist/css/layui.css" rel="stylesheet">
  9 | </head>
 10 | <body>
 11 | <style>
 12 |     .demo-reg-container{width: 320px; margin: 21px auto 0;}
 13 |     .demo-reg-other .layui-icon{position: relative; display: inline-block; margin: 0 2px; top: 2px; font-size: 26px;}
 14 | </style>
 15 | <form class="layui-form">
 16 |     <div class="demo-reg-container">
 17 |         <div class="layui-form-item">
 18 |             <div class="layui-row">
 19 |                 <div class="layui-col-xs7">
 20 |                     <div class="layui-input-wrap">
 21 |                         <div class="layui-input-prefix">
 22 |                             <i class="layui-icon layui-icon-cellphone"></i>
 23 |                         </div>
 24 |                         <input type="text" name="cellphone" value="" lay-verify="required|phone" placeholder="手机号" lay-reqtext="请填写手机号" autocomplete="off" class="layui-input" id="reg-cellphone">
 25 |                     </div>
 26 |                 </div>
 27 |                 <div class="layui-col-xs5">
 28 |                     <div style="margin-left: 11px;">
 29 |                         <button type="button" class="layui-btn layui-btn-fluid layui-btn-primary" lay-on="reg-get-vercode">获取验证码</button>
 30 |                     </div>
 31 |                 </div>
 32 |             </div>
 33 |         </div>
 34 |         <div class="layui-form-item">
 35 |             <div class="layui-input-wrap">
 36 |                 <div class="layui-input-prefix">
 37 |                     <i class="layui-icon layui-icon-vercode"></i>
 38 |                 </div>
 39 |                 <input type="text" name="vercode" value="" lay-verify="required" placeholder="验证码" lay-reqtext="请填写验证码" autocomplete="off" class="layui-input">
 40 |             </div>
 41 |         </div>
 42 |         <div class="layui-form-item">
 43 |             <div class="layui-input-wrap">
 44 |                 <div class="layui-input-prefix">
 45 |                     <i class="layui-icon layui-icon-password"></i>
 46 |                 </div>
 47 |                 <input type="password" name="password" value="" lay-verify="required" placeholder="密码" autocomplete="off" class="layui-input" id="reg-password" lay-affix="eye">
 48 |             </div>
 49 |         </div>
 50 |         <div class="layui-form-item">
 51 |             <div class="layui-input-wrap">
 52 |                 <div class="layui-input-prefix">
 53 |                     <i class="layui-icon layui-icon-password"></i>
 54 |                 </div>
 55 |                 <input type="password" name="confirmPassword" value="" lay-verify="required|confirmPassword" placeholder="确认密码" autocomplete="off" class="layui-input" lay-affix="eye">
 56 |             </div>
 57 |         </div>
 58 |         <div class="layui-form-item">
 59 |             <div class="layui-input-wrap">
 60 |                 <div class="layui-input-prefix">
 61 |                     <i class="layui-icon layui-icon-username"></i>
 62 |                 </div>
 63 |                 <input type="text" name="nickname" value="" lay-verify="required" placeholder="昵称" autocomplete="off" class="layui-input" lay-affix="clear">
 64 |             </div>
 65 |         </div>
 66 |         <div class="layui-form-item">
 67 |             <input type="checkbox" name="agreement" lay-verify="required" lay-skin="primary" title="同意">
 68 |             <a href="#terms" target="_blank" style="position: relative; top: 6px; left: -15px;">
 69 |                 <ins>用户协议</ins>
 70 |             </a>
 71 |         </div>
 72 |         <div class="layui-form-item">
 73 |             <button class="layui-btn layui-btn-fluid" lay-submit lay-filter="demo-reg">注册</button>
 74 |         </div>
 75 |         <div class="layui-form-item demo-reg-other">
 76 |             <label>社交账号注册</label>
 77 |             <span style="padding: 0 21px 0 6px;">
 78 |         <a href="javascript:;"><i class="layui-icon layui-icon-login-qq" style="color: #3492ed;"></i></a>
 79 |         <a href="javascript:;"><i class="layui-icon layui-icon-login-wechat" style="color: #4daf29;"></i></a>
 80 |         <a href="javascript:;"><i class="layui-icon layui-icon-login-weibo" style="color: #cf1900;"></i></a>
 81 |       </span>
 82 |             <a href="#login">登录已有帐号</a>
 83 |         </div>
 84 |     </div>
 85 | </form>
 86 | 
 87 | <!-- 请勿在项目正式环境中引用该 layui.js 地址 -->
 88 | <script src="//unpkg.com/layui@2.9.21/dist/layui.js"></script>
 89 | <script>
 90 |     layui.use(function(){
 91 |       var $ = layui.$;
 92 |       var form = layui.form;
 93 |       var layer = layui.layer;
 94 |       var util = layui.util;
 95 | 
 96 |       // 自定义验证规则
 97 |       form.verify({
 98 |         // 确认密码
 99 |         confirmPassword: function(value, item){
100 |           var passwordValue = $('#reg-password').val();
101 |           if(value !== passwordValue){
102 |             return '两次密码输入不一致';
103 |           }
104 |         }
105 |       });
106 | 
107 |       // 提交事件
108 |       form.on('submit(demo-reg)', function(data){
109 |         var field = data.field; // 获取表单字段值
110 | 
111 |         // 是否勾选同意
112 |         if(!field.agreement){
113 |           layer.msg('您必须勾选同意用户协议才能注册');
114 |           return false;
115 |         }
116 | 
117 |         // 显示填写结果，仅作演示用
118 |         layer.alert(JSON.stringify(field), {
119 |           title: '当前填写的字段值'
120 |         });
121 | 
122 |         // 此处可执行 Ajax 等操作
123 |         // …
124 | 
125 |         return false; // 阻止默认 form 跳转
126 |       });
127 | 
128 |       // 普通事件
129 |       util.on('lay-on', {
130 |         // 获取验证码
131 |         'reg-get-vercode': function(othis){
132 |           var isvalid = form.validate('#reg-cellphone'); // 主动触发验证，v2.7.0 新增
133 |           // 验证通过
134 |           if(isvalid){
135 |             layer.msg('手机号规则验证通过');
136 |             // 此处可继续书写「发送验证码」等后续逻辑
137 |             // …
138 |           }
139 |         }
140 |       });
141 |     });
142 | </script>
143 | 
144 | </body>
145 | </html>


--------------------------------------------------------------------------------
/webapp/views/search.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh-CN">
  3 | 
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>搜索页面 - 基于 layui 的界面设计</title>
  8 |     <!-- 引入 layui 的 CSS 文件，注意在正式项目中请替换为正确的路径 -->
  9 |     <link rel="stylesheet" href="//unpkg.com/layui@2.9.21/dist/css/layui.css">
 10 |     <style>
 11 |         body {
 12 |             background-color: #f9f9f9;
 13 |             font-family: Arial, sans-serif;
 14 |         }
 15 | 
 16 |         .layui-nav {
 17 |             height: 60px;
 18 |             background-color: #333;
 19 |             display: flex;
 20 |             align-items: center;
 21 |         }
 22 | 
 23 |         .layui-nav-left {
 24 |             flex: 1;
 25 |         }
 26 | 
 27 |         .layui-nav-center {
 28 |             flex: 2;
 29 |             display: flex;
 30 |             justify-content: center;
 31 |         }
 32 | 
 33 |         .layui-nav-right {
 34 |             flex: 1;
 35 |             display: flex;
 36 |             justify-content: flex-end;
 37 |         }
 38 | 
 39 |         .layui-nav-item a {
 40 |             color: #fff;
 41 |             padding: 0 20px;
 42 |         }
 43 | 
 44 |         .layui-nav-item.layui-this {
 45 |             background-color: #009688;
 46 |         }
 47 | 
 48 |         .layui-nav-child {
 49 |             background-color: #444;
 50 |         }
 51 | 
 52 |         .layui-nav-child dd a {
 53 |             color: #fff;
 54 |         }
 55 | 
 56 |         .content {
 57 |             padding: 20px;
 58 |             background-color: #fff;
 59 |             min-height: 500px;
 60 |             display: flex;
 61 |             flex-direction: column;
 62 |             align-items: center;
 63 |         }
 64 | 
 65 |         /* 搜索框样式 */
 66 |         .search-box {
 67 |             margin-bottom: 20px;
 68 |             display: flex;
 69 |             justify-content: center;
 70 |             align-items: center;
 71 |             width: 100%;
 72 |         }
 73 | 
 74 |         .search-box input {
 75 |             width: 300px;
 76 |             height: 30px;
 77 |             padding: 5px;
 78 |         }
 79 | 
 80 |         .search-box button {
 81 |             height: 40px; /* 按钮变大，调整高度 */
 82 |             padding: 0 20px; /* 按钮变大，调整内边距 */
 83 |             margin-left: 10px;
 84 |             font-size: 16px; /* 按钮变大，调整字体大小 */
 85 |         }
 86 | 
 87 |         /* 搜索结果列表样式 */
 88 |         .search-result-list {
 89 |             list-style-type: none;
 90 |             padding: 0;
 91 |             width: 100%;
 92 |         }
 93 | 
 94 |         .search-result-list li {
 95 |             margin-bottom: 10px;
 96 |             border-bottom: 1px solid #ccc;
 97 |             padding-bottom: 5px;
 98 |         }
 99 |     </style>
100 | </head>
101 | 
102 | <body>
103 | 
104 | <!-- 内容区域 -->
105 | <div class="content">
106 |     <div class="search-box">
107 |         <input type="text" id="searchInput" placeholder="请输入搜索关键词">
108 |         <button class="layui-btn" id="searchButton">搜索</button>
109 |     </div>
110 |     <div style="flex: 1; width: 100%;">
111 |         <ul class="search-result-list" id="searchResultList"></ul>
112 |     </div>
113 | </div>
114 | <!-- 引入 layui 的 JS 文件，注意在正式项目中请替换为正确的路径 -->
115 | <script src="//unpkg.com/layui@2.9.21/dist/layui.js"></script>
116 | <script>
117 |     layui.use(['element', 'jquery'], function () {
118 |         var element = layui.element;
119 |         var $ = layui.jquery;
120 | 
121 |         // 点击搜索按钮事件
122 |         $('#searchButton').on('click', function () {
123 |             var keyword = $('#searchInput').val();
124 |             if (keyword) {
125 |                 // 清空之前的搜索结果
126 |                 $('#searchResultList').empty();
127 | 
128 |                 // 发送 AJAX 请求到 localhost
129 |                 $.ajax({
130 |                     url: 'http://localhost:8080/search',  // 向 localhost 发送搜索请求
131 |                     method: 'POST',
132 |                     data: { keyword: keyword },
133 |                     dataType: 'html', // 修改为 html 类型，因为后端返回的是 HTML 内容
134 |                     success: function (response) {
135 |                         // 将搜索结果插入到搜索结果列表中
136 |                         $('#searchResultList').html(response);
137 |                     },
138 |                     error: function (xhr, status, error) {
139 |                         console.log('搜索请求失败', status, error);
140 |                     }
141 |                 });
142 |             }
143 |         });
144 |     });
145 | </script>
146 | </body>
147 | 
148 | </html>


--------------------------------------------------------------------------------