├── .eslintrc.json ├── .gitignore ├── README.md ├── app ├── api │ ├── llm │ │ ├── ragBotService.js │ │ ├── ragChain.js │ │ └── saveNoteToVectorStore.js │ └── note │ │ └── actions.js ├── components │ ├── EditButton.js │ ├── Note.js │ ├── NoteEditor.js │ ├── NoteListSkeleton.js │ ├── NotePreview.js │ ├── Sidebar.js │ ├── SidebarFilterNoteItems.js │ ├── SidebarNoteItemContent.js │ ├── SidebarNoteItemHeader.js │ ├── SidebarNoteList.js │ ├── SidebarSearchField.js │ ├── Spinner.js │ └── Uploader.js ├── globals.css ├── layout.js ├── lib │ ├── redis.js │ └── utils.js ├── note │ ├── [id] │ │ ├── loading.js │ │ └── page.js │ ├── chat │ │ ├── page.js │ │ └── styles.module.css │ └── edit │ │ ├── [id] │ │ └── page.js │ │ ├── loading.js │ │ └── page.js ├── page.js └── style.css ├── asset ├── demo.gif └── explain.png ├── jsconfig.json ├── next.config.mjs ├── package-lock.json ├── package.json ├── postcss.config.mjs ├── public ├── checkmark.svg ├── chevron-down.svg ├── chevron-up.svg ├── cross.svg ├── db │ └── placeholder │ │ ├── docstore.json │ │ └── faiss.index ├── favicon.ico ├── gemma2Icon.png ├── llmEntryIcon.png ├── logo.svg └── next.svg └── tailwind.config.js /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | 32 | # env files (can opt-in for commiting if needed) 33 | .env* 34 | 35 | # vercel 36 | .vercel 37 | 38 | # typescript 39 | *.tsbuildinfo 40 | next-env.d.ts 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 简介 2 | 3 | Hi👋🏻~,本项目基于 [Next.js](https://nextjs.org) 实现了一个简单的笔记管理系统,能够上传、编辑与预览、删除笔记🗒️。 4 | 5 | 同时,项目还基于 [LangChain.js](https://js.langchain.com/docs/introduction/) 实现了一个 RAG(检索增强生成)问答机器人🤖,能够基于现有的笔记文档,如公司的内部文档、需求文档等,进行聊天对话。 6 | 7 | ## 演示 8 | ### 功能概览 9 | ![image](./asset/demo.gif) 10 | 11 | ### 演示分解 12 | ![image](./asset/explain.png) 13 | 14 | 如果提问针对的是一些专业性较强的内容甚至是公司的内部文档,由于大语言模型的训练数据集中这方面的知识占比少,模型较大概率会生成一些低质量的回复,这也是 [RAG](https://js.langchain.com/v0.2/docs/tutorials/rag/) 提出的主要原因。例如演示中: 15 | - step1 当允许大模型不回答置信度不高的问题时,提问`什么是孔乙己?`,模型并没有给出明确的答复; 16 | - step2 新建并上传了与`孔乙己`相关的文档信息; 17 | - step3 在拥有额外的知识库后,对于同样的问题,模型给出了正确且关联度高的高质量回复。 18 | 19 | ## 本地运行 20 | ### 环境配置 21 | - 安装项目依赖,如在根目录下执行`npm i`。 22 | - 启动本地大模型,项目使用 [Ollama](https://ollama.com/) 运行谷歌的开源本地大模型 [gemma2](https://blog.google/technology/developers/google-gemma-2/)。安装`Ollama`后,命令行运行`ollama run gemma2`即可。 23 | - ps:`gemma2`经过测试,正常运行会占用显卡运存`9GB`左右。可以参考 [这里](https://github.com/ollama/ollama) 更换其他更轻量的模型,或直接使用OpenAI提供的接口。 24 | - 启动本地 Redis。 25 | 26 | ### 启动项目 27 | 根目录下执行`npm run dev`,在本地开发环境下启动项目。项目运行在`http://localhost:3000`。 -------------------------------------------------------------------------------- /app/api/llm/ragBotService.js: -------------------------------------------------------------------------------- 1 | 'use server' 2 | 3 | import { getRagChain } from "@/api/llm/ragChain" 4 | import { createStreamableValue } from "ai/rsc" 5 | import redis from "@/lib/redis" 6 | 7 | const ragChainMap = new Map() // 模拟线程池 - 单例模式 8 | 9 | export async function initRagChatBot(sessionId) { 10 | if (!ragChainMap.has(sessionId)) { 11 | const ragChain = await getRagChain({ sessionId }) 12 | ragChainMap.set(sessionId, ragChain) 13 | redis.set(sessionId, "") 14 | } 15 | } 16 | 17 | export async function executeRagBotTool(sessionId, query) { 18 | const ragChain = ragChainMap.get(sessionId) 19 | if (!ragChain) throw new Error("RagBot is not initialized for this session.") 20 | 21 | const stream = createStreamableValue() 22 | 23 | const run = async () => { 24 | const output = await ragChain.stream({ question: query }) 25 | for await (const chunk of output) { 26 | stream.update(chunk) 27 | } 28 | stream.done() 29 | } 30 | run() 31 | 32 | return { streamData: stream.value } 33 | } 34 | 35 | export async function removeRagBot(sessionId) { 36 | ragChainMap.delete(sessionId) 37 | redis.del(sessionId) 38 | } -------------------------------------------------------------------------------- /app/api/llm/ragChain.js: -------------------------------------------------------------------------------- 1 | 'use server' 2 | 3 | import "dotenv/config" 4 | import 'faiss-node' 5 | import { FaissStore } from "@langchain/community/vectorstores/faiss" 6 | import { Ollama, OllamaEmbeddings } from "@langchain/ollama" 7 | import { ChatPromptTemplate } from "@langchain/core/prompts" 8 | import { RunnableSequence, RunnablePassthrough } from "@langchain/core/runnables" 9 | import { StringOutputParser } from "@langchain/core/output_parsers" 10 | import { getBufferString } from "@langchain/core/messages" 11 | import { ChatMessageHistory } from "langchain/stores/message/in_memory" 12 | import { ContextualCompressionRetriever } from "langchain/retrievers/contextual_compression" 13 | import { LLMChainExtractor } from "langchain/retrievers/document_compressors/chain_extract" 14 | import { ScoreThresholdRetriever } from "langchain/retrievers/score_threshold" 15 | import { join } from "path" 16 | import { promises as fs } from 'fs' 17 | import redis from "@/lib/redis" 18 | 19 | async function loadVectorStore() { 20 | const embeddings = new OllamaEmbeddings({ 21 | model: "gemma2", 22 | baseUrl: "http://localhost:11434" 23 | }) 24 | const rootDirPath = join(process.cwd(), 'public', 'db') 25 | const dirEntries = await fs.readdir(rootDirPath, { withFileTypes: true }) 26 | const subDirPaths = dirEntries 27 | .filter(item => item.isDirectory()) 28 | .map(item => join(rootDirPath, item.name)) 29 | const vectorStores = await Promise.all( 30 | subDirPaths.map(async (directory) => { 31 | return await FaissStore.load(directory, embeddings) 32 | }) 33 | ) 34 | // if (!vectorStores || !vectorStores.length) return null // todo: 处理空向量库 | update: db中存入了一个占位符 35 | const res = vectorStores[0] 36 | for (let i = 1; i < vectorStores.length; i++) 37 | res.mergeFrom(vectorStores[i]) 38 | return res 39 | } 40 | 41 | async function getSummaryChain() { 42 | const summaryPrompt = ChatPromptTemplate.fromTemplate(` 43 | 总结提供的新对话,并结合先前的摘要,总结出一个新的摘要。 44 | 注意摘要的简洁性,总结时可以只保留关键信息。 45 | 46 | 先前的摘要: 47 | {summary} 48 | 49 | 新对话: 50 | {new_lines} 51 | 52 | 新的摘要: 53 | `) 54 | 55 | const summaryChain = RunnableSequence.from([ 56 | summaryPrompt, 57 | new Ollama({ 58 | baseUrl: "http://localhost:11434", 59 | model: "gemma2", 60 | }), 61 | new StringOutputParser(), 62 | ]) 63 | return summaryChain 64 | } 65 | 66 | async function getRephraseChain() { 67 | const rephraseChainPrompt = ChatPromptTemplate.fromTemplate(` 68 | 你会收到一段历史对话总结和一个后续问题,你的任务是根据历史对话将后续问题转述成一个描述更加具体和清晰的新提问。 69 | 注意:如果没有收到对话历史或者你认为后续问题的描述已经足够清晰,直接使用后续问题。 70 | 71 | 例子 72 | 对话历史:human表示他叫小明,他今年18岁。AI热情地表示很高兴认识他,并询问有什么可以帮助他的? 73 | 后续问题:我今年多少岁了? 74 | 重述后的问题:小明今年多少岁? 75 | 例子结束 76 | 77 | 对话历史:{history_summary} 78 | 后续问题:{question} 79 | 重述后的问题: 80 | `) 81 | 82 | const rephraseChain = RunnableSequence.from([ 83 | rephraseChainPrompt, 84 | new Ollama({ 85 | baseUrl: "http://localhost:11434", 86 | model: "gemma2", 87 | temperature: 0.4, 88 | }), 89 | new StringOutputParser(), 90 | ]) 91 | 92 | return rephraseChain 93 | } 94 | 95 | export async function getRagChain({ sessionId = '' }) { 96 | // 读取向量数据并设置检索策略 97 | const vectorStore = await loadVectorStore() 98 | // const retriever = vectorStore.asRetriever(2) 99 | const retriever = new ContextualCompressionRetriever({ 100 | baseCompressor: LLMChainExtractor.fromLLM(new Ollama({ 101 | baseUrl: "http://localhost:11434", 102 | model: "gemma2", 103 | })), 104 | baseRetriever: ScoreThresholdRetriever.fromVectorStore(vectorStore, { 105 | minSimilarityScore: 0.15, // todo 106 | maxK: 5, 107 | kIncrement: 1 108 | }) 109 | }) 110 | 111 | const convertDocsToString = (documents) => { 112 | return documents.map(document => document.pageContent).join("\n") 113 | } 114 | const contextRetrieverChain = RunnableSequence.from([ 115 | (input) => input.new_question, 116 | retriever, 117 | convertDocsToString, 118 | ]) 119 | 120 | // 配置 RAG 链 121 | const SYSTEM_TEMPLATE = ` 122 | 作为一个专业的知识问答助手,你需要尽可能回答用户问题。在回答时可以参考以下信息,综合考虑后做出回答。 123 | 当然,如果你对当前提问感到疑惑,也可以回答“我不确定”,并直接给出自己的建议。 124 | 125 | 以下是与提问相关的文档内容: 126 | {context} 127 | 128 | 以下是聊天摘要: 129 | {history_summary} 130 | 131 | ` 132 | 133 | const prompt = ChatPromptTemplate.fromMessages([ 134 | ["system", SYSTEM_TEMPLATE], 135 | ["human", "现在,你需要参考以上信息,回答以下问题:\n{new_question}`"], 136 | ]) 137 | 138 | const history = new ChatMessageHistory() 139 | const summaryChain = await getSummaryChain() 140 | const rephraseChain = await getRephraseChain() 141 | const model = new Ollama({ 142 | baseUrl: "http://localhost:11434", 143 | model: "gemma2", 144 | // verbose: true 145 | }) 146 | 147 | const ragChain = RunnableSequence.from([ 148 | { 149 | input: new RunnablePassthrough({ 150 | func: async (input) => { 151 | history.addUserMessage(input.question) 152 | } 153 | }), 154 | question: (input) => input.question, 155 | history_summary: () => redis.get(sessionId), 156 | }, 157 | RunnablePassthrough.assign({ 158 | new_question: rephraseChain, 159 | }), 160 | RunnablePassthrough.assign({ 161 | context: contextRetrieverChain, 162 | }), 163 | prompt, 164 | model, 165 | new StringOutputParser(), 166 | new RunnablePassthrough({ 167 | func: async (input) => { 168 | history.addAIMessage(input) 169 | const messages = await history.getMessages() 170 | const new_lines = getBufferString(messages) 171 | const newSummary = await summaryChain.invoke({ 172 | summary: redis.get(sessionId), 173 | new_lines 174 | }) 175 | redis.set(sessionId, newSummary) 176 | history.clear() 177 | } 178 | }) 179 | ]) 180 | 181 | return ragChain 182 | } 183 | 184 | // current 185 | // let executeRagBotTool = async () => null 186 | 187 | // export async function initRagChatBot() { 188 | // const ragChain = await getRagChain() // todo: 单例模式 189 | 190 | // executeRagBotTool = async (query) => { 191 | // const stream = createStreamableValue() 192 | 193 | // const run = async () => { 194 | // const output = await ragChain.stream({ question: query }) // todo: sessionID - redis - 存聊天记录 195 | // for await (const chunk of output) { 196 | // stream.update(chunk) 197 | // } 198 | // stream.done() 199 | // } 200 | // run() 201 | 202 | // return { streamData: stream.value } 203 | // } 204 | // } 205 | 206 | // export { executeRagBotTool } -------------------------------------------------------------------------------- /app/api/llm/saveNoteToVectorStore.js: -------------------------------------------------------------------------------- 1 | 'use server' 2 | 3 | import "dotenv/config" 4 | import 'faiss-node' 5 | import { Document } from 'langchain/document' 6 | import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" 7 | import { FaissStore } from "@langchain/community/vectorstores/faiss" 8 | import { OllamaEmbeddings } from "@langchain/ollama" 9 | import { join } from "path" 10 | import dayjs from 'dayjs' 11 | 12 | export const saveNoteToVectorStore = async (noteInfo) => { 13 | if (!noteInfo.content) return 14 | const docs = [ 15 | new Document({ 16 | pageContent: noteInfo.content, 17 | metadata: { 18 | title: noteInfo.title, 19 | uid: noteInfo.uid, 20 | updateTime: dayjs(noteInfo.updateTime).format('YYYY-MM-DD HH:mm:ss') 21 | } 22 | }) 23 | ] 24 | 25 | const splitter = new RecursiveCharacterTextSplitter({ 26 | chunkSize: 100, // todo: 动态调整 27 | chunkOverlap: 10 28 | }) 29 | 30 | const splitDocs = await splitter.splitDocuments(docs) 31 | 32 | const embeddings = new OllamaEmbeddings({ 33 | model: "gemma2", 34 | baseUrl: "http://localhost:11434" 35 | }) 36 | const vectorStore = await FaissStore.fromDocuments(splitDocs, embeddings) 37 | 38 | await vectorStore.save(join(process.cwd(), 'public', 'db', `${noteInfo.uid}`)) 39 | } -------------------------------------------------------------------------------- /app/api/note/actions.js: -------------------------------------------------------------------------------- 1 | 'use server' 2 | 3 | import { saveNoteToVectorStore } from "@/api/llm/saveNoteToVectorStore" 4 | import { redirect } from 'next/navigation' 5 | import { addNote, updateNote, delNote } from '@/lib/redis' 6 | import { z } from "zod" 7 | import { rm } from 'fs/promises' 8 | import { join } from "path" 9 | 10 | const schema = z.object({ 11 | title: z.string().max(200, 'The title should be less than 200 characters.'), 12 | content: z.string() 13 | }) 14 | 15 | export async function saveNote(formData) { 16 | const data = { 17 | title: formData.get('title'), 18 | content: formData.get('body'), 19 | updateTime: new Date() 20 | } 21 | // 校验数据 22 | const validated = schema.safeParse(data) 23 | if (!validated.success) { 24 | return { 25 | success: false, 26 | message: validated.error.issues[0].message 27 | } 28 | } 29 | 30 | let noteId = formData.get('noteId') 31 | const sData = JSON.stringify(data) 32 | if (noteId) { 33 | await updateNote(noteId, sData) 34 | } else { 35 | noteId = await addNote(sData) 36 | } 37 | // 保存到向量数据库 38 | await saveNoteToVectorStore({ 39 | ...data, 40 | uid: noteId 41 | }) 42 | redirect(`/note/${noteId}`) 43 | } 44 | 45 | export async function deleteNote(formData) { 46 | const noteId = formData.get('noteId') 47 | const flag = await delNote(noteId) 48 | if (flag) { 49 | try { 50 | // 删除向量数据库记录的内容 51 | await rm(join(process.cwd(), 'public', 'db', noteId), { recursive: true, force: true }) 52 | } catch (error) { 53 | console.error(`Error deleting vector store with ID ${noteId}:`, error) 54 | } 55 | redirect('/') 56 | } else { 57 | console.error("Delete failed.") 58 | } 59 | } 60 | 61 | export async function importNote(file) { 62 | if (!file) return new Error("File is required.") 63 | 64 | try { 65 | const buffer = Buffer.from(await file.arrayBuffer()) 66 | const filename = file.name.replace(/\.[^/.]+$/, "") 67 | // 写入数据库 68 | const data = { 69 | title: filename, 70 | content: buffer.toString('utf-8'), 71 | updateTime: new Date() 72 | } 73 | const res = await addNote(JSON.stringify(data)) 74 | await saveNoteToVectorStore({ 75 | ...data, 76 | uid: res 77 | }) 78 | return { uid: res } 79 | } catch (e) { 80 | console.error(e) 81 | return new Error("Upload failed.") 82 | } 83 | } -------------------------------------------------------------------------------- /app/components/EditButton.js: -------------------------------------------------------------------------------- 1 | import Link from 'next/link' 2 | 3 | export default function EditButton({ noteId, children }) { 4 | const isDraft = !noteId 5 | return ( 6 | 7 | 15 | 16 | ) 17 | } 18 | -------------------------------------------------------------------------------- /app/components/Note.js: -------------------------------------------------------------------------------- 1 | import dayjs from 'dayjs' 2 | import NotePreview from '@/components/NotePreview' 3 | import EditButton from '@/components/EditButton' 4 | export default function Note({ noteId, note }) { 5 | const { title, content, updateTime } = note 6 | 7 | return ( 8 |
9 |
10 |

{title}

11 |
12 | 13 | Last updated on {dayjs(updateTime).format('YYYY-MM-DD hh:mm:ss')} 14 | 15 | Edit 16 |
17 |
18 | {content} 19 |
20 | ) 21 | } 22 | -------------------------------------------------------------------------------- /app/components/NoteEditor.js: -------------------------------------------------------------------------------- 1 | 'use client' 2 | 3 | import { useState } from 'react' 4 | import { useFormStatus } from 'react-dom' 5 | import { deleteNote, saveNote } from '@/api/note/actions' 6 | import NotePreview from '@/components/NotePreview' 7 | 8 | export default function NoteEditor({ 9 | noteId, 10 | initialTitle, 11 | initialBody 12 | }) { 13 | const { pending } = useFormStatus() 14 | const [title, setTitle] = useState(initialTitle) 15 | const [body, setBody] = useState(initialBody) 16 | 17 | const isEdit = !!noteId 18 | 19 | const save = async (formData) => { 20 | const res = await saveNote(formData) 21 | if (!res.success) alert(res.message) 22 | } 23 | 24 | return ( 25 |
26 |
27 |
28 | 29 | 45 | {isEdit && 46 | ()} 65 |
66 | 69 | { 75 | setTitle(e.target.value) 76 | }} 77 | /> 78 | 81 |