├── .gitignore
├── img.png
├── img_1.png
├── img_2.png
├── md_template.md
├── api
    ├── __init__.py
    ├── arxiv_client.py
    └── aminer.py
├── front
    ├── __init__.py
    ├── kimi_file_manage.py
    └── st_chat.py
├── llm
    ├── __init__.py
    ├── agent.py
    └── model.py
├── requirements.txt
├── utils.py
├── README.md
├── prompt_template.py
├── pojo.py
├── flow.py
├── hf.py
├── main.py
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | data/
3 | output.md
4 | __pycache__
5 | .DS_Store


--------------------------------------------------------------------------------
/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/careywyr/paper-agent/HEAD/img.png


--------------------------------------------------------------------------------
/img_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/careywyr/paper-agent/HEAD/img_1.png


--------------------------------------------------------------------------------
/img_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/careywyr/paper-agent/HEAD/img_2.png


--------------------------------------------------------------------------------
/md_template.md:
--------------------------------------------------------------------------------
 1 | # {title}
 2 | 
 3 | ## Abstract
 4 | {abstract}
 5 | 
 6 | 
 7 | {title_abstract_cn}
 8 | 
 9 | ## FAQ
10 | 


--------------------------------------------------------------------------------
/api/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @file    : __init__.py.py
4 | @date    : 2024-07-24
5 | @author  : leafw
6 | """
7 | 


--------------------------------------------------------------------------------
/front/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @file    : __init__.py.py
4 | @date    : 2024-08-11
5 | @author  : leafw
6 | """
7 | 


--------------------------------------------------------------------------------
/llm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | @file    : __init__.py.py
4 | @date    : 2024-07-11
5 | @author  : leafw
6 | """
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.12.3
 2 | openai==1.35.13
 3 | pandas==2.2.2
 4 | requests==2.32.3
 5 | streamlit==1.36.0
 6 | streamlit_option_menu==0.3.13
 7 | 
 8 | bs4~=0.0.2
 9 | arxiv~=2.1.3
10 | pymupdf~=1.24.8


--------------------------------------------------------------------------------
/llm/agent.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : agent.py
 4 | @date    : 2024-07-22
 5 | @author  : leafw
 6 | """
 7 | from llm.model import LLM
 8 | import prompt_template
 9 | import utils
10 | 
11 | 
12 | class Agent:
13 |     def __init__(self, llm: LLM, desc: str = ""):
14 |         self.llm = llm
15 |         self.desc = desc
16 | 
17 |     def run(self, **kwargs):
18 |         pass
19 | 
20 | 
21 | class TranslaterAgent(Agent):
22 |     def __init__(self, llm: LLM):
23 |         super().__init__(llm, "翻译智能体")
24 | 
25 |     def run(self, text):
26 |         s = self.llm.chat(text, prompt_template.en_zh)
27 |         return utils.extract_yy_text(s)
28 | 
29 | 
30 | class PaperAnswerAgent(Agent):
31 |     def __init__(self, llm: LLM):
32 |         super().__init__(llm, "Paper 问答")
33 | 
34 |     def run(self, question, file_content):
35 |         return self.llm.chat_pdf(question, file_content)
36 | 


--------------------------------------------------------------------------------
/api/arxiv_client.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : arxiv_client.py
 4 | @date    : 2024-07-24
 5 | @author  : leafw
 6 | """
 7 | import arxiv
 8 | from urllib.parse import urlparse
 9 | from pojo import ArxivData
10 | 
11 | 
12 | # Construct the default API client.
13 | client = arxiv.Client()
14 | 
15 | 
16 | def search_by_id(arxiv_id: str) -> ArxivData | None:
17 |     # Search for the paper with the given ID
18 |     id_search = arxiv.Search(id_list=[arxiv_id])
19 | 
20 |     try:
21 |         # 只拿第一个
22 |         result = next(client.results(id_search))
23 |         return ArxivData('', arxiv_id, result.title, result.summary)
24 |     except StopIteration:
25 |         # Handle the case where no result is found
26 |         print(f"No paper found with ID {arxiv_id}")
27 |     except Exception as e:
28 |         # Handle other potential exceptions
29 |         print(f"An error occurred: {e}")
30 |     return None
31 | 
32 | 
33 | def search_by_url(url: str) -> ArxivData | None:
34 |     parsed_url = urlparse(url)
35 |     # 获取路径的最后一个部分
36 |     arxiv_id = parsed_url.path.split('/')[-1]
37 |     return search_by_id(arxiv_id)
38 | 
39 | 


--------------------------------------------------------------------------------
/front/kimi_file_manage.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : kimi_file_manage.py
 4 | @date    : 2024-08-11
 5 | @author  : leafw
 6 | """
 7 | import streamlit as st
 8 | import pandas as pd
 9 | from llm.model import KimiLlm
10 | 
11 | 
12 | # 定义设置页面
13 | def settings(current_llm: KimiLlm):
14 |     st.markdown("<h1 style='text-align: center; font-size: 32px;'>Kimi文件管理(存在Kimi才可使用)</h1>",
15 |                 unsafe_allow_html=True)
16 | 
17 |     files = current_llm.list_files()
18 |     df = create_files_dataframe(files)
19 | 
20 |     # 显示文件表格
21 |     for index, row in df.iterrows():
22 |         col1, col2, col3 = st.columns([3, 7, 2])
23 |         col1.write(row["ID"])
24 |         col2.write(row["FileName"])
25 |         button_placeholder = col3.empty()
26 |         if button_placeholder.button("删除", key=row["ID"]):
27 |             current_llm.remove_file(row["ID"])
28 |             st.rerun()
29 | 
30 | 
31 | # 创建显示文件列表的 DataFrame
32 | def create_files_dataframe(files):
33 |     data = {
34 |         "ID": [file.id for file in files],
35 |         "FileName": [file.filename for file in files]
36 |     }
37 |     df = pd.DataFrame(data)
38 |     return df
39 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : utils.py
 4 | @date    : 2024-07-13
 5 | @author  : leafw
 6 | """
 7 | import os
 8 | import pymupdf
 9 | import re
10 | from pojo import load_Arxiv_data, ArxivData
11 | 
12 | data_dir = './data'
13 | 
14 | 
15 | def ensure_directory_exists(directory_path: str):
16 |     if not os.path.exists(directory_path):
17 |         os.makedirs(directory_path)
18 |         print(f"目录 {directory_path} 已创建")
19 |     else:
20 |         print(f"目录 {directory_path} 已存在")
21 | 
22 | 
23 | def extract_yy_text(text):
24 |     # 使用正则表达式匹配 "### 意译" 后面的文本
25 |     pattern = r'### 意译\s*(```)?(.+?)(```)?(?=###|\Z)'
26 |     match = re.search(pattern, text, re.DOTALL)
27 | 
28 |     if match:
29 |         # 提取匹配的文本，去除可能存在的 ``` 符号
30 |         extracted_text = match.group(2).strip()
31 |         return extracted_text
32 |     else:
33 |         return "未找到意译部分"
34 | 
35 | 
36 | def read_pdf(file_path: str) -> str:
37 |     doc = pymupdf.open(file_path)
38 |     all_text = []
39 | 
40 |     for page in doc:
41 |         text = page.get_text()
42 |         all_text.append(text)
43 | 
44 |     combined_text = "\n".join(all_text)
45 |     return combined_text
46 | 
47 | 
48 | def arxiv_dir_path(arxiv_id: str, root_dir: str = data_dir) -> str:
49 |     return root_dir + os.sep + arxiv_id
50 | 
51 | 
52 | def get_data_from_arxiv_id(arxiv_id: str) -> ArxivData:
53 |     json_path = arxiv_dir_path(arxiv_id) + os.sep + arxiv_id + '.json'
54 |     return load_Arxiv_data(json_path)


--------------------------------------------------------------------------------
/api/aminer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : aminer.py
 4 | @date    : 2024-07-24
 5 | @author  : leafw
 6 | 本项目未使用，但是个很有用的网站
 7 | """
 8 | 
 9 | import os
10 | import requests
11 | 
12 | aminer_key = os.environ.get('AMINER_KEY')
13 | 
14 | # 设置请求头
15 | headers = {
16 |     'Authorization': aminer_key
17 | }
18 | 
19 | 
20 | def search(title):
21 |     simple_result = simple_search(title)
22 |     if not simple_result:
23 |         print(f'can not find {title}')
24 |         return
25 |     aminer_paper_id = simple_result['id']
26 |     result = search_by_id(aminer_paper_id)
27 |     return {
28 |         "title": result['title'],
29 |         "abstract": result['abstract']
30 |     }
31 | 
32 | 
33 | def simple_search(title):
34 |     url = f'https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/list/by/publish?page=1&size=10&title={title}'
35 |     # 发送GET请求
36 |     response = requests.get(url, headers=headers)
37 | 
38 |     # 检查响应状态码
39 |     if response.status_code == 200:
40 |         # 请求成功，处理响应数据
41 |         data = response.json()
42 |         if len(data['data']) > 0:
43 |             return data['data'][0]
44 |         return None
45 |     else:
46 |         # 请求失败，打印错误信息
47 |         print(f"Request failed with status code {response.status_code}")
48 |     return None
49 | 
50 | 
51 | def search_by_id(aminer_paper_id: str):
52 |     url = f'https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/platform/details/not/contain/wos/by/id?id={aminer_paper_id}'
53 | 
54 |     response = requests.get(url, headers=headers)
55 | 
56 |     # 检查响应状态码
57 |     if response.status_code == 200:
58 |         # 请求成功，处理响应数据
59 |         response_data = response.json()
60 |         return response_data['data']
61 |     else:
62 |         # 请求失败，打印错误信息
63 |         print(f"Request failed with status code {response.status_code}")
64 |         print(response.text)
65 |         return None
66 | 
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Paper-Agent
 2 | 
 3 | 一款可以帮助经常阅读论文的同学提升效率的小工具，有两个部分：
 4 | - HuggingFace Weekly Paper 制作工具
 5 | - Arxiv Helper
 6 | 
 7 | ## 前置条件
 8 | 翻译基于 `deepseek` 的服务， 论文十问依赖于 `kimi`， 因此需要这两个的 api key。可以配置到环境变量中，代码中使用的环境变量 key 分别是
 9 | - DEEPSEEK_KEY
10 | - KIMI_KEY
11 | 
12 | 如果不想同时用两家，翻译可以考虑也换成 kimi，需要手动修改代码，将 deepseek 里面的设置换成 kimi的。
13 | 
14 | ## 1. HuggingFace Weekly Paper 制作工具
15 | 我每周博客和公众号上都会发一篇 weekly paper，文章来源于 HuggingFace 的 Daily Paper。 为了减少每次都要一个个点进去通过N次复制粘贴来得到翻译后的结果的痛苦，写了个脚本，可以直接读取本周的点赞超过n次的论文，并生成Weekly Paper。
16 | 
17 | 代码就是hf.py文件，运行 `weekly_paper` 方法即可，慢慢等待即可，如果出现了一些翻译上的问题或者接口异常，可以重新从目录下的output.md文件里面拿到英文原版继续人工处理。
18 | 
19 | 此脚本依赖的模型是 `deepseek`。 翻译 prompt 来自于微博上宝玉老师的分享。
20 | 
21 | ## 2. Arxiv Helper (0.0.2版本做了大幅度的调整，不再默认使用kimi)
22 | 
23 | 这个使用 `streamlit` 做了前端，输入框里面输入 arxiv 论文的首页地址，回车即可得到它的标题和摘要。
24 | 
25 | 0.0.1版本用的是网页爬虫，0.0.2版本用的arxiv api，速度似乎比爬虫慢一点，但应该更稳定些。
26 | 
27 | 左侧可以进行翻译，右侧是[论文十问](http://leafw.cn/2023/12/25/%e5%88%86%e4%ba%ab%e4%b8%a4%e4%b8%aa%e9%98%85%e8%af%bb%e8%ae%ba%e6%96%87%e7%9a%84%e6%96%b9%e6%b3%95/)的模板，使用初始化的模型的接口进行问答。
28 | 
29 | 论文十问以及系统prompt的配置均在`prompt_template.py` 中，需要的可以自行修改。
30 | 
31 | ![img_2.png](img_2.png)
32 | 
33 | 新增加了生成所有和导出MD的按钮，方便一次性处理所有问题。
34 | 
35 | 使用了pymupdf抽取了pdf内的内容，不再依赖于kimi的文件管理，主要是kimi的api相对贵了点。
36 | 
37 | ![img_1.png](img_1.png)
38 | 
39 | ### 支持deepseek（可自己修改其他模型）、kimi以及本地使用ollama部署的模型
40 | 
41 | ``` python
42 | current_llm = KimiLlm() if use_kimi else OllamaLlm('qwen') if use_ollama else OpenAiLlm('deepseek')
43 | ```
44 | 
45 | - 只要是支持OpenAI的模型，均可以使用OpenAiLlm的实现，在config里面配置对应的model_name,api_key以及base_url即可。
46 | - Kimi 因为有自己的文件管理接口，因此使用了单独的实现类,也只有使用kimi的情况下才可以使用设置页面
47 | - Ollama的base_url就是ollama服务的地址
48 | 
49 | 使用方式：
50 | 
51 | ``` shell
52 | streamlit run main.py
53 | ```
54 | 
55 | ## 3. 使用flow.py 一键生成功能2里面需要的Markdown文档。
56 | flow.py 就是将2里面的所有功能顺序的放到了一起，修改里面的url即可等待生成需要的论文标题摘要以及对应的QA。
57 | 
58 | 由于Streamlit的限制感觉还是太多了，想要做个更成熟的web产品还是得用正经的前端，包括后端可能也要用自己的主业Java，后续会开发一套成熟一点的Java编写的Web平台。
59 | 
60 | 
61 | ## 0811 update
62 | 新增聊天功能，在主要生成的问题可以直接带到聊天页面，切换菜单的时候自动带上。注意暂时只支持Deepseek


--------------------------------------------------------------------------------
/prompt_template.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : prompt_template.py
 4 | @date    : 2024-07-10
 5 | @author  : leafw
 6 | """
 7 | 
 8 | en_zh = """
 9 | 你是一位精通简体中文的专业翻译，尤其擅长将英文的专业学术论文或文章翻译成面向专业技术人员的中文技术文章。请你帮我将以下英文段落翻译成中文，风格与中文理工技术书籍读物相似。
10 | 
11 | 规则：
12 | - 翻译时要准确传达原文的事实和背景。
13 | - 即使上意译也要保留原始段落格式，以及保留术语，例如 FLAC，JPEG 等。保留公司缩写，例如 Microsoft, Amazon, OpenAI 等。
14 | - 人名不翻译
15 | - 同时要保留引用的论文，例如 [20] 这样的引用。
16 | - 对于 Figure 和 Table，翻译的同时保留原有格式，例如：“Figure 1: ”翻译为“图 1: ”，“Table 1: ”翻译为：“表 1: ”。
17 | - 全角括号换成半角括号，并在左括号前面加半角空格，右括号后面加半角空格。
18 | - 输入格式为 Markdown 格式，输出格式也必须保留原始 Markdown 格式
19 | - 在翻译专业术语时，第一次出现时要在括号里面写上英文原文，例如：“生成式 AI (Generative AI)”，之后就可以只写中文了。
20 | - 注意你翻译内容的受众是专业技术人员，因此不需要对专业术语做口语化的解释。
21 | - 以下是常见的 AI 相关术语词汇对应表（English -> 中文）：
22 |   * Transformer -> Transformer
23 |   * Token -> Token
24 |   * LLM/Large Language Model -> 大语言模型
25 |   * Zero-shot -> 零样本
26 |   * Few-shot -> 少样本
27 |   * AI Agent -> AI 智能体
28 |   * AGI -> 通用人工智能
29 | 
30 | 策略：
31 | 
32 | 分三步进行翻译工作，并打印每步的结果：
33 | 1. 根据英文内容直译，保持原有格式，不要遗漏任何信息
34 | 2. 根据第一步直译的结果，指出其中存在的具体问题，要准确描述，不宜笼统的表示，也不需要增加原文不存在的内容或格式，包括不仅限于：
35 |   - 不符合中文表达习惯，明确指出不符合的地方
36 |   - 语句不通顺，指出位置，不需要给出修改意见，意译时修复
37 | 3. 根据第一步直译的结果和第二步指出的问题，重新进行意译，保证内容的原意的基础上，使其更易于理解，更符合中文的表达习惯，同时保持原有的格式不变
38 | 
39 | 返回格式如下，"{xxx}"表示占位符：
40 | 
41 | ### 直译
42 | {直译结果}
43 | 
44 | ***
45 | 
46 | ### 问题
47 | {直译的具体问题列表}
48 | 
49 | ***
50 | 
51 | ### 意译
52 | ```
53 | {意译结果}
54 | ```
55 | 
56 | 现在请按照上面的要求从第一行开始翻译以下内容为简体中文：
57 | ```
58 | """
59 | 
60 | paper_questions = [
61 |     '论文试图解决什么问题？',
62 |     '这是否是一个新的问题？',
63 |     '这篇文章要验证一个什么科学假设？',
64 |     '有哪些相关研究？如何归类？谁是这一课题在领域内值得关注的研究员？',
65 |     '论文中提到的解决方案之关键是什么？',
66 |     '论文中的实验是如何设计的？',
67 |     '用于定量评估的数据集是什么？代码有没有开源？',
68 |     '论文中的实验及结果有没有很好地支持需要验证的科学假设？',
69 |     '这篇论文到底有什么贡献？',
70 |     '研究的下一步呢？有什么工作可以继续深入？'
71 | ]
72 | 
73 | paper_system = f"""
74 | 你是一名资深科研工作者，擅长阅读各种中英文文献，能准确地根据文献内容回答用户的问题。
75 | """
76 | 
77 | 
78 | def build_paper(content: str, question: str) -> str:
79 |     return f"""
80 |     以下是论文的全文:
81 |     {content}
82 |     
83 |     请你回答我的问题:
84 |     {question}
85 |     """
86 | 


--------------------------------------------------------------------------------
/pojo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : pojo.py
 4 | @date    : 2024-07-13
 5 | @author  : leafw
 6 | """
 7 | import json
 8 | 
 9 | 
10 | class ArxivData:
11 |     def __init__(self, file_path: str, arxiv_id: str, title: str, abstract: str, file_id: str = '',
12 |                  title_abstract_cn: str = '', content: str = '', faq=None, chat_history=None):
13 |         self.file_path = file_path
14 |         self.arxiv_id = arxiv_id
15 |         self.title = title
16 |         self.abstract = abstract
17 |         self.file_id = file_id
18 |         self.title_abstract_cn = title_abstract_cn
19 |         self.content = content
20 |         self.faq = faq if faq is not None else {}
21 |         self.chat_history = chat_history if chat_history is not None else {}
22 | 
23 |     def to_dict(self):
24 |         return {
25 |             'file_path': self.file_path,
26 |             'arxiv_id': self.arxiv_id,
27 |             'file_id': self.file_id,
28 |             'title': self.title,
29 |             'abstract': self.abstract,
30 |             'title_abstract_cn': self.title_abstract_cn,
31 |             'content': self.content,
32 |             'faq': self.faq,
33 |             'chat_history': self.chat_history
34 |         }
35 | 
36 |     @classmethod
37 |     def from_dict(cls, data: dict):
38 |         return cls(
39 |             file_path=data.get('file_path', ''),
40 |             arxiv_id=data.get('arxiv_id', ''),
41 |             title=data.get('title', ''),
42 |             abstract=data.get('abstract', ''),
43 |             file_id=data.get('file_id', ''),
44 |             title_abstract_cn=data.get('title_abstract_cn', ''),
45 |             content=data.get('content', ''),
46 |             faq=data.get('faq', {}),
47 |             chat_history=data.get('chat_history', {})
48 |         )
49 | 
50 |     def save_to_json(self):
51 |         # 将对象转换为字典
52 |         data_dict = self.to_dict()
53 |         json_path = self.file_path.replace('pdf', 'json')
54 |         # 写入JSON文件
55 |         with open(json_path, 'w', encoding='utf-8') as json_file:
56 |             json.dump(data_dict, json_file, ensure_ascii=False, indent=4)
57 | 
58 |         print(f"数据已写入 {json_path}")
59 | 
60 | 
61 | def load_Arxiv_data(json_file_path: str) -> ArxivData | None:
62 |     try:
63 |         with open(json_file_path, 'r', encoding='utf-8') as json_file:
64 |             data = json.load(json_file)
65 |         return ArxivData.from_dict(data)
66 |     except FileNotFoundError:
67 |         print(f"文件 {json_file_path} 未找到.")
68 |         return None
69 | 


--------------------------------------------------------------------------------
/flow.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : flow.py
 4 | @date    : 2024-07-28
 5 | @author  : leafw
 6 | """
 7 | from urllib.parse import urlparse
 8 | from api import arxiv_client
 9 | import utils
10 | import os
11 | import requests
12 | from llm.agent import TranslaterAgent
13 | from llm.model import OpenAiLlm
14 | from prompt_template import paper_questions
15 | 
16 | paper_url = 'https://arxiv.org/abs/2407.18248'
17 | 
18 | current_llm = OpenAiLlm('deepseek')
19 | trans_agent = TranslaterAgent(current_llm)
20 | md_template_path = 'md_template.md'
21 | 
22 | 
23 | def run(url: str):
24 |     parsed_url = urlparse(url)
25 |     arxiv_id = parsed_url.path.split('/')[-1]
26 |     with open(md_template_path, 'r', encoding='utf-8') as f:
27 |         template = f.read()
28 | 
29 |     # 先下载
30 |     url = url.replace("abs", "pdf")
31 |     print(f'开始下载: {url}')
32 |     response = requests.get(url)
33 |     directory_path = utils.arxiv_dir_path(arxiv_id)
34 |     utils.ensure_directory_exists(directory_path)
35 | 
36 |     file_path = directory_path + os.sep + arxiv_id + '.pdf'
37 | 
38 |     if response.status_code == 200:
39 |         with open(file_path, 'wb') as file:
40 |             file.write(response.content)
41 |         print(f"文件下载成功: {file_path}")
42 |     else:
43 |         print(f"文件下载失败，状态码: {response.status_code}")
44 |         return
45 | 
46 |     # 获取标题和摘要
47 |     arxiv_data = arxiv_client.search_by_id(arxiv_id)
48 |     arxiv_data.file_path = file_path
49 |     arxiv_data.save_to_json()
50 |     print(f'标题和摘要获取成功: {arxiv_data.title}')
51 |     print('开始翻译')
52 | 
53 |     # 翻译标题和摘要
54 |     content = f'## {arxiv_data.title}\n{arxiv_data.abstract}'
55 |     translated = trans_agent.run(content)
56 |     arxiv_data.title_abstract_cn = translated
57 |     print('翻译结束')
58 | 
59 |     # 填充问题之外的东西
60 |     template_format = template.format(title=arxiv_data.title, abstract=arxiv_data.abstract, title_abstract_cn=translated)
61 | 
62 |     # 回答问题
63 |     file_content = utils.read_pdf(arxiv_data.file_path)
64 |     arxiv_data.content = file_content
65 | 
66 |     for question in paper_questions:
67 |         print(f'回答问题: {question}')
68 |         answer, _ = current_llm.chat_pdf(question, arxiv_data.content)
69 |         arxiv_data.faq[question] = answer
70 |         item = '### ' + question + '\n' + answer + '\n\n'
71 |         template_format += item
72 | 
73 |     arxiv_data.save_to_json()
74 | 
75 |     print(f'问题回答结束!')
76 | 
77 |     with open(arxiv_id + '.md', 'w', encoding='utf-8') as f:
78 |         f.write(template_format)
79 | 
80 |     print('=============== ending! =============== ')
81 | 
82 | 
83 | run(paper_url)
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/front/st_chat.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @file    : st_chat.py
 4 | @date    : 2024-08-11
 5 | @author  : leafw
 6 | """
 7 | import streamlit as st
 8 | from llm.model import OpenAiLlm
 9 | from utils import get_data_from_arxiv_id
10 | from prompt_template import paper_system
11 | 
12 | 
13 | # 初始化OpenAiLlm
14 | current_llm = OpenAiLlm('deepseek')
15 | 
16 | 
17 | def chatting(arxiv_id):
18 |     st.markdown("<h1 style='text-align: center; font-size: 32px;'>Chat with LLM</h1>",
19 |                 unsafe_allow_html=True)
20 |     if 'history' not in st.session_state:
21 |         st.session_state.history = []
22 |     if arxiv_id == '':
23 |         return
24 | 
25 |     arxiv_data = get_data_from_arxiv_id(arxiv_id)
26 |     if not arxiv_data:
27 |         st.session_state.history = []
28 |     else:
29 |         default_history = [
30 |             {
31 |                 "role": "system",
32 |                 "content": paper_system
33 |             },
34 |             {
35 |                 "role": "system",
36 |                 "content": arxiv_data.content,
37 |             }
38 |         ]
39 | 
40 |         if len(arxiv_data.chat_history) > 0:
41 |             st.session_state.history = arxiv_data.chat_history
42 | 
43 |         # 说明还没聊过
44 |         if len(arxiv_data.chat_history) == 0:
45 |             st.session_state.history = default_history
46 | 
47 |         # 这里加上小于等于2是如果曾经聊过天，就不再把这里的东西再重复拼上去了
48 |         if arxiv_data.faq and len(arxiv_data.faq) > 0 and len(st.session_state.history) <= 2:
49 |             for q, a in arxiv_data.faq.items():
50 |                 user_msg = {'role': 'user', 'content': q}
51 |                 assistant_msg = {'role': 'assistant', 'content': a}
52 |                 st.session_state.history.append(user_msg)
53 |                 st.session_state.history.append(assistant_msg)
54 | 
55 |     for message in st.session_state.history:
56 |         if message['role'] == 'system':
57 |             continue
58 |         if message['role'] == 'user':
59 |             with st.chat_message("user"):
60 |                 st.markdown(message['content'])
61 |         else:
62 |             with st.chat_message("assistant"):
63 |                 st.markdown(message['content'])
64 | 
65 |     # 用户输入
66 |     user_input = st.chat_input(placeholder="", key="input_box")
67 | 
68 |     if user_input:
69 |         # 保存用户输入到对话历史
70 |         st.session_state.history.append({"role": "user", "content": user_input})
71 |         arxiv_data.chat_history = st.session_state.history
72 |         arxiv_data.save_to_json()
73 | 
74 |         with st.chat_message("user"):
75 |             st.markdown(user_input)
76 | 
77 |         ai_reply = current_llm.chat(user_input, history=st.session_state.history, stream=True)
78 |         # 显示AI回复并将其存储到字符串中
79 |         with st.chat_message("assistant"):
80 |             complete_response = st.write_stream(ai_reply)
81 | 
82 |         st.session_state.history.append({"role": "assistant", "content": complete_response})
83 |         arxiv_data.chat_history = st.session_state.history
84 |         arxiv_data.save_to_json()
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/hf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @file    : hf.py
  4 | @date    : 2024-07-11
  5 | @author  : leafw
  6 | """
  7 | 
  8 | import requests
  9 | from bs4 import BeautifulSoup
 10 | from datetime import datetime, timedelta
 11 | from llm.model import OllamaLlm, OpenAiLlm
 12 | from llm.agent import TranslaterAgent
 13 | 
 14 | base_url = "https://huggingface.co"
 15 | # deepseek = OpenAiLlm("deepseek")
 16 | deepseek = OllamaLlm("deepseek-r1")
 17 | trans_agent = TranslaterAgent(deepseek)
 18 | 
 19 | 
 20 | class Article:
 21 |     def __init__(self, title, arxiv_link, abstract):
 22 |         self.title = title
 23 |         self.arxiv_link = arxiv_link
 24 |         self.abstract = abstract
 25 | 
 26 | 
 27 | def en_content(article: Article):
 28 |     return f"""
 29 | ## {article.title}
 30 | [{article.title}]({article.arxiv_link})
 31 | 
 32 | {article.abstract}
 33 | """
 34 | 
 35 | 
 36 | def home_parse(url):
 37 |     """
 38 |     获取文章列表
 39 |     :return:
 40 |     """
 41 |     response = requests.get(url)
 42 |     html_content = response.text
 43 | 
 44 |     # 解析HTML内容
 45 |     soup = BeautifulSoup(html_content, "html.parser")
 46 | 
 47 |     articles = soup.find_all("article")
 48 | 
 49 |     article_list = []
 50 |     for article in articles:
 51 |         title = article.find("h3").get_text(strip=True)
 52 |         link = article.find("a")["href"]
 53 |         leading_nones = article.find_all("div", class_="leading-none")
 54 |         likes_div = None
 55 |         for item in leading_nones:
 56 |             if item.get("class") == ["leading-none"]:
 57 |                 likes_div = item
 58 |                 break
 59 |         likes = int(likes_div.get_text(strip=True))
 60 |         if likes < 25:
 61 |             break
 62 |         print(f"Title: {title}")
 63 |         print(f"Link: {link}")
 64 |         print(f"Likes: {likes}")
 65 |         print("------")
 66 |         one = {"title": title, "link": base_url + link, "likes": likes}
 67 |         article_list.append(one)
 68 |     return article_list
 69 | 
 70 | 
 71 | def parse_article(url, title):
 72 |     response = requests.get(url)
 73 |     html_content = response.text
 74 |     soup = BeautifulSoup(html_content, "html.parser")
 75 | 
 76 |     article_content = soup.find("p", class_="text-gray-700 dark:text-gray-400")
 77 |     content = article_content.get_text(strip=True)
 78 |     arxiv_link = soup.find("a", class_="btn inline-flex h-9 items-center")["href"]
 79 | 
 80 |     return Article(title, arxiv_link, content)
 81 | 
 82 | 
 83 | def weekly_get():
 84 |     # 获取当前日期
 85 |     today = datetime.today()
 86 | 
 87 |     # 计算当前周的周一日期
 88 |     start_of_week = today - timedelta(days=today.weekday())
 89 | 
 90 |     # 创建一个包含周一到周五日期的列表
 91 |     weekdays = [start_of_week + timedelta(days=i) for i in range(5)]
 92 |     return [day.strftime("%Y-%m-%d") for day in weekdays]
 93 | 
 94 | 
 95 | def weekly_paper(output_path=""):
 96 |     days = weekly_get()
 97 |     if output_path == "":
 98 |         output_path = days[0].replace("-", "") + "-" + days[-1].replace("-", "") + ".md"
 99 |     # 这一份是防止翻译不太好或者其他问题先留存下
100 |     en_articles_content = []
101 |     with open("output.md", "w") as en:
102 |         for day in days:
103 |             print(f"开始处理日期: {day}")
104 |             url = base_url + "/papers?date=" + day
105 |             article_list = home_parse(url)
106 |             print(f"{day} 主页解析完毕")
107 |             for item in article_list:
108 |                 print(f"解析文章{item['title']}开始")
109 |                 article = parse_article(item["link"], item["title"])
110 |                 content = en_content(article)
111 |                 en_articles_content.append(content)
112 |                 en.write(content)
113 |                 print(f"解析文章{item['title']}完毕")
114 |             print(f"日期 {day} 处理结束")
115 |     print("英文输出完毕")
116 |     # 我只要这个
117 |     with open(output_path, "w") as f:
118 |         for en_article in en_articles_content:
119 |             zh = trans_agent.run(en_article)
120 |             f.write(zh + "\n\n")
121 | 
122 | 
123 | weekly_paper()
124 | 


--------------------------------------------------------------------------------
/llm/model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @file    : model.py
  4 | @date    : 2024-07-22
  5 | @author  : leafw
  6 | """
  7 | 
  8 | from abc import ABC, abstractmethod
  9 | 
 10 | from openai import OpenAI, Stream
 11 | from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 12 | import requests
 13 | import prompt_template
 14 | import os
 15 | from pathlib import Path
 16 | 
 17 | llm_config = {
 18 |     "deepseek": {
 19 |         "model_name": "deepseek-chat",
 20 |         "api_key": os.environ.get("DEEPSEEK_KEY"),
 21 |         "base_url": "https://api.deepseek.com",
 22 |     },
 23 |     "kimi": {
 24 |         "model_name": "moonshot-v1-128k",
 25 |         "api_key": os.environ.get("KIMI_KEY"),
 26 |         "base_url": "https://api.moonshot.cn/v1",
 27 |     },
 28 |     "deepseek-r1": {
 29 |         "model_name": "deepseek-r1:32b",
 30 |         "api_key": "",
 31 |         "base_url": "http://localhost:11434",
 32 |     },
 33 | }
 34 | 
 35 | 
 36 | class LLM(ABC):
 37 |     def __init__(self, model_name: str):
 38 |         conf = llm_config.get(model_name)
 39 |         self.model_name = conf["model_name"]
 40 |         self.api_key = conf["api_key"]
 41 |         self.base_url = conf["base_url"]
 42 | 
 43 |     @abstractmethod
 44 |     def chat(
 45 |         self, message: str, system_prompt: str = "", history=None, stream=False
 46 |     ) -> str | Stream[ChatCompletionChunk]:
 47 |         pass
 48 | 
 49 |     @abstractmethod
 50 |     def chat_pdf(self, message: str, file_content) -> str:
 51 |         pass
 52 | 
 53 | 
 54 | class OpenAiLlm(LLM):
 55 |     def __init__(self, model_name: str):
 56 |         super().__init__(model_name)
 57 |         self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
 58 | 
 59 |     def chat(
 60 |         self, message: str, system_prompt: str = "", history=None, stream=False
 61 |     ) -> str | Stream[ChatCompletionChunk]:
 62 |         response = self.client.chat.completions.create(
 63 |             model=self.model_name,
 64 |             messages=history
 65 |             if history is not None
 66 |             else [
 67 |                 {"role": "system", "content": system_prompt},
 68 |                 {"role": "user", "content": message},
 69 |             ],
 70 |             stream=stream,
 71 |         )
 72 |         if stream:
 73 |             return response
 74 |         return response.choices[0].message.content
 75 | 
 76 |     def chat_pdf(self, message: str, file_content) -> str:
 77 |         default_history = [
 78 |             {"role": "system", "content": prompt_template.paper_system},
 79 |             {
 80 |                 "role": "system",
 81 |                 "content": file_content,
 82 |             },
 83 |         ]
 84 |         messages = default_history.copy()
 85 |         messages.append({"role": "user", "content": message})
 86 | 
 87 |         completion = self.client.chat.completions.create(
 88 |             model=self.model_name, messages=messages, stream=False
 89 |         )
 90 |         res = completion.choices[0].message.content
 91 |         return res
 92 | 
 93 | 
 94 | class KimiLlm(OpenAiLlm):
 95 |     def __init__(self):
 96 |         super().__init__("kimi")
 97 | 
 98 |     def upload_file(self, file_path: str) -> str:
 99 |         file_object = self.client.files.create(
100 |             file=Path(file_path), purpose="file-extract"
101 |         )
102 |         return file_object.id
103 | 
104 |     def extract_file(self, file_id: str):
105 |         return self.client.files.content(file_id=file_id).text
106 | 
107 |     def list_files(self):
108 |         file_list = self.client.files.list()
109 |         # 要用到的应该就俩属性: id, filename
110 |         return file_list.data
111 | 
112 |     def remove_file(self, file_id: str):
113 |         self.client.files.delete(file_id=file_id)
114 |         print("remove success")
115 | 
116 | 
117 | class OllamaLlm(LLM):
118 |     def __init__(self, model_name: str):
119 |         super().__init__(model_name)
120 | 
121 |     def chat(
122 |         self, message: str, system_prompt: str = "", history=None, stream=False
123 |     ) -> str:
124 |         data = {
125 |             "model": self.model_name,
126 |             "messages": history
127 |             if history is not None
128 |             else [
129 |                 {"role": "system", "content": system_prompt},
130 |                 {"role": "user", "content": message},
131 |             ],
132 |             "stream": stream,
133 |         }
134 |         response = requests.post(self.base_url + "/api/chat", json=data)
135 |         res = response.json()["message"]["content"]
136 |         return res
137 | 
138 |     def chat_pdf(self, message: str, file_content) -> str:
139 |         data = {
140 |             "model": self.model_name,
141 |             "messages": [
142 |                 {"role": "system", "content": prompt_template.paper_system},
143 |                 {
144 |                     "role": "user",
145 |                     "content": prompt_template.build_paper(file_content, message),
146 |                 },
147 |             ],
148 |             "stream": False,
149 |         }
150 |         response = requests.post(self.base_url + "/api/chat", json=data)
151 |         return response.json()["message"]["content"]
152 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @file    : arxiv_client.py
  4 | @date    : 2024-07-11
  5 | @author  : leafw
  6 | """
  7 | import requests
  8 | import os
  9 | import utils
 10 | from utils import get_data_from_arxiv_id
 11 | from urllib.parse import urlparse
 12 | import streamlit as st
 13 | from streamlit_option_menu import option_menu
 14 | from prompt_template import paper_questions
 15 | from api import arxiv_client
 16 | from llm.model import OpenAiLlm, KimiLlm, OllamaLlm
 17 | from llm.agent import TranslaterAgent, PaperAnswerAgent
 18 | from front.st_chat import chatting
 19 | from front.kimi_file_manage import settings
 20 | 
 21 | # 是否使用Kimi
 22 | use_kimi = False
 23 | # 是否使用ollama
 24 | use_ollama = False
 25 | 
 26 | data_dir = './data'
 27 | md_template_path = 'md_template.md'
 28 | current_llm = KimiLlm() if use_kimi else OllamaLlm('qwen') if use_ollama else OpenAiLlm('deepseek')
 29 | trans_agent = TranslaterAgent(llm=current_llm)
 30 | paper_answer_agent = PaperAnswerAgent(llm=current_llm)
 31 | 
 32 | 
 33 | def download(url):
 34 |     url = url.replace("abs", "pdf")
 35 |     response = requests.get(url)
 36 |     last = url.rfind('/')
 37 | 
 38 |     arxiv_id = url[last + 1:]
 39 |     # 每个论文放在data/arxiv_id文件夹下
 40 |     directory_path = utils.arxiv_dir_path(arxiv_id)
 41 |     utils.ensure_directory_exists(directory_path)
 42 | 
 43 |     file_path = directory_path + os.sep + arxiv_id + '.pdf'
 44 | 
 45 |     if os.path.exists(file_path):
 46 |         print('文件已经存在')
 47 |         return file_path, arxiv_id
 48 | 
 49 |     if response.status_code == 200:
 50 |         with open(file_path, 'wb') as file:
 51 |             file.write(response.content)
 52 |         print("文件下载成功")
 53 |     else:
 54 |         print(f"文件下载失败，状态码: {response.status_code}")
 55 |     return file_path, arxiv_id
 56 | 
 57 | 
 58 | def parse_home(url: str) -> dict:
 59 |     parsed_url = urlparse(url)
 60 |     arxiv_id = parsed_url.path.split('/')[-1]
 61 | 
 62 |     arxiv_data = get_data_from_arxiv_id(arxiv_id)
 63 |     if arxiv_data:
 64 |         return {
 65 |             "title": arxiv_data.title,
 66 |             "abstract": arxiv_data.abstract,
 67 |             "file_path": arxiv_data.file_path,
 68 |             "arxiv_id": arxiv_id
 69 |         }
 70 | 
 71 |     arxiv_data = arxiv_client.search_by_id(arxiv_id)
 72 |     # 持久化
 73 |     file_path, arxiv_id = download(url)
 74 |     arxiv_data.file_path = file_path
 75 |     arxiv_data.save_to_json()
 76 | 
 77 |     return {
 78 |         "title": arxiv_data.title,
 79 |         "abstract": arxiv_data.abstract,
 80 |         "file_path": file_path,
 81 |         "arxiv_id": arxiv_id
 82 |     }
 83 | 
 84 | 
 85 | def trans(title: str, abstract: str, arxiv_id: str) -> str:
 86 |     arxiv_data = get_data_from_arxiv_id(arxiv_id)
 87 |     if arxiv_data is None:
 88 |         return '系统异常'
 89 | 
 90 |     # 如果翻译过就直接拿翻译的
 91 |     if arxiv_data.title_abstract_cn is not None and arxiv_data.title_abstract_cn != '':
 92 |         return arxiv_data.title_abstract_cn
 93 | 
 94 |     content = f'## {title}\n{abstract}'
 95 |     translated = trans_agent.run(content)
 96 |     arxiv_data.title_abstract_cn = translated
 97 |     arxiv_data.save_to_json()
 98 |     return translated
 99 | 
100 | 
101 | def answer_pdf(index: int, file_id: str, arxiv_id: str) -> (str, str):
102 |     arxiv_data = get_data_from_arxiv_id(arxiv_id)
103 |     if arxiv_data is None:
104 |         return '系统异常', '', []
105 | 
106 |     if arxiv_data.content is None or len(arxiv_data.content) == 0:
107 |         if isinstance(current_llm, KimiLlm):
108 |             file_id = current_llm.upload_file(arxiv_data.file_path)
109 |             file_content = current_llm.extract_file(file_id)
110 |             arxiv_data.file_id = file_id
111 |             arxiv_data.content = file_content
112 |             arxiv_data.save_to_json()
113 |         else:
114 |             file_content = utils.read_pdf(arxiv_data.file_path)
115 |             arxiv_data.content = file_content
116 |             arxiv_data.save_to_json()
117 | 
118 |     question = paper_questions[index]
119 | 
120 |     if arxiv_data.faq is not None and arxiv_data.faq.get(question):
121 |         return file_id, arxiv_data.faq.get(question)
122 | 
123 |     answer = current_llm.chat_pdf(question, arxiv_data.content)
124 |     arxiv_data.faq[question] = answer
125 |     arxiv_data.save_to_json()
126 |     return file_id, answer
127 | 
128 | 
129 | def export_md(arxiv_id: str):
130 |     arxiv_data = get_data_from_arxiv_id(arxiv_id)
131 |     path = utils.arxiv_dir_path(arxiv_id)
132 |     with open(md_template_path, 'r', encoding='utf-8') as f:
133 |         template = f.read()
134 | 
135 |     template_format = template.format(title=arxiv_data.title, abstract=arxiv_data.abstract,
136 |                                       title_abstract_cn=arxiv_data.title_abstract_cn)
137 |     faq = arxiv_data.faq
138 |     for key, value in faq.items():
139 |         item = '### ' + key + '\n' + value + '\n\n'
140 |         template_format += item
141 | 
142 |     with open(path + os.sep + arxiv_id + '.md', 'w', encoding='utf-8') as f:
143 |         f.write(template_format)
144 | 
145 |     print('导出结束')
146 | 
147 | 
148 | # 定义主页
149 | def home():
150 |     st.markdown("<h1 style='text-align: center; font-size: 32px;'>Arxiv Helper</h1>", unsafe_allow_html=True)
151 | 
152 |     # 初始化 session state
153 |     if 'responses' not in st.session_state:
154 |         st.session_state.responses = [""] * len(paper_questions)
155 | 
156 |     if 'title' not in st.session_state:
157 |         st.session_state.title = ""
158 |     if 'abstract' not in st.session_state:
159 |         st.session_state.abstract = ""
160 |     if 'url' not in st.session_state:
161 |         st.session_state.url = ""
162 | 
163 |     if 'arxiv_id' not in st.session_state:
164 |         st.session_state.arxiv_id = ""
165 |     if 'translated_abstract' not in st.session_state:
166 |         st.session_state.translated_abstract = ""
167 | 
168 |     if 'file_id' not in st.session_state:
169 |         st.session_state.file_id = ""
170 | 
171 |     if 'generate_all' not in st.session_state:
172 |         st.session_state.generate_all = False
173 | 
174 |     if 'generate_index' not in st.session_state:
175 |         st.session_state.generate_index = 0
176 | 
177 |     url = st.text_input("请输入网址", value=st.session_state.url, key="url_input")
178 | 
179 |     def analysis_url():
180 |         if st.session_state.url_input:
181 |             analysis_result = parse_home(st.session_state.url_input)
182 |             st.session_state.title = analysis_result['title']
183 |             st.session_state.abstract = analysis_result['abstract']
184 |             st.session_state.arxiv_id = analysis_result['arxiv_id']
185 |             st.rerun()
186 | 
187 |     if url != st.session_state.url:
188 |         st.session_state.url = url
189 |         analysis_url()
190 | 
191 |     # 布局分两列
192 |     col1, col2 = st.columns([2, 3])
193 | 
194 |     with col1:
195 |         if st.session_state.title:
196 |             st.markdown(f"**<h2 style='font-size: 24px;'>标题</h2>** {st.session_state.title}", unsafe_allow_html=True)
197 |             st.markdown(f"**<h3 style='font-size: 20px;'>摘要</h3>** {st.session_state.abstract}",
198 |                         unsafe_allow_html=True)
199 | 
200 |             if st.button("翻译"):
201 |                 with st.spinner("翻译中，请稍候..."):
202 |                     st.session_state.translated_abstract = trans(st.session_state.title, st.session_state.abstract,
203 |                                                                  st.session_state.arxiv_id)
204 |                 st.rerun()
205 | 
206 |             if st.session_state.translated_abstract:
207 |                 st.markdown(f"**<h3 style='font-size: 20px;'>翻译结果</h3>** {st.session_state.translated_abstract}",
208 |                             unsafe_allow_html=True)
209 | 
210 |     with col2:
211 |         if st.session_state.title:
212 |             spinner_placeholder = st.empty()
213 |             b1, b2 = st.columns(2)
214 |             with b1:
215 |                 if st.button("生成所有"):
216 |                     st.session_state.generate_all = True
217 |                     st.session_state.generate_index = 0
218 |             with b2:
219 |                 if st.button("导出MD"):
220 |                     with st.spinner("导出中，请稍候..."):
221 |                         export_md(st.session_state.arxiv_id)
222 |                         st.rerun()
223 | 
224 |             for i, question in enumerate(paper_questions):
225 |                 with st.form(key=f"form_{i}"):
226 |                     st.markdown(f"**{question}**", unsafe_allow_html=True)
227 |                     st.markdown(f"{st.session_state.responses[i]}", unsafe_allow_html=True)
228 |                     submitted = st.form_submit_button("生成")
229 |                     if submitted:
230 |                         with st.spinner("生成中，请稍候..."):
231 |                             _, result = answer_pdf(i, st.session_state.file_id,
232 |                                                    st.session_state.arxiv_id)
233 |                             st.session_state.responses[i] = result
234 |                             st.rerun()
235 | 
236 |     # 处理生成所有的问题
237 |     if st.session_state.generate_all and st.session_state.generate_index < len(paper_questions):
238 |         i = st.session_state.generate_index
239 |         with spinner_placeholder.container():
240 |             with st.spinner(f"正在生成问题 {i + 1}/{len(paper_questions)}..."):
241 |                 _, result = answer_pdf(i, st.session_state.file_id, st.session_state.arxiv_id)
242 |                 st.session_state.responses[i] = result
243 |                 st.session_state.generate_index += 1
244 |                 if st.session_state.generate_index >= len(paper_questions):
245 |                     st.session_state.generate_all = False
246 |         st.rerun()
247 | 
248 | 
249 | # 主函数
250 | def main():
251 |     st.set_page_config(layout="wide")
252 | 
253 |     options = ["主页", "聊天", "设置"]
254 |     with st.sidebar:
255 |         selected = option_menu(
256 |             menu_title="菜单",  # 菜单标题
257 |             options=options,  # 菜单选项
258 |             icons=["house", "robot", "gear"],  # 菜单图标
259 |             menu_icon="cast",  # 菜单图标
260 |             default_index=0,  # 默认选中菜单项
261 |             orientation="vertical",  # 菜单方向
262 |         )
263 |     if selected == '主页':
264 |         home()
265 |     elif selected == '聊天':
266 |         chatting(st.session_state.arxiv_id if 'arxiv_id' in st.session_state else '')
267 |     elif selected == '设置':
268 |         settings(current_llm)
269 | 
270 | 
271 | if __name__ == "__main__":
272 |     main()
273 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------