├── .gitignore ├── img.png ├── img_1.png ├── img_2.png ├── md_template.md ├── api ├── __init__.py ├── arxiv_client.py └── aminer.py ├── front ├── __init__.py ├── kimi_file_manage.py └── st_chat.py ├── llm ├── __init__.py ├── agent.py └── model.py ├── requirements.txt ├── utils.py ├── README.md ├── prompt_template.py ├── pojo.py ├── flow.py ├── hf.py ├── main.py └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | data/ 3 | output.md 4 | __pycache__ 5 | .DS_Store -------------------------------------------------------------------------------- /img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/careywyr/paper-agent/HEAD/img.png -------------------------------------------------------------------------------- /img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/careywyr/paper-agent/HEAD/img_1.png -------------------------------------------------------------------------------- /img_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/careywyr/paper-agent/HEAD/img_2.png -------------------------------------------------------------------------------- /md_template.md: -------------------------------------------------------------------------------- 1 | # {title} 2 | 3 | ## Abstract 4 | {abstract} 5 | 6 | 7 | {title_abstract_cn} 8 | 9 | ## FAQ 10 | -------------------------------------------------------------------------------- /api/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : __init__.py.py 4 | @date : 2024-07-24 5 | @author : leafw 6 | """ 7 | -------------------------------------------------------------------------------- /front/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : __init__.py.py 4 | @date : 2024-08-11 5 | @author : leafw 6 | """ 7 | -------------------------------------------------------------------------------- /llm/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : __init__.py.py 4 | @date : 2024-07-11 5 | @author : leafw 6 | """ 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.12.3 2 | openai==1.35.13 3 | pandas==2.2.2 4 | requests==2.32.3 5 | streamlit==1.36.0 6 | streamlit_option_menu==0.3.13 7 | 8 | bs4~=0.0.2 9 | arxiv~=2.1.3 10 | pymupdf~=1.24.8 -------------------------------------------------------------------------------- /llm/agent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : agent.py 4 | @date : 2024-07-22 5 | @author : leafw 6 | """ 7 | from llm.model import LLM 8 | import prompt_template 9 | import utils 10 | 11 | 12 | class Agent: 13 | def __init__(self, llm: LLM, desc: str = ""): 14 | self.llm = llm 15 | self.desc = desc 16 | 17 | def run(self, **kwargs): 18 | pass 19 | 20 | 21 | class TranslaterAgent(Agent): 22 | def __init__(self, llm: LLM): 23 | super().__init__(llm, "翻译智能体") 24 | 25 | def run(self, text): 26 | s = self.llm.chat(text, prompt_template.en_zh) 27 | return utils.extract_yy_text(s) 28 | 29 | 30 | class PaperAnswerAgent(Agent): 31 | def __init__(self, llm: LLM): 32 | super().__init__(llm, "Paper 问答") 33 | 34 | def run(self, question, file_content): 35 | return self.llm.chat_pdf(question, file_content) 36 | -------------------------------------------------------------------------------- /api/arxiv_client.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : arxiv_client.py 4 | @date : 2024-07-24 5 | @author : leafw 6 | """ 7 | import arxiv 8 | from urllib.parse import urlparse 9 | from pojo import ArxivData 10 | 11 | 12 | # Construct the default API client. 13 | client = arxiv.Client() 14 | 15 | 16 | def search_by_id(arxiv_id: str) -> ArxivData | None: 17 | # Search for the paper with the given ID 18 | id_search = arxiv.Search(id_list=[arxiv_id]) 19 | 20 | try: 21 | # 只拿第一个 22 | result = next(client.results(id_search)) 23 | return ArxivData('', arxiv_id, result.title, result.summary) 24 | except StopIteration: 25 | # Handle the case where no result is found 26 | print(f"No paper found with ID {arxiv_id}") 27 | except Exception as e: 28 | # Handle other potential exceptions 29 | print(f"An error occurred: {e}") 30 | return None 31 | 32 | 33 | def search_by_url(url: str) -> ArxivData | None: 34 | parsed_url = urlparse(url) 35 | # 获取路径的最后一个部分 36 | arxiv_id = parsed_url.path.split('/')[-1] 37 | return search_by_id(arxiv_id) 38 | 39 | -------------------------------------------------------------------------------- /front/kimi_file_manage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : kimi_file_manage.py 4 | @date : 2024-08-11 5 | @author : leafw 6 | """ 7 | import streamlit as st 8 | import pandas as pd 9 | from llm.model import KimiLlm 10 | 11 | 12 | # 定义设置页面 13 | def settings(current_llm: KimiLlm): 14 | st.markdown("

Kimi文件管理(存在Kimi才可使用)

", 15 | unsafe_allow_html=True) 16 | 17 | files = current_llm.list_files() 18 | df = create_files_dataframe(files) 19 | 20 | # 显示文件表格 21 | for index, row in df.iterrows(): 22 | col1, col2, col3 = st.columns([3, 7, 2]) 23 | col1.write(row["ID"]) 24 | col2.write(row["FileName"]) 25 | button_placeholder = col3.empty() 26 | if button_placeholder.button("删除", key=row["ID"]): 27 | current_llm.remove_file(row["ID"]) 28 | st.rerun() 29 | 30 | 31 | # 创建显示文件列表的 DataFrame 32 | def create_files_dataframe(files): 33 | data = { 34 | "ID": [file.id for file in files], 35 | "FileName": [file.filename for file in files] 36 | } 37 | df = pd.DataFrame(data) 38 | return df 39 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : utils.py 4 | @date : 2024-07-13 5 | @author : leafw 6 | """ 7 | import os 8 | import pymupdf 9 | import re 10 | from pojo import load_Arxiv_data, ArxivData 11 | 12 | data_dir = './data' 13 | 14 | 15 | def ensure_directory_exists(directory_path: str): 16 | if not os.path.exists(directory_path): 17 | os.makedirs(directory_path) 18 | print(f"目录 {directory_path} 已创建") 19 | else: 20 | print(f"目录 {directory_path} 已存在") 21 | 22 | 23 | def extract_yy_text(text): 24 | # 使用正则表达式匹配 "### 意译" 后面的文本 25 | pattern = r'### 意译\s*(```)?(.+?)(```)?(?=###|\Z)' 26 | match = re.search(pattern, text, re.DOTALL) 27 | 28 | if match: 29 | # 提取匹配的文本,去除可能存在的 ``` 符号 30 | extracted_text = match.group(2).strip() 31 | return extracted_text 32 | else: 33 | return "未找到意译部分" 34 | 35 | 36 | def read_pdf(file_path: str) -> str: 37 | doc = pymupdf.open(file_path) 38 | all_text = [] 39 | 40 | for page in doc: 41 | text = page.get_text() 42 | all_text.append(text) 43 | 44 | combined_text = "\n".join(all_text) 45 | return combined_text 46 | 47 | 48 | def arxiv_dir_path(arxiv_id: str, root_dir: str = data_dir) -> str: 49 | return root_dir + os.sep + arxiv_id 50 | 51 | 52 | def get_data_from_arxiv_id(arxiv_id: str) -> ArxivData: 53 | json_path = arxiv_dir_path(arxiv_id) + os.sep + arxiv_id + '.json' 54 | return load_Arxiv_data(json_path) -------------------------------------------------------------------------------- /api/aminer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : aminer.py 4 | @date : 2024-07-24 5 | @author : leafw 6 | 本项目未使用,但是个很有用的网站 7 | """ 8 | 9 | import os 10 | import requests 11 | 12 | aminer_key = os.environ.get('AMINER_KEY') 13 | 14 | # 设置请求头 15 | headers = { 16 | 'Authorization': aminer_key 17 | } 18 | 19 | 20 | def search(title): 21 | simple_result = simple_search(title) 22 | if not simple_result: 23 | print(f'can not find {title}') 24 | return 25 | aminer_paper_id = simple_result['id'] 26 | result = search_by_id(aminer_paper_id) 27 | return { 28 | "title": result['title'], 29 | "abstract": result['abstract'] 30 | } 31 | 32 | 33 | def simple_search(title): 34 | url = f'https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/list/by/publish?page=1&size=10&title={title}' 35 | # 发送GET请求 36 | response = requests.get(url, headers=headers) 37 | 38 | # 检查响应状态码 39 | if response.status_code == 200: 40 | # 请求成功,处理响应数据 41 | data = response.json() 42 | if len(data['data']) > 0: 43 | return data['data'][0] 44 | return None 45 | else: 46 | # 请求失败,打印错误信息 47 | print(f"Request failed with status code {response.status_code}") 48 | return None 49 | 50 | 51 | def search_by_id(aminer_paper_id: str): 52 | url = f'https://datacenter.aminer.cn/gateway/open_platform/api/v3/paper/platform/details/not/contain/wos/by/id?id={aminer_paper_id}' 53 | 54 | response = requests.get(url, headers=headers) 55 | 56 | # 检查响应状态码 57 | if response.status_code == 200: 58 | # 请求成功,处理响应数据 59 | response_data = response.json() 60 | return response_data['data'] 61 | else: 62 | # 请求失败,打印错误信息 63 | print(f"Request failed with status code {response.status_code}") 64 | print(response.text) 65 | return None 66 | 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Paper-Agent 2 | 3 | 一款可以帮助经常阅读论文的同学提升效率的小工具,有两个部分: 4 | - HuggingFace Weekly Paper 制作工具 5 | - Arxiv Helper 6 | 7 | ## 前置条件 8 | 翻译基于 `deepseek` 的服务, 论文十问依赖于 `kimi`, 因此需要这两个的 api key。可以配置到环境变量中,代码中使用的环境变量 key 分别是 9 | - DEEPSEEK_KEY 10 | - KIMI_KEY 11 | 12 | 如果不想同时用两家,翻译可以考虑也换成 kimi,需要手动修改代码,将 deepseek 里面的设置换成 kimi的。 13 | 14 | ## 1. HuggingFace Weekly Paper 制作工具 15 | 我每周博客和公众号上都会发一篇 weekly paper,文章来源于 HuggingFace 的 Daily Paper。 为了减少每次都要一个个点进去通过N次复制粘贴来得到翻译后的结果的痛苦,写了个脚本,可以直接读取本周的点赞超过n次的论文,并生成Weekly Paper。 16 | 17 | 代码就是hf.py文件,运行 `weekly_paper` 方法即可,慢慢等待即可,如果出现了一些翻译上的问题或者接口异常,可以重新从目录下的output.md文件里面拿到英文原版继续人工处理。 18 | 19 | 此脚本依赖的模型是 `deepseek`。 翻译 prompt 来自于微博上宝玉老师的分享。 20 | 21 | ## 2. Arxiv Helper (0.0.2版本做了大幅度的调整,不再默认使用kimi) 22 | 23 | 这个使用 `streamlit` 做了前端,输入框里面输入 arxiv 论文的首页地址,回车即可得到它的标题和摘要。 24 | 25 | 0.0.1版本用的是网页爬虫,0.0.2版本用的arxiv api,速度似乎比爬虫慢一点,但应该更稳定些。 26 | 27 | 左侧可以进行翻译,右侧是[论文十问](http://leafw.cn/2023/12/25/%e5%88%86%e4%ba%ab%e4%b8%a4%e4%b8%aa%e9%98%85%e8%af%bb%e8%ae%ba%e6%96%87%e7%9a%84%e6%96%b9%e6%b3%95/)的模板,使用初始化的模型的接口进行问答。 28 | 29 | 论文十问以及系统prompt的配置均在`prompt_template.py` 中,需要的可以自行修改。 30 | 31 | ![img_2.png](img_2.png) 32 | 33 | 新增加了生成所有和导出MD的按钮,方便一次性处理所有问题。 34 | 35 | 使用了pymupdf抽取了pdf内的内容,不再依赖于kimi的文件管理,主要是kimi的api相对贵了点。 36 | 37 | ![img_1.png](img_1.png) 38 | 39 | ### 支持deepseek(可自己修改其他模型)、kimi以及本地使用ollama部署的模型 40 | 41 | ``` python 42 | current_llm = KimiLlm() if use_kimi else OllamaLlm('qwen') if use_ollama else OpenAiLlm('deepseek') 43 | ``` 44 | 45 | - 只要是支持OpenAI的模型,均可以使用OpenAiLlm的实现,在config里面配置对应的model_name,api_key以及base_url即可。 46 | - Kimi 因为有自己的文件管理接口,因此使用了单独的实现类,也只有使用kimi的情况下才可以使用设置页面 47 | - Ollama的base_url就是ollama服务的地址 48 | 49 | 使用方式: 50 | 51 | ``` shell 52 | streamlit run main.py 53 | ``` 54 | 55 | ## 3. 使用flow.py 一键生成功能2里面需要的Markdown文档。 56 | flow.py 就是将2里面的所有功能顺序的放到了一起,修改里面的url即可等待生成需要的论文标题摘要以及对应的QA。 57 | 58 | 由于Streamlit的限制感觉还是太多了,想要做个更成熟的web产品还是得用正经的前端,包括后端可能也要用自己的主业Java,后续会开发一套成熟一点的Java编写的Web平台。 59 | 60 | 61 | ## 0811 update 62 | 新增聊天功能,在主要生成的问题可以直接带到聊天页面,切换菜单的时候自动带上。注意暂时只支持Deepseek -------------------------------------------------------------------------------- /prompt_template.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : prompt_template.py 4 | @date : 2024-07-10 5 | @author : leafw 6 | """ 7 | 8 | en_zh = """ 9 | 你是一位精通简体中文的专业翻译,尤其擅长将英文的专业学术论文或文章翻译成面向专业技术人员的中文技术文章。请你帮我将以下英文段落翻译成中文,风格与中文理工技术书籍读物相似。 10 | 11 | 规则: 12 | - 翻译时要准确传达原文的事实和背景。 13 | - 即使上意译也要保留原始段落格式,以及保留术语,例如 FLAC,JPEG 等。保留公司缩写,例如 Microsoft, Amazon, OpenAI 等。 14 | - 人名不翻译 15 | - 同时要保留引用的论文,例如 [20] 这样的引用。 16 | - 对于 Figure 和 Table,翻译的同时保留原有格式,例如:“Figure 1: ”翻译为“图 1: ”,“Table 1: ”翻译为:“表 1: ”。 17 | - 全角括号换成半角括号,并在左括号前面加半角空格,右括号后面加半角空格。 18 | - 输入格式为 Markdown 格式,输出格式也必须保留原始 Markdown 格式 19 | - 在翻译专业术语时,第一次出现时要在括号里面写上英文原文,例如:“生成式 AI (Generative AI)”,之后就可以只写中文了。 20 | - 注意你翻译内容的受众是专业技术人员,因此不需要对专业术语做口语化的解释。 21 | - 以下是常见的 AI 相关术语词汇对应表(English -> 中文): 22 | * Transformer -> Transformer 23 | * Token -> Token 24 | * LLM/Large Language Model -> 大语言模型 25 | * Zero-shot -> 零样本 26 | * Few-shot -> 少样本 27 | * AI Agent -> AI 智能体 28 | * AGI -> 通用人工智能 29 | 30 | 策略: 31 | 32 | 分三步进行翻译工作,并打印每步的结果: 33 | 1. 根据英文内容直译,保持原有格式,不要遗漏任何信息 34 | 2. 根据第一步直译的结果,指出其中存在的具体问题,要准确描述,不宜笼统的表示,也不需要增加原文不存在的内容或格式,包括不仅限于: 35 | - 不符合中文表达习惯,明确指出不符合的地方 36 | - 语句不通顺,指出位置,不需要给出修改意见,意译时修复 37 | 3. 根据第一步直译的结果和第二步指出的问题,重新进行意译,保证内容的原意的基础上,使其更易于理解,更符合中文的表达习惯,同时保持原有的格式不变 38 | 39 | 返回格式如下,"{xxx}"表示占位符: 40 | 41 | ### 直译 42 | {直译结果} 43 | 44 | *** 45 | 46 | ### 问题 47 | {直译的具体问题列表} 48 | 49 | *** 50 | 51 | ### 意译 52 | ``` 53 | {意译结果} 54 | ``` 55 | 56 | 现在请按照上面的要求从第一行开始翻译以下内容为简体中文: 57 | ``` 58 | """ 59 | 60 | paper_questions = [ 61 | '论文试图解决什么问题?', 62 | '这是否是一个新的问题?', 63 | '这篇文章要验证一个什么科学假设?', 64 | '有哪些相关研究?如何归类?谁是这一课题在领域内值得关注的研究员?', 65 | '论文中提到的解决方案之关键是什么?', 66 | '论文中的实验是如何设计的?', 67 | '用于定量评估的数据集是什么?代码有没有开源?', 68 | '论文中的实验及结果有没有很好地支持需要验证的科学假设?', 69 | '这篇论文到底有什么贡献?', 70 | '研究的下一步呢?有什么工作可以继续深入?' 71 | ] 72 | 73 | paper_system = f""" 74 | 你是一名资深科研工作者,擅长阅读各种中英文文献,能准确地根据文献内容回答用户的问题。 75 | """ 76 | 77 | 78 | def build_paper(content: str, question: str) -> str: 79 | return f""" 80 | 以下是论文的全文: 81 | {content} 82 | 83 | 请你回答我的问题: 84 | {question} 85 | """ 86 | -------------------------------------------------------------------------------- /pojo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : pojo.py 4 | @date : 2024-07-13 5 | @author : leafw 6 | """ 7 | import json 8 | 9 | 10 | class ArxivData: 11 | def __init__(self, file_path: str, arxiv_id: str, title: str, abstract: str, file_id: str = '', 12 | title_abstract_cn: str = '', content: str = '', faq=None, chat_history=None): 13 | self.file_path = file_path 14 | self.arxiv_id = arxiv_id 15 | self.title = title 16 | self.abstract = abstract 17 | self.file_id = file_id 18 | self.title_abstract_cn = title_abstract_cn 19 | self.content = content 20 | self.faq = faq if faq is not None else {} 21 | self.chat_history = chat_history if chat_history is not None else {} 22 | 23 | def to_dict(self): 24 | return { 25 | 'file_path': self.file_path, 26 | 'arxiv_id': self.arxiv_id, 27 | 'file_id': self.file_id, 28 | 'title': self.title, 29 | 'abstract': self.abstract, 30 | 'title_abstract_cn': self.title_abstract_cn, 31 | 'content': self.content, 32 | 'faq': self.faq, 33 | 'chat_history': self.chat_history 34 | } 35 | 36 | @classmethod 37 | def from_dict(cls, data: dict): 38 | return cls( 39 | file_path=data.get('file_path', ''), 40 | arxiv_id=data.get('arxiv_id', ''), 41 | title=data.get('title', ''), 42 | abstract=data.get('abstract', ''), 43 | file_id=data.get('file_id', ''), 44 | title_abstract_cn=data.get('title_abstract_cn', ''), 45 | content=data.get('content', ''), 46 | faq=data.get('faq', {}), 47 | chat_history=data.get('chat_history', {}) 48 | ) 49 | 50 | def save_to_json(self): 51 | # 将对象转换为字典 52 | data_dict = self.to_dict() 53 | json_path = self.file_path.replace('pdf', 'json') 54 | # 写入JSON文件 55 | with open(json_path, 'w', encoding='utf-8') as json_file: 56 | json.dump(data_dict, json_file, ensure_ascii=False, indent=4) 57 | 58 | print(f"数据已写入 {json_path}") 59 | 60 | 61 | def load_Arxiv_data(json_file_path: str) -> ArxivData | None: 62 | try: 63 | with open(json_file_path, 'r', encoding='utf-8') as json_file: 64 | data = json.load(json_file) 65 | return ArxivData.from_dict(data) 66 | except FileNotFoundError: 67 | print(f"文件 {json_file_path} 未找到.") 68 | return None 69 | -------------------------------------------------------------------------------- /flow.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : flow.py 4 | @date : 2024-07-28 5 | @author : leafw 6 | """ 7 | from urllib.parse import urlparse 8 | from api import arxiv_client 9 | import utils 10 | import os 11 | import requests 12 | from llm.agent import TranslaterAgent 13 | from llm.model import OpenAiLlm 14 | from prompt_template import paper_questions 15 | 16 | paper_url = 'https://arxiv.org/abs/2407.18248' 17 | 18 | current_llm = OpenAiLlm('deepseek') 19 | trans_agent = TranslaterAgent(current_llm) 20 | md_template_path = 'md_template.md' 21 | 22 | 23 | def run(url: str): 24 | parsed_url = urlparse(url) 25 | arxiv_id = parsed_url.path.split('/')[-1] 26 | with open(md_template_path, 'r', encoding='utf-8') as f: 27 | template = f.read() 28 | 29 | # 先下载 30 | url = url.replace("abs", "pdf") 31 | print(f'开始下载: {url}') 32 | response = requests.get(url) 33 | directory_path = utils.arxiv_dir_path(arxiv_id) 34 | utils.ensure_directory_exists(directory_path) 35 | 36 | file_path = directory_path + os.sep + arxiv_id + '.pdf' 37 | 38 | if response.status_code == 200: 39 | with open(file_path, 'wb') as file: 40 | file.write(response.content) 41 | print(f"文件下载成功: {file_path}") 42 | else: 43 | print(f"文件下载失败,状态码: {response.status_code}") 44 | return 45 | 46 | # 获取标题和摘要 47 | arxiv_data = arxiv_client.search_by_id(arxiv_id) 48 | arxiv_data.file_path = file_path 49 | arxiv_data.save_to_json() 50 | print(f'标题和摘要获取成功: {arxiv_data.title}') 51 | print('开始翻译') 52 | 53 | # 翻译标题和摘要 54 | content = f'## {arxiv_data.title}\n{arxiv_data.abstract}' 55 | translated = trans_agent.run(content) 56 | arxiv_data.title_abstract_cn = translated 57 | print('翻译结束') 58 | 59 | # 填充问题之外的东西 60 | template_format = template.format(title=arxiv_data.title, abstract=arxiv_data.abstract, title_abstract_cn=translated) 61 | 62 | # 回答问题 63 | file_content = utils.read_pdf(arxiv_data.file_path) 64 | arxiv_data.content = file_content 65 | 66 | for question in paper_questions: 67 | print(f'回答问题: {question}') 68 | answer, _ = current_llm.chat_pdf(question, arxiv_data.content) 69 | arxiv_data.faq[question] = answer 70 | item = '### ' + question + '\n' + answer + '\n\n' 71 | template_format += item 72 | 73 | arxiv_data.save_to_json() 74 | 75 | print(f'问题回答结束!') 76 | 77 | with open(arxiv_id + '.md', 'w', encoding='utf-8') as f: 78 | f.write(template_format) 79 | 80 | print('=============== ending! =============== ') 81 | 82 | 83 | run(paper_url) 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /front/st_chat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : st_chat.py 4 | @date : 2024-08-11 5 | @author : leafw 6 | """ 7 | import streamlit as st 8 | from llm.model import OpenAiLlm 9 | from utils import get_data_from_arxiv_id 10 | from prompt_template import paper_system 11 | 12 | 13 | # 初始化OpenAiLlm 14 | current_llm = OpenAiLlm('deepseek') 15 | 16 | 17 | def chatting(arxiv_id): 18 | st.markdown("

Chat with LLM

", 19 | unsafe_allow_html=True) 20 | if 'history' not in st.session_state: 21 | st.session_state.history = [] 22 | if arxiv_id == '': 23 | return 24 | 25 | arxiv_data = get_data_from_arxiv_id(arxiv_id) 26 | if not arxiv_data: 27 | st.session_state.history = [] 28 | else: 29 | default_history = [ 30 | { 31 | "role": "system", 32 | "content": paper_system 33 | }, 34 | { 35 | "role": "system", 36 | "content": arxiv_data.content, 37 | } 38 | ] 39 | 40 | if len(arxiv_data.chat_history) > 0: 41 | st.session_state.history = arxiv_data.chat_history 42 | 43 | # 说明还没聊过 44 | if len(arxiv_data.chat_history) == 0: 45 | st.session_state.history = default_history 46 | 47 | # 这里加上小于等于2是如果曾经聊过天,就不再把这里的东西再重复拼上去了 48 | if arxiv_data.faq and len(arxiv_data.faq) > 0 and len(st.session_state.history) <= 2: 49 | for q, a in arxiv_data.faq.items(): 50 | user_msg = {'role': 'user', 'content': q} 51 | assistant_msg = {'role': 'assistant', 'content': a} 52 | st.session_state.history.append(user_msg) 53 | st.session_state.history.append(assistant_msg) 54 | 55 | for message in st.session_state.history: 56 | if message['role'] == 'system': 57 | continue 58 | if message['role'] == 'user': 59 | with st.chat_message("user"): 60 | st.markdown(message['content']) 61 | else: 62 | with st.chat_message("assistant"): 63 | st.markdown(message['content']) 64 | 65 | # 用户输入 66 | user_input = st.chat_input(placeholder="", key="input_box") 67 | 68 | if user_input: 69 | # 保存用户输入到对话历史 70 | st.session_state.history.append({"role": "user", "content": user_input}) 71 | arxiv_data.chat_history = st.session_state.history 72 | arxiv_data.save_to_json() 73 | 74 | with st.chat_message("user"): 75 | st.markdown(user_input) 76 | 77 | ai_reply = current_llm.chat(user_input, history=st.session_state.history, stream=True) 78 | # 显示AI回复并将其存储到字符串中 79 | with st.chat_message("assistant"): 80 | complete_response = st.write_stream(ai_reply) 81 | 82 | st.session_state.history.append({"role": "assistant", "content": complete_response}) 83 | arxiv_data.chat_history = st.session_state.history 84 | arxiv_data.save_to_json() 85 | 86 | 87 | -------------------------------------------------------------------------------- /hf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : hf.py 4 | @date : 2024-07-11 5 | @author : leafw 6 | """ 7 | 8 | import requests 9 | from bs4 import BeautifulSoup 10 | from datetime import datetime, timedelta 11 | from llm.model import OllamaLlm, OpenAiLlm 12 | from llm.agent import TranslaterAgent 13 | 14 | base_url = "https://huggingface.co" 15 | # deepseek = OpenAiLlm("deepseek") 16 | deepseek = OllamaLlm("deepseek-r1") 17 | trans_agent = TranslaterAgent(deepseek) 18 | 19 | 20 | class Article: 21 | def __init__(self, title, arxiv_link, abstract): 22 | self.title = title 23 | self.arxiv_link = arxiv_link 24 | self.abstract = abstract 25 | 26 | 27 | def en_content(article: Article): 28 | return f""" 29 | ## {article.title} 30 | [{article.title}]({article.arxiv_link}) 31 | 32 | {article.abstract} 33 | """ 34 | 35 | 36 | def home_parse(url): 37 | """ 38 | 获取文章列表 39 | :return: 40 | """ 41 | response = requests.get(url) 42 | html_content = response.text 43 | 44 | # 解析HTML内容 45 | soup = BeautifulSoup(html_content, "html.parser") 46 | 47 | articles = soup.find_all("article") 48 | 49 | article_list = [] 50 | for article in articles: 51 | title = article.find("h3").get_text(strip=True) 52 | link = article.find("a")["href"] 53 | leading_nones = article.find_all("div", class_="leading-none") 54 | likes_div = None 55 | for item in leading_nones: 56 | if item.get("class") == ["leading-none"]: 57 | likes_div = item 58 | break 59 | likes = int(likes_div.get_text(strip=True)) 60 | if likes < 25: 61 | break 62 | print(f"Title: {title}") 63 | print(f"Link: {link}") 64 | print(f"Likes: {likes}") 65 | print("------") 66 | one = {"title": title, "link": base_url + link, "likes": likes} 67 | article_list.append(one) 68 | return article_list 69 | 70 | 71 | def parse_article(url, title): 72 | response = requests.get(url) 73 | html_content = response.text 74 | soup = BeautifulSoup(html_content, "html.parser") 75 | 76 | article_content = soup.find("p", class_="text-gray-700 dark:text-gray-400") 77 | content = article_content.get_text(strip=True) 78 | arxiv_link = soup.find("a", class_="btn inline-flex h-9 items-center")["href"] 79 | 80 | return Article(title, arxiv_link, content) 81 | 82 | 83 | def weekly_get(): 84 | # 获取当前日期 85 | today = datetime.today() 86 | 87 | # 计算当前周的周一日期 88 | start_of_week = today - timedelta(days=today.weekday()) 89 | 90 | # 创建一个包含周一到周五日期的列表 91 | weekdays = [start_of_week + timedelta(days=i) for i in range(5)] 92 | return [day.strftime("%Y-%m-%d") for day in weekdays] 93 | 94 | 95 | def weekly_paper(output_path=""): 96 | days = weekly_get() 97 | if output_path == "": 98 | output_path = days[0].replace("-", "") + "-" + days[-1].replace("-", "") + ".md" 99 | # 这一份是防止翻译不太好或者其他问题先留存下 100 | en_articles_content = [] 101 | with open("output.md", "w") as en: 102 | for day in days: 103 | print(f"开始处理日期: {day}") 104 | url = base_url + "/papers?date=" + day 105 | article_list = home_parse(url) 106 | print(f"{day} 主页解析完毕") 107 | for item in article_list: 108 | print(f"解析文章{item['title']}开始") 109 | article = parse_article(item["link"], item["title"]) 110 | content = en_content(article) 111 | en_articles_content.append(content) 112 | en.write(content) 113 | print(f"解析文章{item['title']}完毕") 114 | print(f"日期 {day} 处理结束") 115 | print("英文输出完毕") 116 | # 我只要这个 117 | with open(output_path, "w") as f: 118 | for en_article in en_articles_content: 119 | zh = trans_agent.run(en_article) 120 | f.write(zh + "\n\n") 121 | 122 | 123 | weekly_paper() 124 | -------------------------------------------------------------------------------- /llm/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : model.py 4 | @date : 2024-07-22 5 | @author : leafw 6 | """ 7 | 8 | from abc import ABC, abstractmethod 9 | 10 | from openai import OpenAI, Stream 11 | from openai.types.chat.chat_completion_chunk import ChatCompletionChunk 12 | import requests 13 | import prompt_template 14 | import os 15 | from pathlib import Path 16 | 17 | llm_config = { 18 | "deepseek": { 19 | "model_name": "deepseek-chat", 20 | "api_key": os.environ.get("DEEPSEEK_KEY"), 21 | "base_url": "https://api.deepseek.com", 22 | }, 23 | "kimi": { 24 | "model_name": "moonshot-v1-128k", 25 | "api_key": os.environ.get("KIMI_KEY"), 26 | "base_url": "https://api.moonshot.cn/v1", 27 | }, 28 | "deepseek-r1": { 29 | "model_name": "deepseek-r1:32b", 30 | "api_key": "", 31 | "base_url": "http://localhost:11434", 32 | }, 33 | } 34 | 35 | 36 | class LLM(ABC): 37 | def __init__(self, model_name: str): 38 | conf = llm_config.get(model_name) 39 | self.model_name = conf["model_name"] 40 | self.api_key = conf["api_key"] 41 | self.base_url = conf["base_url"] 42 | 43 | @abstractmethod 44 | def chat( 45 | self, message: str, system_prompt: str = "", history=None, stream=False 46 | ) -> str | Stream[ChatCompletionChunk]: 47 | pass 48 | 49 | @abstractmethod 50 | def chat_pdf(self, message: str, file_content) -> str: 51 | pass 52 | 53 | 54 | class OpenAiLlm(LLM): 55 | def __init__(self, model_name: str): 56 | super().__init__(model_name) 57 | self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) 58 | 59 | def chat( 60 | self, message: str, system_prompt: str = "", history=None, stream=False 61 | ) -> str | Stream[ChatCompletionChunk]: 62 | response = self.client.chat.completions.create( 63 | model=self.model_name, 64 | messages=history 65 | if history is not None 66 | else [ 67 | {"role": "system", "content": system_prompt}, 68 | {"role": "user", "content": message}, 69 | ], 70 | stream=stream, 71 | ) 72 | if stream: 73 | return response 74 | return response.choices[0].message.content 75 | 76 | def chat_pdf(self, message: str, file_content) -> str: 77 | default_history = [ 78 | {"role": "system", "content": prompt_template.paper_system}, 79 | { 80 | "role": "system", 81 | "content": file_content, 82 | }, 83 | ] 84 | messages = default_history.copy() 85 | messages.append({"role": "user", "content": message}) 86 | 87 | completion = self.client.chat.completions.create( 88 | model=self.model_name, messages=messages, stream=False 89 | ) 90 | res = completion.choices[0].message.content 91 | return res 92 | 93 | 94 | class KimiLlm(OpenAiLlm): 95 | def __init__(self): 96 | super().__init__("kimi") 97 | 98 | def upload_file(self, file_path: str) -> str: 99 | file_object = self.client.files.create( 100 | file=Path(file_path), purpose="file-extract" 101 | ) 102 | return file_object.id 103 | 104 | def extract_file(self, file_id: str): 105 | return self.client.files.content(file_id=file_id).text 106 | 107 | def list_files(self): 108 | file_list = self.client.files.list() 109 | # 要用到的应该就俩属性: id, filename 110 | return file_list.data 111 | 112 | def remove_file(self, file_id: str): 113 | self.client.files.delete(file_id=file_id) 114 | print("remove success") 115 | 116 | 117 | class OllamaLlm(LLM): 118 | def __init__(self, model_name: str): 119 | super().__init__(model_name) 120 | 121 | def chat( 122 | self, message: str, system_prompt: str = "", history=None, stream=False 123 | ) -> str: 124 | data = { 125 | "model": self.model_name, 126 | "messages": history 127 | if history is not None 128 | else [ 129 | {"role": "system", "content": system_prompt}, 130 | {"role": "user", "content": message}, 131 | ], 132 | "stream": stream, 133 | } 134 | response = requests.post(self.base_url + "/api/chat", json=data) 135 | res = response.json()["message"]["content"] 136 | return res 137 | 138 | def chat_pdf(self, message: str, file_content) -> str: 139 | data = { 140 | "model": self.model_name, 141 | "messages": [ 142 | {"role": "system", "content": prompt_template.paper_system}, 143 | { 144 | "role": "user", 145 | "content": prompt_template.build_paper(file_content, message), 146 | }, 147 | ], 148 | "stream": False, 149 | } 150 | response = requests.post(self.base_url + "/api/chat", json=data) 151 | return response.json()["message"]["content"] 152 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @file : arxiv_client.py 4 | @date : 2024-07-11 5 | @author : leafw 6 | """ 7 | import requests 8 | import os 9 | import utils 10 | from utils import get_data_from_arxiv_id 11 | from urllib.parse import urlparse 12 | import streamlit as st 13 | from streamlit_option_menu import option_menu 14 | from prompt_template import paper_questions 15 | from api import arxiv_client 16 | from llm.model import OpenAiLlm, KimiLlm, OllamaLlm 17 | from llm.agent import TranslaterAgent, PaperAnswerAgent 18 | from front.st_chat import chatting 19 | from front.kimi_file_manage import settings 20 | 21 | # 是否使用Kimi 22 | use_kimi = False 23 | # 是否使用ollama 24 | use_ollama = False 25 | 26 | data_dir = './data' 27 | md_template_path = 'md_template.md' 28 | current_llm = KimiLlm() if use_kimi else OllamaLlm('qwen') if use_ollama else OpenAiLlm('deepseek') 29 | trans_agent = TranslaterAgent(llm=current_llm) 30 | paper_answer_agent = PaperAnswerAgent(llm=current_llm) 31 | 32 | 33 | def download(url): 34 | url = url.replace("abs", "pdf") 35 | response = requests.get(url) 36 | last = url.rfind('/') 37 | 38 | arxiv_id = url[last + 1:] 39 | # 每个论文放在data/arxiv_id文件夹下 40 | directory_path = utils.arxiv_dir_path(arxiv_id) 41 | utils.ensure_directory_exists(directory_path) 42 | 43 | file_path = directory_path + os.sep + arxiv_id + '.pdf' 44 | 45 | if os.path.exists(file_path): 46 | print('文件已经存在') 47 | return file_path, arxiv_id 48 | 49 | if response.status_code == 200: 50 | with open(file_path, 'wb') as file: 51 | file.write(response.content) 52 | print("文件下载成功") 53 | else: 54 | print(f"文件下载失败,状态码: {response.status_code}") 55 | return file_path, arxiv_id 56 | 57 | 58 | def parse_home(url: str) -> dict: 59 | parsed_url = urlparse(url) 60 | arxiv_id = parsed_url.path.split('/')[-1] 61 | 62 | arxiv_data = get_data_from_arxiv_id(arxiv_id) 63 | if arxiv_data: 64 | return { 65 | "title": arxiv_data.title, 66 | "abstract": arxiv_data.abstract, 67 | "file_path": arxiv_data.file_path, 68 | "arxiv_id": arxiv_id 69 | } 70 | 71 | arxiv_data = arxiv_client.search_by_id(arxiv_id) 72 | # 持久化 73 | file_path, arxiv_id = download(url) 74 | arxiv_data.file_path = file_path 75 | arxiv_data.save_to_json() 76 | 77 | return { 78 | "title": arxiv_data.title, 79 | "abstract": arxiv_data.abstract, 80 | "file_path": file_path, 81 | "arxiv_id": arxiv_id 82 | } 83 | 84 | 85 | def trans(title: str, abstract: str, arxiv_id: str) -> str: 86 | arxiv_data = get_data_from_arxiv_id(arxiv_id) 87 | if arxiv_data is None: 88 | return '系统异常' 89 | 90 | # 如果翻译过就直接拿翻译的 91 | if arxiv_data.title_abstract_cn is not None and arxiv_data.title_abstract_cn != '': 92 | return arxiv_data.title_abstract_cn 93 | 94 | content = f'## {title}\n{abstract}' 95 | translated = trans_agent.run(content) 96 | arxiv_data.title_abstract_cn = translated 97 | arxiv_data.save_to_json() 98 | return translated 99 | 100 | 101 | def answer_pdf(index: int, file_id: str, arxiv_id: str) -> (str, str): 102 | arxiv_data = get_data_from_arxiv_id(arxiv_id) 103 | if arxiv_data is None: 104 | return '系统异常', '', [] 105 | 106 | if arxiv_data.content is None or len(arxiv_data.content) == 0: 107 | if isinstance(current_llm, KimiLlm): 108 | file_id = current_llm.upload_file(arxiv_data.file_path) 109 | file_content = current_llm.extract_file(file_id) 110 | arxiv_data.file_id = file_id 111 | arxiv_data.content = file_content 112 | arxiv_data.save_to_json() 113 | else: 114 | file_content = utils.read_pdf(arxiv_data.file_path) 115 | arxiv_data.content = file_content 116 | arxiv_data.save_to_json() 117 | 118 | question = paper_questions[index] 119 | 120 | if arxiv_data.faq is not None and arxiv_data.faq.get(question): 121 | return file_id, arxiv_data.faq.get(question) 122 | 123 | answer = current_llm.chat_pdf(question, arxiv_data.content) 124 | arxiv_data.faq[question] = answer 125 | arxiv_data.save_to_json() 126 | return file_id, answer 127 | 128 | 129 | def export_md(arxiv_id: str): 130 | arxiv_data = get_data_from_arxiv_id(arxiv_id) 131 | path = utils.arxiv_dir_path(arxiv_id) 132 | with open(md_template_path, 'r', encoding='utf-8') as f: 133 | template = f.read() 134 | 135 | template_format = template.format(title=arxiv_data.title, abstract=arxiv_data.abstract, 136 | title_abstract_cn=arxiv_data.title_abstract_cn) 137 | faq = arxiv_data.faq 138 | for key, value in faq.items(): 139 | item = '### ' + key + '\n' + value + '\n\n' 140 | template_format += item 141 | 142 | with open(path + os.sep + arxiv_id + '.md', 'w', encoding='utf-8') as f: 143 | f.write(template_format) 144 | 145 | print('导出结束') 146 | 147 | 148 | # 定义主页 149 | def home(): 150 | st.markdown("

Arxiv Helper

", unsafe_allow_html=True) 151 | 152 | # 初始化 session state 153 | if 'responses' not in st.session_state: 154 | st.session_state.responses = [""] * len(paper_questions) 155 | 156 | if 'title' not in st.session_state: 157 | st.session_state.title = "" 158 | if 'abstract' not in st.session_state: 159 | st.session_state.abstract = "" 160 | if 'url' not in st.session_state: 161 | st.session_state.url = "" 162 | 163 | if 'arxiv_id' not in st.session_state: 164 | st.session_state.arxiv_id = "" 165 | if 'translated_abstract' not in st.session_state: 166 | st.session_state.translated_abstract = "" 167 | 168 | if 'file_id' not in st.session_state: 169 | st.session_state.file_id = "" 170 | 171 | if 'generate_all' not in st.session_state: 172 | st.session_state.generate_all = False 173 | 174 | if 'generate_index' not in st.session_state: 175 | st.session_state.generate_index = 0 176 | 177 | url = st.text_input("请输入网址", value=st.session_state.url, key="url_input") 178 | 179 | def analysis_url(): 180 | if st.session_state.url_input: 181 | analysis_result = parse_home(st.session_state.url_input) 182 | st.session_state.title = analysis_result['title'] 183 | st.session_state.abstract = analysis_result['abstract'] 184 | st.session_state.arxiv_id = analysis_result['arxiv_id'] 185 | st.rerun() 186 | 187 | if url != st.session_state.url: 188 | st.session_state.url = url 189 | analysis_url() 190 | 191 | # 布局分两列 192 | col1, col2 = st.columns([2, 3]) 193 | 194 | with col1: 195 | if st.session_state.title: 196 | st.markdown(f"**

标题

** {st.session_state.title}", unsafe_allow_html=True) 197 | st.markdown(f"**

摘要

** {st.session_state.abstract}", 198 | unsafe_allow_html=True) 199 | 200 | if st.button("翻译"): 201 | with st.spinner("翻译中,请稍候..."): 202 | st.session_state.translated_abstract = trans(st.session_state.title, st.session_state.abstract, 203 | st.session_state.arxiv_id) 204 | st.rerun() 205 | 206 | if st.session_state.translated_abstract: 207 | st.markdown(f"**

翻译结果

** {st.session_state.translated_abstract}", 208 | unsafe_allow_html=True) 209 | 210 | with col2: 211 | if st.session_state.title: 212 | spinner_placeholder = st.empty() 213 | b1, b2 = st.columns(2) 214 | with b1: 215 | if st.button("生成所有"): 216 | st.session_state.generate_all = True 217 | st.session_state.generate_index = 0 218 | with b2: 219 | if st.button("导出MD"): 220 | with st.spinner("导出中,请稍候..."): 221 | export_md(st.session_state.arxiv_id) 222 | st.rerun() 223 | 224 | for i, question in enumerate(paper_questions): 225 | with st.form(key=f"form_{i}"): 226 | st.markdown(f"**{question}**", unsafe_allow_html=True) 227 | st.markdown(f"{st.session_state.responses[i]}", unsafe_allow_html=True) 228 | submitted = st.form_submit_button("生成") 229 | if submitted: 230 | with st.spinner("生成中,请稍候..."): 231 | _, result = answer_pdf(i, st.session_state.file_id, 232 | st.session_state.arxiv_id) 233 | st.session_state.responses[i] = result 234 | st.rerun() 235 | 236 | # 处理生成所有的问题 237 | if st.session_state.generate_all and st.session_state.generate_index < len(paper_questions): 238 | i = st.session_state.generate_index 239 | with spinner_placeholder.container(): 240 | with st.spinner(f"正在生成问题 {i + 1}/{len(paper_questions)}..."): 241 | _, result = answer_pdf(i, st.session_state.file_id, st.session_state.arxiv_id) 242 | st.session_state.responses[i] = result 243 | st.session_state.generate_index += 1 244 | if st.session_state.generate_index >= len(paper_questions): 245 | st.session_state.generate_all = False 246 | st.rerun() 247 | 248 | 249 | # 主函数 250 | def main(): 251 | st.set_page_config(layout="wide") 252 | 253 | options = ["主页", "聊天", "设置"] 254 | with st.sidebar: 255 | selected = option_menu( 256 | menu_title="菜单", # 菜单标题 257 | options=options, # 菜单选项 258 | icons=["house", "robot", "gear"], # 菜单图标 259 | menu_icon="cast", # 菜单图标 260 | default_index=0, # 默认选中菜单项 261 | orientation="vertical", # 菜单方向 262 | ) 263 | if selected == '主页': 264 | home() 265 | elif selected == '聊天': 266 | chatting(st.session_state.arxiv_id if 'arxiv_id' in st.session_state else '') 267 | elif selected == '设置': 268 | settings(current_llm) 269 | 270 | 271 | if __name__ == "__main__": 272 | main() 273 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------