├── README.md ├── install.sh ├── prompt_utils.py ├── qwen-agent.py └── vllm_server.py /README.md: -------------------------------------------------------------------------------- 1 | # agent 2 | 3 | 基于qwen的agent demo,支持历史对话 4 | 5 | ## 依赖 6 | 7 | 基于阿里云百炼的通义千问200B大模型 -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install vLLM with CUDA 11.8. 4 | export VLLM_VERSION=0.2.7 5 | export PYTHON_VERSION=39 6 | pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux1_x86_64.whl -i https://mirrors.aliyun.com/pypi/simple/ 7 | 8 | pip install langchain modelscope tiktoken requests -i https://mirrors.aliyun.com/pypi/simple/ -------------------------------------------------------------------------------- /prompt_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | # 按chatml格式构造千问的Prompt 4 | def _build_prompt( 5 | generation_config, 6 | tokenizer, 7 | query, 8 | history=None, 9 | system=""): 10 | if history is None: 11 | history=[] 12 | 13 | # 包裹发言内容的token 14 | im_start,im_start_tokens='<|im_start|>',[tokenizer.im_start_id] 15 | im_end,im_end_tokens='<|im_end|>',[tokenizer.im_end_id] 16 | # 换行符token 17 | nl_tokens=tokenizer.encode("\n") 18 | 19 | # 用于编码system/user/assistant的一段发言, 格式{role}\n{content} 20 | def _tokenize_str(role,content): # 返回元组,下标0是文本,下标1是token ids 21 | return f"{role}\n{content}",tokenizer.encode(role)+nl_tokens+tokenizer.encode(content) 22 | 23 | # 剩余token数 24 | left_token_space=generation_config.max_window_size 25 | 26 | # prompt头部: system发言 27 | system_text_part,system_tokens_part=_tokenize_str("system", system) # system_tokens_part --> system\nYou are a helpful assistant. 28 | system_text=f'{im_start}{system_text_part}{im_end}' 29 | system_tokens=im_start_tokens+system_tokens_part+im_end_tokens # <|im_start|>system\nYou are a helpful assistant.<|im_end|> 30 | left_token_space-=len(system_tokens) 31 | 32 | # prompt尾部: user发言和assistant引导 33 | query_text_part,query_tokens_part=_tokenize_str('user', query) 34 | query_tokens_prefix=nl_tokens+ im_start_tokens 35 | query_tokens_suffix=im_end_tokens+nl_tokens+im_start_tokens+tokenizer.encode('assistant')+nl_tokens 36 | if len(query_tokens_prefix)+len(query_tokens_part)+len(query_tokens_suffix)>left_token_space: # query太长截断 37 | query_token_len=left_token_space-len(query_tokens_prefix)-len(query_tokens_suffix) 38 | query_tokens_part=query_tokens_part[:query_token_len] 39 | query_text_part=tokenizer.decode(query_tokens_part) 40 | query_tokens=query_tokens_prefix+query_tokens_part+query_tokens_suffix 41 | query_text=f"\n{im_start}{query_text_part}{im_end}\n{im_start}assistant\n" 42 | left_token_space-=len(query_tokens) 43 | 44 | # prompt腰部: 历史user+assitant对话 45 | history_text,history_tokens='',[] 46 | for hist_query,hist_response in reversed(history): # 优先采用最近的对话历史 47 | hist_query_text,hist_query_tokens_part=_tokenize_str("user",hist_query) # user\n历史提问 48 | hist_response_text,hist_response_tokens_part=_tokenize_str("assistant",hist_response) # assistant\n历史回答 49 | # 生成本轮对话 50 | cur_history_tokens=nl_tokens+im_start_tokens+hist_query_tokens_part+im_end_tokens+nl_tokens+im_start_tokens+hist_response_tokens_part+im_end_tokens 51 | cur_history_text=f"\n{im_start}{hist_query_text}{im_end}\n{im_start}{hist_response_text}{im_end}" 52 | # 储存多轮对话 53 | if len(cur_history_tokens)<=left_token_space: 54 | history_text=cur_history_text+history_text 55 | history_tokens=cur_history_tokens+history_tokens 56 | left_token_space-=len(cur_history_tokens) 57 | else: 58 | break 59 | 60 | # 生成完整Prompt 61 | prompt_str=f'{system_text}{history_text}{query_text}' 62 | prompt_tokens=system_tokens+history_tokens+query_tokens 63 | return prompt_str,prompt_tokens 64 | 65 | # 停用词清理 66 | def remove_stop_words(token_ids,stop_words_ids): 67 | token_ids=copy.deepcopy(token_ids) 68 | while len(token_ids)>0: 69 | if token_ids[-1] in stop_words_ids: 70 | token_ids.pop(-1) 71 | else: 72 | break 73 | return token_ids -------------------------------------------------------------------------------- /qwen-agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from langchain_community.tools.tavily_search import TavilySearchResults 4 | import broadscope_bailian 5 | import datetime 6 | 7 | def llm(query,history=[],user_stop_words=[]): # 调用api_server 8 | access_key_id=os.environ.get("ACCESS_KEY_ID") 9 | access_key_secret=os.environ.get("ACCESS_KEY_SECRET") 10 | agent_key=os.environ.get("AGENT_KEY") 11 | app_id=os.environ.get("APP_ID") 12 | 13 | try: 14 | messages=[{'role':'system','content':'You are a helpful assistant.'}] 15 | for hist in history: 16 | messages.append({'role':'user','content':hist[0]}) 17 | messages.append({'role':'assistant','content':hist[1]}) 18 | messages.append({'role':'user','content':query}) 19 | client=broadscope_bailian.AccessTokenClient(access_key_id=access_key_id, access_key_secret=access_key_secret, 20 | agent_key=agent_key) 21 | resp=broadscope_bailian.Completions(token=client.get_token()).create( 22 | app_id=app_id, 23 | messages=messages, 24 | result_format="message", 25 | stop=user_stop_words, 26 | ) 27 | # print(resp) 28 | content=resp.get("Data", {}).get("Choices", [])[0].get("Message", {}).get("Content") 29 | return content 30 | except Exception as e: 31 | return str(e) 32 | 33 | # travily搜索引擎 34 | os.environ['TAVILY_API_KEY']='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' # travily搜索引擎api key 35 | tavily=TavilySearchResults(max_results=5) 36 | tavily.description='这是一个类似谷歌和百度的搜索引擎,搜索知识、天气、股票、电影、小说、百科等都是支持的哦,如果你不确定就应该搜索一下,谢谢!s' 37 | 38 | # 工具列表 39 | tools=[tavily, ] 40 | 41 | tool_names='or'.join([tool.name for tool in tools]) # 拼接工具名 42 | tool_descs=[] # 拼接工具详情 43 | for t in tools: 44 | args_desc=[] 45 | for name,info in t.args.items(): 46 | args_desc.append({'name':name,'description':info['description'] if 'description' in info else '','type':info['type']}) 47 | args_desc=json.dumps(args_desc,ensure_ascii=False) 48 | tool_descs.append('%s: %s,args: %s'%(t.name,t.description,args_desc)) 49 | tool_descs='\n'.join(tool_descs) 50 | 51 | prompt_tpl='''Today is {today}. Please Answer the following questions as best you can. You have access to the following tools: 52 | 53 | {tool_descs} 54 | 55 | These are chat history before: 56 | {chat_history} 57 | 58 | Use the following format: 59 | 60 | Question: the input question you must answer 61 | Thought: you should always think about what to do 62 | Action: the action to take, should be one of [{tool_names}] 63 | Action Input: the input to the action 64 | Observation: the result of the action 65 | ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) 66 | Thought: I now know the final answer 67 | Final Answer: the final answer to the original input question 68 | 69 | Begin! 70 | 71 | Question: {query} 72 | {agent_scratchpad} 73 | ''' 74 | 75 | def agent_execute(query,chat_history=[]): 76 | global tools,tool_names,tool_descs,prompt_tpl,llm,tokenizer 77 | 78 | agent_scratchpad='' # agent执行过程 79 | while True: 80 | # 1)触发llm思考下一步action 81 | history='\n'.join(['Question:%s\nAnswer:%s'%(his[0],his[1]) for his in chat_history]) 82 | today=datetime.datetime.now().strftime('%Y-%m-%d') 83 | prompt=prompt_tpl.format(today=today,chat_history=history,tool_descs=tool_descs,tool_names=tool_names,query=query,agent_scratchpad=agent_scratchpad) 84 | print('\033[32m---等待LLM返回... ...\n%s\n\033[0m'%prompt,flush=True) 85 | response=llm(prompt,user_stop_words=['Observation:']) 86 | print('\033[34m---LLM返回---\n%s\n---\033[34m'%response,flush=True) 87 | 88 | # 2)解析thought+action+action input+observation or thought+final answer 89 | thought_i=response.rfind('Thought:') 90 | final_answer_i=response.rfind('\nFinal Answer:') 91 | action_i=response.rfind('\nAction:') 92 | action_input_i=response.rfind('\nAction Input:') 93 | observation_i=response.rfind('\nObservation:') 94 | 95 | # 3)返回final answer,执行完成 96 | if final_answer_i!=-1 and thought_i