├── data ├── DB │ ├── 9dd35c25-fd48-4ead-8cd2-da9efb3b30b7 │ │ ├── link_lists.bin │ │ ├── header.bin │ │ └── length.bin │ └── chroma.sqlite3 └── AutomobileIndustry_raw │ ├── porsche-fahrwerkentwicklung-rear-axle-steering-en.pdf │ ├── porsche-fahrwerkentwicklung-chassis-of-the-future-en.pdf │ ├── porsche-fahrwerkentwicklung-adaptive-air-suspension-en.pdf │ ├── porsche-fahrwerkentwicklung-electromechanical-steering-en.pdf │ ├── porsche-fahrwerkentwicklung-porsche-4d-chassis-control-en.pdf │ ├── porsche-fahrwerkentwicklung-porsche-traction-management-ptm-en.pdf │ ├── porsche-fahrwerkentwicklung-porsche-stability-management-psm-en.pdf │ ├── porsche-fahrwerkentwicklung-active-anti-roll-stabilisation-pdcc-en.pdf │ ├── porsche-fahrwerkentwicklung-porsche-active-suspension-management-pasm-en.pdf │ ├── porsche-fahrwerkentwicklung-porsche-torque-vectoring-ptv-and-ptv-plus-en.pdf │ ├── porsche-fahrwerkentwicklung-the-second-cornerstone-chassis-mechanics-en.pdf │ ├── porsche-fahrwerkentwicklung-the-third-cornerstone-mechatronic-systems-en.pdf │ ├── porsche-fahrwerkentwicklung-the-first-cornerstone-the-overall-vehicle-concept-en.pdf │ └── porsche-fahrwerkentwicklung-the-three-cornerstones-of-porsche-chassis-development-en.pdf ├── .idea ├── .gitignore ├── vcs.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml ├── RAG_simp_DEMO.iml └── misc.xml ├── README.md ├── 01_LoadPDF_Embedding_StoreIntoBD.py ├── 02_Final_RAG_OnlyChat.py ├── 04_TrainLLM.py ├── 03_DEMO_Aiagenttest.py ├── 01_LoadPDF_Embedding_StoreIntoBD2.py └── 03_02_AIAgent_wASR.py /data/DB/9dd35c25-fd48-4ead-8cd2-da9efb3b30b7/link_lists.bin: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /data/DB/chroma.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/DB/chroma.sqlite3 -------------------------------------------------------------------------------- /data/DB/9dd35c25-fd48-4ead-8cd2-da9efb3b30b7/header.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/DB/9dd35c25-fd48-4ead-8cd2-da9efb3b30b7/header.bin -------------------------------------------------------------------------------- /data/DB/9dd35c25-fd48-4ead-8cd2-da9efb3b30b7/length.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/DB/9dd35c25-fd48-4ead-8cd2-da9efb3b30b7/length.bin -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-rear-axle-steering-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-rear-axle-steering-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-chassis-of-the-future-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-chassis-of-the-future-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-adaptive-air-suspension-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-adaptive-air-suspension-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-electromechanical-steering-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-electromechanical-steering-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-4d-chassis-control-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-4d-chassis-control-en.pdf -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-traction-management-ptm-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-traction-management-ptm-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-stability-management-psm-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-stability-management-psm-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-active-anti-roll-stabilisation-pdcc-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-active-anti-roll-stabilisation-pdcc-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-active-suspension-management-pasm-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-active-suspension-management-pasm-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-torque-vectoring-ptv-and-ptv-plus-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-porsche-torque-vectoring-ptv-and-ptv-plus-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-second-cornerstone-chassis-mechanics-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-second-cornerstone-chassis-mechanics-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-third-cornerstone-mechatronic-systems-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-third-cornerstone-mechatronic-systems-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-first-cornerstone-the-overall-vehicle-concept-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-first-cornerstone-the-overall-vehicle-concept-en.pdf -------------------------------------------------------------------------------- /data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-three-cornerstones-of-porsche-chassis-development-en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ForestDake/RAG_simp_DEMO/HEAD/data/AutomobileIndustry_raw/porsche-fahrwerkentwicklung-the-three-cornerstones-of-porsche-chassis-development-en.pdf -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/RAG_simp_DEMO.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a source code for DEMO, detail ref. bilibili: 2 | pip install of 3 | langchain-community 4 | langchain-core 5 | Chroma 6 | are necessary. 7 | 8 | Please adapte the path based on your on PC for DB setup and pdf raw data. 9 | -------------------------------------------------------------------------------- /01_LoadPDF_Embedding_StoreIntoBD.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import (PyPDFLoader) 2 | from langchain.text_splitter import (RecursiveCharacterTextSplitter) 3 | from langchain_community.vectorstores import Chroma 4 | import os 5 | from langchain_community.embeddings import HuggingFaceEmbeddings 6 | 7 | path_docfolder = "/Users/qicao/Documents/GitHub/RAG_simp_DEMO/data/AutomobileIndustry_raw" 8 | path_db = "/Users/qicao/Documents/GitHub/RAG_simp_DEMO/data/DB" 9 | 10 | #Choose the embedding model 11 | #model_name = "sentence-transformers/all-MiniLM-L6-v2" 12 | model_name = "sentence-transformers/sentence-t5-large" 13 | embedding = HuggingFaceEmbeddings(model_name=model_name) 14 | 15 | 16 | def read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding): 17 | # Iterate over all files in the folder 18 | for filename in os.listdir(path_docfolder): 19 | print(filename) 20 | if filename.endswith('.pdf'): # Check if the file is a PDF 21 | file_path = os.path.join(path_docfolder, filename) 22 | print(f"Reading file: {file_path}") 23 | 24 | # Open the PDF file 25 | with open(file_path, 'rb') as file: 26 | loader = PyPDFLoader(file_path) 27 | pages = loader.load_and_split() 28 | text_splitter = RecursiveCharacterTextSplitter( 29 | chunk_size=260, 30 | chunk_overlap=20, 31 | ) 32 | docs = text_splitter.split_documents(pages) 33 | # Facility Step 3:用特定模型做embedding 34 | db2 = Chroma.from_documents(docs, embedding, persist_directory=path_db) 35 | print("Successfully save the embedding into DB") 36 | return True 37 | 38 | read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding) 39 | 40 | #------------------Now from here we need a chat to discuss with me -------------------- 41 | -------------------------------------------------------------------------------- /02_Final_RAG_OnlyChat.py: -------------------------------------------------------------------------------- 1 | # Example: reuse your existing OpenAI setup 2 | from openai import OpenAI 3 | import os 4 | from langchain.chat_models import ChatOpenAI 5 | #from langchain.document_loaders import PyPDFLoader 6 | #from langchain.text_splitter import RecursiveCharacterTextSplitter 7 | ##from langchain.embeddings.openai import OpenAIEmbeddings 8 | from langchain.vectorstores import Chroma 9 | 10 | path_db = "/Users/qicao/Documents/GitHub/RAG_simp_DEMO/data/DB" 11 | 12 | #Define a function to combine Query and the data found from RAG Vector DB. 13 | def augment_prompt(query:str): 14 | results = vectorstore.similarity_search(query,k=3) 15 | source_knowledge = "\n".join([x.page_content for x in results]) 16 | augment_prompt = f"""Using the contexts below, answer the query: 17 | contexts: 18 | {source_knowledge} 19 | query:{query}""" 20 | return augment_prompt 21 | #Output is the original query with top 3 result from database 22 | 23 | def llm(query, history=[], user_stop_words=[]): # 调用api_server 24 | try: 25 | messages = [ 26 | SystemMessage(content="You are a helpful assistant."), 27 | HumanMessage(content=augment_prompt(query)), 28 | ] 29 | res = chat(messages) 30 | #print("-----Answer of the xPAI to your question is------") 31 | #print(res.content) 32 | content = res.content 33 | return content 34 | except Exception as e: 35 | return str(e) 36 | #The output is the feedback from LLM 37 | 38 | # Point to the local server 39 | client = OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed") 40 | os.environ["OPENAI_API_KEY"] = "not-needed" 41 | os.environ["OPENAI_API_BASE"] = "http://localhost:1234/v1" 42 | 43 | chat = ChatOpenAI( 44 | openai_api_key=os.environ["OPENAI_API_KEY"], 45 | openai_api_base=os.environ["OPENAI_API_BASE"] 46 | ) 47 | 48 | from langchain.schema import( 49 | SystemMessage, 50 | HumanMessage, 51 | AIMessage 52 | ) 53 | 54 | #Facility Step 3:用特定模型做embedding 55 | from langchain.embeddings import HuggingFaceEmbeddings 56 | model_name = "sentence-transformers/sentence-t5-large" 57 | #model_name = "sentence-transformers/all-MiniLM-L6-v2" 58 | embedding = HuggingFaceEmbeddings(model_name=model_name) 59 | 60 | # load DB from disk 61 | vectorstore = Chroma(persist_directory=path_db, embedding_function=embedding) 62 | 63 | #------------------Now from here we need a chat to discuss with me -------------------- 64 | 65 | def agent_execute(query, chat_history=[]): 66 | global augment_prompt, llm 67 | 68 | agent_scratchpad = '' # agent执行过程 69 | while True: 70 | # 1）触发llm思考下一步action 71 | prompt = augment_prompt(query) 72 | #print("Promt is") 73 | #print(prompt) 74 | response = llm(prompt, user_stop_words=['Observation:']) 75 | print("-----The Answer of xPAI is ---------") 76 | print(response) 77 | chat_history.append((query, response)) 78 | return True, response, chat_history 79 | return False, "", "" 80 | 81 | def agent_execute_with_retry(query, chat_history=[], retry_times=3): 82 | for i in range(retry_times): 83 | success = False 84 | success, response, chat_history = agent_execute(query, chat_history=chat_history) 85 | #print(success) 86 | return success, response, chat_history 87 | 88 | #Chat Step 1: 89 | my_history = [] 90 | while True: 91 | query = input('query:') 92 | success, response, my_history = agent_execute_with_retry(query, chat_history=my_history) 93 | my_history = my_history[-10:] 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /04_TrainLLM.py: -------------------------------------------------------------------------------- 1 | #Fine tune a LLM Model in Huggingface with LORA https://arxiv.org/abs/2106.09685 2 | #Step 1: Define your model target 3 | #Step 2: define supervised or not, and then prepare the dataset for training 4 | #Step 3: you need to choose a suitable base mode, and use the config. ttps://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForSequenceClassification 5 | #Step 4: start training with PEFT LORA Training 6 | #Step 5: Save model check point, and reuse it and with pretrained model, you can get an adapted one. "output_dir" 7 | #Step 6: Reuse it and evaluatate it with evaluation dataset. https://huggingface.co/docs/trl/main/en/use_model 8 | #Step 7: If it is good, you could also upload into HuggingFace 9 | 10 | #Very detail guide is: https://www.youtube.com/watch?v=eC6Hd1hFvos 11 | 12 | 13 | from datasets import load_dataset, DatasetDict, Dataset 14 | from transformers import ( 15 | AutoTokenizer, 16 | AutoConfig, 17 | AutoModelForSequenceClassification, 18 | DataCollatorWithPadding, 19 | TrainingArguments, 20 | Trainer) 21 | 22 | from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig 23 | import evaluate 24 | import torch 25 | import numpy as np 26 | import time 27 | 28 | start = time.perf_counter() 29 | 30 | # load dataset 31 | dataset = load_dataset('shawhin/imdb-truncated') 32 | 33 | # display % of training data with label=1 34 | np.array(dataset['train']['label']).sum()/len(dataset['train']['label']) 35 | 36 | #Choose base model, which to be found in https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForSequenceClassification 37 | model_checkpoint = 'distilbert-base-uncased' 38 | #model_checkpoint = 'Qwen/Qwen1.5-7B-Chat' #Base model not yet available, no info in model card 39 | #model_checkpoint = 'roberta-base' # you can alternatively use roberta-base but this model is bigger thus training will take longer 40 | 41 | # define label maps 42 | id2label = {0: "Negative", 1: "Positive"} 43 | label2id = {"Negative":0, "Positive":1} 44 | 45 | # generate classification model from model_checkpoint 46 | model = AutoModelForSequenceClassification.from_pretrained( 47 | model_checkpoint, num_labels=2, id2label=id2label, label2id=label2id) 48 | 49 | # create tokenizer 50 | tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True) 51 | 52 | # add pad token if none exists 53 | if tokenizer.pad_token is None: 54 | tokenizer.add_special_tokens({'pad_token': '[PAD]'}) 55 | model.resize_token_embeddings(len(tokenizer)) 56 | 57 | # create tokenize function 58 | def tokenize_function(examples): 59 | # extract text 60 | text = examples["text"] 61 | 62 | #tokenize and truncate text 63 | tokenizer.truncation_side = "left" 64 | tokenized_inputs = tokenizer( 65 | text, 66 | return_tensors="np", 67 | truncation=True, 68 | max_length=512 69 | ) 70 | 71 | return tokenized_inputs 72 | 73 | # tokenize training and validation datasets 74 | tokenized_dataset = dataset.map(tokenize_function, batched=True) 75 | 76 | # create data collator 77 | data_collator = DataCollatorWithPadding(tokenizer=tokenizer) 78 | 79 | # import accuracy evaluation metric 80 | accuracy = evaluate.load("accuracy") 81 | 82 | # define an evaluation function to pass into trainer later 83 | def compute_metrics(p): 84 | predictions, labels = p 85 | predictions = np.argmax(predictions, axis=1) 86 | 87 | return {"accuracy": accuracy.compute(predictions=predictions, references=labels)} 88 | 89 | # define list of examples 90 | text_list = ["It was good.", "Not a fan, don't recommed.", "Better than the first one.", "This is not worth watching even once.", "This one is a pass.",] 91 | 92 | print("Untrained model predictions:") 93 | print(text_list) 94 | print("----------------------------") 95 | for text in text_list: 96 | # tokenize text 97 | inputs = tokenizer.encode(text, return_tensors="pt") 98 | # compute logits 99 | logits = model(inputs).logits 100 | # convert logits to label 101 | predictions = torch.argmax(logits) 102 | print(text + " - " + id2label[predictions.tolist()]) 103 | 104 | peft_config = LoraConfig(task_type="SEQ_CLS", 105 | r=4, 106 | lora_alpha=32, 107 | lora_dropout=0.01, 108 | target_modules = ['q_lin']) 109 | 110 | model = get_peft_model(model, peft_config) 111 | model.print_trainable_parameters() 112 | 113 | # hyperparameters 114 | lr = 1e-3 115 | batch_size = 4 116 | num_epochs = 10 117 | 118 | # define training arguments 119 | training_args = TrainingArguments( 120 | output_dir= model_checkpoint + "-lora-text-classification", 121 | learning_rate=lr, 122 | per_device_train_batch_size=batch_size, 123 | per_device_eval_batch_size=batch_size, 124 | num_train_epochs=num_epochs, 125 | weight_decay=0.01, 126 | evaluation_strategy="epoch", 127 | save_strategy="epoch", 128 | load_best_model_at_end=True, 129 | ) 130 | 131 | # creater trainer object 132 | trainer = Trainer( 133 | model=model, 134 | args=training_args, 135 | train_dataset=tokenized_dataset["train"], 136 | eval_dataset=tokenized_dataset["validation"], 137 | tokenizer=tokenizer, 138 | data_collator=data_collator, # this will dynamically pad examples in each batch to be equal length 139 | compute_metrics=compute_metrics, 140 | ) 141 | 142 | # train model 143 | trainer.train() 144 | end = time.perf_counter() 145 | model.to('mps') # moving to mps for Mac (can alternatively do 'cpu') 146 | 147 | print("Trained model predictions:") 148 | print("--------------------------") 149 | for text in text_list: 150 | inputs = tokenizer.encode(text, return_tensors="pt").to("mps") # moving to mps for Mac (can alternatively do 'cpu') 151 | 152 | logits = model(inputs).logits 153 | predictions = torch.max(logits,1).indices 154 | 155 | print(text + " - " + id2label[predictions.tolist()[0]]) 156 | 157 | print(end-start) 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /03_DEMO_Aiagenttest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import datetime 4 | from langchain.chat_models import ChatOpenAI 5 | from langchain.schema import( 6 | SystemMessage, 7 | HumanMessage 8 | ) 9 | 10 | #制备Bing搜索 11 | from langchain_community.tools import BingSearchRun 12 | from langchain_community.utilities import BingSearchAPIWrapper 13 | os.environ["BING_SUBSCRIPTION_KEY"] = "" 14 | os.environ["BING_SEARCH_URL"] = "https://api.bing.microsoft.com/v7.0/search" 15 | search_api = BingSearchAPIWrapper(k=3) 16 | searchtool = BingSearchRun(api_wrapper=search_api) 17 | searchtool.description ='This is Bing search tool. Useful for searching some real time info, such as news.' 18 | 19 | #制备维基百科工具.定义name，description，JSON schema，the function to call, result return to user directly or not 20 | from langchain_community.tools import WikipediaQueryRun 21 | from langchain_community.utilities import WikipediaAPIWrapper 22 | api_wrapper = WikipediaAPIWrapper(top_k_result=1, doc_content_chars_max=100) 23 | wikitool = WikipediaQueryRun(api_wrapper=api_wrapper) 24 | wikitool.name = 'Wikipedia' 25 | wikitool.description ='A wrapper around Wikipedia. Useful for when you need to answer general question about definition and the description of people, place, facrts, history etc.' 26 | 27 | # 工具列表,并组合其重要信息，用于让AI agent进行工具选择和反思 28 | tools = [searchtool,wikitool,] 29 | tool_names = 'or'.join([tool.name for tool in tools]) # 拼接工具名 30 | tool_descs = [] # 拼接工具详情 31 | for t in tools: 32 | args_desc = [] 33 | for name, info in t.args.items(): 34 | args_desc.append( 35 | {'name': name, 'description': info['description'] if 'description' in info else '', 'type': info['type']}) 36 | args_desc = json.dumps(args_desc, ensure_ascii=False) 37 | tool_descs.append('%s: %s,args: %s' % (t.name, t.description, args_desc)) 38 | tool_descs = '\n'.join(tool_descs) 39 | 40 | # Prompt模版的搭建 41 | prompt_tpl = '''Today is {today}. Please Answer the following questions as best you can. You have access to the following tools: 42 | 43 | {tool_description} 44 | 45 | These are chat history before: 46 | {chat_history} 47 | 48 | Use the following format: 49 | - Question: the input question you must answer 50 | - Thought: you should always think about what to do 51 | - Action: the action to take, should be one of [{tool_names}] 52 | - Action Input: the input to the action 53 | - Observation: the result of the action 54 | ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) 55 | - Thought: I now know the final answer 56 | - Final Answer: the final answer to the original input question 57 | 58 | Begin! 59 | 60 | Question: {query} 61 | {agent_scratchpad} 62 | ''' 63 | 64 | #调用LLM 65 | def llm(query, history=[], user_stop_words=[]): # 调用api_server 66 | 67 | os.environ["OPENAI_API_KEY"] = "not-needed" 68 | os.environ["OPENAI_API_BASE"] = "http://localhost:1234/v1" 69 | 70 | chat = ChatOpenAI( 71 | openai_api_key=os.environ["OPENAI_API_KEY"], 72 | openai_api_base=os.environ["OPENAI_API_BASE"] 73 | ) 74 | 75 | try: 76 | messages = [ 77 | SystemMessage(content="You are a helpful assistant."), 78 | ] 79 | for hist in history: 80 | messages.append(SystemMessage(content=hist[0])) 81 | messages.append(SystemMessage(content=hist[1])) 82 | messages.append(HumanMessage(content=query)) 83 | resp = chat(messages) 84 | 85 | # print(resp) 86 | content = resp.content 87 | return content 88 | except Exception as e: 89 | return str(e) 90 | 91 | def agent_execute(query, chat_history=[]): 92 | global tools, tool_names, tool_descs, prompt_tpl, llm, tokenizer 93 | 94 | agent_scratchpad = '' # agent执行过程 95 | while True: 96 | # 1）触发llm思考下一步action 97 | history = '\n'.join(['Question:%s\nAnswer:%s' % (his[0], his[1]) for his in chat_history]) 98 | today = datetime.datetime.now().strftime('%Y-%m-%d') 99 | prompt = prompt_tpl.format(today=today, chat_history=history, tool_description=tool_descs, tool_names=tool_names, 100 | query=query, agent_scratchpad=agent_scratchpad) 101 | print('\033[32m---等待LLM返回... ...\n%s\n\033[0m' % prompt, flush=True) 102 | response = llm(prompt, user_stop_words=['Observation:']) 103 | print('\033[34m---LLM返回---\n%s\n---\033[34m' % response, flush=True) 104 | 105 | # 2）解析thought+action+action input+observation or thought+final answer 106 | thought_i = response.rfind('Thought:') 107 | final_answer_i = response.rfind('\nFinal Answer:') 108 | action_i = response.rfind('\nAction:') 109 | action_input_i = response.rfind('\nAction Input:') 110 | observation_i = response.rfind('\nObservation:') 111 | 112 | # 3）返回final answer，执行完成 113 | if final_answer_i != -1 and thought_i < final_answer_i: 114 | final_answer = response[final_answer_i + len('\nFinal Answer:'):].strip() 115 | chat_history.append((query, final_answer)) 116 | return True, final_answer, chat_history 117 | 118 | # 4）解析action 119 | if not (thought_i < action_i < action_input_i): 120 | return False, 'LLM回复格式异常', chat_history 121 | if observation_i == -1: 122 | observation_i = len(response) 123 | response = response + 'Observation: ' 124 | thought = response[thought_i + len('Thought:'):action_i].strip() 125 | action = response[action_i + len('\nAction:'):action_input_i].strip() 126 | action_input = response[action_input_i + len('\nAction Input:'):observation_i].strip() 127 | 128 | # 5）匹配tool 129 | the_tool = None 130 | for t in tools: 131 | if t.name == action: 132 | the_tool = t 133 | break 134 | if the_tool is None: 135 | observation = 'the tool not exist' 136 | agent_scratchpad = agent_scratchpad + response + observation + '\n' 137 | continue 138 | 139 | # 6）执行tool 140 | try: 141 | action_input = json.loads(action_input) 142 | tool_ret = the_tool.invoke(input=json.dumps(action_input)) 143 | except Exception as e: 144 | observation = 'the tool has error:{}'.format(e) 145 | else: 146 | observation = str(tool_ret) 147 | agent_scratchpad = agent_scratchpad + response + observation + '\n' 148 | 149 | def agent_execute_with_retry(query, chat_history=[], retry_times=3): 150 | for i in range(retry_times): 151 | success, result, chat_history = agent_execute(query, chat_history=chat_history) 152 | if success: 153 | return success, result, chat_history 154 | return success, result, chat_history 155 | 156 | my_history = [] 157 | while True: 158 | query = input('query:') 159 | success, result, my_history = agent_execute_with_retry(query, chat_history=my_history) 160 | my_history = my_history[-10:] -------------------------------------------------------------------------------- /01_LoadPDF_Embedding_StoreIntoBD2.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import (PyPDFLoader) 2 | #from langchain.text_splitter import (RecursiveCharacterTextSplitter) 3 | import os 4 | import numpy as np 5 | from ltp import StnSplit 6 | from sentence_transformers import SentenceTransformer 7 | from langchain_community.vectorstores import Chroma 8 | from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,) 9 | 10 | path_docfolder = "/Users/qicao/Documents/GitHub/RAG_langchain/data/AutomobileIndustry_raw" 11 | path_db = "/Users/qicao/Documents/GitHub/RAG_simp_DEMO/data/DB" 12 | 13 | #Choose the embedding model 14 | #model_name = "sentence-transformers/all-MiniLM-L6-v2" 15 | model_name = "sentence-transformers/sentence-t5-large" 16 | 17 | embedding_function = SentenceTransformerEmbeddings(model_name=model_name) 18 | 19 | #here is the setting for the size of chunk, 100 is one article only one chunk 20 | THRESHOLD = 70 21 | 22 | class SemanticParagraphSplitter: 23 | def __init__(self, threshold=THRESHOLD, model_path=model_name): 24 | self.threshold = threshold 25 | self.model = SentenceTransformer(model_path) 26 | 27 | @staticmethod 28 | def cut_sentences(text): 29 | sentences = StnSplit().split(text) 30 | return sentences 31 | 32 | @staticmethod 33 | def combine_sentences(sentences, buffer_size=2): 34 | # Go through each sentence dict 35 | for i in range(len(sentences)): 36 | 37 | # Create a string that will hold the sentences which are joined 38 | combined_sentence = '' 39 | 40 | # Add sentences before the current one, based on the buffer size. 41 | for j in range(i - buffer_size, i): 42 | # Check if the index j is not negative (to avoid index out of range like on the first one) 43 | if j >= 0: 44 | # Add the sentence at index j to the combined_sentence string 45 | combined_sentence += sentences[j]['sentence'] + ' ' 46 | 47 | # Add the current sentence 48 | combined_sentence += sentences[i]['sentence'] 49 | 50 | # Add sentences after the current one, based on the buffer size 51 | for j in range(i + 1, i + 1 + buffer_size): 52 | # Check if the index j is within the range of the sentences list 53 | if j < len(sentences): 54 | # Add the sentence at index j to the combined_sentence string 55 | combined_sentence += ' ' + sentences[j]['sentence'] 56 | 57 | # Then add the whole thing to your dict 58 | # Store the combined sentence in the current sentence dict 59 | sentences[i]['combined_sentence'] = combined_sentence 60 | 61 | return sentences 62 | 63 | def build_sentences_dict(self, sentences): 64 | indexed_sentences = [{'sentence': x, 'index': i} for i, x in enumerate(sentences)] 65 | combined_sentences = self.combine_sentences(indexed_sentences) 66 | 67 | embeddings = self.model.encode([x['combined_sentence'] for x in combined_sentences], normalize_embeddings=True) 68 | 69 | for i, sentence in enumerate(combined_sentences): 70 | sentence['combined_sentence_embedding'] = embeddings[i] 71 | 72 | return combined_sentences 73 | 74 | @staticmethod 75 | def calculate_cosine_distances(sentences): 76 | distances = [] 77 | for i in range(len(sentences) - 1): 78 | embedding_current = sentences[i]['combined_sentence_embedding'] 79 | embedding_next = sentences[i + 1]['combined_sentence_embedding'] 80 | 81 | # Calculate cosine similarity 82 | # similarity = cosine_similarity([embedding_current], [embedding_next])[0][0] 83 | similarity = embedding_current @ embedding_next.T 84 | # Convert to cosine distance 85 | distance = 1 - similarity 86 | 87 | # Append cosine distance to the list 88 | distances.append(distance) 89 | 90 | # Store distance in the dictionary 91 | sentences[i]['distance_to_next'] = distance 92 | 93 | # Optionally handle the last sentence 94 | # sentences[-1]['distance_to_next'] = None # or a default value 95 | 96 | return distances, sentences 97 | 98 | def calculate_indices_above_thresh(self, distances): 99 | breakpoint_distance_threshold = np.percentile(distances, self.threshold) 100 | # The indices of those breakpoints on your list 101 | indices_above_thresh = [i for i, x in enumerate(distances) if x > breakpoint_distance_threshold] 102 | return indices_above_thresh 103 | 104 | @staticmethod 105 | def cut_chunks(indices_above_thresh, sentences): 106 | # Initialize the start index 107 | start_index = 0 108 | 109 | # Create a list to hold the grouped sentences 110 | chunks = [] 111 | 112 | # Iterate through the breakpoints to slice the sentences 113 | for index in indices_above_thresh: 114 | # The end index is the current breakpoint 115 | end_index = index 116 | 117 | # Slice the sentence_dicts from the current start index to the end index 118 | group = sentences[start_index:end_index + 1] 119 | combined_text = ' '.join([d['sentence'] for d in group]) 120 | chunks.append(combined_text) 121 | 122 | # Update the start index for the next group 123 | start_index = index + 1 124 | 125 | # The last group, if any sentences remain 126 | if start_index < len(sentences): 127 | combined_text = ' '.join([d['sentence'] for d in sentences[start_index:]]) 128 | chunks.append(combined_text) 129 | 130 | return chunks 131 | 132 | def split(self, text): 133 | single_sentences = (self.cut_sentences(text)) #Pre-split with standard function 134 | print(f"{len(single_sentences)} single sentences were found") 135 | chunks = self.split_passages(single_sentences) 136 | return chunks 137 | 138 | def split_passages(self, passages): 139 | combined_sentences = self.build_sentences_dict(passages) 140 | distances, sentences = self.calculate_cosine_distances(combined_sentences) 141 | 142 | indices_above_thresh = self.calculate_indices_above_thresh(distances) 143 | chunks = self.cut_chunks(indices_above_thresh, sentences) 144 | return chunks 145 | 146 | def read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding): 147 | # Iterate over all files in the folder 148 | for filename in os.listdir(path_docfolder): 149 | #print(filename) 150 | if filename.endswith('.pdf'): # Check if the file is a PDF 151 | file_path = os.path.join(path_docfolder, filename) 152 | print(f"Reading file: {file_path}") 153 | 154 | # Open the PDF file 155 | with open(file_path, 'rb') as file: 156 | loader = PyPDFLoader(file_path) 157 | pages_pypdf = loader.load() 158 | pages = pages_pypdf[0].page_content 159 | 160 | text_splitter = SemanticParagraphSplitter(threshold=THRESHOLD) 161 | # text_splitter = RecursiveCharacterTextSplitter( 162 | # chunk_size=260, 163 | # chunk_overlap=20, 164 | # ) 165 | docs = text_splitter.split(pages) 166 | 167 | # Facility Step 3:用特定模型做embedding 168 | #db2 = Chroma.from_documents(docs, embedding, persist_directory=path_db) 169 | db2 = Chroma.from_texts(docs, embedding, persist_directory=path_db) 170 | print("Successfully save the embedding into DB") 171 | return True 172 | 173 | read_pdf_files_in_folder_onebyone_and_Store(path_docfolder, path_db, embedding_function) 174 | 175 | #------------------Now from here we need a chat to discuss with me -------------------- 176 | -------------------------------------------------------------------------------- /03_02_AIAgent_wASR.py: -------------------------------------------------------------------------------- 1 | #https://huggingface.co/distil-whisper/distil-large-v3 2 | from pvrecorder import PvRecorder 3 | import wave, struct 4 | import time 5 | 6 | # for index, device in enumerate(PvRecorder.get_available_devices()): 7 | # print(f"[{index}]{device}") 8 | # [0]“大可小机灵”的麦克风 9 | # [1] AKG Ara USB Microphone 10 | # [2] MacBook Pro麦克风 11 | # [3] LarkAudioDevice 12 | # [4] Microsoft Teams Audio 13 | 14 | recorder = PvRecorder(device_index=1, frame_length=512) 15 | folderpath = "/Users/qicao/Documents/GitHub/RAG_langchain/playground/Audio" 16 | 17 | def inputviamic(index:int): 18 | audio = [] 19 | global path 20 | path=folderpath+"/audio"+str(index)+".wav" 21 | print(path) 22 | print("recorder takes 10 seconds") 23 | recorder.start() 24 | starttime = time.time() 25 | 26 | print(starttime) 27 | while ((time.time()-starttime)<10): 28 | frame = recorder.read() 29 | audio.extend(frame) 30 | 31 | print("Recording End") 32 | 33 | recorder.stop() 34 | 35 | with wave.open(path,'w') as f: 36 | f.setparams((1,2,16000,512,"NONE","NONE")) 37 | f.writeframes(struct.pack("h"*len(audio), *audio)) 38 | return True 39 | 40 | 41 | import torch 42 | from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline 43 | from datasets import load_dataset 44 | 45 | def initializeASR(): 46 | # device = "cuda:0" if torch.cuda.is_available() else "cpu" 47 | device = 'cpu' 48 | # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 49 | torch_dtype = torch.float32 50 | model_id = "distil-whisper/distil-large-v3" 51 | 52 | model = AutoModelForSpeechSeq2Seq.from_pretrained( 53 | model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True 54 | ) 55 | model.to(device) 56 | 57 | processor = AutoProcessor.from_pretrained(model_id) 58 | 59 | pipe = pipeline( 60 | "automatic-speech-recognition", 61 | model=model, 62 | tokenizer=processor.tokenizer, 63 | feature_extractor=processor.feature_extractor, 64 | max_new_tokens=128, 65 | torch_dtype=torch_dtype, 66 | device=device, 67 | ) 68 | return pipe 69 | def ASRwavfile(pipe): 70 | # #device = "cuda:0" if torch.cuda.is_available() else "cpu" 71 | # device = 'cpu' 72 | # #torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 73 | # torch_dtype = torch.float32 74 | # model_id = "distil-whisper/distil-large-v3" 75 | # 76 | # model = AutoModelForSpeechSeq2Seq.from_pretrained( 77 | # model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True 78 | # ) 79 | # model.to(device) 80 | # 81 | # processor = AutoProcessor.from_pretrained(model_id) 82 | # 83 | # pipe = pipeline( 84 | # "automatic-speech-recognition", 85 | # model=model, 86 | # tokenizer=processor.tokenizer, 87 | # feature_extractor=processor.feature_extractor, 88 | # max_new_tokens=128, 89 | # torch_dtype=torch_dtype, 90 | # device=device, 91 | # ) 92 | 93 | #dataset = load_dataset(folderpath, "default", split="train") 94 | dataset = load_dataset(folderpath, data_files=path, split="train") 95 | sample = dataset[0]["audio"] 96 | 97 | result = pipe(sample) 98 | content = result["text"] 99 | print(content) 100 | return content 101 | 102 | import os 103 | import json 104 | import datetime 105 | from langchain.chat_models import ChatOpenAI 106 | from langchain.schema import( 107 | SystemMessage, 108 | HumanMessage 109 | ) 110 | 111 | #制备Bing搜索 112 | from langchain_community.tools import BingSearchRun 113 | from langchain_community.utilities import BingSearchAPIWrapper 114 | os.environ["BING_SUBSCRIPTION_KEY"] = "" 115 | os.environ["BING_SEARCH_URL"] = "https://api.bing.microsoft.com/v7.0/search" 116 | search_api = BingSearchAPIWrapper(k=3) 117 | searchtool = BingSearchRun(api_wrapper=search_api) 118 | searchtool.description ='This is Bing search tool. Useful for searching some real time info, such as news.' 119 | 120 | #制备维基百科工具.定义name，description，JSON schema，the function to call, result return to user directly or not 121 | from langchain_community.tools import WikipediaQueryRun 122 | from langchain_community.utilities import WikipediaAPIWrapper 123 | api_wrapper = WikipediaAPIWrapper(top_k_result=1, doc_content_chars_max=100) 124 | wikitool = WikipediaQueryRun(api_wrapper=api_wrapper) 125 | wikitool.name = 'Wikipedia' 126 | wikitool.description ='A wrapper around Wikipedia. Useful for when you need to answer general question about definition and the description of people, place, facrts, history etc.' 127 | 128 | # 工具列表,并组合其重要信息，用于让AI agent进行工具选择和反思 129 | tools = [searchtool,wikitool,] 130 | tool_names = 'or'.join([tool.name for tool in tools]) # 拼接工具名 131 | tool_descs = [] # 拼接工具详情 132 | for t in tools: 133 | args_desc = [] 134 | for name, info in t.args.items(): 135 | args_desc.append( 136 | {'name': name, 'description': info['description'] if 'description' in info else '', 'type': info['type']}) 137 | args_desc = json.dumps(args_desc, ensure_ascii=False) 138 | tool_descs.append('%s: %s,args: %s' % (t.name, t.description, args_desc)) 139 | tool_descs = '\n'.join(tool_descs) 140 | 141 | # Prompt模版的搭建 142 | prompt_tpl = '''Today is {today}. Please Answer the following questions as best you can. You have access to the following tools: 143 | 144 | {tool_description} 145 | 146 | These are chat history before: 147 | {chat_history} 148 | 149 | Use the following format: 150 | - Question: the input question you must answer 151 | - Thought: you should always think about what to do 152 | - Action: the action to take, should be one of [{tool_names}] 153 | - Action Input: the input to the action 154 | - Observation: the result of the action 155 | ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) 156 | - Thought: I now know the final answer 157 | - Final Answer: the final answer to the original input question 158 | 159 | Begin! 160 | 161 | Question: {query} 162 | {agent_scratchpad} 163 | ''' 164 | 165 | #调用LLM 166 | def llm(query, history=[], user_stop_words=[]): # 调用api_server 167 | 168 | os.environ["OPENAI_API_KEY"] = "not-needed" 169 | os.environ["OPENAI_API_BASE"] = "http://localhost:1234/v1" 170 | 171 | chat = ChatOpenAI( 172 | openai_api_key=os.environ["OPENAI_API_KEY"], 173 | openai_api_base=os.environ["OPENAI_API_BASE"] 174 | ) 175 | 176 | try: 177 | messages = [ 178 | SystemMessage(content="You are a helpful assistant."), 179 | ] 180 | for hist in history: 181 | messages.append(SystemMessage(content=hist[0])) 182 | messages.append(SystemMessage(content=hist[1])) 183 | messages.append(HumanMessage(content=query)) 184 | resp = chat(messages) 185 | 186 | # print(resp) 187 | content = resp.content 188 | return content 189 | except Exception as e: 190 | return str(e) 191 | 192 | def agent_execute(query, chat_history=[]): 193 | global tools, tool_names, tool_descs, prompt_tpl, llm, tokenizer 194 | 195 | agent_scratchpad = '' # agent执行过程 196 | while True: 197 | # 1）触发llm思考下一步action 198 | history = '\n'.join(['Question:%s\nAnswer:%s' % (his[0], his[1]) for his in chat_history]) 199 | today = datetime.datetime.now().strftime('%Y-%m-%d') 200 | prompt = prompt_tpl.format(today=today, chat_history=history, tool_description=tool_descs, tool_names=tool_names, 201 | query=query, agent_scratchpad=agent_scratchpad) 202 | print('\033[32m---等待LLM返回... ...\n%s\n\033[0m' % prompt, flush=True) 203 | response = llm(prompt, user_stop_words=['Observation:']) 204 | print('\033[34m---LLM返回---\n%s\n---\033[34m' % response, flush=True) 205 | 206 | # 2）解析thought+action+action input+observation or thought+final answer 207 | thought_i = response.rfind('Thought:') 208 | final_answer_i = response.rfind('\nFinal Answer:') 209 | action_i = response.rfind('\nAction:') 210 | action_input_i = response.rfind('\nAction Input:') 211 | observation_i = response.rfind('\nObservation:') 212 | 213 | # 3）返回final answer，执行完成 214 | if final_answer_i != -1 and thought_i < final_answer_i: 215 | final_answer = response[final_answer_i + len('\nFinal Answer:'):].strip() 216 | chat_history.append((query, final_answer)) 217 | return True, final_answer, chat_history 218 | 219 | # 4）解析action 220 | if not (thought_i < action_i < action_input_i): 221 | return False, 'LLM回复格式异常', chat_history 222 | if observation_i == -1: 223 | observation_i = len(response) 224 | response = response + 'Observation: ' 225 | thought = response[thought_i + len('Thought:'):action_i].strip() 226 | action = response[action_i + len('\nAction:'):action_input_i].strip() 227 | action_input = response[action_input_i + len('\nAction Input:'):observation_i].strip() 228 | 229 | # 5）匹配tool 230 | the_tool = None 231 | for t in tools: 232 | if t.name == action: 233 | the_tool = t 234 | break 235 | if the_tool is None: 236 | observation = 'the tool not exist' 237 | agent_scratchpad = agent_scratchpad + response + observation + '\n' 238 | continue 239 | 240 | # 6）执行tool 241 | try: 242 | action_input = json.loads(action_input) 243 | tool_ret = the_tool.invoke(input=json.dumps(action_input)) 244 | except Exception as e: 245 | observation = 'the tool has error:{}'.format(e) 246 | else: 247 | observation = str(tool_ret) 248 | agent_scratchpad = agent_scratchpad + response + observation + '\n' 249 | 250 | def agent_execute_with_retry(query, chat_history=[], retry_times=3): 251 | for i in range(retry_times): 252 | success, result, chat_history = agent_execute(query, chat_history=chat_history) 253 | if success: 254 | return success, result, chat_history 255 | return success, result, chat_history 256 | 257 | my_history = [] 258 | index = 0 259 | loop_count = 5 260 | 261 | import multiprocessing 262 | pipe=initializeASR() 263 | 264 | while index<=loop_count: 265 | print('Lets start the '+ str(index) + ' round') 266 | time.sleep(1) 267 | inputviamic(index) 268 | query = ASRwavfile(pipe) 269 | success, result, my_history = agent_execute_with_retry(query, chat_history=my_history) 270 | my_history = my_history[-10:] 271 | index = index+1 --------------------------------------------------------------------------------