├── 1.pdf ├── LLMMAS.ipynb ├── README.md ├── SC.png ├── Test_generated_SC.py ├── code.txt └── movie_lines.txt /1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangfeibo/CommLLM/9735c130468b348910fb3197d676cfc8abdd1ea2/1.pdf -------------------------------------------------------------------------------- /LLMMAS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from langchain.chains import RetrievalQA\n", 12 | "from langchain.vectorstores import DocArrayInMemorySearch\n", 13 | "from IPython.display import display, Markdown\n", 14 | "from langchain.prompts import ChatPromptTemplate\n", 15 | "from langchain.document_loaders import PyPDFLoader\n", 16 | "from langchain.embeddings import OpenAIEmbeddings\n", 17 | "from langchain.chat_models import ChatOpenAI\n", 18 | "from langchain.chains import LLMChain, SequentialChain\n", 19 | "import openai\n", 20 | "\n", 21 | "class configs():\n", 22 | " openai_api_key= \"your openai api-key\"\n", 23 | " openai.api_key =openai_api_key\n", 24 | " openai.api_base=\"https://api.closeai-proxy.xyz/v1\"\n", 25 | " # path to the reference paper that describes the semantic communications\n", 26 | " pdf_path = \"1.pdf\"\n", 27 | " # path to the reference code that is as an example\n", 28 | " code_path = \"code.txt\"\n", 29 | " # iteration number\n", 30 | " iterations = 3" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# Define LLM-enhanced multi-agent system\n", 42 | "class LLM_enhanced_multi_agent_system_for_SC():\n", 43 | " def __init__(self, args: configs):\n", 44 | " self.api_key = args.openai_api_key\n", 45 | " self.llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0.9, openai_api_key=self.api_key)\n", 46 | " self.iterations = args.iterations\n", 47 | " self.embeddings = OpenAIEmbeddings(openai_api_key=self.api_key)\n", 48 | "\n", 49 | " # Validate the legitimacy of the input\n", 50 | " def secure_agent(self, query):\n", 51 | " response = openai.Moderation.create(\n", 52 | " input=query,\n", 53 | " )\n", 54 | " moderation_output = response[\"results\"][0]\n", 55 | " print(moderation_output)\n", 56 | " if moderation_output[\"flagged\"]:\n", 57 | " print(\"Illegal input!\")\n", 58 | " return False\n", 59 | " else:\n", 60 | " return query\n", 61 | "\n", 62 | " # Load the paper and generate vector memory, which is the knowledge base\n", 63 | " def condensate_agent(self, pdf_path=\"\"):\n", 64 | " loader = PyPDFLoader(pdf_path)\n", 65 | " docs = loader.load()\n", 66 | " # construct knowledge base\n", 67 | " self.db = DocArrayInMemorySearch.from_documents(\n", 68 | " docs,\n", 69 | " self.embeddings\n", 70 | " )\n", 71 | "\n", 72 | " # Distill relevant SC knowledge for constructing the SC model\n", 73 | " def inference_agent(self, query):\n", 74 | " retriever = self.db.as_retriever()\n", 75 | " qa_stuff = RetrievalQA.from_chain_type(\n", 76 | " llm=self.llm,\n", 77 | " chain_type=\"stuff\", # Stuff means merging text fragments into one paragraph of text\n", 78 | " retriever=retriever,\n", 79 | " verbose=True\n", 80 | " )\n", 81 | " response = qa_stuff.run(query)\n", 82 | " display(Markdown(response))\n", 83 | " return response\n", 84 | "\n", 85 | " # Formulates a specific sub-task chain for the SC model\n", 86 | " def planning_agent(self, input_name=\"paper_ref\", output_name=\"sc_scheme\"):\n", 87 | " scheme_prompt = ChatPromptTemplate.from_template(\n", 88 | " \"# Referring to the following description of the semantic communication model: '''{\"\n", 89 | " f\"{input_name}\"\n", 90 | " \"}'''.\"\n", 91 | " \"# Output the design scheme for achieving the semantic communication model.\"\n", 92 | " )\n", 93 | " chain = LLMChain(llm=self.llm, prompt=scheme_prompt,\n", 94 | " output_key=output_name,\n", 95 | " )\n", 96 | " return chain\n", 97 | "\n", 98 | " # sub-task chain for code generation\n", 99 | " def code_generation(self, input_name=\"sc_scheme\", output_name=\"sc_model\"):\n", 100 | " data = [\n", 101 | " \"Hello there!\",\n", 102 | " \"How are you doing?\",\n", 103 | " \"PyTorch is great.\",\n", 104 | " \"Machine learning is fascinating.\",\n", 105 | " \"Let's build something amazing.\",\n", 106 | " \"Semantic communication matters.\",\n", 107 | " \"Understanding context is important.\",\n", 108 | " \"AI is changing the world.\",\n", 109 | " \"Keep learning and growing.\",\n", 110 | " \"Innovation drives progress.\"\n", 111 | " ]\n", 112 | " model_prompt = ChatPromptTemplate.from_template(\n", 113 | " \"# According to the design scheme of the semantic communication model: '''{\"\n", 114 | " f\"{input_name}\"\n", 115 | " \"}'''.\"\n", 116 | " \"# Refering to the reference code: '''{ref_code}''',\"\n", 117 | " \"output the Pytorch framework-based Python code for achieving the semantic communication model, \"\n", 118 | " f\"assuming the input data is '''{data}''', which is a list of sentences. \"\n", 119 | " f\"Specifically, the Additive Gaussian White Noise (AWGN) can serve as the physical channel. The Bilingual Evaluation Understudy (BLEU) score is adopted as the metric that evaluates the SC model. We expect that the generated SC model achieves no less than a 0.6 BLEU score when the Signal-to-Noise Ratio (SNR) is 10 dB. The total number of model parameters does not exceed 2,000,000 for the resource constraints of devices.\"\n", 120 | " )\n", 121 | " chain = LLMChain(llm=self.llm, prompt=model_prompt,\n", 122 | " output_key=output_name,\n", 123 | " )\n", 124 | " return chain\n", 125 | "\n", 126 | " # evaluate the quality of the generated code\n", 127 | " def evaluate_agent(self, input_name=\"sc_model\", output_name=\"eval_score\"):\n", 128 | " # Manually input bleu scores for SC model\n", 129 | " eval_prompt = ChatPromptTemplate.from_template(\n", 130 | " \"The quality of the generated code is evaluated and an overall score is given from three aspects: \"\n", 131 | " \"1. Quality of code, including its reasonableness and completeness.\"\n", 132 | " \"2. The performance of code, using bleu score as the evaluation indicator.\"\n", 133 | " \"3. Whether the number of parameters in the generated code meets the limitation.\"\n", 134 | " \"Based on the above requirements, evaluate the quality of the 'generated code':'''{\"\n", 135 | " f\"{input_name}\"\n", 136 | " \"}'''.\"\n", 137 | " \"## Output the evaluated results. The format of the output refers to:\"\n", 138 | " \"```json Dict('model parameters': int \\ the model parameters of the 'generated code', 'evaluated score': int \\ The evaluated score is based on the performance of the reference code in three aspects. The evaluated score ranges from 0 to 100 and is not the same as the last time.\"\n", 139 | " \"'evaluation': string \\ Based on the performance of the reference code in three aspects, give the reviews, including the model parameters, the description of 'generated code', the advantage and disadvantage analysis of the 'generated code', etc.\"\n", 140 | " \"There is an example: 'The Python-based SC model has been successfully implemented, incorporating all necessary modules. The semantic encoder and decoder are realized using Long Short-Term Memory (LSTM) networks. The channel encoder and decoder are constructed based on the Multilayer Perceptron (MLP) architecture. The final SC model can achieve a 0.68 BLEU score when SNR is 10 dB, which meets expectations. In addition, the total number of model parameters is 1,826,762.')```\"\n", 141 | " )\n", 142 | " chain = LLMChain(llm=self.llm, prompt=eval_prompt,\n", 143 | " output_key=f\"{output_name}\",\n", 144 | " )\n", 145 | " return chain\n", 146 | "\n", 147 | " def long_memory_stroage(self, inputs, output_name=\"long_term_memory\"):\n", 148 | " memory_prompt = ChatPromptTemplate.from_template(\n", 149 | " \"# According to the code and evaluation results of the current subtask chain:'''{\"\n", 150 | " f\"{inputs[0]}\"\n", 151 | " \"}''' and '''{\"\n", 152 | " f\"{inputs[1]}\"\n", 153 | " \"}'''.\"\n", 154 | " \"# If there is a significant difference in the semantic space of the current subtask chain, i.e. the 'evaluation score' is less than 60.\"\n", 155 | " \"# Then only the module composition of the sub task chain code needs to be output, such as LSTM, MLP, etc.\"\n", 156 | " )\n", 157 | " chain = LLMChain(llm=self.llm, prompt=memory_prompt,\n", 158 | " output_key=f\"{output_name}\",\n", 159 | " )\n", 160 | " return chain\n", 161 | "\n", 162 | " def short_memory_stroage(self, inputs, output_name=\"short_term_memory\"):\n", 163 | " memory_prompt = ChatPromptTemplate.from_template(\n", 164 | " \"# According to the code and evaluation results of the current subtask chain:'''{\"\n", 165 | " f\"{inputs[0]}\"\n", 166 | " \"}''' and '''{\"\n", 167 | " f\"{inputs[1]}\"\n", 168 | " \"}'''.\"\n", 169 | " \"# If the current subtask chain has semantic similarity in the semantic space, i.e. the 'evaluation score' is greater than 60.\"\n", 170 | " \"# Not only does it output the module composition of the subtask chain code, such as LSTM, MLP, etc., but it also outputs the evaluation results of the subtask chain\"\n", 171 | " )\n", 172 | " chain = LLMChain(llm=self.llm, prompt=memory_prompt,\n", 173 | " output_key=f\"{output_name}\",\n", 174 | " )\n", 175 | " return chain\n", 176 | "\n", 177 | " def reflexion_agent(self, inputs, output_name=\"sc_model\"):\n", 178 | " model_prompt = ChatPromptTemplate.from_template(\n", 179 | " \"# Modify the Python code: '''{\"\n", 180 | " f\"{inputs[0]}\"\n", 181 | " \"}'''.\"\n", 182 | " \"# According to the corresponding evaluation results:'''{\"\n", 183 | " f\"{inputs[1]}\"\n", 184 | " \"}'''.\"\n", 185 | " \"# According to short-term memory:'''{\"\n", 186 | " f\"{inputs[2]}\"\n", 187 | " \"}'''.\"\n", 188 | " \"# Extracting fine-grained information, similar to how humans can recall recent details, considering the performance of the current subtask chain in historical schemes, provides valuable small feedback for improving code.\"\n", 189 | " \"# Output the modified Python codes that aim to improve the 'evaluated score' and obtain a score of no less than 90 finally.\"\n", 190 | " )\n", 191 | " chain = LLMChain(llm=self.llm, prompt=model_prompt,\n", 192 | " output_key=output_name,\n", 193 | " )\n", 194 | " return chain\n", 195 | "\n", 196 | " def refinement_agent(self, inputs, output_name=\"sc_model\"):\n", 197 | " model_prompt = ChatPromptTemplate.from_template(\n", 198 | " \"# Modify the Python code: '''{\"\n", 199 | " f\"{inputs[0]}\"\n", 200 | " \"}'''.\"\n", 201 | " \"# According to the corresponding evaluation results:'''{\"\n", 202 | " f\"{inputs[1]}\"\n", 203 | " \"}'''.\"\n", 204 | " \"# According to long-term memory:'''{\"\n", 205 | " f\"{inputs[2]}\"\n", 206 | " \"}'''.\"\n", 207 | " \"# Quoting coarse-grained information is similar to the way humans extract important experiences from long-term decisions, considering the performance of the current subtask chain from a global perspective and providing large-scale feedback for improving subtask chain code.\"\n", 208 | " \"# Output the modified Python codes that aim to improve the 'evaluated score' and obtain a score of no less than 90 finally.\"\n", 209 | " )\n", 210 | " chain = LLMChain(llm=self.llm, prompt=model_prompt,\n", 211 | " output_key=output_name,\n", 212 | " )\n", 213 | " return chain\n", 214 | "\n", 215 | " # Combine all agents for SC system generation\n", 216 | " def create_chains(self):\n", 217 | " chains = []\n", 218 | " output_keys = []\n", 219 | "\n", 220 | " # define sc scheme 1\n", 221 | " input_name = \"paper_ref\"\n", 222 | " output_name = \"scheme_1\"\n", 223 | " chain = self.planning_agent(input_name, output_name)\n", 224 | " chains.append(chain)\n", 225 | " output_keys.append(output_name)\n", 226 | " # define sc scheme 2\n", 227 | " input_name = \"paper_ref\"\n", 228 | " output_name = \"scheme_2\"\n", 229 | " chain = self.planning_agent(input_name, output_name)\n", 230 | " chains.append(chain)\n", 231 | " output_keys.append(output_name)\n", 232 | " # define code generation 1\n", 233 | " input_name = \"scheme_1\"\n", 234 | " output_name = \"sc_model_0_1\"\n", 235 | " chain = self.code_generation(input_name, output_name)\n", 236 | " chains.append(chain)\n", 237 | " output_keys.append(chain.output_key)\n", 238 | "\n", 239 | " # define code generation 2\n", 240 | " input_name = \"scheme_2\"\n", 241 | " output_name = \"sc_model_0_2\"\n", 242 | " chain = self.code_generation(input_name, output_name)\n", 243 | " chains.append(chain)\n", 244 | " output_keys.append(chain.output_key)\n", 245 | " for i in range(self.iterations):\n", 246 | " # eval sc model\n", 247 | " input_name = f\"sc_model_{i}_1\"\n", 248 | " output_name = f\"eval_score_{i}_1\"\n", 249 | " chain = self.evaluate_agent(input_name, output_name)\n", 250 | " chains.append(chain)\n", 251 | " output_keys.append(chain.output_key)\n", 252 | "\n", 253 | " input_name = f\"sc_model_{i}_2\"\n", 254 | " output_name = f\"eval_score_{i}_2\"\n", 255 | " chain = self.evaluate_agent(input_name, output_name)\n", 256 | " chains.append(chain)\n", 257 | " output_keys.append(chain.output_key)\n", 258 | "\n", 259 | " # memory storage\n", 260 | " inputs = [f\"sc_model_{i}_1\", f\"eval_score_{i}_1\"]\n", 261 | " output_name = f\"long_term_memory_{i}_1\"\n", 262 | " chain = self.long_memory_stroage(inputs, output_name)\n", 263 | " chains.append(chain)\n", 264 | " output_keys.append(chain.output_key)\n", 265 | "\n", 266 | " inputs = [f\"sc_model_{i}_1\", f\"eval_score_{i}_1\"]\n", 267 | " output_name = f\"short_term_memory_{i}_1\"\n", 268 | " chain = self.short_memory_stroage(inputs, output_name)\n", 269 | " chains.append(chain)\n", 270 | " output_keys.append(chain.output_key)\n", 271 | "\n", 272 | " inputs = [f\"sc_model_{i}_2\", f\"eval_score_{i}_2\"]\n", 273 | " output_name = f\"long_term_memory_{i}_2\"\n", 274 | " chain = self.long_memory_stroage(inputs, output_name)\n", 275 | " chains.append(chain)\n", 276 | " output_keys.append(chain.output_key)\n", 277 | "\n", 278 | " inputs = [f\"sc_model_{i}_2\", f\"eval_score_{i}_2\"]\n", 279 | " output_name = f\"short_term_memory_{i}_2\"\n", 280 | " chain = self.short_memory_stroage(inputs, output_name)\n", 281 | " chains.append(chain)\n", 282 | " output_keys.append(chain.output_key)\n", 283 | "\n", 284 | " # Reflect and modify the SC model according to the evaluated results and memory\n", 285 | " inputs = [f\"sc_model_{i}_1\", f\"eval_score_{i}_1\", f\"short_term_memory_{i}_1\"]\n", 286 | " output_name = f\"sc_modelx_{i}_1\"\n", 287 | " chain = self.reflexion_agent(inputs, output_name)\n", 288 | " chains.append(chain)\n", 289 | " output_keys.append(chain.output_key)\n", 290 | "\n", 291 | " inputs = [f\"sc_model_{i}_2\", f\"eval_score_{i}_2\", f\"short_term_memory_{i}_2\"]\n", 292 | " output_name = f\"sc_modelx_{i}_2\"\n", 293 | " chain = self.reflexion_agent(inputs, output_name)\n", 294 | " chains.append(chain)\n", 295 | " output_keys.append(chain.output_key)\n", 296 | "\n", 297 | " inputs = [f\"sc_modelx_{i}_1\", f\"eval_score_{i}_1\", f\"long_term_memory_{i}_1\"]\n", 298 | " output_name = f\"sc_model_{i + 1}_1\"\n", 299 | " chain = self.refinement_agent(inputs, output_name)\n", 300 | " chains.append(chain)\n", 301 | " output_keys.append(chain.output_key)\n", 302 | "\n", 303 | " inputs = [f\"sc_modelx_{i}_2\", f\"eval_score_{i}_2\", f\"long_term_memory_{i}_2\"]\n", 304 | " output_name = f\"sc_model_{i + 1}_2\"\n", 305 | " chain = self.refinement_agent(inputs, output_name)\n", 306 | " chains.append(chain)\n", 307 | " output_keys.append(chain.output_key)\n", 308 | "\n", 309 | " # eval the final sc model\n", 310 | " input_name = f\"sc_model_{self.iterations}_1\"\n", 311 | " output_name = f\"eval_score_{self.iterations}_1\"\n", 312 | " chain = self.evaluate_agent(input_name, output_name)\n", 313 | " chains.append(chain)\n", 314 | " output_keys.append(chain.output_key)\n", 315 | "\n", 316 | " input_name = f\"sc_model_{self.iterations}_2\"\n", 317 | " output_name = f\"eval_score_{self.iterations}_2\"\n", 318 | " chain = self.evaluate_agent(input_name, output_name)\n", 319 | " chains.append(chain)\n", 320 | " output_keys.append(chain.output_key)\n", 321 | "\n", 322 | " self.overall_chain = SequentialChain(\n", 323 | " chains=chains,\n", 324 | " input_variables=[\"paper_ref\", \"ref_code\"],\n", 325 | " output_variables=output_keys,\n", 326 | " verbose=True,\n", 327 | " )\n", 328 | "\n", 329 | " # Run multi-agent system\n", 330 | " def Run(self, inputs):\n", 331 | " outputs = self.overall_chain(inputs)\n", 332 | " return outputs" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": { 339 | "collapsed": false, 340 | "pycharm": { 341 | "is_executing": true 342 | } 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "# generate sc by LLM enhanced multi-agent system\n", 347 | "args = configs()\n", 348 | "ref_code = open(args.code_path,\"r\",encoding=\"utf-8\").read()\n", 349 | "LL_SC = LLM_enhanced_multi_agent_system_for_SC(args)\n", 350 | "LL_SC.condensate_agent(args.pdf_path)\n", 351 | "paper_query = \"Composition of the semantic communication model\"\n", 352 | "if LL_SC.secure_agent(paper_query):\n", 353 | " SC_Components = LL_SC.inference_agent(paper_query)\n", 354 | " LL_SC.create_chains()\n", 355 | " results = LL_SC.Run({\"paper_ref\":SC_Components,\"ref_code\":ref_code})\n", 356 | "else:\n", 357 | " pass" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": false, 365 | "pycharm": { 366 | "is_executing": true 367 | } 368 | }, 369 | "outputs": [], 370 | "source": [ 371 | "results" 372 | ] 373 | } 374 | ], 375 | "metadata": { 376 | "kernelspec": { 377 | "display_name": "Python 3", 378 | "language": "python", 379 | "name": "python3" 380 | }, 381 | "language_info": { 382 | "codemirror_mode": { 383 | "name": "ipython", 384 | "version": 3 385 | }, 386 | "file_extension": ".py", 387 | "mimetype": "text/x-python", 388 | "name": "python", 389 | "nbconvert_exporter": "python", 390 | "pygments_lexer": "ipython3", 391 | "version": "3.9.19" 392 | } 393 | }, 394 | "nbformat": 4, 395 | "nbformat_minor": 0 396 | } 397 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Large Language Model Enhanced Multi-Agent Systems for 6G Communications 2 | ## Authors 3 | ### Feibo Jiang, Yubo Peng, Li Dong, Kezhi Wang, Kun Yang, Cunhua Pan, Dusit Niyato, and Octavia A. Dobre. 4 | ## Paper 5 | ### https://arxiv.org/abs/2312.07850 6 | ## Code 7 | ### https://github.com/jiangfeibo/CommLLM.git 8 | ## Abstract 9 | The rapid development of the large language model (LLM) presents huge opportunities for 6G communications — for example, network optimization and management — by allowing users to input task requirements to LLMs with natural language. However, directly applying native LLMs in 6G encounters various challenges, such as a lack of communication data and knowledge, and limited logical reasoning, evaluation, and refinement abilities. Integrating LLMs with the capabilities of retrieval, planning, memory, evaluation, and reflection in agents can greatly enhance the potential of LLMs for 6G communications. To this end, we propose CommLLM, a multi-agent system with customized communication knowledge and tools for solving communication-related tasks using natural language. This system consists of three components: multi-agent data retrieval (MDR), which employs the condensate and inference agents to refine and summarize communication knowledge from the knowledge base, expanding the knowledge boundaries of LLMs in 6G communications; multi-agent collaborative planning (MCP), which utilizes multiple planning agents to generate feasible solutions for the communication-related task from different perspectives based on the retrieved knowledge; and multi-agent evaluation and reflection (MER), which utilizes the evaluation agent to assess the solutions, and applies the reflection agent and refinement agent to provide improvement suggestions for current solutions. Finally, we validate the effectiveness of the proposed multiagent system by designing a semantic communication system as a case study of 6G communications. 10 | ![img](SC.png) 11 | 12 | ## The function of each file 13 | - [LLMMAS.ipynb](LLMMAS.ipynb): The implementation of LLM-enhanced multi-agent systems for the generation of semantic communication systems. 14 | 15 | - [Test_generated_SC.py](Test_generated_SC.py): Test the generated SC model based on cosine similarity. 16 | 17 | - [1.pdf](1.pdf): Reference paper. 18 | 19 | - [code.txt](code.txt): Reference code. 20 | 21 | - [movie_lines.txt](movie_lines.txt): Training and test data for the generated SC model. 22 | 23 | ## Citation 24 | ``` 25 | @ARTICLE{10638533, 26 | author={Jiang, Feibo and Peng, Yubo and Dong, Li and Wang, Kezhi and Yang, Kun and Pan, Cunhua and Niyato, Dusit and Dobre, Octavia A.}, 27 | journal={IEEE Wireless Communications}, 28 | title={Large Language Model Enhanced Multi-Agent Systems for 6G Communications}, 29 | year={2024}, 30 | volume={}, 31 | number={}, 32 | pages={1-8}, 33 | doi={10.1109/MWC.016.2300600}} 34 | ``` 35 | 36 | -------------------------------------------------------------------------------- /SC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiangfeibo/CommLLM/9735c130468b348910fb3197d676cfc8abdd1ea2/SC.png -------------------------------------------------------------------------------- /Test_generated_SC.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import numpy as np 5 | import math 6 | import re 7 | # prepare training data 8 | def get_data(): 9 | data = [] 10 | max_words = 0 11 | with open("movie_lines.txt","r",encoding="utf-8")as f: 12 | content = f.read() 13 | req = ".*?\+\+\+\$\+\+\+.*?\+\+\+\$\+\+\+.*?\+\+\+\$\+\+\+.*?\+\+\+\$\+\+\+ " 14 | lines = re.split(req,content) 15 | print(len(lines)) 16 | for i,line in enumerate(lines): 17 | line = line.replace("\n","").strip() 18 | req = "\.|\?" 19 | sentence = re.split(req,line) 20 | sentence = set(sentence) 21 | if '' in sentence: 22 | sentence.remove('') 23 | if ' ' in sentence: 24 | sentence.remove(' ') 25 | data+=list(sentence) 26 | new_data = [] 27 | for sen in data: 28 | if len(sen.split(" ")) > 20 or len(sen.split(" ")) < 2: 29 | continue 30 | else: 31 | new_data.append(sen) 32 | max_words = max(len(sen.split(" ")),max_words) 33 | return new_data[:10000] 34 | 35 | training_data = get_data() 36 | 37 | # Tokenize sentences into words 38 | words = [sentence.split() for sentence in training_data] 39 | 40 | # Create a vocabulary 41 | vocab = list(set(word for sentence in words for word in sentence)) 42 | vocab_size = 8266 43 | print(vocab_size) 44 | 45 | # Convert words to unique indices 46 | word_to_idx = {word: idx for idx, word in enumerate(vocab)} 47 | idx_to_word = {idx: word for word, idx in word_to_idx.items()} 48 | 49 | # Convert sentences to numerical representation 50 | numerical_sentences = [[word_to_idx[word] for word in sentence] for sentence in words] 51 | # Spilt training and test data 52 | data_size = len(numerical_sentences) 53 | train_data = numerical_sentences[:int(data_size*0.8)] 54 | test_data = numerical_sentences[int(data_size*0.8):] 55 | 56 | ## Copy the generated code of the SC model here 57 | # Semantic Encoder 58 | class SemanticEncoder(nn.Module): 59 | def __init__(self, vocab_size, embedding_size, hidden_size): 60 | super(SemanticEncoder, self).__init__() 61 | self.embedding = nn.Embedding(vocab_size, embedding_size) 62 | self.lstm = nn.LSTM(embedding_size, hidden_size) 63 | 64 | def forward(self, input_seq): 65 | embedded = self.embedding(input_seq) 66 | print(embedded.shape) 67 | output, (hidden, cell) = self.lstm(embedded) 68 | return output 69 | 70 | # Channel Encoder and Decoder (Simple Identity Mapping) 71 | class ChannelEncoder(nn.Module): 72 | def __init__(self, hidden_size): 73 | super(ChannelEncoder, self).__init__() 74 | self.identity = nn.Identity() 75 | 76 | def forward(self, input_features): 77 | return self.identity(input_features) 78 | 79 | class ChannelDecoder(nn.Module): 80 | def __init__(self, hidden_size): 81 | super(ChannelDecoder, self).__init__() 82 | self.identity = nn.Identity() 83 | 84 | def forward(self, received_features): 85 | return self.identity(received_features) 86 | 87 | # Define the physical channel, which is a Gaussian white noise channel with a given SNR 88 | class PhysicalChannel(nn.Module): 89 | def __init__(self, snr): 90 | super(PhysicalChannel, self).__init__() 91 | self.snr = snr 92 | 93 | def forward(self, x): 94 | x = x.cpu() 95 | # x: (batch_size, output_size) 96 | noise_power = 10 ** (-self.snr / 10) # Calculate the noise power from the SNR 97 | noise = math.sqrt(noise_power) * torch.randn_like(x) # Generate Gaussian white noise with the same shape as x 98 | y = x + noise # Add noise to the signal 99 | y = y.to(device) 100 | return y 101 | 102 | # Semantic Decoder 103 | class SemanticDecoder(nn.Module): 104 | def __init__(self, hidden_size, vocab_size): 105 | super(SemanticDecoder, self).__init__() 106 | self.lstm = nn.LSTM(hidden_size, hidden_size) 107 | self.linear = nn.Linear(hidden_size, vocab_size) 108 | 109 | def forward(self, hidden): 110 | output, _ = self.lstm(hidden) 111 | output = self.linear(output) 112 | return output 113 | 114 | class SC_model(nn.Module): 115 | def __init__(self, vocab_size, embedding_size, hidden_size): 116 | super(SC_model, self).__init__() 117 | self.semantic_encoder = SemanticEncoder(vocab_size, embedding_size, hidden_size) 118 | self.channel_encoder = ChannelEncoder(hidden_size) 119 | self.channel_decoder = ChannelDecoder(hidden_size) 120 | self.semantic_decoder = SemanticDecoder(hidden_size, vocab_size) 121 | # self.physical_channel = PhysicalChannel(snr) 122 | 123 | def forward(self,x): 124 | x = self.semantic_encoder(x) 125 | x = self.channel_encoder(x) 126 | # x = self.physical_channel(x) 127 | x = self.channel_decoder(x) 128 | x = self.semantic_decoder(x) 129 | return x 130 | 131 | # training semantic communication model 132 | def train(): 133 | # Define loss and optimizer 134 | criterion = nn.CrossEntropyLoss() 135 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 136 | 137 | # Train the model 138 | num_epochs = 50 139 | for epoch in range(num_epochs): 140 | total_loss = 0.0 141 | model.train() 142 | for sentence in train_data: 143 | if sentence == []: 144 | continue 145 | optimizer.zero_grad() 146 | input_seq = torch.tensor(sentence).to(device).long() # Input: all words except the last 147 | target_seq = torch.tensor(sentence).to(device).long() # Target: all words except the first 148 | 149 | output = model(input_seq) 150 | print(output.shape, target_seq.shape) 151 | 152 | loss = criterion(output, target_seq) 153 | loss.backward() 154 | optimizer.step() 155 | 156 | total_loss += loss.item() 157 | 158 | print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss:.4f}') 159 | torch.save(model.state_dict(), f"{snr}_weight.pth") 160 | eval() 161 | 162 | # evaluate semantic communication model 163 | def eval(): 164 | # Test the semantic communication model 165 | scores = [] 166 | model.eval() 167 | model.load_state_dict(torch.load(f"{snr}_weight.pth", map_location="cpu")) 168 | for i, sentence in enumerate(test_data): 169 | try: 170 | if sentence == []: 171 | continue 172 | test_input = torch.tensor(sentence).to(device) 173 | with torch.no_grad(): 174 | output = model(test_input) 175 | predicted_indices = torch.argmax(output, dim=1).cpu().numpy() 176 | predicted_sentence = ' '.join([idx_to_word[idx] for idx in predicted_indices]) 177 | src_txt = training_data[i + int(data_size * 0.8)] 178 | tar_txt = predicted_sentence 179 | # print("Original Sentence:", src_txt) 180 | # print("Predicted Sentence:", tar_txt) 181 | 182 | # Tokenize and process each sentence individually 183 | encoded_sentence1 = tokenizer.encode_plus(src_txt, add_special_tokens=True, max_length=64, 184 | truncation=True, return_tensors='pt', padding='max_length') 185 | encoded_sentence2 = tokenizer.encode_plus(tar_txt, add_special_tokens=True, max_length=64, 186 | truncation=True, return_tensors='pt', padding='max_length') 187 | 188 | # Obtain the BERT embeddings for each sentence 189 | 190 | model_output1 = bert(encoded_sentence1['input_ids'], encoded_sentence1['attention_mask']) 191 | embeddings1 = model_output1.last_hidden_state[:, 0, :] 192 | 193 | model_output2 = bert(encoded_sentence2['input_ids'], encoded_sentence2['attention_mask']) 194 | embeddings2 = model_output2.last_hidden_state[:, 0, :] 195 | 196 | # Calculate the similarity using cosine similarity 197 | similarity = cosine_similarity(embeddings1, embeddings2)[0][0] 198 | print(f"Cosine similarity score: {similarity}") 199 | scores.append(similarity) 200 | except Exception as e: 201 | print(e) 202 | pass 203 | 204 | print("SNR:", snr, "sim score:", np.mean(scores)) 205 | 206 | if __name__ == '__main__': 207 | # Instantiate the model components 208 | from torchsummary import summary 209 | from transformers import BertTokenizer, BertModel 210 | from sklearn.metrics.pairwise import cosine_similarity 211 | 212 | # Load the BERT model and tokenizer 213 | tokenizer = BertTokenizer.from_pretrained('Geotrend/bert-base-en-bg-cased') 214 | bert = BertModel.from_pretrained('Geotrend/bert-base-en-bg-cased') 215 | embedding_size = 64 216 | hidden_size = 128 217 | device = "cuda" if torch.cuda.is_available() else "cpu" 218 | model = SC_model(vocab_size, embedding_size, hidden_size).to(device) 219 | for snr in reversed([15,10,5,0,-5]): 220 | train() 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /code.txt: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import numpy as np 5 | import math 6 | # Generate random sentences as data A 7 | data_A = [ 8 | "Hello there!", 9 | "How are you doing?", 10 | "PyTorch is great.", 11 | "Machine learning is fascinating.", 12 | "Let's build something amazing.", 13 | "Semantic communication matters.", 14 | "Understanding context is important.", 15 | "AI is changing the world.", 16 | "Keep learning and growing.", 17 | "Innovation drives progress." 18 | ] 19 | 20 | # Tokenize sentences into words 21 | words = [sentence.split() for sentence in data_A] 22 | 23 | # Create a vocabulary 24 | vocab = list(set(word for sentence in words for word in sentence)) 25 | vocab_size = len(vocab) 26 | 27 | # Convert words to unique indices 28 | word_to_idx = {word: idx for idx, word in enumerate(vocab)} 29 | idx_to_word = {idx: word for word, idx in word_to_idx.items()} 30 | 31 | # Convert sentences to numerical representation 32 | numerical_sentences = [[word_to_idx[word] for word in sentence] for sentence in words] 33 | 34 | 35 | # Semantic Encoder 36 | class SemanticEncoder(nn.Module): 37 | def __init__(self, vocab_size, embedding_size, hidden_size): 38 | super(SemanticEncoder, self).__init__() 39 | self.embedding = nn.Embedding(vocab_size, embedding_size) 40 | self.lstm = nn.LSTM(embedding_size, hidden_size) 41 | 42 | def forward(self, input_seq): 43 | embedded = self.embedding(input_seq) 44 | output, (hidden, cell) = self.lstm(embedded) 45 | return output 46 | 47 | 48 | # Channel Encoder and Decoder (Simple Identity Mapping) 49 | class ChannelEncoder(nn.Module): 50 | def __init__(self, hidden_size): 51 | super(ChannelEncoder, self).__init__() 52 | self.identity = nn.Identity() 53 | 54 | def forward(self, input_features): 55 | return self.identity(input_features) 56 | 57 | 58 | class ChannelDecoder(nn.Module): 59 | def __init__(self, hidden_size): 60 | super(ChannelDecoder, self).__init__() 61 | self.identity = nn.Identity() 62 | 63 | def forward(self, received_features): 64 | return self.identity(received_features) 65 | 66 | # Define the physical channel, which is a Gaussian white noise channel with a given SNR 67 | class PhysicalChannel(nn.Module): 68 | def __init__(self, snr): 69 | super(PhysicalChannel, self).__init__() 70 | self.snr = snr 71 | 72 | def forward(self, x): 73 | # x: (batch_size, output_size) 74 | noise_power = 10 ** (-self.snr / 10) # Calculate the noise power from the SNR 75 | noise = math.sqrt(noise_power) * torch.randn_like(x) # Generate Gaussian white noise with the same shape as x 76 | y = x + noise # Add noise to the signal 77 | return y 78 | 79 | # Semantic Decoder 80 | class SemanticDecoder(nn.Module): 81 | def __init__(self, hidden_size, vocab_size): 82 | super(SemanticDecoder, self).__init__() 83 | self.lstm = nn.LSTM(hidden_size, hidden_size) 84 | self.linear = nn.Linear(hidden_size, vocab_size) 85 | 86 | def forward(self, hidden): 87 | output, _ = self.lstm(hidden) 88 | output = self.linear(output) 89 | return output 90 | 91 | 92 | # Instantiate the model components 93 | embedding_size = 64 94 | hidden_size = 128 95 | snr = -10 96 | 97 | semantic_encoder = SemanticEncoder(vocab_size, embedding_size, hidden_size) 98 | channel_encoder = ChannelEncoder(hidden_size) 99 | channel_decoder = ChannelDecoder(hidden_size) 100 | semantic_decoder = SemanticDecoder(hidden_size, vocab_size) 101 | physical_channel = PhysicalChannel(snr) 102 | 103 | # Define loss and optimizer 104 | criterion = nn.CrossEntropyLoss() 105 | optimizer = optim.Adam(list(semantic_encoder.parameters()) + 106 | list(channel_encoder.parameters()) + 107 | list(channel_decoder.parameters()) + 108 | list(semantic_decoder.parameters()), lr=0.001) 109 | 110 | # Train the model 111 | num_epochs = 100 112 | for epoch in range(num_epochs): 113 | total_loss = 0.0 114 | for sentence in numerical_sentences: 115 | optimizer.zero_grad() 116 | input_seq = torch.tensor(sentence) # Input: all words except the last 117 | target_seq = torch.tensor(sentence) # Target: all words except the first 118 | semantic_feature = semantic_encoder(input_seq) 119 | encoded_features = channel_encoder(semantic_feature) 120 | # Simulate the channel (add noise) 121 | received_features = physical_channel(encoded_features) #+ torch.randn_like(encoded_features) * 0.1 122 | 123 | decoded_features = channel_decoder(received_features) 124 | output = semantic_decoder(decoded_features) 125 | 126 | loss = criterion(output, target_seq) 127 | loss.backward() 128 | optimizer.step() 129 | 130 | total_loss += loss.item() 131 | 132 | if (epoch + 1) % 10 == 0: 133 | print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss:.4f}') 134 | 135 | # Test the semantic communication model 136 | for i,sentence in enumerate(numerical_sentences): 137 | test_input = torch.tensor(sentence) 138 | with torch.no_grad(): 139 | semantic_feature = semantic_encoder(test_input) 140 | encoded_features = channel_encoder(semantic_feature) 141 | # Simulate the channel (add noise) 142 | received_features = encoded_features + torch.randn_like(encoded_features) * 0.1 143 | decoded_features = channel_decoder(received_features) 144 | output = semantic_decoder(decoded_features) 145 | predicted_indices = torch.argmax(output, dim=1).numpy() 146 | predicted_sentence = ' '.join([idx_to_word[idx] for idx in predicted_indices]) 147 | 148 | print("Original Sentence:", data_A[i]) 149 | print("Predicted Sentence:", predicted_sentence) 150 | --------------------------------------------------------------------------------