├── Evaluation
    ├── postprocess_mbpp.py
    ├── test_ds1000.py
    ├── test_humaneval.py
    └── test_mbpp.py
├── LICENSE
├── README.md
├── Web_demo
    ├── assets
    │   ├── assistant.pic.jpg
    │   └── user.pic.jpg
    ├── chatbot.py
    ├── code_interpreter
    │   ├── AutoCoderInterpreter.py
    │   ├── BaseCodeInterpreter.py
    │   └── JupyterClient.py
    ├── sandbox
    │   ├── cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── Dockerfile.cpp
    │   │   ├── compile_run.sh
    │   │   ├── script.cpp
    │   │   └── test
    │   ├── fortran
    │   │   ├── Dockerfile.fortran
    │   │   ├── compile_run.sh
    │   │   ├── script.f90
    │   │   └── test
    │   ├── java
    │   │   ├── Dockerfile.java
    │   │   ├── compile_run.sh
    │   │   ├── pom.xml
    │   │   ├── src
    │   │   │   └── main
    │   │   │   │   └── java
    │   │   │   │       └── script.java
    │   │   └── target
    │   │   │   └── maven-status
    │   │   │       └── maven-compiler-plugin
    │   │   │           └── compile
    │   │   │               └── default-compile
    │   │   │                   ├── createdFiles.lst
    │   │   │                   └── inputFiles.lst
    │   ├── python_0
    │   │   ├── Dockerfile.python
    │   │   └── script.py
    │   └── rust
    │   │   ├── Cargo.lock
    │   │   ├── Cargo.toml
    │   │   ├── Dockerfile.rust
    │   │   ├── compile_run.sh
    │   │   ├── main.rs
    │   │   └── target
    │   │       ├── .rustc_info.json
    │   │       ├── CACHEDIR.TAG
    │   │       └── release
    │   │           ├── .cargo-lock
    │   │           ├── .fingerprint
    │   │               └── myapp-0a8bfcae0d226a1c
    │   │               │   ├── bin-test
    │   │               │   ├── bin-test.json
    │   │               │   ├── dep-bin-test
    │   │               │   └── invoked.timestamp
    │   │           ├── deps
    │   │               ├── test-0a8bfcae0d226a1c
    │   │               └── test-0a8bfcae0d226a1c.d
    │   │           ├── test
    │   │           └── test.d
    └── utils
    │   ├── cleaner.py
    │   └── const.py
└── requirements.txt


/Evaluation/postprocess_mbpp.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | 
 4 | def read_jsonl(file_path):
 5 |     with open(file_path, 'r', encoding='utf-8') as file:
 6 |         lines = [json.loads(line.strip()) for line in file]
 7 |     return lines
 8 | 
 9 | def write_jsonl(data, file_path):
10 |     with open(file_path, 'w', encoding='utf-8') as file:
11 |         for entry in data:
12 |             file.write(json.dumps(entry) + '\n')
13 | 
14 | def extract_code_blocks(solution):
15 |     code_blocks = re.findall(r'```python(.*?)```', solution, re.DOTALL)
16 |     return code_blocks
17 | 
18 | # 主函数
19 | def process_files(file_path1, output_file):
20 |     data1 = read_jsonl(file_path1)
21 |     
22 |     updated_data = []
23 |     
24 |     for entry in data1:
25 |         new_entry = entry
26 |         code_blocks = extract_code_blocks(entry['solution'])
27 |         if len(code_blocks) > 0:
28 |             cleaned_code = code_blocks[0].strip().strip()
29 |             new_entry['solution'] = cleaned_code 
30 |             updated_data.append(new_entry)
31 |         else:
32 |             new_entry['solution'] = "" 
33 |             updated_data.append(new_entry)
34 |     write_jsonl(updated_data, output_file)
35 | 
36 | file_path1 = '/data2/bil22003/AutoCoder_git/AutoCoder_6.7B_Mbpp+.jsonl'
37 | output_file = '/data2/bil22003/AutoCoder_git/AutoCoder_6.7B_Mbpp+-sanitized.jsonl'
38 | 
39 | process_files(file_path1, output_file)
40 | 


--------------------------------------------------------------------------------
/Evaluation/test_ds1000.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from datasets import load_dataset
 4 | from transformers import AutoTokenizer, AutoModelForCausalLM
 5 | 
 6 | model_path = "Bin12345/AutoCoder"
 7 | tokenizer = AutoTokenizer.from_pretrained(model_path)
 8 | model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
 9 | model_identifier = os.path.basename(model_path)
10 | 
11 | ds_1000 = load_dataset("xlangai/DS-1000")
12 | 
13 | answers = []
14 | 
15 | for (index, data) in enumerate(ds_1000["test"]):
16 |     print(f"Working on {index}\n")
17 |     question = data['prompt'].strip()
18 |     metadata = data['metadata']
19 |     data_id = index
20 |     if "SOLUTION START" in question:
21 |         question = question.strip()
22 |     else:
23 |         question = question.strip()
24 |     print(f"Input question:\n{question}\n")
25 |     content = f"""Please Help me to finish the following code completion and Place the executable code between <code> 
26 |                 and </code> tags, without any other non-executable things.
27 |             {question}
28 |             """
29 |     messages=[
30 |         { 'role': 'user', 'content': content}
31 |     ]
32 |     inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
33 |     outputs = model.generate(inputs, 
34 |                              max_new_tokens=1024, 
35 |                              do_sample=False, 
36 |                              temperature=0.0, 
37 |                              top_p=1.0, 
38 |                              num_return_sequences=1, 
39 |                              eos_token_id=tokenizer.eos_token_id)
40 | 
41 |     answer = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
42 |     print(f"Answer:\n{answer}\n")
43 |     json_data = {"id": data_id,
44 |                  "code": answer,
45 |                  "metadata": metadata}
46 |     
47 |     with open(f'{model_identifier}_DS_1000.jsonl', 'a') as f:
48 |         json.dump(json_data, f)
49 |         f.write('\n')
50 | 
51 | print(f"All data has been saved to /{model_identifier}_DS_1000.jsonl")


--------------------------------------------------------------------------------
/Evaluation/test_humaneval.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datasets import load_dataset
 3 | from transformers import AutoTokenizer, AutoModelForCausalLM
 4 | 
 5 | model_path = "Bin12345/AutoCoder"
 6 | tokenizer = AutoTokenizer.from_pretrained(model_path)
 7 | model = AutoModelForCausalLM.from_pretrained(model_path, 
 8 |                                              device_map="auto")
 9 | 
10 | HumanEval = load_dataset("evalplus/humanevalplus")
11 | 
12 | answers = []
13 | 
14 | for (index, data) in enumerate(HumanEval["test"]):
15 |     print(f"Working on {index}\n")
16 |     print(f"Original question:\n{data['prompt']}\n")
17 |     question = data['prompt'].strip()
18 |     content = f"""Write a solution to the following problem:
19 |         ```python
20 |         {question}
21 |         ```"""
22 |     messages=[
23 |         { 'role': 'user', 'content': content}
24 |     ]
25 |     inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, 
26 |                                            return_tensors="pt").to(model.device)
27 | 
28 |     outputs = model.generate(inputs, 
29 |                             max_new_tokens=1024, 
30 |                             do_sample=False, 
31 |                             temperature=0.0,
32 |                             top_p=1.0, 
33 |                             num_return_sequences=1, 
34 |                             eos_token_id=tokenizer.eos_token_id)
35 | 
36 |     answer = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
37 |     print(f"Answer:\n{answer}\n")
38 |     json_data = {"task_id": f"HumanEval/{index}",
39 |         "solution":f"{answer}" }
40 | 
41 |     # Save results to a JSON file
42 |     with open('AutoCoder_HumanEval+.jsonl', 'a') as f:
43 |         json.dump(json_data, f) 
44 |         f.write('\n')
45 | 
46 | print("All data has been saved to AutoCoder_HumanEval+.jsonl")
47 | 
48 | 


--------------------------------------------------------------------------------
/Evaluation/test_mbpp.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datasets import load_dataset
 3 | from transformers import AutoTokenizer, AutoModelForCausalLM
 4 | 
 5 | model_path = "Bin12345/AutoCoder"
 6 | tokenizer = AutoTokenizer.from_pretrained(model_path)
 7 | model = AutoModelForCausalLM.from_pretrained(model_path, 
 8 |                                              device_map="auto")
 9 | 
10 | mbpp = load_dataset("evalplus/mbppplus")
11 | 
12 | answers = []
13 | 
14 | for (index, data) in enumerate(mbpp["test"]):
15 |     print(f"Working on {index}\n")
16 |     print(f"Original question:\n{data['prompt']}\n")
17 |     question = data['prompt'].strip()
18 |     data_id = data['task_id']
19 |     assertion = data['test_list']
20 |     content = f"""{question}
21 |                 Your code should satisfy the following assertion:
22 |                 ```python
23 |                 {assertion}
24 |                 ```
25 |                 """
26 |     messages=[
27 |         { 'role': 'user', 'content': content}
28 |     ]
29 |     inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
30 |     outputs = model.generate(inputs, 
31 |                             max_new_tokens=1024, 
32 |                             do_sample=False, 
33 |                             temperature=0.0,
34 |                             top_p=1.0, 
35 |                             num_return_sequences=1, 
36 |                             eos_token_id=tokenizer.eos_token_id)
37 | 
38 |     answer = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
39 |     print(f"Answer:\n{answer}\n")
40 |     json_data = {"task_id": f"Mbpp/{data_id}",
41 |         "solution":f"{answer}" }
42 | 
43 |     # Save results to a JSON file
44 |     with open('AutoCoder_Mbpp+.jsonl', 'a') as f:
45 |         json.dump(json_data, f)  
46 |         f.write('\n')
47 | 
48 | print("All data has been saved to AutoCoder_Mbpp+.jsonl")
49 | 
50 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [2024] [AutoCoder]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AutoCoder
  2 | 
  3 | ## News :fire: 
  4 | 
  5 | A new model [AutoCoder_QW_7B](https://huggingface.co/Bin12345/AutoCoder_QW_7B) is uploaded. In this model, We fixed the previous problem that the model will only start the code interpreter when you ask it to *verify* its code. 
  6 | 
  7 | The base model of AutoCode_QW_7B is [CodeQwen1.5-7b](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat).
  8 | 
  9 | ## Introduction :mega:
 10 | We introduced a new model designed for the Code generation task. Its test accuracy on the HumanEval base dataset surpasses that of GPT-4 Turbo (April 2024). (**90.9% vs 90.2%**).
 11 | 
 12 | Additionally, compared to previous open-source models, AutoCoder offers a new feature: it can **automatically install the required packages** and attempt to run the code until it deems there are no issues, **whenever the user wishes to execute the code**.
 13 | 
 14 | * Difference between the code interpreter of AutoCoder and the GPT-4 Turbo:
 15 | 
 16 | Below are the video demos for the code interpreter comparison between GPT-4 Turbo and AutoCoder: 
 17 | 
 18 | GPT-4o can not access the external library.
 19 | 
 20 | [GPT-4o](https://github.com/bin123apple/AutoCoder/assets/99925255/be47b449-4e8a-4b77-981b-ec79b15970cc)
 21 | 
 22 | AutoCoder can automatically install the required packages. This feature expands the scope of code interpreter's application.
 23 | 
 24 | [AutoCoder](https://github.com/bin123apple/AutoCoder/assets/99925255/1893f904-c1f2-4f59-9ec5-45b69efcc26a)
 25 | 
 26 | * Difference between the code interpreter of AutoCoder and the current open-source code interpreter [OpenCodeInterpreter](https://opencodeinterpreter.github.io/):
 27 | 
 28 | The code interpreter of AutoCoder, like GPT-4 Turbo, is only called when the user has a need to verify the code, while OpenCodeInterpreter runs all generated python code.
 29 | 
 30 | ## Model :gift:
 31 | The Model is avaliable on Huggingface:
 32 |  
 33 | [AutoCoder (33B)](https://huggingface.co/Bin12345/AutoCoder)
 34 | [AutoCoder-S (6.7B)](https://huggingface.co/Bin12345/AutoCoder_S_6.7B)
 35 | 
 36 | The base models of AutoCoder (33B) and AutoCoder-S (6.7B) are deepseeker-coder.
 37 | 
 38 | [AutoCoder_QW_7B](https://huggingface.co/Bin12345/AutoCoder_QW_7B)
 39 | 
 40 | The base model of AutoCoder_QW_7B is CodeQwen1.5-7b.
 41 | 
 42 | ## Quick Start :rocket:
 43 | 1. Create the conda env
 44 | 
 45 | ```
 46 | conda create -n AutoCoder python=3.11
 47 | conda activate AutoCoder
 48 | pip install -r requirements.txt
 49 | ```
 50 | 
 51 | 2. Test on HumanEval **90.9% on base, 78.0% on base + extra**. (Skip to Step 5, if you don't want to test its performance on benchmarks)
 52 | 
 53 | ```
 54 | cd Evaluation
 55 | python test_humaneval.py
 56 | ```
 57 | You will receive a file named AutoCoder_HumanEval+.jsonl, which follows the EvalPlus format, after this step.
 58 | 
 59 | Then follow the testing framework of the [EvalPlus GitHub](https://github.com/evalplus/evalplus). You will see the results. 
 60 | 
 61 | **NOTE**: 
 62 | * Don't forget to use evalplus's `evalplus.sanitize` to post-process the code. 
 63 | * If you don't use the greedy method (for example set the `do_sample=True`) for the code generation. You will probably see the different results.
 64 | 
 65 | 3. Test on MBPP **82.5% on base, 70.6% on base + extra**. (Skip to Step 5, if you don't want to test its performance on benchmarks)
 66 | 
 67 | ```
 68 | python test_humaneval.py
 69 | ```
 70 | 
 71 | Post-process to delete the nature language for testing
 72 | ```
 73 | python postprocess_mbpp.py
 74 | ```
 75 | Your will get a AutoCoder_Mbpp+-sanitized.jsonl file after this step, it extracted all the code blocks. 
 76 | Then, directly test it by using [EvalPlus GitHub](https://github.com/evalplus/evalplus) (You don't need to use to use evalplus's `evalplus.sanitize` to post-process the code this time).
 77 | 
 78 | 4. Test on DS-1000. (Skip to Step 5, if you don't want to test its performance on benchmarks)
 79 | 
 80 | ```
 81 | python test_ds1000.py
 82 | ```
 83 | 
 84 | Your will get a jsonl file after this step, it extracted all the code blocks. 
 85 | Then, directly test it by using [DS-1000 GitHub](https://github.com/xlang-ai/DS-1000).
 86 | 
 87 | 5. Web demo (Include code interpreter)
 88 | 
 89 | Install gradio and Run:
 90 | 
 91 | ```
 92 | pip install gradio==3.48.0
 93 | cd /Web_demo
 94 | python chatbot.py
 95 | ```
 96 | 
 97 | ## **NOTE** :warning:
 98 | * We suggest to set `do_sample = True` (default setting here) while using the code interpreter.
 99 | 
100 | * It would be preferable to use Linux for deploying everything.
101 | 
102 | ## Contact :email:
103 | If you have any inquiries, please feel free to raise an issue or reach out to leib2765@gmail.com.
104 | 
105 | ## Citation :book:
106 | ```
107 | @misc{lei2024autocoder,
108 |       title={AutoCoder: Enhancing Code Large Language Model with \textsc{AIEV-Instruct}}, 
109 |       author={Bin Lei and Yuchen Li and Qiuwu Chen},
110 |       year={2024},
111 |       eprint={2405.14906},
112 |       archivePrefix={arXiv},
113 |       primaryClass={cs.SE}
114 | }
115 | ```
116 | 
117 | ## Acknowledgments :pray:
118 | Thanks to Tianyu Zheng, the first author of the [OpenCodeInterpreter](https://opencodeinterpreter.github.io/), for guidance on some technical details.
119 | 
120 | 


--------------------------------------------------------------------------------
/Web_demo/assets/assistant.pic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/assets/assistant.pic.jpg


--------------------------------------------------------------------------------
/Web_demo/assets/user.pic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/assets/user.pic.jpg


--------------------------------------------------------------------------------
/Web_demo/chatbot.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import gradio as gr
  3 | import os
  4 | import re
  5 | import json
  6 | import logging
  7 | import subprocess
  8 | 
  9 | import torch
 10 | import atexit
 11 | from datetime import datetime
 12 | 
 13 | from threading import Thread
 14 | from typing import Optional
 15 | from transformers import TextIteratorStreamer
 16 | from functools import partial
 17 | from huggingface_hub import CommitScheduler
 18 | from uuid import uuid4
 19 | from pathlib import Path
 20 | 
 21 | from code_interpreter.JupyterClient import JupyterNotebook
 22 | 
 23 | MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 24 | 
 25 | import warnings
 26 | 
 27 | warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
 28 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 29 | 
 30 | '''
 31 | This demo code is modified from https://github.com/OpenCodeInterpreter/OpenCodeInterpreter/tree/main/demo. 
 32 | '''
 33 | 
 34 | from code_interpreter.AutoCoderInterpreter import AutoCoderInterpreter
 35 | 
 36 | JSON_DATASET_DIR = Path("json_dataset")
 37 | JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
 38 | 
 39 | scheduler = CommitScheduler(
 40 |     repo_id="AutoCoder_user_data",
 41 |     repo_type="dataset",
 42 |     folder_path=JSON_DATASET_DIR,
 43 |     path_in_repo="data",
 44 |     private=True
 45 | )
 46 | 
 47 | logging.basicConfig(level=logging.INFO)
 48 | 
 49 | class StreamingAutoCodeInterpreter(AutoCoderInterpreter):
 50 |     streamer: Optional[TextIteratorStreamer] = None
 51 | 
 52 |     # overwirte generate function
 53 |     @torch.inference_mode()
 54 |     def generate(
 55 |         self,
 56 |         inputs,
 57 |         max_new_tokens = 1024,
 58 |         do_sample: bool = True,
 59 |         top_p: float = 0.95,
 60 |         top_k: int = 50,
 61 |     ) -> str:
 62 | 
 63 |         self.streamer = TextIteratorStreamer(
 64 |             self.tokenizer, skip_prompt=True, Timeout=5
 65 |         )
 66 |         logging.info(f"inputs:\n{inputs}")
 67 |         inputs = inputs.to(self.model.device)
 68 | 
 69 |         kwargs = dict(
 70 |             input_ids = inputs,
 71 |             streamer=self.streamer,
 72 |             max_new_tokens=max_new_tokens,
 73 |             do_sample=do_sample,
 74 |             top_k = top_k,
 75 |             top_p = top_p,
 76 |             eos_token_id=self.tokenizer.eos_token_id,
 77 |             pad_token_id=self.tokenizer.eos_token_id,
 78 |         )
 79 |         logging.info(f"kwargs:\n{kwargs}")
 80 |         
 81 |         thread = Thread(target=self.model.generate, kwargs=kwargs)
 82 |         thread.start()
 83 | 
 84 |         return ""
 85 | 
 86 | def save_json(dialog, mode, json_file_path, dialog_id) -> None:
 87 |     with scheduler.lock:
 88 |         with json_file_path.open("a") as f:
 89 |             json.dump({"id": dialog_id, "dialog": dialog, "mode": mode, "datetime": datetime.now().isoformat()}, f, ensure_ascii=False)
 90 |             f.write("\n")
 91 | 
 92 | def convert_history(gradio_history: list[list], interpreter_history: list[dict]):
 93 |     interpreter_history = [interpreter_history[0]] if interpreter_history and interpreter_history[0]["role"] == "system" else []
 94 |     if not gradio_history:
 95 |         return interpreter_history
 96 |     for item in gradio_history:
 97 |         if item[0] is not None:
 98 |             interpreter_history.append({"role": "user", "content": item[0]})
 99 |         if item[1] is not None:
100 |             interpreter_history.append({"role": "assistant", "content": item[1]})
101 |     return interpreter_history
102 | 
103 | def update_uuid(dialog_info):
104 |     new_uuid = str(uuid4())
105 |     logging.info(f"allocating new uuid {new_uuid} for conversation...")
106 |     return [new_uuid, dialog_info[1]]
107 | 
108 | def is_valid_python_code(code):
109 |     try:
110 |         ast.parse(code)
111 |         return True
112 |     except SyntaxError:
113 |         return False
114 | 
115 | 
116 | class InputFunctionVisitor(ast.NodeVisitor):
117 |     def __init__(self):
118 |         self.found_input = False
119 | 
120 |     def visit_Call(self, node):
121 |         if isinstance(node.func, ast.Name) and node.func.id == 'input':
122 |             self.found_input = True
123 |         self.generic_visit(node)
124 | 
125 | def has_input_function_calls(code):
126 |     try:
127 |         tree = ast.parse(code)
128 |     except SyntaxError:
129 |         return False
130 |     visitor = InputFunctionVisitor()
131 |     visitor.visit(tree)
132 |     return visitor.found_input
133 | 
134 | def gradio_launch(model_path: str, MAX_TRY: int = 3):
135 |     with gr.Blocks() as demo:
136 |         chatbot = gr.Chatbot(height=600, label="AutoCoder", avatar_images=["assets/user.pic.jpg", "assets/assistant.pic.jpg"], show_copy_button=True)
137 |         with gr.Group():
138 |             with gr.Row():
139 |                 msg = gr.Textbox(
140 |                     container=False,
141 |                     show_label=False,
142 |                     label="Message",
143 |                     placeholder="Type a message...",
144 |                     scale=7,
145 |                     autofocus=True
146 |                 )
147 |                 sub = gr.Button(
148 |                     "Submit",
149 |                     variant="primary",
150 |                     scale=1,
151 |                     min_width=150
152 |                 )
153 |                 # stop = gr.Button(
154 |                 #     "Stop",
155 |                 #     variant="stop",
156 |                 #     visible=False,
157 |                 #     scale=1,
158 |                 #     min_width=150
159 |                 # )
160 | 
161 |         with gr.Row():
162 |             # retry = gr.Button("🔄  Retry", variant="secondary")
163 |             # undo = gr.Button("↩️ Undo", variant="secondary")
164 |             clear = gr.Button("🗑️  Clear", variant="secondary")
165 | 
166 |         session_state = gr.State([])
167 |         dialog_info = gr.State(["", 0])
168 |         demo.load(update_uuid, dialog_info, dialog_info)
169 | 
170 |         def bot(user_message, history, dialog_info, 
171 |                 interpreter, image_name, container_name, 
172 |                 dockerfile_name, sandbox_path, volume_mount):
173 |             
174 |             ## Initialize everything
175 |             
176 |             # log user input message
177 |             logging.info(f"user message:\n {user_message}")
178 |             
179 |             # interpreter.dialog is inialize as []
180 |             interpreter.dialog = convert_history(gradio_history=history, interpreter_history=interpreter.dialog)
181 |             
182 |             # Add user message to the history [user_msg, assistant_msg]
183 |             history.append([user_message, None])
184 |             
185 |             # Add user message to the dialogue
186 |             interpreter.dialog.append({"role": "user", "content": user_message})
187 | 
188 |             # Initialize the HAS_CODE = False
189 |             HAS_CODE = False  
190 |             
191 |             ## Generate the assistant response
192 |             
193 |             # Apply chat template
194 |             inputs = interpreter.dialog_to_prompt(dialog=interpreter.dialog)
195 | 
196 |             # Generate the assistant response
197 |             _ = interpreter.generate(inputs)
198 |             history[-1][1] = ""
199 |             generated_text = ""
200 |             code_blocks = ""
201 |             code_block = ""
202 |             for character in interpreter.streamer:
203 |                 history[-1][1] += character
204 |                 history[-1][1] = history[-1][1].replace("<|EOT|>","").replace("<API_RUN_START>","").replace("<API_RUN_STOP>","").replace("<|im_end|>","")
205 |                 generated_text += character
206 |                 yield history, history, dialog_info
207 |             print("generated_text",generated_text)
208 |             
209 |             # Add the assistant response to the dialogue
210 |             interpreter.dialog.append(
211 |                 {
212 |                     "role": "assistant",
213 |                     "content": generated_text.replace("<unk>_", "")
214 |                     .replace("<unk>", "")
215 |                     .replace("<|EOT|>", "")
216 |                     .replace("<|im_end|>",""),
217 |                 }
218 |             )
219 |             
220 |             HAS_CODE, generated_code_block = interpreter.extract_code_blocks(
221 |                 generated_text
222 |             )
223 | 
224 |             logging.info(f"saving current dialog to file {dialog_info[0]}.json...")
225 |             logging.info(f"current dialog: {interpreter.dialog}")
226 |             save_json(interpreter.dialog, mode="openci_only", json_file_path=JSON_DATASET_DIR/f"{dialog_info[0]}.json", dialog_id=dialog_info[0])
227 | 
228 |             # uncomment this line for the no interpreter demo
229 |             # HAS_CODE = False
230 |             
231 |             # Set up docker related path
232 |             attempt = 1
233 |             
234 |             print(f"HAS_Code:{HAS_CODE}")
235 |             # Enter into code interpreter and run the code
236 |             while HAS_CODE:
237 |                 if attempt > MAX_TRY:
238 |                     break
239 |                 
240 |                 # if no code then doesn't have to execute it
241 |                 generated_text = "" # clear generated text
242 | 
243 |                 yield history, history, dialog_info
244 | 
245 |                 # preprocess for the each kinds of generated code
246 |                 for lang, code in generated_code_block.items():
247 |                     processed_code = code.replace("<unk>_", "").replace("<unk>", "")
248 |                     generated_code_block[lang] = processed_code
249 | 
250 |                 # exclude languages that do not require code execution
251 |                 generated_code_block = {lang: code for lang, code in generated_code_block.items() if code.strip()}
252 |                 print("generated_code_block",generated_code_block)
253 |                 
254 |                 # Check if need to install the external library
255 |                 matches_with_pip = []
256 |                 if "sh" in generated_code_block:
257 |                     matches_with_pip.append(generated_code_block['sh'])
258 |                     logging.info("We need to install new packages...")
259 |                     
260 |                 # create the sandbox enviroment and run each kinds of codes
261 |                 has_problem, code_blocks_output = interpreter.execute_code_and_return_output(generated_code_block, 
262 |                                                                                 matches_with_pip, 
263 |                                                                                 image_name, container_name, 
264 |                                                                                 dockerfile_name, sandbox_path)
265 |                 print("code_blocks_output",code_blocks_output)
266 | 
267 |                 # postprocess
268 |                 result_string = code_blocks_output['python'].rstrip()
269 |                 print("result_string",result_string)
270 |                 history.append([result_string, ""])
271 | 
272 |                 interpreter.dialog.append({"role": "user", "content": result_string})
273 | 
274 |                 yield history, history, dialog_info
275 |                 
276 |                 
277 |                 ## Generate the assistant response
278 |                 inputs = interpreter.dialog_to_prompt(dialog=interpreter.dialog)
279 | 
280 |                 logging.info(f"generating answer for dialog {dialog_info[0]}")
281 |                 _ = interpreter.generate(inputs)
282 |                 for character in interpreter.streamer:
283 |                     history[-1][1] += character
284 |                     history[-1][1] = history[-1][1].replace("<|EOT|>","").replace("<API_RUN_START>","").replace("<API_RUN_STOP>","").replace("<|im_end|>","")
285 |                     generated_text += character
286 |                     yield history, history, dialog_info
287 |                 logging.info(f"finish generating answer for dialog {dialog_info[0]}")
288 | 
289 |                 interpreter.dialog.append(
290 |                     {
291 |                         "role": "assistant", 
292 |                         "content": generated_text.replace("<unk>_", "")
293 |                         .replace("<unk>", "")
294 |                         .replace("<|EOT|>", "")
295 |                         .replace("<|im_end|>",""),
296 |                     }
297 |                 )
298 |                 
299 |                 HAS_CODE, generated_code_block = interpreter.extract_code_blocks(
300 |                     generated_text
301 |                 )
302 | 
303 | 
304 |                 # Try more times
305 |                 attempt += 1
306 | 
307 |                 logging.info(f"saving current dialog to file {dialog_info[0]}.json...")
308 |                 logging.info(f"current dialog: {interpreter.dialog}")
309 |                 save_json(interpreter.dialog, mode="openci_only", json_file_path=JSON_DATASET_DIR/f"{dialog_info[0]}.json", dialog_id=dialog_info[0])
310 | 
311 |                 if generated_text.endswith("<|EOT|>") or generated_text.endswith("<|im_end|>",""):
312 |                     continue
313 | 
314 |             return history, history, dialog_info
315 | 
316 | 
317 |         def reset_textbox():
318 |             return gr.update(value="")
319 | 
320 |         def clean_docker_container(container_name):
321 |             try:
322 |                 subprocess.run(["docker", "rm", "-f", container_name], check=True)
323 |                 print(f"Container named {container_name} has been removed.")
324 |             except subprocess.CalledProcessError as e:
325 |                 print(f"An error occurred while removing the container {container_name}: {e}")
326 |                 
327 |         def clear_history(history, dialog_info, interpreter, container_name, 
328 |                           image_name, volume_mount):
329 |             interpreter.dialog = []
330 |             clean_docker_container(container_name)
331 |             
332 |             # Keeping the docker backend running
333 |             subprocess.run(["docker", "run", "-d", "-v", 
334 |                             volume_mount, "--name", f"{container_name}", 
335 |                             image_name, "tail", "-f", "/dev/null"], check=True)
336 |             return [], [], update_uuid(dialog_info)
337 |         
338 |         def on_exit():
339 |             clean_docker_container(container_name)
340 | 
341 |         atexit.register(on_exit)
342 |         interpreter = StreamingAutoCodeInterpreter(model_path=model_path)
343 |         
344 |         index = 0
345 |         image_name = f"python-sandbox_{index}"
346 |         container_name = f"container_python_{index}"
347 |         dockerfile_name = "Dockerfile.python"
348 |         current_file_path = os.path.abspath(__file__)
349 |         current_directory = os.path.dirname(current_file_path)
350 |         main_path = os.path.join(current_directory, "sandbox")
351 |         sandbox_path = f"{main_path}/python_{index}"
352 |         volume_mount = f"{sandbox_path}:/app"
353 | 
354 |         # Creat docker image 
355 |         check_command = ["docker", "images", "-q", image_name]
356 |         image_exists = subprocess.run(check_command, capture_output=True, text=True).stdout.strip()
357 |         if not image_exists:
358 |             dockerfile_path = os.path.join(sandbox_path, dockerfile_name)
359 |             # lang_sandbox_path = f"{sandbox_path}/{lang}/"
360 |             lang_sandbox_path = f"{sandbox_path}"
361 |             build_command = ["docker", "build", "-t", image_name, "-f", dockerfile_path, lang_sandbox_path]
362 |             build_result = subprocess.run(build_command, capture_output=True, text=True)
363 |             if build_result.returncode != 0:
364 |                 print(f"Failed to build image {image_name}: {build_result.stderr}")
365 |                 # code_blocks_output[lang] = f"Failed to build image {image_name}: {build_result.stderr}"
366 |         
367 |         # Keeping the docker backend running
368 |         subprocess.run(["docker", "run", "-d", "-v", 
369 |                         volume_mount, "--name", f"{container_name}", 
370 |                         image_name, "tail", "-f", "/dev/null"], check=True)
371 |         
372 |         sub.click(partial(bot, interpreter=interpreter, image_name = image_name, 
373 |                           container_name = container_name, dockerfile_name = dockerfile_name,
374 |                           sandbox_path = sandbox_path, volume_mount = volume_mount), 
375 |                   [msg, session_state, dialog_info], 
376 |                   [chatbot, session_state, dialog_info])
377 |         sub.click(reset_textbox, [], [msg])
378 | 
379 |         clear.click(partial(clear_history, interpreter=interpreter, container_name = container_name, 
380 |                             image_name = image_name, volume_mount = volume_mount), 
381 |                     [session_state, dialog_info], 
382 |                     [chatbot, session_state, dialog_info], queue=False)
383 | 
384 |     demo.queue(max_size=20)
385 |     demo.launch(share=True, server_port = 8000)
386 | 
387 | 
388 | if __name__ == "__main__":
389 |     import argparse
390 | 
391 |     parser = argparse.ArgumentParser()
392 |     parser.add_argument(
393 |         "--path",
394 |         type=str,
395 |         required=False,
396 |         help="Path to the Model.",
397 |         default="Bin12345/AutoCoder",
398 |     )
399 |     args = parser.parse_args()
400 | 
401 |     gradio_launch(model_path=args.path)
402 | 


--------------------------------------------------------------------------------
/Web_demo/code_interpreter/AutoCoderInterpreter.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | 
  4 | prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  5 | sys.path.append(prj_root_path)
  6 | 
  7 | from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
  8 | from utils.const import *
  9 | 
 10 | from typing import List, Tuple, Dict
 11 | import re
 12 | 
 13 | import torch
 14 | from transformers import AutoModelForCausalLM, AutoTokenizer
 15 | 
 16 | 
 17 | sys.path.append(os.path.dirname(__file__))
 18 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 19 | 
 20 | import warnings
 21 | 
 22 | warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
 23 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 24 | 
 25 | 
 26 | class AutoCoderInterpreter(BaseCodeInterpreter):
 27 |     def __init__(
 28 |         self,
 29 |         model_path: str,
 30 |         load_in_8bit: bool = False,
 31 |         load_in_4bit: bool = False,
 32 |     ):
 33 |         # build tokenizer
 34 |         # self.tokenizer = AutoTokenizer.from_pretrained(
 35 |         #     model_path,
 36 |         #     padding_side="right",
 37 |         #     trust_remote_code=True
 38 |         # )
 39 |         self.tokenizer = AutoTokenizer.from_pretrained(model_path, 
 40 |                                         legacy=False)
 41 | 
 42 |         # self.model = AutoModelForCausalLM.from_pretrained(
 43 |         #     model_path,
 44 |         #     device_map="auto",
 45 |         #     load_in_4bit=load_in_4bit,
 46 |         #     load_in_8bit=load_in_8bit,
 47 |         #     torch_dtype=torch.float16,
 48 |         #     trust_remote_code=True
 49 |         # )
 50 | 
 51 |         # self.model.resize_token_embeddings(len(self.tokenizer))
 52 |         
 53 |         self.model = AutoModelForCausalLM.from_pretrained(model_path,device_map="auto")
 54 | 
 55 |         self.model = self.model.eval()
 56 | 
 57 |         self.dialog = []
 58 |         self.MAX_CODE_OUTPUT_LENGTH = 1000
 59 |         
 60 | 
 61 |     def dialog_to_prompt(self, dialog: List[Dict]) -> str:
 62 |         inputs = self.tokenizer.apply_chat_template(dialog, return_tensors="pt")
 63 | 
 64 |         return inputs
 65 | 
 66 |     # def extract_code_blocks(self, prompt: str) -> Tuple[bool, str]:
 67 |     #     pattern = re.escape("```python") + r"(.*?)" + re.escape("```")
 68 |     #     matches = re.findall(pattern, prompt, re.DOTALL)
 69 | 
 70 |     #     if matches:
 71 |     #         # Return the last matched code block
 72 |     #         return True, matches[-1].strip()
 73 |     #     else:
 74 |     #         return False, ""
 75 |     
 76 |     # def extract_code_blocks(self, prompt: str) -> Tuple[str, str, str]:
 77 |     #     has_code = False
 78 |     #     patterns = {
 79 |     #         "python": re.escape("```python") + r"(.*?)" + re.escape("```"),
 80 |     #         "cpp": re.escape("```cpp") + r"(.*?)" + re.escape("```"),
 81 |     #         "fortran": re.escape("```fortran") + r"(.*?)" + re.escape("```"),
 82 |     #     }
 83 |     #     generated_code_block = {"python": "", "cpp": "", "fortran": ""}
 84 | 
 85 |     #     for lang, pattern in patterns.items():
 86 |     #         matches = re.findall(pattern, prompt, re.DOTALL)
 87 |     #         if matches:
 88 |     #             generated_code_block[lang] = matches[-1].strip()
 89 |     #             has_code = True
 90 | 
 91 |     #     return has_code, generated_code_block
 92 |     
 93 |     def extract_code_blocks(self, prompt: str) -> Tuple[str, str, str]:
 94 |         has_code = False
 95 |         patterns = {
 96 |             "sh": re.escape("<API_RUN_START>```sh") + r"(.*?)" + re.escape("```<API_RUN_STOP>"),
 97 |             "python": re.escape("<API_RUN_START>```python") + r"(.*?)" + re.escape("```<API_RUN_STOP>"),
 98 |         }
 99 |         generated_code_block = {"sh": "","python": ""}
100 | 
101 |         for lang, pattern in patterns.items():
102 |             matches = re.findall(pattern, prompt, re.DOTALL)
103 |             if matches:
104 |                 generated_code_block[lang] = matches[-1].strip()
105 |                 has_code = True
106 | 
107 |         return has_code, generated_code_block
108 | 
109 |     def clean_code_output(self, output: str) -> str:
110 |         if self.MAX_CODE_OUTPUT_LENGTH < len(output):
111 |             return (
112 |                 output[: self.MAX_CODE_OUTPUT_LENGTH // 5]
113 |                 + "\n...(truncated due to length)...\n"
114 |                 + output[-self.MAX_CODE_OUTPUT_LENGTH // 5 :]
115 |             )
116 | 
117 |         return output
118 | 


--------------------------------------------------------------------------------
/Web_demo/code_interpreter/BaseCodeInterpreter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import re
  4 | import logging
  5 | import subprocess
  6 | 
  7 | prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  8 | sys.path.append(prj_root_path)
  9 | 
 10 | 
 11 | from utils.const import *
 12 | 
 13 | class BaseCodeInterpreter:
 14 |     def __init__(self):
 15 |         self.dialog = [
 16 |             {
 17 |                 "role": "system",
 18 |                 "content": CODE_INTERPRETER_SYSTEM_PROMPT,
 19 |             },
 20 |         ]
 21 | 
 22 |     @staticmethod
 23 |     def extract_code_blocks(text: str):
 24 |         pattern = r"```(?:python\n)?(.*?)```"  # Match optional 'python\n' but don't capture it
 25 |         code_blocks = re.findall(pattern, text, re.DOTALL)
 26 |         return [block.strip() for block in code_blocks]
 27 | 
 28 |     # def execute_code_and_return_output(self, code_str: str, nb):
 29 |     #     _, _ = nb.add_and_run(GUARD_CODE)
 30 |     #     outputs, error_flag = nb.add_and_run(code_str)
 31 |     #     return outputs, error_flag
 32 | 
 33 |     # def execute_code_and_return_output(self, generated_code_block) -> str:
 34 |     #     code_blocks_output= {}
 35 |     #     sandbox_path = "/home/uconn/BinLei/Data_engine/NLP_Task/multi_codes_demo/sandbox"
 36 |     #     file_map = {"python": "script.py", "cpp": "script.cpp", "fortran": "script.f90"}
 37 |     #     image_map = {"python": "python-sandbox", "cpp": "cpp-sandbox", "fortran": "fortran-sandbox"}
 38 |     #     dockerfile_map = {"python": "python/Dockerfile.python", "cpp": "cpp/Dockerfile.cpp", "fortran": "fortran/Dockerfile.fortran"}
 39 |     #     for lang, code in generated_code_block.items():
 40 |             
 41 |     #         file_name = file_map[lang]
 42 |     #         image_name = image_map[lang]
 43 |             
 44 |     #         print(f"Writing the script for {lang}...")
 45 |     #         file_path = f"{sandbox_path}/{lang}/{file_name}"
 46 |     #         with open(file_path, 'w') as file:
 47 |     #             file.write(code)
 48 |             
 49 |     #         # check and build the image
 50 |     #         print(f"Checking if the image for {lang}... exist")
 51 |     #         image_name = image_map[lang]
 52 |     #         check_command = ["docker", "images", "-q", image_name]
 53 |     #         image_exists = subprocess.run(check_command, capture_output=True, text=True).stdout.strip()
 54 |             
 55 |     #         if not image_exists:
 56 |     #             print(f"Creating the image for {lang}...")
 57 |     #             dockerfile_path = os.path.join(sandbox_path, dockerfile_map[lang])
 58 |     #             lang_sandbox_path = f"{sandbox_path}/{lang}/"
 59 |     #             build_command = ["docker", "build", "-t", image_name, "-f", dockerfile_path, lang_sandbox_path]
 60 |     #             build_result = subprocess.run(build_command, capture_output=True, text=True)
 61 |     #             if build_result.returncode != 0:
 62 |     #                 print(f"Failed to build image {image_name}: {build_result.stderr}")
 63 |     #                 code_blocks_output[lang] = f"Failed to build image {image_name}: {build_result.stderr}"
 64 |     #                 continue
 65 |             
 66 |             
 67 |     #         print(f"Running the script for {lang} in the sandbox...")
 68 |     #         script_path = f"{sandbox_path}/{lang}/compile_run.sh"
 69 |     #         chmod_command = ["chmod", "+x", script_path]
 70 |     #         chmod_result = subprocess.run(chmod_command, capture_output=True, text=True)
 71 |     #         volume_mount = f"{sandbox_path}/{lang}:/app"
 72 |     #         command = ["docker", "run", "--rm", "-v", volume_mount, image_name]
 73 |     #         # command = ["docker", "run", "--rm", image_name]
 74 |     #         result = subprocess.run(command, capture_output=True, text=True)
 75 |             
 76 |     #         if result.returncode == 0: 
 77 |     #             code_blocks_output[lang] = result.stdout 
 78 |     #         else: 
 79 |     #             code_blocks_output[lang] = result.stderr
 80 |     #     return code_blocks_output
 81 |     
 82 |     def execute_code_and_return_output(self, generated_code_block,matches_with_pip, 
 83 |                                     image_name, container_name, 
 84 |                                     dockerfile_name, sandbox_path) -> str:
 85 |         
 86 |         # Initialize the file/image/dockerfile information for each langauges
 87 |         code_blocks_output= {}
 88 |         # sandbox_path = "/home/uconn/BinLei/Data_engine/NLP_Task/multi_codes_demo/sandbox"
 89 |         file_map = {"python": "script.py", 
 90 |                     "cpp": "script.cpp", 
 91 |                     "fortran": "script.f90"}
 92 |         # image_map = {"python": "python-sandbox", 
 93 |         #              "cpp": "cpp-sandbox", 
 94 |         #              "fortran": "fortran-sandbox"}
 95 |         # dockerfile_map = {"python": "python/Dockerfile.python", 
 96 |         #                   "cpp": "cpp/Dockerfile.cpp", 
 97 |         #                   "fortran": "fortran/Dockerfile.fortran"}
 98 |         image_map = {"python": f"{image_name}", 
 99 |                     "cpp": "cpp-sandbox", 
100 |                     "fortran": "fortran-sandbox"}
101 |         dockerfile_map = {"python": f"{dockerfile_name}", 
102 |                         "cpp": "cpp/Dockerfile.cpp", 
103 |                         "fortran": "fortran/Dockerfile.fortran"}
104 |         has_problem = True
105 |         for lang, code in generated_code_block.items():
106 |             if lang == "sh":
107 |                 lang = "python"
108 |             # write the script into the corresponsing file
109 |             file_name = file_map[lang]
110 |             image_name = image_map[lang]
111 |             # file_path = f"{sandbox_path}/{lang}/{file_name}"
112 |             file_path = f"{sandbox_path}/{file_name}"
113 |             with open(file_path, 'w') as file:
114 |                 file.write(code)
115 |             
116 |             # check and build the image
117 |             image_name = image_map[lang]
118 |             check_command = ["docker", "images", "-q", image_name]
119 |             image_exists = subprocess.run(check_command, capture_output=True, text=True).stdout.strip()
120 |             
121 |             if not image_exists:
122 |                 dockerfile_path = os.path.join(sandbox_path, dockerfile_map[lang])
123 |                 # lang_sandbox_path = f"{sandbox_path}/{lang}/"
124 |                 lang_sandbox_path = f"{sandbox_path}"
125 |                 build_command = ["docker", "build", "-t", image_name, "-f", dockerfile_path, lang_sandbox_path]
126 |                 build_result = subprocess.run(build_command, capture_output=True, text=True)
127 |                 if build_result.returncode != 0:
128 |                     print(f"Failed to build image {image_name}: {build_result.stderr}")
129 |                     code_blocks_output[lang] = f"Failed to build image {image_name}: {build_result.stderr}"
130 |                     continue
131 |             
132 |             # give docker the access to run the comile_run file 
133 |             # script_path = f"{sandbox_path}/{lang}/compile_run.sh"
134 |             script_path = f"{sandbox_path}/compile_run.sh"
135 |             chmod_command = ["chmod", "+x", script_path]
136 |             chmod_result = subprocess.run(chmod_command, capture_output=True, text=True)
137 |             # volume_mount = f"{sandbox_path}/{lang}:/app"
138 |             volume_mount = f"{sandbox_path}:/app"
139 |             
140 |             # install external library for python if there are related commonds
141 |             pip_command = None
142 |             print("matches_with_pip",matches_with_pip)
143 |             if lang == "python" and matches_with_pip: 
144 |                 pip_commands = []
145 |                 for match in matches_with_pip:
146 |                     pattern = r'^pip install.*'
147 |                     matches_pip = re.findall(pattern, match, re.MULTILINE)
148 |                     print("matches_pip", matches_pip)
149 |                     for match_pip in matches_pip:
150 |                         if match_pip:
151 |                             pip_commands.append(match_pip.replace('\n', ' ').strip())
152 |                 print(f"pip_command:{pip_command}")
153 |                 pip_command = " && ".join(pip_commands)
154 |                 print(f"pip_command:{pip_command}")
155 |                 if pip_command:
156 |                     # command = ["docker", "exec", "container_python", "sh", "-c", f"{pip_command}"]
157 |                     command = ["docker", "exec", f"{container_name}", "sh", "-c", f"{pip_command}"]
158 |                     # print(f"command:{command}")
159 |                     try:
160 |                         logging.info("Start to install related packages...")
161 |                         pip_result = subprocess.run(command, check=True, 
162 |                                                     stdout=subprocess.PIPE, 
163 |                                                     stderr=subprocess.PIPE, 
164 |                                                     text=True)   
165 |                     except subprocess.CalledProcessError as e:
166 |                         pip_result = e
167 |                     # command = ["docker", "exec", "container_python", "python", "/app/script.py"]
168 |                     command = ["docker", "exec", f"{container_name}", "python", "/app/script.py"]
169 |             
170 |             # if there is no external library for python, execute the code directly
171 |             else:    
172 |                 # command = ["docker", "exec", "container_python", "python", "/app/script.py"]
173 |                 command = ["docker", "exec", f"{container_name}", "python", "/app/script.py"]
174 |             # result = subprocess.run(command, capture_output=True, text=True)
175 |             try:
176 |                 logging.info("Start to run the code...")
177 |                 result = subprocess.run(command, capture_output=True, text=True, timeout=30)
178 |             except subprocess.TimeoutExpired:
179 |                 code_blocks_output[lang] = "Command execution timed out. This is probably because the function needs to run continuously."
180 |                 continue
181 |             # record all the information into the code_blocks_output
182 |             if result.stdout == "":
183 |                 result.stdout = "None"
184 |             if result.stderr == "":
185 |                 has_problem = False
186 |                 result.stderr = "None"
187 |             if pip_command:
188 |                 code_blocks_output[lang] = f"pip_result.stdout: \n{pip_result.stdout}\n pip_result.stderr: \n{pip_result.stderr}\n result.stdout:\n{result.stdout}\nresult.stderr:\n{result.stderr}"
189 |             else:
190 |                 code_blocks_output[lang] = f"result.stdout:\n{result.stdout}\nresult.stderr:\n{result.stderr}"
191 |         return has_problem, code_blocks_output


--------------------------------------------------------------------------------
/Web_demo/code_interpreter/JupyterClient.py:
--------------------------------------------------------------------------------
 1 | from jupyter_client import KernelManager
 2 | import threading
 3 | import re
 4 | from utils.const import *
 5 | 
 6 | 
 7 | class JupyterNotebook:
 8 |     def __init__(self):
 9 |         self.km = KernelManager()
10 |         self.km.start_kernel()
11 |         self.kc = self.km.client()
12 |         _ = self.add_and_run(TOOLS_CODE)
13 | 
14 |     def clean_output(self, outputs):
15 |         outputs_only_str = list()
16 |         for i in outputs:
17 |             if type(i) == dict:
18 |                 if "text/plain" in list(i.keys()):
19 |                     outputs_only_str.append(i["text/plain"])
20 |             elif type(i) == str:
21 |                 outputs_only_str.append(i)
22 |             elif type(i) == list:
23 |                 error_msg = "\n".join(i)
24 |                 error_msg = re.sub(r"\x1b\[.*?m", "", error_msg)
25 |                 outputs_only_str.append(error_msg)
26 | 
27 |         return "\n".join(outputs_only_str).strip()
28 | 
29 |     def add_and_run(self, code_string):
30 |         # This inner function will be executed in a separate thread
31 |         def run_code_in_thread():
32 |             nonlocal outputs, error_flag
33 | 
34 |             # Execute the code and get the execution count
35 |             msg_id = self.kc.execute(code_string)
36 | 
37 |             while True:
38 |                 try:
39 |                     msg = self.kc.get_iopub_msg(timeout=20)
40 | 
41 |                     msg_type = msg["header"]["msg_type"]
42 |                     content = msg["content"]
43 | 
44 |                     if msg_type == "execute_result":
45 |                         outputs.append(content["data"])
46 |                     elif msg_type == "stream":
47 |                         outputs.append(content["text"])
48 |                     elif msg_type == "error":
49 |                         error_flag = True
50 |                         outputs.append(content["traceback"])
51 | 
52 |                     # If the execution state of the kernel is idle, it means the cell finished executing
53 |                     if msg_type == "status" and content["execution_state"] == "idle":
54 |                         break
55 |                 except:
56 |                     break
57 | 
58 |         outputs = []
59 |         error_flag = False
60 | 
61 |         # Start the thread to run the code
62 |         thread = threading.Thread(target=run_code_in_thread)
63 |         thread.start()
64 | 
65 |         # Wait for 20 seconds for the thread to finish
66 |         thread.join(timeout=20)
67 | 
68 |         # If the thread is still alive after 20 seconds, it's a timeout
69 |         if thread.is_alive():
70 |             outputs = ["Execution timed out."]
71 |             # outputs = ["Error"]
72 |             error_flag = "Timeout"
73 | 
74 |         return self.clean_output(outputs), error_flag
75 | 
76 |     def close(self):
77 |         """Shutdown the kernel."""
78 |         self.km.shutdown_kernel()
79 |     
80 |     def __deepcopy__(self, memo):
81 |         if id(self) in memo:
82 |             return memo[id(self)]
83 |         new_copy = type(self)()
84 |         memo[id(self)] = new_copy
85 |         return new_copy
86 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(ScriptTest)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 11)
 5 | 
 6 | include(FetchContent)
 7 | FetchContent_Declare(
 8 |   googletest
 9 |   URL https://github.com/google/googletest/archive/release-1.10.0.tar.gz
10 | )
11 | include(GoogleTest)
12 | FetchContent_MakeAvailable(googletest)
13 | 
14 | add_executable(script_test script.cpp)
15 | target_link_libraries(script_test gtest_main)
16 | 
17 | enable_testing()
18 | include(GoogleTest)
19 | gtest_discover_tests(script_test)
20 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/cpp/Dockerfile.cpp:
--------------------------------------------------------------------------------
 1 | # 使用最新的Ubuntu镜像作为基础镜像
 2 | FROM ubuntu:latest
 3 | 
 4 | # 设置非交互模式，防止apt-get等命令在安装过程中等待用户输入
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | # 更新软件包列表，并安装必要的软件包
 8 | RUN apt-get update && \
 9 |     apt-get install -y cmake git build-essential libomp-dev
10 | 
11 | # 克隆Google Test的源码，编译并安装
12 | RUN git clone https://github.com/google/googletest.git /googletest && \
13 |     cd /googletest && \
14 |     cmake . && \
15 |     make && \
16 |     make install
17 | 
18 | # 设置工作目录为/project
19 | WORKDIR /app
20 | 
21 | # 复制当前目录下的所有文件到容器的/project目录中
22 | COPY . /app/
23 | 
24 | # 编译C++程序，这里假设主要的C++文件名为script.cpp，并且生成的可执行文件名为test
25 | # 注意：-L/usr/local/lib选项指定了库文件的搜索路径，这在某些情况下可能是必要的
26 | # RUN g++ -fopenmp script.cpp -L/usr/local/lib -lgtest -lgtest_main -pthread -o test
27 | 
28 | # 容器启动时默认执行的命令
29 | # CMD ["./test"]
30 | CMD ["/app/compile_run.sh"]
31 | 
32 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/cpp/compile_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # echo "Starting compilation and execution..."
 6 | 
 7 | # compile
 8 | g++ -fopenmp /app/script.cpp -L/usr/local/lib -lgtest -lgtest_main -pthread -o /app/test
 9 | 
10 | # run
11 | /app/test --gtest_print_time=0 --gtest_brief=1
12 | 
13 | # echo "Execution completed."
14 | 
15 | # exit_code=$?
16 | # exit $exit_code


--------------------------------------------------------------------------------
/Web_demo/sandbox/cpp/script.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <vector>
 3 | #include <cassert>
 4 | 
 5 | using namespace std;
 6 | 
 7 | vector<int> convolution(vector<int> a, vector<int> b) {
 8 |     int n = a.size();
 9 |     vector<int> c(2 * n - 1, 0);
10 |     for(int i = 0; i < 2 * n - 1; ++i) {
11 |         for(int j = max(0, i - n + 1); j <= min(i, n - 1); ++j) {
12 |             c[i] += a[j] * b[i - j];
13 |         }
14 |     }
15 |     return c;
16 | }
17 | 
18 | void test_convolution() {
19 |     // Test case 1
20 |     {
21 |         vector<int> a = {1, 2, 3};
22 |         vector<int> b = {4, 5, 6};
23 |         vector<int> expected = {4, 13, 28, 27, 18};
24 |         assert(convolution(a, b) == expected);
25 |     }
26 | 
27 |     // Test case 2: Negative values
28 |     {
29 |         vector<int> a = {-1, -2, -3};
30 |         vector<int> b = {1, 2, 3};
31 |         vector<int> expected = {-1, -4, -10, -8, -9};
32 |         assert(convolution(a, b) == expected);
33 |     }
34 | 
35 |     // Test case 3: Zeroes
36 |     {
37 |         vector<int> a = {0, 0, 0};
38 |         vector<int> b = {1, 2, 3};
39 |         vector<int> expected = {0, 0, 0, 0, 0};
40 |         assert(convolution(a, b) == expected);
41 |     }
42 | 
43 |     // Test case 4: Single element arrays
44 |     {
45 |         vector<int> a = {5};
46 |         vector<int> b = {10};
47 |         vector<int> expected = {50};
48 |         assert(convolution(a, b) == expected);
49 |     }
50 | 
51 |     // Test case 5: Different positive values
52 |     {
53 |         vector<int> a = {2, 4, 6};
54 |         vector<int> b = {1, 3, 5};
55 |         vector<int> expected = {2, 10, 28, 32, 30};
56 |         assert(convolution(a, b) == expected);
57 |     }
58 | 
59 |     // Test case 6: Larger arrays
60 |     {
61 |         vector<int> a = {1, 3, 5, 7};
62 |         vector<int> b = {2, 4, 6, 8};
63 |         vector<int> expected = {2, 10, 28, 52, 60, 58, 56};
64 |         assert(convolution(a, b) == expected);
65 |     }
66 | 
67 |     // Test case 7: Arrays with a negative and positive mix
68 |     {
69 |         vector<int> a = {-1, 2, -3, 4};
70 |         vector<int> b = {5, -6, 7, -8};
71 |         vector<int> expected = {-5, 16, -31, 58, -67, 52, -32};
72 |         assert(convolution(a, b) == expected);
73 |     }
74 | 
75 |     cout << "All test cases passed!" << endl;
76 | }
77 | 
78 | int main() {
79 |     test_convolution();
80 |     return 0;
81 | }


--------------------------------------------------------------------------------
/Web_demo/sandbox/cpp/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/sandbox/cpp/test


--------------------------------------------------------------------------------
/Web_demo/sandbox/fortran/Dockerfile.fortran:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | 
 3 | RUN apt-get update && apt-get install -y gfortran libgomp1
 4 | 
 5 | WORKDIR /app
 6 | 
 7 | COPY . /app
 8 | 
 9 | CMD ["/app/compile_run.sh"]
10 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/fortran/compile_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # compile
3 | gfortran -fopenmp -J /app/ -o /app/test /app/script.f90
4 | # run
5 | /app/test
6 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/fortran/script.f90:
--------------------------------------------------------------------------------
1 | program hello
2 |     print *, 'Updated Hello from Fortran'
3 | end program hello


--------------------------------------------------------------------------------
/Web_demo/sandbox/fortran/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/sandbox/fortran/test


--------------------------------------------------------------------------------
/Web_demo/sandbox/java/Dockerfile.java:
--------------------------------------------------------------------------------
 1 | # 使用官方 Java 11 JDK 镜像作为基础镜像
 2 | FROM openjdk:11-jdk
 3 | 
 4 | # 设置非交互模式，防止 apt-get 等命令在安装过程中等待用户输入
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | # 安装必要的工具，比如构建和调试工具
 8 | RUN apt-get update && \
 9 |     apt-get install -y --no-install-recommends \
10 |     git \
11 |     maven \
12 |     && rm -rf /var/lib/apt/lists/*
13 | 
14 | WORKDIR /app
15 | 
16 | COPY . /app
17 | 
18 | CMD ["/app/compile_run.sh"]
19 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/java/compile_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | cd /app
 6 | 
 7 | mkdir -p src/main/java
 8 | 
 9 | mv script.java src/main/java/
10 | 
11 | mvn clean package
12 | 
13 | java -jar target/test-1.0-SNAPSHOT.jar
14 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/java/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 2 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 | 
 6 |     <groupId>com.example</groupId>
 7 |     <artifactId>test</artifactId>
 8 |     <version>1.0-SNAPSHOT</version>
 9 | 
10 |     <dependencies>
11 |         <dependency>
12 |             <groupId>org.eclipse.paho</groupId>
13 |             <artifactId>org.eclipse.paho.client.mqttv3</artifactId>
14 |             <version>1.2.5</version>
15 |         </dependency>
16 |     </dependencies>
17 | </project>
18 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/java/src/main/java/script.java:
--------------------------------------------------------------------------------
 1 | import org.eclipse.paho.client.mqttv3.*;
 2 | import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence;
 3 | 
 4 | // 移除了 public 修饰符，使得 MqttSubscriber 类变成包级私有
 5 | class MqttSubscriber {
 6 | 
 7 |     private MqttClient client;
 8 |     private MqttConnectOptions options = new MqttConnectOptions();
 9 | 
10 |     public void connectAndSubscribe(String brokerUrl, String clientId, String topic, int qos) throws MqttException {
11 |         String serverURI = "tcp://" + brokerUrl;
12 |         client = new MqttClient(serverURI, clientId, new MemoryPersistence());
13 |         options.setCleanSession(true); // 假设你想要一个干净的会话
14 |         client.connect(options);
15 |         client.subscribe(topic, qos, this::messageArrived);
16 |     }
17 | 
18 |     public void disconnect() throws MqttException {
19 |         if (client != null && client.isConnected()) {
20 |             client.disconnect();
21 |         }
22 |     }
23 | 
24 |     private void messageArrived(String topic, MqttMessage message) {
25 |         System.out.println("Received message: " + new String(message.getPayload()) + " on topic: " + topic);
26 |     }
27 | }
28 | 
29 | // 创建一个新的类，用于启动和运行 MqttSubscriber
30 | class ScriptMain {
31 |     public static void main(String[] args) {
32 |         MqttSubscriber subscriber = new MqttSubscriber();
33 |         // 填入你的 MQTT 服务器 URL、客户端 ID、主题和 QoS
34 |         String brokerUrl = "your_broker_url";
35 |         String clientId = "your_client_id";
36 |         String topic = "your_topic";
37 |         int qos = 1; // Quality of Service Level
38 |         try {
39 |             subscriber.connectAndSubscribe(brokerUrl, clientId, topic, qos);
40 |             // 假设你想要在接收到消息后立即断开连接，或者在这里添加逻辑等待消息
41 |             subscriber.disconnect();
42 |         } catch (MqttException e) {
43 |             e.printStackTrace();
44 |         }
45 |     }
46 | }


--------------------------------------------------------------------------------
/Web_demo/sandbox/java/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/sandbox/java/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst


--------------------------------------------------------------------------------
/Web_demo/sandbox/java/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst:
--------------------------------------------------------------------------------
1 | /app/src/main/java/script.java
2 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/python_0/Dockerfile.python:
--------------------------------------------------------------------------------
1 | FROM python:latest
2 | WORKDIR /app
3 | COPY . /app
4 | 
5 | # CMD ["python", "script.py"]
6 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/python_0/script.py:
--------------------------------------------------------------------------------
 1 | def sum_numbers(n):
 2 |     total = 0
 3 |     for i in range(n + 1):
 4 |         total += i
 5 |     return total
 6 | 
 7 | def main():
 8 |     n = 10  # Predefined value for n
 9 |     result = sum_numbers(n)
10 |     print(f"The sum of numbers from 1 to {n} is: {result}")
11 | 
12 | if __name__ == "__main__":
13 |     main()


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 | 
5 | [[package]]
6 | name = "myapp"
7 | version = "0.1.0"
8 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "myapp"
3 | version = "0.1.0"
4 | edition = "2018"
5 | 
6 | [[bin]]
7 | name = "test"
8 | path = "main.rs"
9 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/Dockerfile.rust:
--------------------------------------------------------------------------------
 1 | # 使用最新的Ubuntu镜像作为基础镜像
 2 | FROM ubuntu:latest
 3 | 
 4 | # 设置非交互模式，防止apt-get等命令在安装过程中等待用户输入
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | # 更新软件包列表
 8 | RUN apt-get update
 9 | 
10 | # 安装curl工具，用于下载Rust安装脚本，以及安装 build-essential 包
11 | RUN apt-get install -y curl build-essential
12 | 
13 | # 使用Rust官方安装脚本安装Rust，安装后清理不必要的文件
14 | RUN curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSf | sh -s -- -y && \
15 |     rm -rf /var/lib/apt/lists/*
16 | 
17 | # 将Rust的cargo工具的二进制目录添加到PATH环境变量中，以便可以直接使用cargo命令
18 | ENV PATH="/root/.cargo/bin:${PATH}"
19 | 
20 | # 设置工作目录为/app
21 | WORKDIR /app
22 | 
23 | # 复制当前目录下的所有文件到容器的/app目录中
24 | COPY . /app/
25 | 
26 | CMD ["/app/compile_run.sh"]
27 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/compile_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | mv /app/script.rs /app/main.rs
 6 | 
 7 | # 创建 Cargo.toml 文件
 8 | echo '[package]
 9 | name = "myapp"
10 | version = "0.1.0"
11 | edition = "2018"
12 | 
13 | [[bin]]
14 | name = "test"
15 | path = "main.rs"' > /app/Cargo.toml
16 | 
17 | cd /app
18 | 
19 | cargo build --release
20 | 
21 | ./target/release/test
22 | 
23 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/main.rs:
--------------------------------------------------------------------------------
 1 | // 定义 input_handler 模块
 2 | mod input_handler {
 3 |     pub struct InputHandler {}
 4 | 
 5 |     impl InputHandler {
 6 |         pub fn new() -> Self {
 7 |             Self {}
 8 |         }
 9 | 
10 |         pub fn handle_input(&self) {
11 |             println!("Handling general input...");
12 |         }
13 |     }
14 | }
15 | 
16 | // 定义 keyboard 模块
17 | mod keyboard {
18 |     pub struct Keyboard {}
19 | 
20 |     impl Keyboard {
21 |         pub fn new() -> Self {
22 |             Self {}
23 |         }
24 | 
25 |         pub fn handle_key_press(&self) {
26 |             println!("Handling keyboard input...");
27 |         }
28 |     }
29 | }
30 | 
31 | // 定义 mouse 模块
32 | mod mouse {
33 |     pub struct Mouse {}
34 | 
35 |     impl Mouse {
36 |         pub fn new() -> Self {
37 |             Self {}
38 |         }
39 | 
40 |         pub fn handle_mouse_move(&self) {
41 |             println!("Handling mouse movement...");
42 |         }
43 |     }
44 | }
45 | 
46 | // 定义 gamepad 模块
47 | mod gamepad {
48 |     pub struct Gamepad {}
49 | 
50 |     impl Gamepad {
51 |         pub fn new() -> Self {
52 |             Self {}
53 |         }
54 | 
55 |         pub fn handle_button_press(&self) {
56 |             println!("Handling gamepad button press...");
57 |         }
58 |     }
59 | }
60 | 
61 | // 定义 input 模块，将其他模块重新导出
62 | mod input {
63 |     pub use crate::input_handler::InputHandler;
64 |     pub use crate::keyboard::Keyboard;
65 |     pub use crate::mouse::Mouse;
66 |     pub use crate::gamepad::Gamepad;
67 | }
68 | 
69 | fn main() {
70 |     // 实例化每个结构体
71 |     let handler = input::InputHandler::new();
72 |     let keyboard = input::Keyboard::new();
73 |     let mouse = input::Mouse::new();
74 |     let gamepad = input::Gamepad::new();
75 | 
76 |     // 调用每个实例的方法
77 |     handler.handle_input();
78 |     keyboard.handle_key_press();
79 |     mouse.handle_mouse_move();
80 |     gamepad.handle_button_press();
81 | 
82 |     println!("All inputs handled.");
83 | }


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/.rustc_info.json:
--------------------------------------------------------------------------------
1 | {"rustc_fingerprint":5138894494729493183,"outputs":{"4614504638168534921":{"success":true,"status":"","code":0,"stdout":"rustc 1.77.1 (7cf61ebde 2024-03-27)\nbinary: rustc\ncommit-hash: 7cf61ebde7b22796c69757901dd346d0fe70bd97\ncommit-date: 2024-03-27\nhost: x86_64-unknown-linux-gnu\nrelease: 1.77.1\nLLVM version: 17.0.6\n","stderr":""},"15729799797837862367":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/root/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""}},"successes":{}}


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/CACHEDIR.TAG:
--------------------------------------------------------------------------------
1 | Signature: 8a477f597d28d172789f06886806bc55
2 | # This file is a cache directory tag created by cargo.
3 | # For information about cache directory tags see https://bford.info/cachedir/
4 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/.cargo-lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/sandbox/rust/target/release/.cargo-lock


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/.fingerprint/myapp-0a8bfcae0d226a1c/bin-test:
--------------------------------------------------------------------------------
1 | 205748b9c486ad59


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/.fingerprint/myapp-0a8bfcae0d226a1c/bin-test.json:
--------------------------------------------------------------------------------
1 | {"rustc":16286356497298320803,"features":"[]","declared_features":"","target":10996953811719133845,"profile":18277820415669657429,"path":12539050212544219519,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release/.fingerprint/myapp-0a8bfcae0d226a1c/dep-bin-test"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0}


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/.fingerprint/myapp-0a8bfcae0d226a1c/dep-bin-test:
--------------------------------------------------------------------------------
1 |        main.rs    


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/.fingerprint/myapp-0a8bfcae0d226a1c/invoked.timestamp:
--------------------------------------------------------------------------------
1 | This file has an mtime of when this was started.


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/deps/test-0a8bfcae0d226a1c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/sandbox/rust/target/release/deps/test-0a8bfcae0d226a1c


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/deps/test-0a8bfcae0d226a1c.d:
--------------------------------------------------------------------------------
1 | /app/target/release/deps/test-0a8bfcae0d226a1c: main.rs
2 | 
3 | /app/target/release/deps/test-0a8bfcae0d226a1c.d: main.rs
4 | 
5 | main.rs:
6 | 


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bin123apple/AutoCoder/a51540e7d27a9d600a783a824de2a9017f599099/Web_demo/sandbox/rust/target/release/test


--------------------------------------------------------------------------------
/Web_demo/sandbox/rust/target/release/test.d:
--------------------------------------------------------------------------------
1 | /app/target/release/test: /app/main.rs
2 | 


--------------------------------------------------------------------------------
/Web_demo/utils/cleaner.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import os
 3 | 
 4 | PYTHON_PREFIX = os.environ.get("CONDA_PREFIX", "/usr/local")
 5 | 
 6 | SITE_PKG_ERROR_PREFIX = f'File {PYTHON_PREFIX}/lib/python3.10/'
 7 | 
 8 | def get_error_header(traceback_str):
 9 |     lines = traceback_str.split('\n')
10 |     for line in lines:
11 |         if 'Error:' in line:
12 |             return line
13 |     return ''  # Return None if no error message is found
14 | 
15 | def clean_error_msg(error_str:str =''):
16 |     filtered_error_msg = error_str.__str__().split('An error occurred while executing the following cell')[-1].split("\n------------------\n")[-1]
17 |     raw_error_msg = "".join(filtered_error_msg)
18 | 
19 |     # Remove escape sequences for colored text
20 |     ansi_escape = re.compile(r'\x1b\[[0-?]*[ -/]*[@-~]')
21 |     error_msg = ansi_escape.sub('', raw_error_msg)
22 |     
23 |     error_str_out = ''
24 |     error_msg_only_cell = error_msg.split(SITE_PKG_ERROR_PREFIX)
25 | 
26 |     error_str_out += f'{error_msg_only_cell[0]}\n'
27 |     error_header = get_error_header(error_msg_only_cell[-1])
28 |     if error_header not in error_str_out:
29 |         error_str_out += get_error_header(error_msg_only_cell[-1])
30 | 
31 |     return error_str_out


--------------------------------------------------------------------------------
/Web_demo/utils/const.py:
--------------------------------------------------------------------------------
 1 | TOOLS_CODE = """
 2 | import numpy as np
 3 | import pandas as pd 
 4 | import matplotlib.pyplot as plt
 5 | import seaborn as sns
 6 | from scipy import stats
 7 | import os,sys
 8 | import re
 9 | from datetime import datetime
10 | from sympy import symbols, Eq, solve
11 | import torch 
12 | import requests
13 | from bs4 import BeautifulSoup
14 | import json
15 | import math
16 | import yfinance
17 | import time
18 | """
19 | 
20 | write_denial_function = 'lambda *args, **kwargs: (_ for _ in ()).throw(PermissionError("Writing to disk operation is not permitted due to safety reasons. Please do not try again!"))'
21 | read_denial_function = 'lambda *args, **kwargs: (_ for _ in ()).throw(PermissionError("Reading from disk operation is not permitted due to safety reasons. Please do not try again!"))'
22 | class_denial = """Class Denial:
23 |     def __getattr__(self, name):
24 |         def method(*args, **kwargs):
25 |             return "Using this class is not permitted due to safety reasons. Please do not try again!"
26 |         return method
27 | """
28 | 
29 | GUARD_CODE = f"""
30 | import os
31 | 
32 | os.kill = {write_denial_function}
33 | os.system = {write_denial_function}
34 | os.putenv = {write_denial_function}
35 | os.remove = {write_denial_function}
36 | os.removedirs = {write_denial_function}
37 | os.rmdir = {write_denial_function}
38 | os.fchdir = {write_denial_function}
39 | os.setuid = {write_denial_function}
40 | os.fork = {write_denial_function}
41 | os.forkpty = {write_denial_function}
42 | os.killpg = {write_denial_function}
43 | os.rename = {write_denial_function}
44 | os.renames = {write_denial_function}
45 | os.truncate = {write_denial_function}
46 | os.replace = {write_denial_function}
47 | os.unlink = {write_denial_function}
48 | os.fchmod = {write_denial_function}
49 | os.fchown = {write_denial_function}
50 | os.chmod = {write_denial_function}
51 | os.chown = {write_denial_function}
52 | os.chroot = {write_denial_function}
53 | os.fchdir = {write_denial_function}
54 | os.lchflags = {write_denial_function}
55 | os.lchmod = {write_denial_function}
56 | os.lchown = {write_denial_function}
57 | os.getcwd = {write_denial_function}
58 | os.chdir = {write_denial_function}
59 | os.popen = {write_denial_function}
60 | 
61 | import shutil
62 | 
63 | shutil.rmtree = {write_denial_function}
64 | shutil.move = {write_denial_function}
65 | shutil.chown = {write_denial_function}
66 | 
67 | import subprocess
68 | 
69 | subprocess.Popen = {write_denial_function}  # type: ignore
70 | 
71 | import sys
72 | 
73 | sys.modules["ipdb"] = {write_denial_function}
74 | sys.modules["joblib"] = {write_denial_function}
75 | sys.modules["resource"] = {write_denial_function}
76 | sys.modules["psutil"] = {write_denial_function}
77 | sys.modules["tkinter"] = {write_denial_function}
78 | """
79 | 
80 | CODE_INTERPRETER_SYSTEM_PROMPT = """You are an AI code interpreter.
81 | Your goal is to help users do a variety of jobs by executing Python code.
82 | 
83 | You should:
84 | 1. Comprehend the user's requirements carefully & to the letter.
85 | 2. Give a brief description for what you plan to do & call the provided function to run code.
86 | 3. Provide results analysis based on the execution output.
87 | 4. If error occurred, try to fix it.
88 | 5. Response in the same language as the user."""
89 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.25.0
 2 | torchneuromorphic
 3 | h5py==3.11.0
 4 | scipy>=1.0
 5 | tensorboardX==2.6.2.2
 6 | datasets==2.19.1
 7 | transformers==4.40.2
 8 | torch==2.0.1
 9 | accelerate==0.30.0
10 | gradio==3.48.0
11 | jupyter_client==8.3.0


--------------------------------------------------------------------------------