├── README.md
├── api_keys.json
├── configs
    ├── jun_test_2.yaml
    ├── llm
    │   ├── deepseek_llama_70b.yaml
    │   ├── gpt4-greedy.yaml
    │   ├── gpt4-nucleus.yaml
    │   ├── llama3.1_70b-greedy.yaml
    │   ├── llama3.1_70b-nucleus.yaml
    │   ├── llama3.1_8b-greedy.yaml
    │   ├── llama3_70b-greedy.yaml
    │   ├── llama3_70b-nucleus.yaml
    │   ├── llama3_8b-greedy.yaml
    │   ├── llama3_8b-nucleus.yaml
    │   ├── qwen2.5-greedy.yaml
    │   ├── qwen2.5-nucleus.yaml
    │   └── qwq_32b.yaml
    └── source
    │   ├── HumanEval-NFR.yaml
    │   ├── HumanEval-like.yaml
    │   ├── HumanEval.yaml
    │   ├── LivecodeBench-like.yaml
    │   └── LivecodeBench.yaml
├── overview_autocode.png
├── requirements.txt
├── templates
    ├── eval
    │   └── exec_code_he.txt
    └── prompt
    │   ├── DP
    │       ├── ai.txt
    │       └── human.txt
    │   └── cot
    │       ├── ai.txt
    │       └── human.txt
└── third_party
    └── CodeExecContainer
        ├── .gitignore
        ├── Dockerfile
        ├── README.md
        ├── app.py
        ├── requirements.txt
        └── run.sh


/README.md:
--------------------------------------------------------------------------------
 1 | # Autocode
 2 | 
 3 | Research on technology that automatically generates high-quality source code from requirements written in natural language, execution examples, or partially written source code.
 4 | 
 5 | >-  Automatic source code generation technology that combines new and existing techniques such as machine learning (language model), program synthesis, and software engineering.
 6 | 
 7 | ![image](./overview_autocode.png)
 8 | 
 9 | ## Overview
10 | 
11 | This repository contains tools and frameworks for code generation and evaluation using language models. The project leverages LangGraph and various language model APIs to provide a robust environment for code generation tasks.
12 | 
13 | ## Features
14 | 
15 | - Code generation capabilities using language models
16 | - Evaluation framework for generated code
17 | - Integration with multiple language model APIs
18 | - Configurable generation and evaluation pipelines
19 | - Support for various programming languages and frameworks
20 | 
21 | ## Prerequisites
22 | 
23 | - Python 3.10 or higher
24 | - CUDA-compatible GPU (recommended for optimal performance)
25 | - Virtual environment (recommended)
26 | 
27 | ## Installation
28 | 
29 | 1. Create and activate a virtual environment:
30 | ```bash
31 | virtualenv venv --python=3.10
32 | source venv/bin/activate  
33 | ```
34 | 
35 | 2. Install dependencies:
36 | ```bash
37 | pip install -r requirements.txt
38 | pip install -e third_party/etri_langgraph
39 | ```
40 | 
41 | ## Configuration
42 | 
43 | 1. Create an `api_keys.json` file in the root directory with the following structure:
44 | ```json
45 | {
46 |   "OPEN_WEBUI_BASE_URL": "your-model-url",
47 |   "OPENAI_API_KEY": "your-api-key",
48 |   "CODEEXEC_ENDPOINT": "http://localhost:5097/execute"
49 | }
50 | ```
51 | 
52 | ## Usage
53 | 
54 | The project provides two main functionalities:
55 | 
56 | ### Code Generation
57 | 
58 | ```bash
59 | python3 run.py generator \
60 |     --config_path=configs/jun_test_2.yaml \
61 |     - run \
62 |     - merge_json \
63 |     - exit
64 | ```
65 | 
66 | ### Code Evaluation
67 | 
68 | ```bash
69 | python run.py evaluator \
70 |     --path=results/jun_test_2/results_merged_0.json \
71 |     --gt_key=passed \
72 |     - run \
73 |     --k=[1] \
74 |     --n=10
75 | ```
76 | 
77 | ## Project Structure
78 | 
79 | ```
80 | autocode/
81 | ├── configs/          # Configuration files
82 | ├── src/             # Source code
83 | ├── templates/       # Template files
84 | ├── third_party/     # Third-party dependencies
85 | ├── api_keys.json    # API key configuration
86 | ├── run.py           # Main entry point
87 | └── requirements.txt # Python dependencies
88 | ```
89 | 
90 | ## Development
91 | 
92 | - Configuration files are stored in `configs/`
93 | - Templates for code generation are in `templates/`
94 | 
95 | 


--------------------------------------------------------------------------------
/api_keys.json:
--------------------------------------------------------------------------------
1 | {
2 |   "OPEN_WEBUI_BASE_URL": "your_model_path",
3 |   "OPENAI_API_KEY": "sk-your-key",
4 |   "CODEEXEC_ENDPOINT": "http://localhost:5097/execute"
5 | }
6 | 


--------------------------------------------------------------------------------
/configs/jun_test_2.yaml:
--------------------------------------------------------------------------------
 1 | source:
 2 |   - name: HumanEval
 3 |     type: huggingface
 4 |     kwargs:
 5 |       path: openai/openai_humaneval
 6 |       sort_key: task_id
 7 |       split: test
 8 | 
 9 | dataset:
10 |   - name: target
11 |     type: dict
12 |     kwargs:
13 |       primary_key: id
14 |       fields:
15 |         - name: id
16 |           source: HumanEval
17 |           key: task_id
18 |         - name: entry_point
19 |           source: HumanEval
20 |           key: entry_point
21 |         - name: prompt
22 |           source: HumanEval
23 |           key: prompt
24 |         - name: gold_tc
25 |           source: HumanEval
26 |           key: test
27 | 
28 | graph:
29 |   entry_point: gen_code
30 | 
31 |   edges:
32 |     - pair: [gen_code, parse_code]
33 |       type: always
34 |     - pair: [parse_code, gold_tc_exec_code]
35 |       type: always
36 |     - pair: [gold_tc_exec_code, gold_tc_exec_result]
37 |       type: always
38 |     - pair: [gold_tc_exec_result, passed]
39 |       type: always
40 |     - pair: [passed, __end__]
41 |       type: always
42 | 
43 | 
44 |   nodes:
45 |     - name: gen_code
46 |       type: llm
47 |       input_keys: [prompt]
48 |       kwargs:
49 |         n: 1
50 |         output_key: llm_jun_out
51 |         llm: !inc configs/llm/llama3_8b-greedy.yaml
52 |         prompt:
53 |           type: chat
54 |           kwargs:
55 |             body_template_paths: ["templates/prompt/DP"]
56 |     - name: parse_code
57 |       type: parser
58 |       input_keys: [llm_jun_out]
59 |       kwargs:
60 |         output_key: code
61 |         type: code_block
62 |     - name: gold_tc_exec_code
63 |       type: apply_template
64 |       input_keys: [code, gold_tc, entry_point]
65 |       key_map: { code: code, gold_tc: testcase, entry_point: entry_point }
66 |       kwargs:
67 |         output_key: gold_tc_exec_code
68 |         template_path: templates/eval/exec_code_he.txt
69 |     - name: gold_tc_exec_result
70 |       type: execute
71 |       input_keys: [gold_tc_exec_code]
72 |       kwargs:
73 |         output_key: gold_tc_exec_code
74 |         type: code_block
75 |     - name: passed
76 |       dependencies: [gold_tc_exec_result]
77 |       input_keys: [gold_tc_exec_result]
78 |       type: custom_lambda
79 |       kwargs:
80 |         src: [gold_tc_exec_result]
81 |         func: "lambda x: 'Exit Code: 0' in x"
82 |       
83 | 


--------------------------------------------------------------------------------
/configs/llm/deepseek_llama_70b.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16384
2 | model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
3 | platform: vllm
4 | temperature: 0.6
5 | top_p: 0.95
6 | 


--------------------------------------------------------------------------------
/configs/llm/gpt4-greedy.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 4096
2 | model: gpt-4o-2024-11-20
3 | platform: openai
4 | temperature: 0
5 | top_p: 1


--------------------------------------------------------------------------------
/configs/llm/gpt4-nucleus.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 4096
2 | model: gpt-4o-2024-11-20
3 | platform: openai
4 | temperature: 0.8
5 | top_p: 0.95
6 | 


--------------------------------------------------------------------------------
/configs/llm/llama3.1_70b-greedy.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 2048
2 | model: llama3.1:70b-instruct-fp16
3 | platform: ollama
4 | temperature: 0
5 | top_p: 1
6 | 


--------------------------------------------------------------------------------
/configs/llm/llama3.1_70b-nucleus.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 2048
2 | model: llama3.1:70b-instruct-fp16
3 | platform: ollama
4 | temperature: 0.8
5 | top_p: 0.95
6 | 


--------------------------------------------------------------------------------
/configs/llm/llama3.1_8b-greedy.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16384
2 | model: meta-llama/Llama-3.1-8B-Instruct
3 | platform: vllm
4 | temperature: 0
5 | top_p: 1


--------------------------------------------------------------------------------
/configs/llm/llama3_70b-greedy.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16384
2 | model: meta-llama/Llama-3.1-70B-Instruct
3 | platform: vllm
4 | temperature: 0.8
5 | top_p: 1
6 | 


--------------------------------------------------------------------------------
/configs/llm/llama3_70b-nucleus.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16384
2 | model: meta-llama/Llama-3.1-70B-Instruct
3 | platform: vllm
4 | temperature: 0.8
5 | top_p: 0.95
6 | 


--------------------------------------------------------------------------------
/configs/llm/llama3_8b-greedy.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 4048
2 | model: meta-llama/Meta-Llama-3-8B-Instruct
3 | platform: vllm
4 | temperature: 0
5 | top_p: 1


--------------------------------------------------------------------------------
/configs/llm/llama3_8b-nucleus.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16384
2 | model: meta-llama/Llama-3.1-8B-Instruct
3 | platform: vllm
4 | temperature: 0.8
5 | top_p: 0.95


--------------------------------------------------------------------------------
/configs/llm/qwen2.5-greedy.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16384
2 | model: Qwen/Qwen2.5-Coder-32B-Instruct
3 | platform: vllm
4 | temperature: 0
5 | top_p: 1
6 | 


--------------------------------------------------------------------------------
/configs/llm/qwen2.5-nucleus.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16384
2 | model: Qwen/Qwen2.5-Coder-32B-Instruct
3 | platform: vllm
4 | temperature: 0.8
5 | top_p: 0.95
6 | 


--------------------------------------------------------------------------------
/configs/llm/qwq_32b.yaml:
--------------------------------------------------------------------------------
1 | max_tokens: 16000
2 | model: Qwen/QwQ-32B
3 | platform: vllm
4 | temperature: 0.6
5 | top_p: 0.95
6 | 


--------------------------------------------------------------------------------
/configs/source/HumanEval-NFR.yaml:
--------------------------------------------------------------------------------
1 | name: HumanEval-NFR
2 | type: json
3 | kwargs:
4 |   path: third_party/HumanEval_NFR/humaneval_nfr.json
5 |   sort_key: id
6 | 


--------------------------------------------------------------------------------
/configs/source/HumanEval-like.yaml:
--------------------------------------------------------------------------------
1 | name: HumanEval-like
2 | type: json
3 | kwargs:
4 |   path: data/humaneval_like.json
5 |   sort_key: id


--------------------------------------------------------------------------------
/configs/source/HumanEval.yaml:
--------------------------------------------------------------------------------
1 | name: HumanEval
2 | type: huggingface
3 | kwargs:
4 |   path: openai/openai_humaneval
5 |   sort_key: task_id
6 |   split: test
7 | 


--------------------------------------------------------------------------------
/configs/source/LivecodeBench-like.yaml:
--------------------------------------------------------------------------------
1 | name: LivecodeBench-like
2 | type: json
3 | kwargs:
4 |   path: data/livecodebench_like.json
5 |   sort_key: id


--------------------------------------------------------------------------------
/configs/source/LivecodeBench.yaml:
--------------------------------------------------------------------------------
1 | name: LivecodeBench
2 | type: huggingface
3 | kwargs:
4 |   path: livecodebench/code_generation
5 |   sort_key: question_id
6 |   split: test
7 | 


--------------------------------------------------------------------------------
/overview_autocode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auto-code-etri/autocode/d73ebb9a75c546ef6f01d6010443297da68e6396/overview_autocode.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.9.5
  2 | aiosignal==1.3.1
  3 | annotated-types==0.7.0
  4 | anyio==4.4.0
  5 | asgiref==3.8.1
  6 | asttokens 
  7 | astunparse==1.6.3
  8 | async-timeout==4.0.3
  9 | attrs==23.2.0
 10 | autoregistry==1.1.2
 11 | backoff==2.2.1
 12 | bcrypt==4.1.3
 13 | beautifulsoup4==4.11.2
 14 | blinker==1.8.2
 15 | bs4==0.0.2
 16 | build==1.2.1
 17 | cachetools==5.3.3
 18 | certifi==2024.2.2
 19 | chardet==5.2.0
 20 | charset-normalizer==3.3.2
 21 | chroma-hnswlib==0.7.3
 22 | chromadb==0.5.3
 23 | click==8.1.7
 24 | colorama==0.4.6
 25 | coloredlogs==15.0.1
 26 | comm 
 27 | contourpy==1.2.1
 28 | cycler==0.12.1
 29 | dataclasses-json==0.6.6
 30 | datasets==2.19.1
 31 | debugpy 
 32 | docker==7.1.0
 33 | decorator 
 34 | Deprecated==1.2.14
 35 | dill==0.3.8
 36 | distro==1.9.0
 37 | dnspython==2.6.1
 38 | docker-pycreds==0.4.0
 39 | elastic-transport==8.13.1
 40 | elasticsearch==8.14.0
 41 | email_validator==2.2.0
 42 | et-xmlfile==1.1.0
 43 | evaluate==0.4.2
 44 | exceptiongroup==1.2.1
 45 | executing 
 46 | fastapi==0.111.0
 47 | fastapi-cli==0.0.4
 48 | filelock==3.14.0
 49 | fire==0.6.0
 50 | Flask==3.0.3
 51 | flatbuffers==24.3.25
 52 | fonttools==4.52.4
 53 | frozenlist==1.4.1
 54 | fsspec==2024.3.1
 55 | func_timeout==4.3.5
 56 | gensim==4.2.0
 57 | gitdb==4.0.11
 58 | GitPython==3.1.43
 59 | google-auth==2.30.0
 60 | googleapis-common-protos==1.63.2
 61 | greenlet==3.0.3
 62 | grpcio==1.64.1
 63 | h11==0.14.0
 64 | httpcore==1.0.5
 65 | httptools==0.6.1
 66 | httpx==0.27.0
 67 | huggingface-hub==0.23.2
 68 | humanfriendly==10.0
 69 | idna==3.7
 70 | importlib_metadata==7.1.0
 71 | importlib_resources==6.4.0
 72 | iniconfig==2.0.0
 73 | ipykernel 
 74 | ipython 
 75 | itsdangerous==2.2.0
 76 | jedi 
 77 | Jinja2==3.1.4
 78 | jiter==0.5.0
 79 | joblib==1.4.2
 80 | json5==0.9.25
 81 | jsonlines==3.1.0
 82 | jsonpatch==1.33
 83 | jsonpointer==2.4
 84 | jsonschema==4.23.0
 85 | jsonschema-specifications==2023.12.1
 86 | jupyter_client
 87 | jupyter_core
 88 | kiwisolver==1.4.5
 89 | kubernetes==30.1.0
 90 | langchain==0.2.5
 91 | langchain-chroma==0.1.2
 92 | langchain-community==0.2.5
 93 | langchain-core==0.2.9
 94 | langchain-experimental==0.0.59
 95 | langchain-openai==0.1.7
 96 | langchain-text-splitters==0.2.0
 97 | langfuse==2.39.1
 98 | langgraph==0.0.61
 99 | langsmith==0.1.81
100 | Levenshtein==0.20.9
101 | litellm==1.47.1
102 | lxml==5.2.2
103 | mando==0.7.1
104 | markdown-it-py==3.0.0
105 | MarkupSafe==2.1.5
106 | marshmallow==3.21.2
107 | matplotlib==3.9.0
108 | matplotlib-inline
109 | mdurl==0.1.2
110 | mmh3==4.1.0
111 | monotonic==1.6
112 | mpmath==1.3.0
113 | multidict==6.0.5
114 | multiprocess==0.70.16
115 | mypy-extensions==1.0.0
116 | nest_asyncio 
117 | networkx==3.3
118 | nltk==3.8.1
119 | numpy==1.26.4
120 | nvidia-cublas-cu12==12.1.3.1
121 | nvidia-cuda-cupti-cu12==12.1.105
122 | nvidia-cuda-nvrtc-cu12==12.1.105
123 | nvidia-cuda-runtime-cu12==12.1.105
124 | nvidia-cudnn-cu12==8.9.2.26
125 | nvidia-cufft-cu12==11.0.2.54
126 | nvidia-curand-cu12==10.3.2.106
127 | nvidia-cusolver-cu12==11.4.5.107
128 | nvidia-cusparse-cu12==12.1.0.106
129 | nvidia-nccl-cu12==2.20.5
130 | nvidia-nvjitlink-cu12==12.5.40
131 | nvidia-nvtx-cu12==12.1.105
132 | oauthlib==3.2.2
133 | onnxruntime==1.18.1
134 | openai==1.47.0
135 | openpyxl==3.0.10
136 | opentelemetry-api==1.25.0
137 | opentelemetry-exporter-otlp-proto-common==1.25.0
138 | opentelemetry-exporter-otlp-proto-grpc==1.25.0
139 | opentelemetry-instrumentation==0.46b0
140 | opentelemetry-instrumentation-asgi==0.46b0
141 | opentelemetry-instrumentation-fastapi==0.46b0
142 | opentelemetry-proto==1.25.0
143 | opentelemetry-sdk==1.25.0
144 | opentelemetry-semantic-conventions==0.46b0
145 | opentelemetry-util-http==0.46b0
146 | orjson==3.10.3
147 | overrides==7.7.0
148 | packaging==23.2
149 | pandas==1.4.4
150 | parso 
151 | pexpect 
152 | pickleshare 
153 | Pillow==9.1.1
154 | platformdirs==4.2.2
155 | pluggy==1.5.0
156 | posthog==3.5.0
157 | prompt_toolkit
158 | protobuf==4.25.3
159 | psutil==5.9.8
160 | psycopg2==2.9.9
161 | psycopg2-binary==2.9.9
162 | ptyprocess 
163 | pure_eval 
164 | pyarrow==16.1.0
165 | pyarrow-hotfix==0.6
166 | pyasn1==0.6.0
167 | pyasn1_modules==0.4.0
168 | pydantic==2.7.1
169 | pydantic_core==2.18.2
170 | Pygments 
171 | pyparsing==3.1.2
172 | PyPDF2==3.0.1
173 | PyPika==0.48.9
174 | pyproject_hooks==1.1.0
175 | pytest==8.2.2
176 | python-dateutil 
177 | python-docx==1.1.2
178 | python-dotenv==1.0.1
179 | python-multipart==0.0.9
180 | pytz==2024.1
181 | PyYAML==6.0.1
182 | pyyaml-include==2.1
183 | pyzmq
184 | radon==6.0.1
185 | rank-bm25==0.2.2
186 | rapidfuzz==2.15.2
187 | referencing==0.35.1
188 | regex==2024.5.15
189 | reportlab==4.0.9
190 | requests==2.32.2
191 | requests-oauthlib==2.0.0
192 | rich==13.7.1
193 | rpds-py==0.20.0
194 | rsa==4.9
195 | scikit-learn==1.5.0
196 | scipy
197 | sentry-sdk==2.3.1
198 | setproctitle==1.3.3
199 | shellingham==1.5.4
200 | simplejson==3.19.3
201 | six 
202 | smart-open==7.0.4
203 | smmap==5.0.1
204 | sniffio==1.3.1
205 | soupsieve==2.5
206 | SQLAlchemy==2.0.30
207 | sqlparse==0.5.1
208 | stack-data 
209 | starlette==0.37.2
210 | sympy==1.12.1
211 | tenacity==8.3.0
212 | termcolor==2.4.0
213 | text-generation==0.7.0
214 | threadpoolctl==3.5.0
215 | tiktoken==0.7.0
216 | tinydb==4.8.0
217 | tokenizers==0.20.0
218 | tomli==2.0.1
219 | torch==2.3.0
220 | tornado 
221 | tqdm==4.65.2
222 | traitlets 
223 | transformers==4.46.2
224 | triton==2.3.0
225 | typer==0.12.3
226 | typing-inspect==0.9.0
227 | typing_extensions==4.12.0
228 | tzdata==2024.1
229 | ujson==5.10.0
230 | urllib3==2.2.1
231 | uvicorn==0.30.1
232 | uvloop==0.19.0
233 | wandb==0.17.2
234 | watchfiles==0.22.0
235 | wcwidth 
236 | websocket-client==1.8.0
237 | websockets==12.0
238 | Werkzeug==3.0.4
239 | wrapt==1.16.0
240 | xxhash==3.4.1
241 | yarl==1.9.4
242 | zipp==3.19.2
243 | gunicorn
244 | virtualenv


--------------------------------------------------------------------------------
/templates/eval/exec_code_he.txt:
--------------------------------------------------------------------------------
1 | {{ code }}
2 | 
3 | {{ testcase }}
4 | 
5 | check({{ entry_point }})


--------------------------------------------------------------------------------
/templates/prompt/DP/ai.txt:
--------------------------------------------------------------------------------
1 | ```python
2 | {% set lines = code.splitlines() -%}
3 | {% for line in lines %}
4 |     {{ line }}
5 | {%- endfor%}
6 | ```


--------------------------------------------------------------------------------
/templates/prompt/DP/human.txt:
--------------------------------------------------------------------------------
1 | ```python
2 | {{ prompt }}
3 | ```
4 | 
5 | Write a python code for the problem. Package your code in ```python ... ```.


--------------------------------------------------------------------------------
/templates/prompt/cot/ai.txt:
--------------------------------------------------------------------------------
1 | ```python
2 | {% set lines = code.splitlines() -%}
3 | {% for line in lines %}
4 |     {{ line }}
5 | {%- endfor%}
6 | ```
7 | 


--------------------------------------------------------------------------------
/templates/prompt/cot/human.txt:
--------------------------------------------------------------------------------
1 | ```python
2 | {{ cot }}
3 | ```
4 | 
5 | Let's think step-by-step and write python code to solve the problem. Package your response in ```python ... ```
6 | 


--------------------------------------------------------------------------------
/third_party/CodeExecContainer/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | 


--------------------------------------------------------------------------------
/third_party/CodeExecContainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-slim
 2 | 
 3 | # gcc와 필요한 라이브러리 설치
 4 | RUN apt-get update && apt-get install -y \
 5 |     gcc \
 6 |     libpng-dev \
 7 |     libfreetype6-dev \
 8 |     && rm -rf /var/lib/apt/lists/*
 9 | 
10 | # 작업 디렉터리 설정
11 | WORKDIR /app
12 | 
13 | # requirements.txt 파일 복사
14 | COPY requirements.txt /app/
15 | 
16 | # 필요한 라이브러리를 설치
17 | RUN pip install -r requirements.txt


--------------------------------------------------------------------------------
/third_party/CodeExecContainer/README.md:
--------------------------------------------------------------------------------
 1 | # CodeExecContainer
 2 | 
 3 | This project aims to create a containerized environment for executing code snippets. It provides a secure and isolated environment for running code, making it suitable for online coding platforms, code editors, and other similar applications.
 4 | 
 5 | ## Features
 6 | 
 7 | - Containerized execution environment
 8 | - Support for multiple programming languages
 9 | - Secure sandboxing to prevent unauthorized access
10 | - Resource limitations to prevent abuse
11 | - Easy integration with existing applications
12 | 
13 | ## Installation
14 | 
15 | To install and run the CodeExecContainer, follow these steps:
16 | 
17 | 1. Clone the repository:
18 | 
19 |    ```shell
20 |    git clone https://github.com/your-username/CodeExecContainer.git
21 |    ```
22 | 
23 | 2. Navigate to the project directory:
24 | 
25 |    ```shell
26 |    cd CodeExecContainer
27 |    ```
28 | 
29 | 3. Install the dependencies:
30 | 
31 |    ```shell
32 |    pip install -r requirements.txt
33 |    ```
34 | 
35 | 4. Start the application:
36 | 
37 |    ```shell
38 |    source run.sh --port 5097
39 |    ```
40 | 
41 | ## Usage
42 | 
43 | To use the CodeExecContainer, follow these steps:
44 | 
45 | 1.  Make a POST request to the `/execute` endpoint with the following parameters:
46 | 
47 |     - `code`: The code snippet to be executed.
48 |     - `lang`: The programming language of the code snippet.
49 |     - `version`: The version of the programming language (optional).
50 |     - `stdin`: The standard input for the code snippet (optional).
51 |     - `timeout`: The maximum execution time for the code snippet in seconds (optional).
52 |     - `memory_limit`: The maximum memory usage for the code snippet in bytes (optional).
53 |     - `cpu_limit`: The maximum CPU usage for the code snippet in seconds (optional).
54 | 
55 |     Example using cURL:
56 | 
57 |     ```shell
58 |     curl -X POST -H "Content-Type: application/json" \
59 |         -d '{"code": "print(\"Hello, World!\")", "lang": "python"}' \
60 |         http://localhost:5097/execute
61 |     ```
62 | 
63 | 2.  The response will contain the output of the executed code.
64 | 
65 |     Example response:
66 | 
67 |     ```json
68 |     {
69 |       "output": "Hello, World!\n"
70 |     }
71 |     ```
72 | 
73 | 3.  If an error occurs during execution, the response will contain an error message.
74 | 
75 |         Example error response:
76 | 
77 |         ```json
78 |         {
79 |             "error": "Execution failed: TimeoutError: Execution timed out",
80 |         }
81 |         ```
82 | 
83 | ## Contributing
84 | 
85 | Contributions are welcome! If you have any ideas, suggestions, or bug reports, please open an issue or submit a pull request.
86 | 
87 | ## License
88 | 
89 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
90 | 


--------------------------------------------------------------------------------
/third_party/CodeExecContainer/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import uuid
  3 | 
  4 | import docker
  5 | from flask import Flask, request
  6 | 
  7 | app = Flask(__name__)
  8 | client = docker.from_env()
  9 | logger = app.logger
 10 | 
 11 | def _execute(
 12 |     code: str,
 13 |     lang: str = "python",
 14 |     stdin: str = "",
 15 |     version: str = None,
 16 |     mem_limit: str = "128m",
 17 |     cpu_limit: int = 1,  # need cgroup support
 18 |     timeout: int = 3,
 19 |     trace: bool = False,
 20 |     **kwargs,
 21 | ):
 22 |     code_id = uuid.uuid4()
 23 |     if lang == "python":
 24 |         image = f"python:3.9-slim-test"
 25 |         ext = "py"
 26 |         if trace:
 27 |             command = f"/bin/sh -c \"timeout {timeout}s /bin/sh -c 'python3 -u -m trace --trace /code.{ext} < /stdin.in; echo Exit Code: $?;' || echo 'Failed'\""
 28 |         else:
 29 |             command = f"/bin/sh -c \"timeout {timeout}s /bin/sh -c 'python3 -u /code.{ext} < /stdin.in; echo Exit Code: $?;' || echo 'Failed'\""
 30 |     elif lang == "c":
 31 |         raise NotImplementedError("C is not supported yet")
 32 |     elif lang == "cpp":
 33 |         raise NotImplementedError("C++ is not supported yet")
 34 |     elif lang == "java":
 35 |         raise NotImplementedError("Java is not supported yet")
 36 |     else:
 37 |         raise ValueError("Invalid language")
 38 |     
 39 |     try:
 40 |         client.images.get(f"python:3.9-slim-test")
 41 |     except docker.errors.ImageNotFound:
 42 |         client.images.pull(f"python:3.9-slim-test")
 43 | 
 44 |     # save code to a tmp file
 45 |     code_file = f"/tmp/{code_id}.{ext}"
 46 |     with open(code_file, "w") as f:
 47 |         f.write(code)
 48 |         
 49 |     # save stdin to a tmp file
 50 |     stdin_file = f"/tmp/{code_id}.in"
 51 |     with open(stdin_file, "w") as f:
 52 |         f.write(stdin)
 53 | 
 54 |     container_name = f"CodeExecContainer_{code_id}"
 55 |     try:
 56 |         # worker_id = os.getenv("GUNICORN_WORKER_ID", "0")
 57 |         # cpu_start = int(worker_id) * cpu_limit
 58 |         # cpu_end = cpu_start + cpu_limit - 1
 59 |         # cpuset_cpus = f"{cpu_start}-{cpu_end}" if cpu_limit > 1 else f"{cpu_start}"
 60 | 
 61 |         container = client.containers.run(
 62 |             image,
 63 |             command,
 64 |             name=container_name,
 65 |             detach=True,
 66 |             stderr=True,
 67 |             stdout=True,
 68 |             tty=True,
 69 |             # cpuset_cpus=cpuset_cpus,  # need cgroup support
 70 |             mem_limit=mem_limit,
 71 |             volumes={
 72 |                 code_file: {"bind": f"/code.{ext}", "mode": "ro"},
 73 |                 stdin_file: {"bind": "/stdin.in", "mode": "ro"},
 74 |             },
 75 |             environment={"PYTHONUNBUFFERED": "1"},
 76 |         )
 77 |         container.wait()
 78 |         response = container.logs().decode("utf-8")
 79 |         container.remove()
 80 | 
 81 |         os.remove(code_file)
 82 |         os.remove(stdin_file)
 83 |         return response
 84 | 
 85 |     except docker.errors.ContainerError as e:
 86 |         os.remove(code_file)
 87 |         os.remove(stdin_file)
 88 |         return 'Failed'
 89 |     except Exception as e:
 90 |         os.remove(code_file)
 91 |         os.remove(stdin_file)
 92 |         return 'Failed'
 93 | 
 94 | 
 95 | @app.route("/execute", methods=["POST"])
 96 | def execute():
 97 |     try:
 98 |         logger.info(f"Request: {request.json}")
 99 |         response = _execute(**request.json)
100 |         logger.info(f"Response: {response}")
101 |         return {"output": response}
102 |     except ValueError as e:
103 |         return {"error": str(e)}, 400
104 |     except Exception as e:
105 |         return {"error": str(e)}, 500
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     app.run(host="0.0.0.0", port=5097)
110 | 


--------------------------------------------------------------------------------
/third_party/CodeExecContainer/requirements.txt:
--------------------------------------------------------------------------------
 1 | blake3
 2 | beautifulsoup4
 3 | chardet
 4 | cryptography
 5 | django
 6 | docx
 7 | faker
 8 | flask
 9 | flask_mail
10 | folium
11 | geopandas
12 | geopy
13 | gensim
14 | holidays
15 | hypothesis
16 | keras
17 | Levenshtein
18 | lxml
19 | matplotlib
20 | mechanize
21 | natsort
22 | nltk
23 | opencv-python
24 | openpyxl
25 | pandas
26 | prettytable
27 | psutil
28 | pyfakefs
29 | pyquery
30 | pytesseract
31 | pyOpenSSL
32 | PyYAML
33 | requests
34 | requests_mock
35 | rsa
36 | scikit-learn
37 | scipy
38 | sendgrid
39 | scikit-image
40 | soundfile
41 | statsmodels
42 | sympy
43 | tensorflow
44 | textblob
45 | texttable
46 | wikipedia
47 | wordcloud
48 | wordninja
49 | xmltodict
50 | xlwt
51 | 


--------------------------------------------------------------------------------
/third_party/CodeExecContainer/run.sh:
--------------------------------------------------------------------------------
1 | gunicorn -w 36 --bind 0.0.0.0:5097 --timeout 6000 'app:app'


--------------------------------------------------------------------------------