├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── eval_scripts ├── api_comm.py ├── eval_passk.py ├── exec_outcome.py ├── limits_by_lang.yaml ├── prepare_samples_from_task_data.py ├── prepare_unittest.py ├── print_verdict_by_lang.py ├── requirements.txt └── resource_limit.py └── execution_engine ├── __init__.py ├── app.py ├── code_store.py ├── config.py ├── config.yaml ├── exec_outcome.py ├── execution_engine.py ├── gunicorn.conf.py ├── helper.py ├── job.py ├── limits_by_lang.yaml ├── prlimit.py ├── resource_limit.py ├── runtime.py ├── seccomp_filter.py ├── settings.py ├── start_engine.sh ├── test_codes ├── test.c ├── test.cpp ├── test.go ├── test.java ├── test.js ├── test.kt ├── test.php ├── test.py ├── test.rb └── test.rs ├── unittest.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__ 3 | temp/ 4 | *.out -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:jammy 2 | WORKDIR /root 3 | 4 | ENV DEBIAN_FRONTEND noninteractive 5 | ENV TZ Etc/UTC 6 | 7 | RUN apt-get update && \ 8 | apt-get install -y \ 9 | gnupg ca-certificates apt-utils \ 10 | build-essential curl unzip && \ 11 | apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF 12 | RUN echo "deb https://download.mono-project.com/repo/ubuntu stable-focal main" | tee /etc/apt/sources.list.d/mono-official-stable.list 13 | RUN apt-get update && \ 14 | apt-get install -y tzdata 15 | 16 | RUN apt-get install -y mono-devel \ 17 | gcc-12 g++-12 18 | 19 | RUN apt-get update && \ 20 | apt-get install -y software-properties-common python3.11-dev 21 | 22 | RUN apt-get update && \ 23 | apt-get install -y -f libasound2 libc6-i386 libc6-x32 libxi6 libxtst6 24 | 25 | ENV JAVA_PKG=https://download.oracle.com/java/21/latest/jdk-21_linux-x64_bin.tar.gz \ 26 | JAVA_HOME=/usr/java/jdk-21 27 | 28 | RUN set -eux; \ 29 | JAVA_SHA256=$(curl "$JAVA_PKG".sha256) ; \ 30 | curl --output /tmp/jdk.tgz "$JAVA_PKG" && \ 31 | echo "$JAVA_SHA256 */tmp/jdk.tgz" | sha256sum -c; \ 32 | mkdir -p "$JAVA_HOME"; \ 33 | tar --extract --file /tmp/jdk.tgz --directory "$JAVA_HOME" --strip-components 1 34 | 35 | RUN curl -OL https://github.com/JetBrains/kotlin/releases/download/v1.7.20/kotlin-compiler-1.7.20.zip 36 | RUN unzip kotlin-compiler-1.7.20.zip -d /usr/local && \ 37 | rm kotlin-compiler-1.7.20.zip 38 | 39 | RUN apt-get install -y python2 40 | 41 | RUN curl -OL https://go.dev/dl/go1.19.2.linux-amd64.tar.gz 42 | RUN tar -C /usr/local -xzf go1.19.2.linux-amd64.tar.gz && \ 43 | rm go1.19.2.linux-amd64.tar.gz 44 | 45 | RUN curl -OL https://downloads.python.org/pypy/pypy3.9-v7.3.9-linux64.tar.bz2 46 | RUN tar -C /usr/local -xf pypy3.9-v7.3.9-linux64.tar.bz2 && \ 47 | rm pypy3.9-v7.3.9-linux64.tar.bz2 48 | 49 | RUN curl -OL https://downloads.python.org/pypy/pypy2.7-v7.3.9-linux64.tar.bz2 50 | RUN tar -C /usr/local -xf pypy2.7-v7.3.9-linux64.tar.bz2 && \ 51 | rm pypy2.7-v7.3.9-linux64.tar.bz2 52 | 53 | RUN apt-get install -y clang-14 54 | 55 | ARG NODE_VERSION=v16.17.1 56 | ARG NODE_DISTRO=linux-x64 57 | ARG NODE_ZIP=node-$NODE_VERSION-$NODE_DISTRO.tar.xz 58 | RUN curl -OL https://nodejs.org/dist/$NODE_VERSION/$NODE_ZIP 59 | RUN tar -C /usr/local -xJvf $NODE_ZIP && \ 60 | rm $NODE_ZIP 61 | 62 | ENV RUSTUP_HOME /opt/rust 63 | ENV CARGO_HOME /opt/rust 64 | RUN curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path 65 | RUN apt-get install -y ruby-full 66 | 67 | RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing php8.1 68 | RUN apt-get install -y libgmp-dev libmpfr-dev 69 | 70 | ENV PATH $PATH:/usr/local/go/bin:/usr/local/kotlinc/bin:/usr/local/node-v16.17.1-linux-x64/bin:/opt/rust/bin 71 | 72 | WORKDIR /root 73 | 74 | 75 | RUN update-alternatives --install /usr/bin/java java "$JAVA_HOME"/bin/java 100 && \ 76 | update-alternatives --install /usr/bin/javac javac "$JAVA_HOME"/bin/javac 100 && \ 77 | update-alternatives --install /usr/bin/jar jar "$JAVA_HOME"/bin/jar 100 && \ 78 | update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \ 79 | update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \ 80 | update-alternatives --install /usr/bin/python python /usr/bin/python3.11 100 && \ 81 | update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 100 && \ 82 | update-alternatives --install /usr/bin/pypy2 pypy2 /usr/local/pypy2.7-v7.3.9-linux64/bin/pypy2 100 && \ 83 | update-alternatives --install /usr/bin/pypy3 pypy3 /usr/local/pypy3.9-v7.3.9-linux64/bin/pypy3 100 84 | 85 | RUN apt-get install -y python3-pip gperf 86 | RUN pip install flask gunicorn flask-cors gmpy2 Cython jsonlines fire 87 | 88 | RUN curl -o libseccomp.tar.gz -L https://github.com/seccomp/libseccomp/releases/download/v2.5.4/libseccomp-2.5.4.tar.gz 89 | RUN tar -xzvf libseccomp.tar.gz && cd libseccomp-2.5.4 && chmod +x configure 90 | WORKDIR /root/libseccomp-2.5.4 91 | RUN ./configure --prefix=/usr --enable-python && make 92 | RUN make install 93 | WORKDIR /root/ 94 | RUN rm -rf libseccomp* 95 | 96 | 97 | ENV RUN_UID 1586 98 | ENV RUN_GID 1586 99 | ENV NUM_WORKERS 16 100 | ENV GUNICORN_PORT 5000 101 | ENV WORKER_CFG_DB /root/worker_cfg_db.csv 102 | ENV LOG_LEVEL info 103 | 104 | # RUN groupadd -g ${RUN_GID} runner${RUN_GID} && useradd -M runner${RUN_UID} -g ${RUN_GID} -u ${RUN_UID} 105 | 106 | EXPOSE ${GUNICORN_PORT} 107 | 108 | RUN mkdir execution_engine 109 | COPY ./execution_engine /root/execution_engine 110 | 111 | WORKDIR /root/execution_engine 112 | 113 | CMD ["bash", "start_engine.sh"] 114 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 NLP Group, Nanyang Technological University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ExecEval 2 | 3 | A distributed, extensible, secure solution for evaluating machine generated code with unit tests in multiple programming languages. 4 | 5 | This repository is a part of our ongoing effort to build large scale execution based evaluation benchmark published as [xCodeEval: A Large Scale Multilingual Multitask Benchmark for Code Understanding, Generation, Translation and Retrieval](https://arxiv.org/abs/2303.03004). If you are using this tool, plesae consider citing the paper. 6 | 7 | ``` 8 | @misc{khan2023xcodeeval, 9 | title={xCodeEval: A Large Scale Multilingual Multitask Benchmark for Code Understanding, Generation, Translation and Retrieval}, 10 | author={Mohammad Abdullah Matin Khan and M Saiful Bari and Xuan Long Do and Weishi Wang and Md Rizwan Parvez and Shafiq Joty}, 11 | year={2023}, 12 | eprint={2303.03004}, 13 | archivePrefix={arXiv}, 14 | primaryClass={cs.CL} 15 | } 16 | ``` 17 | Part of this work was submitted as a requirement for the Master of Science degree in Computer Science and Applications at the Islamic University of Technology by Muhammad Abdullah Matin Khan. (The thesis or project report will be added upon publication). 18 | 19 | ``` 20 | @misc{khan2024xcodeeval, 21 | title={Development of a Code Search Engine Using Natural Language Processing Techniques}, 22 | author={Mohammad Abdullah Matin Khan}, 23 | year={2024}, 24 | publication={Journal of Engineering and Technology (JET)} 25 | url=TBA 26 | } 27 | ``` 28 | 29 | ## Dependencies: 30 | 31 | - [docker-ce](https://docs.docker.com/engine/install/) 32 | 33 | ## Steps (Assuming dependencies satisfied): 34 | 35 | 1. Clone this [ExecEval repository](https://github.com/ntunlp/ExecEval). 36 | 2. `cd ExecEval` 37 | 3. `docker build . -t exec-eval:1.0` 38 | 4. `docker run -it -p x:y -e NUM_WORKERS=67 exec-eval:1.0`. This will expose port `y` (default `5000`) as `http://localhost:y` on the local machine whereas port `x` is used within the docker container which can be set by environment variable `GUNICORN_PORT`. The `NUM_WORKERS` is an environment variable representing the number of parallel execution engine workers. It is recommended to not use all cpus, as if cpu goes into 100% load it might affect execution speed of the codes uncontrollably, and keeping some cpus free for evaluation script. A valid example assuming less cpus available: `docker run -it -p 5000:5000 -e NUM_WORKERS=5 exec-eval:1.0` 39 | 40 | ### Expected outcome: 41 | 42 | A http server should be running on `$PORT=y` (default `5000`) which can parallely execute codes and return their output. 43 | 44 | ## Some helpful definitions: 45 | 46 | ### Definition of ExtendedUnittest: 47 | 48 | ```py 49 | # dataclass 50 | class ExtendedUnittest: 51 | input: str 52 | output: list[str] = field(default_factory=list) 53 | result: str | None = None 54 | exec_outcome: ExecOutcome | None = None 55 | ``` 56 | 57 | ### Definition of ExecOutcome: 58 | 59 | ```py 60 | class ExecOutcome(Enum): 61 | PASSED = "PASSED" # code executes and output matches expected output 62 | WRONG_ANSWER = "WRONG_ANSWER" # code executes and output does NOT matches expected output 63 | TIME_LIMIT_EXCEEDED = "TIME_LIMIT_EXCEEDED" # code executes and didn't exit in time, output is ignored in this case 64 | RUNTIME_ERROR = "RUNTIME_ERROR" # code failed to execute (crashed) 65 | COMPILATION_ERROR = "COMPILATION_ERROR" # code failed to compile 66 | MEMORY_LIMIT_EXCEEDED = "MEMORY_LIMIT_EXCEEDED" # code exceeded memory limit during execution 67 | ``` 68 | 69 | ### Definition of ResourceLimits: 70 | 71 | For detailed description of each attributes go to [man page of getrlimit](https://man7.org/linux/man-pages/man2/getrlimit.2.html). 72 | 73 | ```py 74 | class ResourceLimits: 75 | core: int = 0 # RLIMIT_CORE 76 | data: int = -1 # RLIMIT_DATA 77 | # nice: int = 20 # RLIMIT_NICE 78 | fsize: int = 0 # RLIMIT_FSIZE 79 | sigpending: int = 0 # RLIMIT_SIGPENDING 80 | # memlock: int = -1 # RLIMIT_MEMLOCK 81 | rss: int = -1 # RLIMIT_RSS 82 | nofile: int = 4 # RLIMIT_NOFILE 83 | msgqueue: int = 0 # RLIMIT_MSGQUEUE 84 | rtprio: int = 0 # RLIMIT_RTPRIO 85 | stack: int = -1 # RLIMIT_STACK 86 | cpu: int = 2 # RLIMIT_CPU, CPU time, in seconds. 87 | nproc: int = 1 # RLIMIT_NPROC 88 | _as: int = 2 * 1024 ** 3 # RLIMIT_AS set to 2GB by default 89 | locks: int = 0 # RLIMIT_LOCKS 90 | # rttime: int = 2 # RLIMIT_RTTIME, Timeout for real-time tasks. 91 | ``` 92 | 93 | ## API endpoints: 94 | 95 | ### API to execute code: 96 | 97 | - End point: /api/execute_code 98 | - Method: POST 99 | - Content-type: application/json 100 | - Post request json format: 101 | 102 | ```py 103 | # json of dict of this dataclass 104 | class JobData: 105 | language: str # language of the code to be executed, usually found in sample["lang"] field 106 | source_code: str #source_code, usually found in sample["source_code"] field 107 | unittests: list[ExtendedUnittest] # unittests, usually found in unittest_db[sample["src_uid"]] field which do contain more key value pairs than input, output; so skip them 108 | compile_cmd: str | None = None # compiler program e.g. gcc, g++, clang++, go, rustc, javac 109 | compile_flags: str | None = None # flags passed during compilation e.g. "-std=c++11 -lm -static ... 110 | execute_cmd: str | None = None # executor program (mainly interpreter for interpreted languages) e.g. python2, pypy2, ruby, php 111 | execute_flags: str | None = None # flags to executor program e.g. "-o -nologo", "-W ignore 112 | limits: ResourceLimits = field(default_factory=ResourceLimits) # Resource limits 113 | block_network: bool = True # block network access for codes executed by ExecEval (True is safer) 114 | stop_on_first_fail: bool = True # stops executing a code if a unit test fails (True for faster execution) 115 | use_sanitizer: bool = False # This kept to allow some codes of xCodeEval (e.g. MS C++) to execute on linux during testing ExecEval with xCodeEval test data. (False should be ok) 116 | 117 | ``` 118 | 119 | - Response json format: ExtendedUnittest 120 | 121 | ### API to get list of runtimes available: 122 | 123 | - End point: /api/all_runtimes 124 | - Method: GET 125 | - Content-type: application/json 126 | - Response format: 127 | 128 | ```json 129 | [ 130 | { 131 | "compile_cmd": "gcc", // program to compile with 132 | "compile_flags": "-fno-optimize-sibling-calls -w -fno-strict-aliasing -DONLINE_JUDGE -include limits.h -fno-asm -s -O2 -DONLINE_JUDGE -include math.h -static -lm", // default compiler flags 133 | "execute_cmd": "", 134 | "execute_flags": "", 135 | "has_sanitizer": true, 136 | "is_compiled": true, 137 | "runtime_name": "GNU C", 138 | "timelimit_factor": 1 139 | }, 140 | { 141 | "compile_cmd": "python3", 142 | "compile_flags": "-W ignore -m py_compile", 143 | "execute_cmd": "python3", // program to execute with 144 | "execute_flags": "-W ignore -OO -s -S", // flags to execute with 145 | "has_sanitizer": false, // is a sanitizer implemented in execution_engine/settings.py 146 | "is_compiled": true, // true if there is a compile cmd 147 | "runtime_name": "Python 3", // name which needs to match with the language passed in api for execute code 148 | "timelimit_factor": 3 // a multiplier for time allowed to execute as some languages are slower than others 149 | } 150 | // etc. 151 | ] 152 | ``` 153 | 154 | ## Evaluation 155 | 156 | ### pass@k 157 | 158 | Check the `eval_scripts` directory. The dependencies are mentioned in `requirements.txt`. Run `pip install -r eval_scripts/requirements.txt`. The entry point is through `eval_passk.py`. Run `python eval_scripts/eval_passk.py --help` for description of arguments. 159 | 160 | #### Example of most typical usage: 161 | 162 | ```sh 163 | python eval_scripts/eval_passk.py $path_to_samples_to_evaluate --k "1,2,5,10" --n_workers 129 --limits_by_lang_cfg_file eval_scripts/limits_by_lang.yaml --unittest_file $path_to_unittest_db_file --execeval_url "http://localhost:5000" --use_sanitizer 0 164 | 165 | ``` 166 | 167 | ## **IMPORTANT** 168 | 169 | - pip dependencies to run evaluation script is listed in `eval_scripts/requirements.txt`. 170 | - Sanitize functions are available in `execution_engine/settings.py`. 171 | - Default compiler or execution flags are available in `execution_engine/config.yaml`. 172 | - Default resource limits for all supported languages are available in `eval_scripts/limits_by_lang.yaml`. 173 | - The machine generated codes to be executed should be a list of json with following key value pairs present to work properly: 174 | - source_code: the code to be executed. 175 | - lang: the language/runtime to use to execute in `ExecEval`. 176 | - src_uid: the unique id to retrieve unittests from unittest_db. 177 | - task_id: an unique id assigned by machine/model trainer to represent the task they are solving. For example, **program synthesis** should have `task_id` same as `src_uid` whereas **Code translation** can have `task_id` same as the index of the test sample for which the code is generated. 178 | - Be extra careful with the files used to run the scripts, for most parts following the files i.e. `unittest_db` by **xCodeEval** and other files by **ExecEval** should be okay except for the file with machine generated codes. 179 | 180 | ## Security measures: 181 | 182 | - Use seperate unpreviledged user for each worker to limit access to different resources. 183 | - Use `prlimit` to limit resources allowed for the execution. 184 | - Use `seccomp` to limit socket syscalls (can be easily extended to arbitrary syscall blocker with the caveat that some syscalls are required by some languages to execute code). 185 | - Thus arbitrary resource usage is restricted. 186 | - Compilation is not so secure as execution with the assumption that the code needs to find vulnerability in the compiler to exploit this point. (This part not tested) 187 | -------------------------------------------------------------------------------- /eval_scripts/api_comm.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | import requests 4 | from exec_outcome import ExecOutcome 5 | from typing import List, Optional, Union, Tuple 6 | 7 | @dataclass 8 | class ExtendedUnittest: 9 | input: str 10 | output: List[str] = field(default_factory=list) 11 | result: Optional[str] = None 12 | exec_outcome: Optional[ExecOutcome] = None 13 | 14 | def json(self): 15 | _json = self.__dict__ 16 | if self.exec_outcome is not None: 17 | _json["exec_outcome"] = self.exec_outcome.name 18 | 19 | return _json 20 | 21 | @classmethod 22 | def from_json(cls, _json): 23 | return cls( 24 | input=_json.get("input", ""), 25 | output=_json.get("output", list()), 26 | result=_json.get("result", None), 27 | exec_outcome=_json.get("exec_outcome", None), 28 | ) 29 | 30 | 31 | class EmptyValueError(Exception): 32 | def __init__(self, *args, **kwargs): 33 | super().__init__(*args, **kwargs) 34 | 35 | 36 | class EmptyUnittestError(EmptyValueError): 37 | pass 38 | 39 | 40 | class EmptyLanguageError(EmptyValueError): 41 | pass 42 | 43 | 44 | class EmptySourceCodeError(EmptyValueError): 45 | pass 46 | 47 | 48 | class APICommunication: 49 | _session: requests.Session 50 | 51 | def __init__(self, server_url: str = "http://localhost:5000"): 52 | self._session = requests.Session() 53 | self.execute_code_url = f"{server_url}/api/execute_code" 54 | self.get_runtimes_url = f"{server_url}/api/all_runtimes" 55 | 56 | def __enter__(self): 57 | return self 58 | 59 | def __exit__(self, *args): 60 | self._session.close() 61 | 62 | def get_runtimes(self): 63 | return self._session.get(self.get_runtimes_url).json() 64 | 65 | def execute_code( 66 | self, 67 | language: str, 68 | source_code: str, 69 | unittests: List[dict], 70 | limits: Optional[dict] = None, 71 | block_network: bool = True, 72 | stop_on_first_fail: bool = True, 73 | use_sanitizer: bool = False, 74 | compiler_program_name: Optional[str] = None, 75 | compiler_flags: Optional[str] = None, 76 | interpreter_cmd: Optional[str] = None, 77 | interpreter_flags: Optional[str] = None, 78 | sample_id: Optional[int] = None, 79 | task_id: Union[str, int, None] = None, 80 | ) -> Tuple[List[ExtendedUnittest], Optional[int], Union[str, int, None]]: 81 | if language is None: 82 | raise EmptyLanguageError 83 | 84 | if source_code is None: 85 | raise EmptySourceCodeError 86 | 87 | if unittests is None or len(unittests) == 0: 88 | raise EmptyUnittestError 89 | 90 | request_body = dict( 91 | language=language, 92 | source_code=source_code, 93 | unittests=unittests, 94 | limits=limits, 95 | compile_cmd=compiler_program_name, 96 | compile_flags=compiler_flags, 97 | execute_cmd=interpreter_cmd, 98 | execute_flags=interpreter_flags, 99 | block_network=block_network, 100 | stop_on_first_fail=stop_on_first_fail, 101 | use_sanitizer=use_sanitizer, 102 | ) 103 | json_response = self._session.post( 104 | self.execute_code_url, 105 | json=request_body, 106 | headers={"Content-Type": "application/json"}, 107 | ).json() 108 | 109 | if "data" not in json_response: 110 | return json_response, sample_id, task_id 111 | 112 | return ( 113 | json_response["data"], 114 | sample_id, 115 | task_id, 116 | ) 117 | -------------------------------------------------------------------------------- /eval_scripts/eval_passk.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import json 3 | import os 4 | import sys 5 | from collections import Counter, defaultdict 6 | from concurrent.futures import ThreadPoolExecutor, as_completed 7 | from pathlib import Path 8 | 9 | import fire 10 | import jsonlines 11 | import numpy as np 12 | import tqdm 13 | 14 | sys.path.extend( 15 | [Path(__file__).parent.parent, Path(__file__).parent.parent / "execution_engine"] 16 | ) 17 | # exit(0) 18 | # sys.path.extend([ 19 | from api_comm import APICommunication 20 | from exec_outcome import ExecOutcome 21 | from yaml import safe_load 22 | 23 | 24 | def estimate_pass_at_k( 25 | num_samples: int | list[int] | np.ndarray, 26 | num_correct: list[int] | np.ndarray, 27 | k: int, 28 | ) -> np.ndarray: 29 | """ 30 | Estimates pass@k of each problem and returns them in an array. 31 | """ 32 | 33 | def estimator(n: int, c: int, k: int): 34 | """ 35 | Calculates 1 - comb(n - c, k) / comb(n, k). 36 | """ 37 | if n - c < k: 38 | return 1.0 39 | return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) 40 | 41 | if isinstance(num_samples, int): 42 | num_samples_it = itertools.repeat(num_samples, len(num_correct)) 43 | else: 44 | assert len(num_samples) == len(num_correct) 45 | num_samples_it = iter(num_samples) 46 | 47 | return np.array( 48 | [estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)] 49 | ) 50 | 51 | 52 | def evaluate_functional_correctness( 53 | sample_file: str, 54 | k: list[int] = [1, 10, 100], 55 | n_workers: int = 4, 56 | limits_by_lang: dict = {}, 57 | compile_n_execute_args_by_lang: dict = {}, 58 | eval_result_file: str | None = None, 59 | unittest_file: str = "unittest_db.json", 60 | execeval_url: str = "http://localhost:5000", 61 | block_network: bool = True, 62 | stop_on_first_fail: bool = True, 63 | use_sanitizer: bool = False, 64 | ): 65 | """ 66 | Evaluates the functional correctness of generated samples, and writes 67 | results to f"{sample_file}_results.jsonl.gz" 68 | """ 69 | if eval_result_file is None: 70 | eval_result_file = f"{sample_file.split('.')[0]}-evaluated.jsonl" 71 | 72 | with open(unittest_file) as ut_rp: 73 | unittest_db = json.load(ut_rp) 74 | # Check the generated samples against test suites. 75 | 76 | with APICommunication(execeval_url) as execeval: 77 | execute_code = execeval.execute_code 78 | supported_langs = {r["runtime_name"] for r in execeval.get_runtimes()} 79 | with ThreadPoolExecutor(max_workers=n_workers) as executor: 80 | futures = [] 81 | completion_id = Counter() 82 | n_samples = 0 83 | results = defaultdict(list) 84 | with jsonlines.open(sample_file) as sample_rp: 85 | for idx, sample in tqdm.tqdm( 86 | enumerate(sample_rp), desc="Reading samples" 87 | ): 88 | src_uid = sample["src_uid"] 89 | source_code = sample["source_code"] 90 | task_id = sample["task_id"] 91 | lang = sample["lang"] 92 | if src_uid not in unittest_db: 93 | continue 94 | unittests = unittest_db[src_uid] 95 | if len(unittests) == 0: 96 | continue 97 | if lang not in supported_langs: 98 | continue 99 | 100 | args = ( 101 | lang, 102 | source_code, 103 | unittests, 104 | limits_by_lang[lang], 105 | block_network, 106 | stop_on_first_fail, 107 | use_sanitizer, 108 | compile_n_execute_args_by_lang.get(lang, {}).get("compile_cmd"), 109 | compile_n_execute_args_by_lang.get(lang, {}).get( 110 | "compile_flags" 111 | ), 112 | compile_n_execute_args_by_lang.get(lang, {}).get("execute_cmd"), 113 | compile_n_execute_args_by_lang.get(lang, {}).get( 114 | "execute_flags" 115 | ), 116 | idx, 117 | task_id, 118 | ) 119 | 120 | future = executor.submit(execute_code, *args) 121 | futures.append(future) 122 | completion_id[task_id] += 1 123 | n_samples += 1 124 | 125 | print("Running test suites...") 126 | for idx, future in tqdm.tqdm( 127 | enumerate(as_completed(futures)), 128 | desc="Test running", 129 | total=len(futures), 130 | ): 131 | result = future.result() 132 | unittests, sample_idx, task_id = result 133 | if not isinstance(unittests, list) and "error" in unittests: 134 | """ 135 | [TODO] log it 136 | """ 137 | print("ERROR: ", unittests["error"]) 138 | continue 139 | results[task_id].append((sample_idx, unittests)) 140 | print("Calculate pass@k.") 141 | total, correct = [], [] 142 | for result in results.values(): 143 | result.sort() 144 | passed = [ 145 | all(x["exec_outcome"] == ExecOutcome.PASSED.value for x in r[1]) 146 | for r in result 147 | ] 148 | total.append(len(passed)) 149 | correct.append(sum(passed)) 150 | total = np.array(total) 151 | correct = np.array(correct) 152 | 153 | ks = k 154 | pass_at_k = { 155 | f"pass@{k}": estimate_pass_at_k(total, correct, k).mean() 156 | for k in ks 157 | if (total >= k).all() 158 | } 159 | 160 | # Finally, save the results in one file: 161 | def combine_results(): 162 | with jsonlines.open(sample_file) as sample_rp: 163 | cnt = 0 164 | for idx, sample in enumerate(sample_rp): 165 | cnt += 1 166 | if sample["lang"] not in supported_langs: 167 | continue 168 | task_id = sample["task_id"] 169 | if len(results[task_id]) == 0: 170 | continue 171 | if results[task_id][0][0] > idx: 172 | continue 173 | result = results[task_id].pop(0) 174 | 175 | sample["unittests"] = result[1] 176 | _exec_outcomes = [ 177 | r["exec_outcome"] 178 | for r in result[1] 179 | if r["exec_outcome"] != ExecOutcome.PASSED.value 180 | ] + [ExecOutcome.PASSED.value] 181 | 182 | sample["exec_outcome"] = _exec_outcomes[0] 183 | yield sample 184 | 185 | print(f"Writing results to {eval_result_file}...") 186 | with jsonlines.open(eval_result_file, "w") as result_wp: 187 | for result in tqdm.tqdm(combine_results(), total=n_samples): 188 | result_wp.write(result) 189 | 190 | return pass_at_k 191 | 192 | 193 | def entry_point( 194 | sample_file: str, 195 | k: str | list | tuple = "1,2,5,10", 196 | n_workers: int = 4, 197 | compile_n_execute_args_by_lang_cfg_file: str | None = None, 198 | limits_by_lang_cfg_file: str | None = None, 199 | unittest_file: str = "unittest_db.json", 200 | execeval_url: str = "http://localhost:5000", 201 | block_network: bool = True, 202 | stop_on_first_fail: bool = True, 203 | use_sanitizer: bool = False, 204 | ): 205 | """ 206 | Evaluates the functional correctness of generated samples, and writes 207 | results to f"{sample_file}_results.jsonl.gz" 208 | """ 209 | 210 | """ 211 | [TODO] 212 | compile_n_execute_args_by_lang_cfg_file: str | None = None, 213 | limits_by_lang_cfg_file: str | None = None, 214 | 215 | assume yaml files and consider config.yaml for compile..args, 216 | and resource_limits.py for limits_by_lang 217 | """ 218 | limits_by_lang, compile_n_execute_args_by_lang = None, {} 219 | if limits_by_lang_cfg_file is None: 220 | limits_by_lang_cfg_file = "limits_by_lang.yaml" 221 | if not os.path.exists(limits_by_lang_cfg_file): 222 | print( 223 | "Need resource limit defaults for all runtimes, provide the path to default 'limits_by_lang.yaml' or to the modified one." 224 | ) 225 | exit(-1) 226 | with open(limits_by_lang_cfg_file) as limit_cfg_rp: 227 | limits_by_lang = safe_load(limit_cfg_rp) 228 | 229 | if compile_n_execute_args_by_lang_cfg_file is not None and os.path.exists( 230 | compile_n_execute_args_by_lang_cfg_file 231 | ): 232 | with open( 233 | compile_n_execute_args_by_lang_cfg_file 234 | ) as compile_n_execute_args_by_lang_rp: 235 | compile_n_execute_args_by_lang = safe_load( 236 | compile_n_execute_args_by_lang_rp 237 | ) 238 | 239 | ks = list(map(int, k.split(","))) if isinstance(k, str) else list(k) 240 | results = evaluate_functional_correctness( 241 | sample_file, 242 | ks, 243 | n_workers, 244 | block_network=block_network, 245 | limits_by_lang=limits_by_lang, 246 | compile_n_execute_args_by_lang=compile_n_execute_args_by_lang, 247 | unittest_file=unittest_file, 248 | execeval_url=execeval_url, 249 | stop_on_first_fail=stop_on_first_fail, 250 | use_sanitizer=use_sanitizer, 251 | ) 252 | 253 | print(results) 254 | 255 | 256 | def main(): 257 | fire.Fire(entry_point) 258 | 259 | 260 | sys.exit(main()) 261 | -------------------------------------------------------------------------------- /eval_scripts/exec_outcome.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ExecOutcome(Enum): 5 | PASSED = "PASSED" # code executes and output matches expected output 6 | WRONG_ANSWER = ( 7 | "WRONG_ANSWER" # code executes and output does NOT matches expected output 8 | ) 9 | TIME_LIMIT_EXCEEDED = "TIME_LIMIT_EXCEEDED" # code executes and didn't exit in time, output is ignored in this case 10 | RUNTIME_ERROR = "RUNTIME_ERROR" # code failed to execute (crashed) 11 | COMPILATION_ERROR = "COMPILATION_ERROR" # code failed to compile 12 | MEMORY_LIMIT_EXCEEDED = ( 13 | "MEMORY_LIMIT_EXCEEDED" # code exceeded memory limit during execution 14 | ) 15 | -------------------------------------------------------------------------------- /eval_scripts/limits_by_lang.yaml: -------------------------------------------------------------------------------- 1 | GNU C: 2 | nofile: 0 3 | 4 | GNU C11: 5 | nofile: 0 6 | 7 | GNU C++: 8 | nofile: 0 9 | 10 | GNU C++0x: 11 | nofile: 0 12 | 13 | GNU C++11: 14 | nofile: 0 15 | 16 | GNU C++14: 17 | nofile: 0 18 | 19 | GNU C++17: 20 | nofile: 0 21 | 22 | GNU C++17 (64): 23 | nofile: 0 24 | 25 | GNU C++20 (64): 26 | nofile: 0 27 | 28 | GNU C++20: 29 | nofile: 0 30 | 31 | GNU C++17 Diagnostics: 32 | nofile: 0 33 | 34 | Clang++17 Diagnostics: 35 | nofile: 0 36 | 37 | Clang++17: 38 | nofile: 0 39 | 40 | Clang++20 Diagnostics: 41 | nofile: 0 42 | 43 | Clang++20: 44 | nofile: 0 45 | 46 | Clang++14: 47 | nofile: 0 48 | 49 | Clang++11: 50 | nofile: 0 51 | 52 | MS C++: 53 | nofile: 0 54 | 55 | MS C++ 2017: 56 | nofile: 0 57 | 58 | MS C#: 59 | nofile: 4 60 | nproc: 4 61 | fsize: 1073741824 62 | 63 | C# 10: 64 | nofile: 4 65 | nproc: 4 66 | fsize: 1073741824 67 | 68 | C# 8: 69 | nofile: 4 70 | nproc: 4 71 | fsize: 1073741824 72 | 73 | Mono C#: 74 | nofile: 4 75 | nproc: 4 76 | fsize: 1073741824 77 | 78 | .NET Core C#: 79 | nofile: 4 80 | nproc: 4 81 | fsize: 1073741824 82 | 83 | PyPy 2: 84 | nofile: 4 85 | 86 | Python 2: 87 | nofile: 4 88 | 89 | PyPy 3: 90 | nofile: 4 91 | 92 | PyPy 3-64: 93 | nofile: 4 94 | 95 | Python 3: 96 | nofile: 4 97 | 98 | Python 3 + libs: 99 | nofile: 4 100 | 101 | JavaScript: 102 | nofile: 25 103 | nproc: 25 104 | 105 | Node js: 106 | nofile: 25 107 | nproc: 25 108 | 109 | Node.js: 110 | nofile: 25 111 | nproc: 25 112 | 113 | Rust: 114 | nofile: 4 115 | 116 | Rust 2021: 117 | nofile: 4 118 | 119 | Rust 2018: 120 | nofile: 4 121 | 122 | Rust 2015: 123 | nofile: 4 124 | 125 | Java 6: 126 | nofile: 25 127 | nproc: 23 128 | 129 | Java 7: 130 | nofile: 25 131 | nproc: 23 132 | 133 | Java 1.5: 134 | nofile: 25 135 | nproc: 23 136 | 137 | Java 8: 138 | nofile: 25 139 | nproc: 23 140 | 141 | Java 11: 142 | nofile: 25 143 | nproc: 23 144 | 145 | Java 17: 146 | nofile: 25 147 | nproc: 23 148 | 149 | PHP: 150 | nofile: 4 151 | 152 | PHP 8.1: 153 | nofile: 4 154 | 155 | Go: 156 | nofile: 0 157 | nproc: 7 158 | 159 | Ruby: 160 | nofile: 10 161 | 162 | Ruby 3: 163 | nofile: 10 164 | 165 | Kotlin: 166 | nofile: 25 167 | nproc: 23 168 | 169 | Kotlin 1.4: 170 | nofile: 25 171 | nproc: 23 172 | 173 | Kotlin 1.5: 174 | nofile: 25 175 | nproc: 23 176 | 177 | Kotlin 1.6: 178 | nofile: 25 179 | nproc: 23 180 | 181 | Kotlin 1.7: 182 | nofile: 25 183 | nproc: 23 184 | -------------------------------------------------------------------------------- /eval_scripts/prepare_samples_from_task_data.py: -------------------------------------------------------------------------------- 1 | import datasets 2 | from tqdm import tqdm 3 | import jsonlines 4 | from copy import deepcopy 5 | 6 | cfg = "apr" 7 | 8 | dataset = datasets.load_dataset("NTU-NLP-sg/xCodeEval", cfg) 9 | 10 | with jsonlines.open(f"{cfg}_code_samples_heldout.jsonl", "w") as jwp: 11 | def append_samples(dts): 12 | for dt in tqdm(dts): 13 | sample = deepcopy(dt) 14 | sample["source_code"] = sample["bug_source_code"] 15 | sample["exec_outcome"] = sample["bug_exec_outcome"] 16 | sample["task_id"] = f'{sample["apr_id"]}-bug' 17 | jwp.write(sample) 18 | if sample["fix_source_code"]: 19 | sample = deepcopy(dt) 20 | sample["source_code"] = sample["fix_source_code"] 21 | sample["exec_outcome"] = sample["fix_exec_outcome"] 22 | sample["task_id"] = f'{sample["apr_id"]}-fix' 23 | jwp.write(sample) 24 | 25 | # append_samples(dataset["train"]) 26 | append_samples(dataset["validation"]) 27 | append_samples(dataset["test"]) -------------------------------------------------------------------------------- /eval_scripts/prepare_unittest.py: -------------------------------------------------------------------------------- 1 | import json 2 | import jsonlines 3 | from tqdm import tqdm 4 | 5 | uts = {} 6 | 7 | with jsonlines.open("api_aux_test_submission_java.jsonl") as jrp: 8 | for sample in tqdm(jrp): 9 | s = sample["hidden_unit_tests"].replace("'", "\"") 10 | _uts = json.loads(s) 11 | if uts.get(sample['src_uid']) is not None: 12 | assert len(uts[sample['src_uid']]) == len(_uts), f"{len(uts[sample['src_uid']])}, {len(_uts)}" 13 | uts[sample['src_uid']] = _uts 14 | 15 | with open("test_unittest_db.json", "w") as wp: 16 | json.dump(uts, wp) 17 | -------------------------------------------------------------------------------- /eval_scripts/print_verdict_by_lang.py: -------------------------------------------------------------------------------- 1 | import jsonlines 2 | from tqdm import tqdm 3 | from collections import Counter, defaultdict 4 | 5 | stat = defaultdict(Counter) 6 | 7 | with jsonlines.open("api_aux_test_submission_java-evaluated.jsonl") as jrp: 8 | for sample in tqdm(jrp): 9 | verdict = "PASSED" 10 | for ut in sample["unittests"]: 11 | if ut["exec_outcome"] != "PASSED": 12 | verdict = ut["exec_outcome"] 13 | break 14 | stat[sample["lang_cluster"]][f"{sample['exec_outcome']}-{verdict}"] += 1 15 | 16 | import json 17 | print(json.dumps(stat, indent=4)) 18 | -------------------------------------------------------------------------------- /eval_scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | fire==0.5.0 2 | jsonlines==3.1.0 3 | numpy==1.23.5 4 | tqdm==4.64.1 5 | requests==2.28.1 -------------------------------------------------------------------------------- /eval_scripts/resource_limit.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, fields 2 | 3 | 4 | @dataclass(kw_only=True) 5 | class ResourceLimits: 6 | core: int = 0 # RLIMIT_CORE 7 | data: int = -1 # RLIMIT_DATA 8 | # nice: int = 20 # RLIMIT_NICE 9 | fsize: int = 0 # RLIMIT_FSIZE 10 | sigpending: int = 0 # RLIMIT_SIGPENDING 11 | # memlock: int = -1 # RLIMIT_MEMLOCK 12 | rss: int = -1 # RLIMIT_RSS 13 | nofile: int = 4 # RLIMIT_NOFILE 14 | msgqueue: int = 0 # RLIMIT_MSGQUEUE 15 | rtprio: int = 0 # RLIMIT_RTPRIO 16 | stack: int = -1 # RLIMIT_STACK 17 | cpu: int = 2 # RLIMIT_CPU, CPU time, in seconds. 18 | nproc: int = 1 # RLIMIT_NPROC 19 | _as: int = 2 * 1024 ** 3 # RLIMIT_AS set to 2GB by default 20 | locks: int = 0 # RLIMIT_LOCKS 21 | # rttime: int = 2 # RLIMIT_RTTIME, Timeout for real-time tasks. 22 | 23 | def fields(self): 24 | for field in fields(self): 25 | yield field.name 26 | 27 | 28 | if __name__ == "__main__": 29 | limits = ResourceLimits() 30 | prlimit_str = " ".join( 31 | f"--{field.name[1:] if field.name.startswith('_') else field.name}={getattr(limits, field.name)}" 32 | for field in fields(limits) 33 | ) 34 | print(prlimit_str) 35 | -------------------------------------------------------------------------------- /execution_engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ntunlp/ExecEval/dd693865a99bd32448c1ff0c40922951d940a0cd/execution_engine/__init__.py -------------------------------------------------------------------------------- /execution_engine/app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import time 5 | import traceback 6 | from pathlib import Path 7 | 8 | from config import load_config, load_limits_by_lang 9 | from exec_outcome import ExecOutcome 10 | from flask import Flask, request 11 | from flask_cors import CORS 12 | from job import JobData 13 | 14 | sys.path.extend([str(Path(__file__).parent)]) 15 | 16 | from execution_engine import ExecutionEngine 17 | 18 | app = Flask(__name__) 19 | CORS(app) 20 | config_path = Path("config.yaml") 21 | cfg = load_config(config_path) 22 | limits_by_lang_path = Path("limits_by_lang.yaml") 23 | limits_by_lang = load_limits_by_lang(limits_by_lang_path) 24 | 25 | gunicorn_logger = logging.getLogger("gunicorn.error") 26 | app.logger.handlers = gunicorn_logger.handlers 27 | app.logger.setLevel(gunicorn_logger.level) 28 | 29 | worker_cfg_db = os.environ["WORKER_CFG_DB"] 30 | 31 | cfg_db_lines = [] 32 | run_ids = None 33 | with open(worker_cfg_db) as db_rp: 34 | assigned = False 35 | for line in db_rp: 36 | pid, idx, gid, uid = map(int, line.strip().split(",")) 37 | if not assigned and pid == -1: 38 | pid = os.getpid() 39 | assigned = True 40 | cfg_db_lines.append(",".join(map(str, (pid, idx, gid, uid)))) 41 | run_ids = (gid, uid) 42 | app.logger.info(f"Assigned {gid=}, {uid=} to {pid=}") 43 | else: 44 | cfg_db_lines.append(line.strip()) 45 | 46 | with open(worker_cfg_db, "w") as db_wp: 47 | for line in cfg_db_lines: 48 | db_wp.write(line + "\n") 49 | 50 | execution_engine = ExecutionEngine(cfg, limits_by_lang, run_ids, app.logger) 51 | app.config["execution_engine"] = execution_engine 52 | execution_engine.start() 53 | 54 | 55 | @app.route("/api/execute_code", methods=["POST"]) 56 | def run_job(): 57 | log, ret, st = "", None, time.perf_counter_ns() 58 | try: 59 | job = JobData.json_parser(request.json) 60 | log = f"api/execute_code: lang={job.language}" 61 | result = execution_engine.check_output_match(job) 62 | ret = {"data": [r.json() for r in result]} 63 | exec_outcomes = [ 64 | r.exec_outcome 65 | for r in result 66 | if not (r.exec_outcome is None or r.exec_outcome is ExecOutcome.PASSED) 67 | ] + [ExecOutcome.PASSED] 68 | peak_mem = max([int(r.peak_memory_consumed.split()[0]) for r in result if r.peak_memory_consumed] + [-1]) 69 | peak_time = max([r.time_consumed for r in result if r.time_consumed] + [-1]) 70 | log = f"{log} time: {(time.perf_counter_ns()-st)/(1000_000_000)}s, |uts|={len(job.unittests)}, exec_outcome={exec_outcomes[0].value}, peak_mem={peak_mem}kB, peak_time={peak_time}s" 71 | 72 | except Exception as e: 73 | ret = {"error": str(e) + f"\n{traceback.print_exc()}"}, 400 74 | log = f"{log} time: {(time.perf_counter_ns()-st)/(1000_000_000)}s, {ret}" 75 | app.logger.info(log) 76 | return ret 77 | 78 | 79 | @app.route("/api/all_runtimes", methods=["GET"]) 80 | def all_runtimes(): 81 | log, st = "", time.perf_counter_ns() 82 | runtimes = [] 83 | for runtime in execution_engine.supported_languages.values(): 84 | runtimes.append(runtime.get_info()) 85 | ret = runtimes, 200 86 | log = f"api/all_runtimes: {log} time: {(time.perf_counter_ns()-st)/(1000_000_000)}s" 87 | 88 | app.logger.info(log) 89 | return ret 90 | 91 | 92 | if __name__ == "__main__": 93 | app.run(host="0.0.0.0") 94 | -------------------------------------------------------------------------------- /execution_engine/code_store.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import uuid 4 | from pathlib import Path 5 | 6 | from config import CodeStoreConfig 7 | 8 | 9 | class CodeStore: 10 | _source_dir: Path 11 | 12 | def __init__(self, cfg: CodeStoreConfig, run_ids: tuple[int, int]) -> None: 13 | self._source_dir = cfg.source_code_dir / uuid.uuid4().hex 14 | self.uid = run_ids[1] 15 | self.gid = run_ids[0] 16 | 17 | def create(self): 18 | os.makedirs(self._source_dir, exist_ok=True) 19 | os.chown(self._source_dir, self.uid, self.gid) 20 | os.chmod(self._source_dir, 0o775) 21 | 22 | def destroy(self) -> None: 23 | shutil.rmtree(self._source_dir, ignore_errors=True) 24 | 25 | def write_source_code(self, source_code: str, filename: Path) -> Path: 26 | filepath = self._source_dir / filename 27 | 28 | with filepath.open("w") as fp: 29 | fp.write(source_code) 30 | 31 | filepath = filepath.resolve() 32 | 33 | os.chown(filepath, self.uid, self.gid) 34 | os.chmod(filepath, 0o775) 35 | return filepath 36 | 37 | def read_source_code(self, filepath: Path) -> str: 38 | with filepath.open() as f: 39 | s = f.read() 40 | 41 | return s 42 | 43 | 44 | if __name__ == "__main__": 45 | from config import load_config 46 | 47 | cfg = load_config(Path("execution_engine/config.yaml")) 48 | code_store = CodeStore(cfg.code_store) 49 | print( 50 | code_store.read_source_code( 51 | code_store.write_source_code("""print("Hello")""", Path("main.py")) 52 | ) 53 | ) 54 | -------------------------------------------------------------------------------- /execution_engine/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | 4 | from helper import has_nested_dataclass 5 | from resource_limit import ResourceLimits 6 | from yaml import safe_load 7 | 8 | 9 | @dataclass 10 | class CodeStoreConfig: 11 | source_code_dir: Path 12 | 13 | def __post_init__(self): 14 | """Ensure its a Path instance""" 15 | self.source_code_dir = Path(self.source_code_dir) 16 | 17 | 18 | @dataclass 19 | class LanguageConfig: 20 | language: str = "" 21 | compile_cmd: str = "" 22 | compile_flags: str = "" 23 | execute_cmd: str = "" 24 | execute_flags: str = "" 25 | sanitize_fn_name: str = "" 26 | compile_fn_name: str = "" 27 | execute_fn_name: str = "" 28 | file_name_fn_or_str_name: str = "" 29 | timelimit_factor: int = 1 30 | extend_mem_for_vm: bool = False 31 | extend_mem_flag_name: str = "" 32 | 33 | 34 | @has_nested_dataclass 35 | class Config: 36 | supported_languages: dict[str, LanguageConfig] 37 | code_store: CodeStoreConfig 38 | run_uid: int 39 | run_gid: int 40 | 41 | def __init__( 42 | self, 43 | code_store: dict[str, str], 44 | supported_languages: dict[str, dict[str, str]], 45 | *args, 46 | **kwargs 47 | ): 48 | tmp = supported_languages.copy() 49 | self.supported_languages = dict() 50 | for lang, cfg in tmp.items(): 51 | self.supported_languages[lang] = LanguageConfig(language=lang, **cfg) 52 | 53 | self.code_store = CodeStoreConfig(**code_store.__dict__) 54 | 55 | super().__init__(*args, **kwargs) 56 | 57 | 58 | def load_config(config_file: Path) -> Config: 59 | with config_file.open("r") as f: 60 | cfg = Config(**safe_load(f)) 61 | 62 | return cfg 63 | 64 | 65 | def load_limits_by_lang(limits_by_lang_file: Path) -> dict[str, ResourceLimits]: 66 | limits_by_lang = dict() 67 | with open(limits_by_lang_file) as lblp: 68 | for lang, limits_dict in safe_load(lblp).items(): 69 | limits_by_lang[lang] = ResourceLimits(**limits_dict) 70 | 71 | return limits_by_lang 72 | 73 | 74 | if __name__ == "__main__": 75 | cfg = load_config(Path("execution_engine/config.yaml")) 76 | print(cfg.supported_languages.keys()) 77 | -------------------------------------------------------------------------------- /execution_engine/config.yaml: -------------------------------------------------------------------------------- 1 | code_store: 2 | source_code_dir: /code_store/ 3 | supported_languages: 4 | GNU C: 5 | compile_cmd: gcc 6 | compile_flags: -fno-optimize-sibling-calls -w -fno-strict-aliasing -DONLINE_JUDGE -include limits.h -fno-asm -s -O2 -DONLINE_JUDGE -include math.h -static -lm 7 | sanitize_fn_name: sanitize_c_cpp 8 | compile_fn_name: generic_c_cpp_compile 9 | execute_fn_name: generic_binary_execute 10 | file_name_fn_or_str_name: test.c 11 | 12 | GNU C11: 13 | compile_cmd: gcc 14 | compile_flags: -std=c11 -fno-optimize-sibling-calls -w -fno-strict-aliasing -DONLINE_JUDGE -include limits.h -fno-asm -s -O2 -DONLINE_JUDGE -include math.h -static -lm 15 | sanitize_fn_name: sanitize_c_cpp 16 | compile_fn_name: generic_c_cpp_compile 17 | execute_fn_name: generic_binary_execute 18 | file_name_fn_or_str_name: test.c 19 | 20 | GNU C++: 21 | compile_cmd: g++ 22 | compile_flags: -s -x c++ -O2 -w -DONLINE_JUDGE -include math.h -include limits.h -static -lm 23 | sanitize_fn_name: sanitize_c_cpp 24 | compile_fn_name: generic_c_cpp_compile 25 | execute_fn_name: generic_binary_execute 26 | file_name_fn_or_str_name: test.cpp 27 | 28 | GNU C++0x: 29 | compile_cmd: g++ 30 | compile_flags: -std=c++0x -s -x c++ -O2 -w -DONLINE_JUDGE -include math.h -include limits.h -static -lm 31 | sanitize_fn_name: sanitize_c_cpp 32 | compile_fn_name: generic_c_cpp_compile 33 | execute_fn_name: generic_binary_execute 34 | file_name_fn_or_str_name: test.cpp 35 | 36 | GNU C++11: 37 | compile_cmd: g++ 38 | compile_flags: -std=c++11 -s -x c++ -O2 -w -DONLINE_JUDGE -include math.h -include limits.h -static -lm 39 | sanitize_fn_name: sanitize_c_cpp 40 | compile_fn_name: generic_c_cpp_compile 41 | execute_fn_name: generic_binary_execute 42 | file_name_fn_or_str_name: test.cpp 43 | 44 | GNU C++14: 45 | compile_cmd: g++ 46 | compile_flags: -std=c++14 -s -x c++ -O2 -w -DONLINE_JUDGE -include math.h -include limits.h -static -lm 47 | sanitize_fn_name: sanitize_c_cpp 48 | compile_fn_name: generic_c_cpp_compile 49 | execute_fn_name: generic_binary_execute 50 | file_name_fn_or_str_name: test.cpp 51 | 52 | GNU C++17: 53 | compile_cmd: g++ 54 | compile_flags: -std=c++17 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 55 | sanitize_fn_name: sanitize_c_cpp 56 | compile_fn_name: generic_c_cpp_compile 57 | execute_fn_name: generic_binary_execute 58 | file_name_fn_or_str_name: test.cpp 59 | 60 | GNU C++17 (64): 61 | compile_cmd: g++ 62 | compile_flags: -std=c++17 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 63 | sanitize_fn_name: sanitize_c_cpp 64 | compile_fn_name: generic_c_cpp_compile 65 | execute_fn_name: generic_binary_execute 66 | file_name_fn_or_str_name: test.cpp 67 | 68 | GNU C++20 (64): 69 | compile_cmd: g++ 70 | compile_flags: -std=c++20 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 71 | sanitize_fn_name: sanitize_c_cpp 72 | compile_fn_name: generic_c_cpp_compile 73 | execute_fn_name: generic_binary_execute 74 | file_name_fn_or_str_name: test.cpp 75 | 76 | GNU C++20: 77 | compile_cmd: g++ 78 | compile_flags: -std=c++20 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 79 | sanitize_fn_name: sanitize_c_cpp 80 | compile_fn_name: generic_c_cpp_compile 81 | execute_fn_name: generic_binary_execute 82 | file_name_fn_or_str_name: test.cpp 83 | 84 | GNU C++17 Diagnostics: 85 | compile_cmd: g++ 86 | compile_flags: -std=c++17 -s -x c++ -O2 -w -DONLINE_JUDGE -pedantic -include limits.h -include math.h -static -lm 87 | sanitize_fn_name: sanitize_c_cpp 88 | compile_fn_name: generic_c_cpp_compile 89 | execute_fn_name: generic_binary_execute 90 | file_name_fn_or_str_name: test.cpp 91 | 92 | Clang++17 Diagnostics: 93 | compile_cmd: clang++-14 94 | compile_flags: -std=c++17 -s -x c++ -O2 -w -pedantic -DONLINE_JUDGE -include limits.h -include math.h -static -lm 95 | sanitize_fn_name: sanitize_c_cpp 96 | compile_fn_name: generic_c_cpp_compile 97 | execute_fn_name: generic_binary_execute 98 | file_name_fn_or_str_name: test.cpp 99 | 100 | Clang++17: 101 | compile_cmd: clang++-14 102 | compile_flags: -std=c++17 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 103 | sanitize_fn_name: sanitize_c_cpp 104 | compile_fn_name: generic_c_cpp_compile 105 | execute_fn_name: generic_binary_execute 106 | file_name_fn_or_str_name: test.cpp 107 | 108 | Clang++20 Diagnostics: 109 | compile_cmd: clang++-14 110 | compile_flags: -std=c++20 -s -x c++ -O2 -w -pedantic -DONLINE_JUDGE -include limits.h -include math.h -static -lm 111 | sanitize_fn_name: sanitize_c_cpp 112 | compile_fn_name: generic_c_cpp_compile 113 | execute_fn_name: generic_binary_execute 114 | file_name_fn_or_str_name: test.cpp 115 | 116 | Clang++20: 117 | compile_cmd: clang++-14 118 | compile_flags: -std=c++20 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 119 | sanitize_fn_name: sanitize_c_cpp 120 | compile_fn_name: generic_c_cpp_compile 121 | execute_fn_name: generic_binary_execute 122 | file_name_fn_or_str_name: test.cpp 123 | 124 | Clang++14: 125 | compile_cmd: clang++-14 126 | compile_flags: -std=c++14 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 127 | sanitize_fn_name: sanitize_c_cpp 128 | compile_fn_name: generic_c_cpp_compile 129 | execute_fn_name: generic_binary_execute 130 | file_name_fn_or_str_name: test.cpp 131 | 132 | Clang++11: 133 | compile_cmd: clang++-14 134 | compile_flags: -std=c++11 -s -x c++ -O2 -w -DONLINE_JUDGE -include limits.h -include math.h -static -lm 135 | sanitize_fn_name: sanitize_c_cpp 136 | compile_fn_name: generic_c_cpp_compile 137 | execute_fn_name: generic_binary_execute 138 | file_name_fn_or_str_name: test.cpp 139 | 140 | MS C++: 141 | compile_cmd: g++ 142 | compile_flags: -s -x c++ -O2 -w -DONLINE_JUDGE -include math.h -include limits.h -static -lm 143 | sanitize_fn_name: sanitize_c_cpp 144 | compile_fn_name: generic_c_cpp_compile 145 | execute_fn_name: generic_binary_execute 146 | file_name_fn_or_str_name: test.cpp 147 | 148 | MS C++ 2017: 149 | compile_cmd: g++ 150 | compile_flags: -s -x c++ -O2 -w -DONLINE_JUDGE -include math.h -include limits.h -static -lm 151 | sanitize_fn_name: sanitize_c_cpp 152 | compile_fn_name: generic_c_cpp_compile 153 | execute_fn_name: generic_binary_execute 154 | file_name_fn_or_str_name: test.cpp 155 | 156 | MS C#: 157 | compile_cmd: csc 158 | compile_flags: -o -nologo 159 | execute_cmd: mono 160 | compile_fn_name: generic_cs_compile 161 | execute_fn_name: generic_interpreted_execute 162 | file_name_fn_or_str_name: test.cs 163 | timelimit_factor: 3 164 | 165 | C# 10: 166 | compile_cmd: csc 167 | compile_flags: -o -nologo 168 | execute_cmd: mono 169 | compile_fn_name: generic_cs_compile 170 | execute_fn_name: generic_interpreted_execute 171 | file_name_fn_or_str_name: test.cs 172 | timelimit_factor: 3 173 | 174 | C# 8: 175 | compile_cmd: csc 176 | compile_flags: -o -nologo 177 | execute_cmd: mono 178 | compile_fn_name: generic_cs_compile 179 | execute_fn_name: generic_interpreted_execute 180 | file_name_fn_or_str_name: test.cs 181 | timelimit_factor: 3 182 | 183 | Mono C#: 184 | compile_cmd: csc 185 | compile_flags: -o -nologo 186 | execute_cmd: mono 187 | compile_fn_name: generic_cs_compile 188 | execute_fn_name: generic_interpreted_execute 189 | file_name_fn_or_str_name: test.cs 190 | timelimit_factor: 3 191 | 192 | .NET Core C#: 193 | compile_cmd: csc 194 | compile_flags: -o -nologo 195 | execute_cmd: mono 196 | compile_fn_name: generic_cs_compile 197 | execute_fn_name: generic_interpreted_execute 198 | file_name_fn_or_str_name: test.cs 199 | timelimit_factor: 3 200 | 201 | PyPy 2: 202 | compile_cmd: pypy2 203 | compile_flags: -W ignore -m py_compile 204 | execute_cmd: pypy2 205 | execute_flags: -W ignore -OO -s -S 206 | execute_fn_name: generic_interpreted_execute 207 | file_name_fn_or_str_name: test.py 208 | timelimit_factor: 2 209 | 210 | Python 2: 211 | compile_cmd: python2 212 | compile_flags: -W ignore -m py_compile 213 | execute_cmd: python2 214 | execute_flags: -W ignore -OO -s -S 215 | execute_fn_name: generic_interpreted_execute 216 | file_name_fn_or_str_name: test.py 217 | timelimit_factor: 3 218 | 219 | PyPy 3: 220 | compile_cmd: pypy3 221 | compile_flags: -W ignore -m py_compile 222 | execute_cmd: pypy3 223 | execute_flags: -W ignore -OO -s -S 224 | compile_fn_name: generic_interpreted_compile 225 | execute_fn_name: generic_interpreted_execute 226 | file_name_fn_or_str_name: test.py 227 | timelimit_factor: 2 228 | 229 | PyPy 3-64: 230 | compile_cmd: pypy3 231 | compile_flags: -W ignore -m py_compile 232 | execute_cmd: pypy3 233 | execute_flags: -W ignore -OO -s -S 234 | compile_fn_name: generic_interpreted_compile 235 | execute_fn_name: generic_interpreted_execute 236 | file_name_fn_or_str_name: test.py 237 | timelimit_factor: 2 238 | 239 | Python 3: 240 | compile_cmd: python3 241 | compile_flags: -W ignore -m py_compile 242 | execute_cmd: python3 243 | execute_flags: -W ignore -OO -s -S 244 | compile_fn_name: generic_interpreted_compile 245 | execute_fn_name: generic_interpreted_execute 246 | file_name_fn_or_str_name: test.py 247 | timelimit_factor: 3 248 | 249 | Python 3 + libs: 250 | compile_cmd: python3 251 | compile_flags: -W ignore -m py_compile 252 | execute_cmd: python3 253 | execute_flags: -W ignore -OO -s -S 254 | compile_fn_name: generic_interpreted_compile 255 | execute_fn_name: generic_interpreted_execute 256 | file_name_fn_or_str_name: test.py 257 | timelimit_factor: 3 258 | 259 | JavaScript: 260 | compile_cmd: node 261 | compile_flags: --check 262 | execute_cmd: node 263 | compile_fn_name: generic_interpreted_compile 264 | execute_fn_name: generic_interpreted_execute 265 | file_name_fn_or_str_name: test.js 266 | timelimit_factor: 3 267 | 268 | Node js: 269 | compile_cmd: node 270 | compile_flags: --check 271 | execute_cmd: node 272 | compile_fn_name: generic_interpreted_compile 273 | execute_fn_name: generic_interpreted_execute 274 | file_name_fn_or_str_name: test.js 275 | timelimit_factor: 3 276 | 277 | Node.js: 278 | compile_cmd: node 279 | compile_flags: --check 280 | execute_cmd: node 281 | compile_fn_name: generic_interpreted_compile 282 | execute_fn_name: generic_interpreted_execute 283 | file_name_fn_or_str_name: test.js 284 | timelimit_factor: 3 285 | 286 | Rust: 287 | compile_cmd: rustc 288 | compile_flags: -O 289 | compile_fn_name: generic_rust_go_compile 290 | execute_fn_name: generic_binary_execute 291 | file_name_fn_or_str_name: test.rs 292 | 293 | Rust 2021: 294 | compile_cmd: rustc 295 | compile_flags: --edition 2021 -O 296 | compile_fn_name: generic_rust_go_compile 297 | execute_fn_name: generic_binary_execute 298 | file_name_fn_or_str_name: test.rs 299 | 300 | Rust 2018: 301 | compile_cmd: rustc 302 | compile_flags: --edition 2018 -O 303 | compile_fn_name: generic_rust_go_compile 304 | execute_fn_name: generic_binary_execute 305 | file_name_fn_or_str_name: test.rs 306 | 307 | Rust 2015: 308 | compile_cmd: rustc 309 | compile_flags: --edition 2015 -O 310 | compile_fn_name: generic_rust_go_compile 311 | execute_fn_name: generic_binary_execute 312 | file_name_fn_or_str_name: test.rs 313 | 314 | Java 6: 315 | compile_cmd: javac 316 | compile_flags: -target 1.6 -nowarn -cp ".;*" 317 | execute_cmd: java 318 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k 319 | compile_fn_name: generic_java_compile 320 | execute_fn_name: generic_java_execute 321 | file_name_fn_or_str_name: java_file_name_suffix 322 | timelimit_factor: 3 323 | extend_mem_for_vm: true 324 | extend_mem_flag_name: Xmx 325 | 326 | Java 7: 327 | compile_cmd: javac 328 | compile_flags: -target 1.7 -nowarn -cp ".;*" 329 | execute_cmd: java 330 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k 331 | compile_fn_name: generic_java_compile 332 | execute_fn_name: generic_java_execute 333 | file_name_fn_or_str_name: java_file_name_suffix 334 | timelimit_factor: 3 335 | extend_mem_for_vm: true 336 | extend_mem_flag_name: Xmx 337 | 338 | Java 1.5: 339 | compile_cmd: javac 340 | compile_flags: -target 1.5 -nowarn -cp ".;*" 341 | execute_cmd: java 342 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k 343 | compile_fn_name: generic_java_compile 344 | execute_fn_name: generic_java_execute 345 | file_name_fn_or_str_name: java_file_name_suffix 346 | timelimit_factor: 3 347 | extend_mem_for_vm: true 348 | extend_mem_flag_name: Xmx 349 | 350 | Java 8: 351 | compile_cmd: javac 352 | compile_flags: -nowarn -cp ".;*" 353 | execute_cmd: java 354 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k 355 | compile_fn_name: generic_java_compile 356 | execute_fn_name: generic_java_execute 357 | file_name_fn_or_str_name: java_file_name_suffix 358 | timelimit_factor: 3 359 | extend_mem_for_vm: true 360 | extend_mem_flag_name: Xmx 361 | 362 | Java 11: 363 | compile_cmd: javac 364 | compile_flags: -nowarn -cp ".;*" 365 | execute_cmd: java 366 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k 367 | compile_fn_name: generic_java_compile 368 | execute_fn_name: generic_java_execute 369 | file_name_fn_or_str_name: java_file_name_suffix 370 | timelimit_factor: 3 371 | extend_mem_for_vm: true 372 | extend_mem_flag_name: Xmx 373 | 374 | Java 17: 375 | compile_cmd: javac 376 | compile_flags: -nowarn -cp ".;*" 377 | execute_cmd: java 378 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k 379 | compile_fn_name: generic_java_compile 380 | execute_fn_name: generic_java_execute 381 | file_name_fn_or_str_name: java_file_name_suffix 382 | timelimit_factor: 3 383 | extend_mem_for_vm: true 384 | extend_mem_flag_name: Xmx 385 | 386 | PHP: 387 | compile_cmd: php 388 | compile_flags: -l 389 | execute_cmd: php 390 | compile_fn_name: generic_interpreted_compile 391 | execute_fn_name: generic_interpreted_execute 392 | file_name_fn_or_str_name: test.php 393 | timelimit_factor: 3 394 | 395 | PHP 8.1: 396 | compile_cmd: php 397 | compile_flags: -l 398 | execute_cmd: php 399 | compile_fn_name: generic_interpreted_compile 400 | execute_fn_name: generic_interpreted_execute 401 | file_name_fn_or_str_name: test.php 402 | timelimit_factor: 3 403 | 404 | Go: 405 | compile_cmd: go build 406 | compile_fn_name: generic_rust_go_compile 407 | execute_fn_name: generic_binary_execute 408 | file_name_fn_or_str_name: test.go 409 | 410 | Ruby: 411 | compile_cmd: ruby 412 | compile_flags: -c 413 | execute_cmd: ruby 414 | compile_fn_name: generic_interpreted_compile 415 | execute_fn_name: generic_interpreted_execute 416 | file_name_fn_or_str_name: test.rb 417 | timelimit_factor: 3 418 | 419 | Ruby 3: 420 | compile_cmd: ruby 421 | compile_flags: -c 422 | execute_cmd: ruby 423 | compile_fn_name: generic_interpreted_compile 424 | execute_fn_name: generic_interpreted_execute 425 | file_name_fn_or_str_name: test.rb 426 | timelimit_factor: 3 427 | 428 | Kotlin: 429 | compile_cmd: kotlinc 430 | compile_flags: -nowarn -language-version 1.7 -include-runtime 431 | execute_cmd: java 432 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k -jar 433 | sanitize_fn_name: sanitize_kotlin 434 | compile_fn_name: generic_kt_compile 435 | execute_fn_name: generic_kotlin_execute 436 | file_name_fn_or_str_name: test.kt 437 | timelimit_factor: 3 438 | extend_mem_for_vm: true 439 | extend_mem_flag_name: Xmx 440 | 441 | Kotlin 1.4: 442 | compile_cmd: kotlinc 443 | compile_flags: -nowarn -language-version 1.4 -include-runtime 444 | execute_cmd: java 445 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k -jar 446 | sanitize_fn_name: sanitize_kotlin 447 | compile_fn_name: generic_kt_compile 448 | execute_fn_name: generic_kotlin_execute 449 | file_name_fn_or_str_name: test.kt 450 | timelimit_factor: 3 451 | extend_mem_for_vm: true 452 | extend_mem_flag_name: Xmx 453 | 454 | Kotlin 1.5: 455 | compile_cmd: kotlinc 456 | compile_flags: -nowarn -language-version 1.5 -include-runtime 457 | execute_cmd: java 458 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k -jar 459 | sanitize_fn_name: sanitize_kotlin 460 | compile_fn_name: generic_kt_compile 461 | execute_fn_name: generic_kotlin_execute 462 | file_name_fn_or_str_name: test.kt 463 | timelimit_factor: 3 464 | extend_mem_for_vm: true 465 | extend_mem_flag_name: Xmx 466 | 467 | Kotlin 1.6: 468 | compile_cmd: kotlinc 469 | compile_flags: -nowarn -language-version 1.6 -include-runtime 470 | execute_cmd: java 471 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k -jar 472 | sanitize_fn_name: sanitize_kotlin 473 | compile_fn_name: generic_kt_compile 474 | execute_fn_name: generic_kotlin_execute 475 | file_name_fn_or_str_name: test.kt 476 | timelimit_factor: 3 477 | extend_mem_for_vm: true 478 | extend_mem_flag_name: Xmx 479 | 480 | Kotlin 1.7: 481 | compile_cmd: kotlinc 482 | compile_flags: -nowarn -language-version 1.7 -include-runtime 483 | execute_cmd: java 484 | execute_flags: -DONLINE_JUDGE=true -Xlog:disable -Xmx2g -XX:ReservedCodeCacheSize=64m -XX:-UseCompressedClassPointers -Xss256k -jar 485 | sanitize_fn_name: sanitize_kotlin 486 | compile_fn_name: generic_kt_compile 487 | execute_fn_name: generic_kotlin_execute 488 | file_name_fn_or_str_name: test.kt 489 | timelimit_factor: 3 490 | extend_mem_for_vm: true 491 | extend_mem_flag_name: Xmx 492 | -------------------------------------------------------------------------------- /execution_engine/exec_outcome.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ExecOutcome(Enum): 5 | PASSED = "PASSED" # code executes and output matches expected output 6 | WRONG_ANSWER = ( 7 | "WRONG_ANSWER" # code executes and output does NOT matches expected output 8 | ) 9 | TIME_LIMIT_EXCEEDED = "TIME_LIMIT_EXCEEDED" # code executes and didn't exit in time, output is ignored in this case 10 | RUNTIME_ERROR = "RUNTIME_ERROR" # code failed to execute (crashed) 11 | COMPILATION_ERROR = "COMPILATION_ERROR" # code failed to compile 12 | MEMORY_LIMIT_EXCEEDED = ( 13 | "MEMORY_LIMIT_EXCEEDED" # code exceeded memory limit during execution 14 | ) 15 | -------------------------------------------------------------------------------- /execution_engine/execution_engine.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shlex 3 | import signal 4 | import subprocess 5 | from pathlib import Path 6 | from threading import Timer, Thread 7 | from unittest import ExtendedUnittest 8 | 9 | import gmpy2 10 | from code_store import CodeStore 11 | from config import Config 12 | from exec_outcome import ExecOutcome 13 | from helper import convert_crlf_to_lf 14 | from job import JobData, LanguageError 15 | from prlimit import get_prlimit_str 16 | from resource_limit import ResourceLimits 17 | from runtime import Runtime 18 | from seccomp_filter import make_filter 19 | from settings import JavaClassNotFoundError 20 | 21 | 22 | class CompilationError(Exception): 23 | """Shows the compilation error message 24 | 25 | Args: 26 | Exception command list[str]: command to compile 27 | message str: compilation error message 28 | """ 29 | 30 | def __init__(self, command, message: subprocess.CalledProcessError): 31 | self.command = command 32 | self.message = message 33 | super().__init__(f"command: {self.command} produced: {self.message.stderr}") 34 | 35 | 36 | def init_validate_outputs(): 37 | _token_set = {"yes", "no", "true", "false"} 38 | PRECISION = gmpy2.mpfr(1e-12, 129) 39 | 40 | def validate_outputs(output1: str, output2: str) -> bool: 41 | # for space sensitive problems stripped string should match 42 | def validate_lines(lines1, lines2): 43 | validate_line = lambda lines: lines[0].strip() == lines[1].strip() 44 | if len(lines1) != len(lines2): 45 | return False 46 | return all(map(validate_line, zip(lines1, lines2))) 47 | 48 | if validate_lines(output1.strip().split("\n"), output2.strip().split("\n")): 49 | return True 50 | 51 | # lines didn't work so token matching 52 | tokens1, tokens2 = output1.strip().split(), output2.strip().split() 53 | if len(tokens1) != len(tokens2): 54 | return False 55 | 56 | for tok1, tok2 in zip(tokens1, tokens2): 57 | try: 58 | num1, num2 = gmpy2.mpfr(tok1, 129), gmpy2.mpfr(tok2, 129) 59 | if abs(num1 - num2) > PRECISION: 60 | return False 61 | except ValueError: 62 | if tok1.lower() in _token_set: 63 | tok1 = tok1.lower() 64 | if tok2.lower() in _token_set: 65 | tok2 = tok2.lower() 66 | if tok1 != tok2: 67 | return False 68 | 69 | return True 70 | 71 | return validate_outputs 72 | 73 | 74 | class MonitorThread(Thread): 75 | def __init__(self, proc): 76 | Thread.__init__(self) 77 | self.total_time = None 78 | self.peak_memory = None 79 | self.proc = proc 80 | self.clk_tck = os.sysconf(os.sysconf_names["SC_CLK_TCK"]) 81 | 82 | def run(self): 83 | while self.proc.poll() is None: 84 | # print(self.total_time, self.peak_memory) 85 | try: 86 | # print(f"/proc/{self.proc.pid}/stat", os.path.exists(f"/proc/{self.proc.pid}/stat")) 87 | # print(f"/proc/{self.proc.pid}/status", os.path.exists(f"/proc/{self.proc.pid}/status")) 88 | # print(self.total_time, self.peak_memory) 89 | with open(f"/proc/{self.proc.pid}/stat") as pid_stat: 90 | vals = pid_stat.read().split() 91 | self.total_time = ( 92 | float(vals[13]) 93 | + float(vals[14]) 94 | + float(vals[15]) 95 | + float(vals[16]) 96 | ) / self.clk_tck # adding user time and sys time, also childs utime, stime 97 | with open(f"/proc/{self.proc.pid}/status") as pid_status: 98 | vm_peak_line = [l for l in pid_status if l.startswith("VmPeak:")] 99 | if len(vm_peak_line) == 0: 100 | continue 101 | vm_peak_line = vm_peak_line[0] 102 | self.peak_memory = vm_peak_line.split(":")[-1].strip() 103 | except (FileNotFoundError, ProcessLookupError): 104 | pass 105 | 106 | 107 | class ExecutionEngine: 108 | def __init__( 109 | self, 110 | cfg: Config, 111 | limits_by_lang: dict[str, ResourceLimits], 112 | run_ids: tuple[int, int], 113 | logger, 114 | ) -> None: 115 | self.code_store = CodeStore(cfg.code_store, run_ids) 116 | self.supported_languages: dict[str, Runtime] = dict() 117 | self.output_validator = init_validate_outputs() 118 | for lang, sup_cfg in cfg.supported_languages.items(): 119 | self.supported_languages[lang] = Runtime(sup_cfg) 120 | 121 | self.run_uid = run_ids[1] 122 | self.run_gid = run_ids[0] 123 | self.socket_filter = make_filter(["socket"]) 124 | self.logger = logger 125 | self.limits_by_lang = limits_by_lang 126 | 127 | self.exec_env = os.environ.copy() 128 | self.exec_env["GOCACHE"] = str(self.code_store._source_dir.resolve()) 129 | 130 | def start(self): 131 | self.code_store.create() 132 | 133 | def stop(self): 134 | self.code_store.destroy() 135 | 136 | def _compile(self, command: str) -> subprocess.CompletedProcess: 137 | return subprocess.run( 138 | shlex.split(command), 139 | user=self.run_uid, 140 | group=self.run_gid, 141 | capture_output=True, 142 | cwd=self.code_store._source_dir, 143 | env=self.exec_env, 144 | timeout=60, 145 | ) 146 | 147 | def _get_executable_after_compile( 148 | self, 149 | lang: str, 150 | source_file: Path, 151 | cmd: str | None = None, 152 | flags: str | None = None, 153 | ) -> tuple[str | Path, bool]: 154 | if not self.supported_languages[lang].is_compiled_language: 155 | return source_file, False 156 | 157 | compile_str, executable = self.supported_languages[lang].compile( 158 | source_file, cmd, flags 159 | ) 160 | try: 161 | cp = self._compile(compile_str) 162 | except subprocess.TimeoutExpired as e: 163 | return f"{e}", True 164 | 165 | if cp.returncode == 0: 166 | return executable, False 167 | 168 | return cp.stderr.decode(errors="ignore"), True 169 | 170 | def get_executor( 171 | self, job: JobData, limits: ResourceLimits 172 | ) -> tuple[str | Path | LanguageError, int]: 173 | language = job.language 174 | if language is None: 175 | return LanguageError("Language must be selected to execute a code."), -1 176 | 177 | if language not in self.supported_languages: 178 | return LanguageError(f"Support for {language} is not implemented."), -1 179 | 180 | source_code = convert_crlf_to_lf(job.source_code) 181 | 182 | if self.supported_languages[language].has_sanitizer and job.use_sanitizer: 183 | source_code = self.supported_languages[language].sanitize(source_code) 184 | 185 | source_path = self.supported_languages[language].get_file_path(source_code) 186 | if isinstance(source_path, JavaClassNotFoundError): 187 | return source_path, -1 188 | source_path = self.code_store.write_source_code(source_code, source_path) 189 | 190 | executable, err = self._get_executable_after_compile( 191 | language, source_path, cmd=job.compile_cmd, flags=job.compile_flags 192 | ) 193 | 194 | if err: 195 | return executable, -1 196 | 197 | execute_flags = job.execute_flags 198 | 199 | if self.supported_languages[language].extend_mem_for_vm: 200 | if limits._as != -1: 201 | if execute_flags is None: 202 | execute_flags = f" -{self.supported_languages[language].extend_mem_flag_name}{limits._as} " 203 | else: 204 | execute_flags += f" -{self.supported_languages[language].extend_mem_flag_name}{limits._as} " 205 | 206 | return ( 207 | self.supported_languages[language].execute( 208 | executable, cmd=job.execute_cmd, flags=execute_flags 209 | ), 210 | self.supported_languages[language].timelimit_factor, 211 | ) 212 | 213 | def check_output_match(self, job: JobData) -> list[ExtendedUnittest]: 214 | limits = job.limits 215 | if limits is None: 216 | limits = ResourceLimits() 217 | limits.update(self.limits_by_lang[job.language]) 218 | 219 | executor, timelimit_factor = self.get_executor(job, limits) 220 | # raise CompilationError(e.args, e) 221 | if timelimit_factor == -1: 222 | result = executor 223 | if isinstance(executor, (LanguageError, JavaClassNotFoundError)): 224 | result = executor.msg 225 | elif not isinstance(result, str): 226 | result = "Some bug in ExecEval, please do report." 227 | return [ 228 | ExtendedUnittest( 229 | input="", 230 | output=[], 231 | result=result, 232 | exec_outcome=ExecOutcome.COMPILATION_ERROR, 233 | ) 234 | ] 235 | 236 | # if language uses vm then add extra 1gb smemory for the parent vm program to run 237 | if ( 238 | self.supported_languages[job.language].extend_mem_for_vm 239 | and limits._as != -1 240 | ): 241 | limits._as += 2**30 242 | # executor = f"timeout -k {limits.cpu} -s 9 {limits.cpu * timelimit_factor + 0.5} {get_prlimit_str(limits)} {executor}" 243 | executor = f"{get_prlimit_str(limits)} {executor}" 244 | new_test_cases = job.unittests.copy() 245 | self.logger.debug( 246 | f"Execute with gid={self.run_gid}, uid={self.run_uid}: {executor}" 247 | ) 248 | for key, tc in enumerate(job.unittests): 249 | result, exec_outcome = None, None 250 | outs, errs = None, None 251 | syscall_filter_loaded = False 252 | 253 | def preexec_fn(): 254 | nonlocal syscall_filter_loaded 255 | if job.block_network: 256 | self.socket_filter.load() 257 | syscall_filter_loaded = True 258 | 259 | with subprocess.Popen( 260 | shlex.split(executor), 261 | stdin=subprocess.PIPE, 262 | stdout=subprocess.PIPE, 263 | stderr=subprocess.PIPE, 264 | bufsize=0, 265 | user=self.run_uid, 266 | group=self.run_gid, 267 | preexec_fn=preexec_fn, 268 | cwd=self.code_store._source_dir.resolve(), 269 | env=self.exec_env, 270 | start_new_session=True, 271 | ) as child_process: 272 | monitor = MonitorThread(child_process) 273 | monitor.start() 274 | 275 | def handler(): 276 | if child_process.poll() is None: 277 | child_process.kill() 278 | 279 | timer = Timer(limits.cpu * timelimit_factor + 1, handler) 280 | timer.start() 281 | tot_time, peak_mem = None, None 282 | # self.logger.debug(f"PID: {child_process.pid}") 283 | try: 284 | outs, errs = child_process.communicate( 285 | tc.input.encode("ascii"), timeout=limits.cpu * timelimit_factor 286 | ) 287 | timer.cancel() 288 | except subprocess.TimeoutExpired: 289 | exec_outcome = ExecOutcome.TIME_LIMIT_EXCEEDED 290 | except subprocess.CalledProcessError: 291 | exec_outcome = ExecOutcome.RUNTIME_ERROR 292 | if errs is not None: 293 | result = errs.decode(errors="ignore").strip() 294 | finally: 295 | timer.cancel() 296 | 297 | child_process.kill() 298 | child_process.communicate() 299 | child_process.wait() 300 | monitor.join() 301 | if syscall_filter_loaded: 302 | self.socket_filter.reset() 303 | if exec_outcome is None: 304 | if child_process.returncode == 0 and outs is not None: 305 | result = outs.decode(errors="ignore").strip() 306 | exec_outcome = ( 307 | ExecOutcome.PASSED 308 | if any( 309 | self.output_validator(output, result) 310 | for output in tc.output 311 | ) 312 | else ExecOutcome.WRONG_ANSWER 313 | ) 314 | elif errs is not None and len(errs) != 0: 315 | exec_outcome = ExecOutcome.RUNTIME_ERROR 316 | errs = errs.decode(errors="ignore") 317 | if ( 318 | "out of memory" in errs.lower() 319 | or "bad_alloc" in errs.lower() 320 | or "bad alloc" in errs.lower() 321 | or "memoryerror" in errs.lower() 322 | ): 323 | exec_outcome = ExecOutcome.MEMORY_LIMIT_EXCEEDED 324 | if child_process.returncode > 0: 325 | result = errs 326 | else: 327 | result = f"Process exited with code {-child_process.returncode}, {signal.strsignal(-child_process.returncode)} stderr: {errs}" 328 | else: 329 | exec_outcome = ExecOutcome.MEMORY_LIMIT_EXCEEDED 330 | if outs is not None: 331 | result = outs.decode(errors="ignore").strip() 332 | elif errs is not None: 333 | result = errs.decode(errors="ignore").strip() 334 | else: 335 | self.logger.debug( 336 | "**************** MEMORY_LIMIT_EXCEEDED assigned but no stdout or stderr" 337 | ) 338 | new_test_cases[key].update_time_mem(monitor.total_time, monitor.peak_memory) 339 | new_test_cases[key].update_result(result) 340 | new_test_cases[key].update_exec_outcome(exec_outcome) 341 | if job.stop_on_first_fail and exec_outcome is not ExecOutcome.PASSED: 342 | break 343 | 344 | return new_test_cases 345 | 346 | 347 | if __name__ == "__main__": 348 | 349 | class Test: 350 | file: str 351 | lang: str 352 | 353 | def __init__(self, file, lang): 354 | self.file = file 355 | self.lang = lang 356 | 357 | tests = [ 358 | Test("execution_engine/test_codes/test.c", "GNU C"), 359 | Test("execution_engine/test_codes/test.cpp", "GNU C++17"), 360 | Test("execution_engine/test_codes/test.go", "Go"), 361 | Test("execution_engine/test_codes/test.js", "Node js"), 362 | Test("execution_engine/test_codes/test.php", "PHP"), 363 | Test("execution_engine/test_codes/test.py", "PyPy 3"), 364 | Test("execution_engine/test_codes/test.py", "Python 3"), 365 | Test("execution_engine/test_codes/test.rb", "Ruby"), 366 | Test("execution_engine/test_codes/test.rs", "Rust"), 367 | Test("execution_engine/test_codes/test.java", "Java 7"), 368 | Test("execution_engine/test_codes/test.kt", "Kotlin"), 369 | ] 370 | 371 | unittests = [ 372 | ExtendedUnittest("1 1", ["2"]), 373 | ExtendedUnittest("1 3", ["4"]), 374 | ExtendedUnittest("-1 2", ["1"]), 375 | ExtendedUnittest("122 2", ["124"]), 376 | ] 377 | 378 | from config import load_config 379 | from job import JobData 380 | from resource_limit import ResourceLimits 381 | 382 | cfg = load_config(Path("execution_engine/config.yaml")) 383 | 384 | ce = ExecutionEngine(cfg) 385 | 386 | for t in tests: 387 | with open(t.file) as f: 388 | s = f.read() 389 | updated_unittests = ce.check_output_match( 390 | JobData( 391 | language=t.lang, 392 | source_code=s, 393 | unittests=unittests, 394 | limits=ResourceLimits(), 395 | ) 396 | ) 397 | 398 | print(f"{t.lang} got: \n", updaed_unittests) 399 | -------------------------------------------------------------------------------- /execution_engine/gunicorn.conf.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | gunicorn_logger = logging.getLogger("gunicorn.error") 5 | 6 | 7 | def clear_pid_from_worker_cfg_db(worker): 8 | worker_cfg_db = os.environ["WORKER_CFG_DB"] 9 | 10 | cfg_db_lines = [] 11 | with open(worker_cfg_db) as db_rp: 12 | assigned = False 13 | for line in db_rp: 14 | pid, idx, gid, uid = map(int, line.strip().split(",")) 15 | if not assigned and pid == worker.pid: 16 | assigned = True 17 | cfg_db_lines.append(",".join(map(str, (-1, idx, gid, uid)))) 18 | gunicorn_logger.info(f"Remove {gid=} {uid=} from {pid=}") 19 | else: 20 | cfg_db_lines.append(line.strip()) 21 | 22 | with open(worker_cfg_db, "w") as db_wp: 23 | for line in cfg_db_lines: 24 | db_wp.write(line + "\n") 25 | 26 | 27 | def worker_abort(worker): 28 | clear_pid_from_worker_cfg_db(worker) 29 | 30 | if not hasattr(worker, "wsgi"): 31 | worker.wsgi = worker.app.wsgi() 32 | if hasattr(worker.wsgi, "config"): 33 | config = worker.wsgi.config 34 | if "execution_engine" in config: 35 | worker.wsgi.logger.info("Stopping execution_engine") 36 | config["execution_engine"].stop() 37 | 38 | 39 | def worker_exit(server, worker): 40 | worker_abort(worker) 41 | 42 | 43 | def when_ready(server): 44 | pass 45 | 46 | 47 | def on_starting(server): 48 | run_gid_start = int(os.environ["RUN_GID"]) 49 | run_uid_start = int(os.environ["RUN_UID"]) 50 | num_workers = int(os.environ["NUM_WORKERS"]) 51 | worker_cfg_db = os.environ["WORKER_CFG_DB"] 52 | 53 | with open(worker_cfg_db, "w") as db_wp: 54 | for i in range(num_workers): 55 | db_wp.write(f"-1,{i},{run_gid_start + i},{run_uid_start + i}\n") 56 | 57 | gunicorn_logger.info("Init worker cfg db.") 58 | 59 | 60 | def pre_fork(server, worker): 61 | pass 62 | 63 | 64 | def post_fork(server, worker): 65 | pass 66 | 67 | 68 | def post_worker_init(worker): 69 | pass 70 | -------------------------------------------------------------------------------- /execution_engine/helper.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, is_dataclass 2 | 3 | 4 | # decorator to wrap original __init__ 5 | def has_nested_dataclass(*args, **kwargs): 6 | def wrapper(check_class): 7 | 8 | # passing class to investigate 9 | check_class = dataclass(check_class, **kwargs) 10 | o_init = check_class.__init__ 11 | 12 | def __init__(self, *args, **kwargs): 13 | 14 | for name, value in kwargs.items(): 15 | 16 | # getting field type 17 | ft = check_class.__annotations__.get(name, None) 18 | 19 | if is_dataclass(ft) and isinstance(value, dict): 20 | obj = ft(**value) 21 | kwargs[name] = obj 22 | o_init(self, *args, **kwargs) 23 | 24 | check_class.__init__ = __init__ 25 | 26 | return check_class 27 | 28 | return wrapper(args[0]) if args else wrapper 29 | 30 | 31 | def convert_crlf_to_lf(s: str) -> str: 32 | s = s.replace("\r", "").replace("\r\n", "\n") 33 | return s 34 | -------------------------------------------------------------------------------- /execution_engine/job.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from unittest import ExtendedUnittest 3 | 4 | from resource_limit import ResourceLimits 5 | 6 | def fix_uts(uts): 7 | uts_fx = [] 8 | for ut in uts: 9 | uts_fx.append({ 10 | "input": ut["input"], 11 | "output": ut["output"], 12 | }) 13 | return uts_fx 14 | 15 | @dataclass 16 | class JobData: 17 | language: str 18 | source_code: str 19 | unittests: list[ExtendedUnittest] 20 | compile_cmd: str | None = None 21 | compile_flags: str | None = None 22 | execute_cmd: str | None = None 23 | execute_flags: str | None = None 24 | limits: ResourceLimits | None = None 25 | block_network: bool = True 26 | stop_on_first_fail: bool = True 27 | use_sanitizer: bool = False 28 | 29 | @classmethod 30 | def json_parser(cls, form): 31 | return cls( 32 | language=form.get("language"), 33 | source_code=form.get("source_code"), 34 | unittests=[ExtendedUnittest(**t) for t in fix_uts(form.get("unittests"))], 35 | compile_cmd=form.get("compile_cmd"), 36 | compile_flags=form.get("compile_flags"), 37 | execute_cmd=form.get("execute_cmd"), 38 | execute_flags=form.get("execute_flags"), 39 | limits=ResourceLimits(**form.get("limits")) if form.get("limits") is not None else None, 40 | block_network=form.get("block_network", True), 41 | stop_on_first_fail=form.get("stop_on_first_fail", True), 42 | use_sanitizer=form.get("use_sanitizer", False), 43 | ) 44 | 45 | 46 | @dataclass 47 | class LanguageError: 48 | msg: str 49 | -------------------------------------------------------------------------------- /execution_engine/limits_by_lang.yaml: -------------------------------------------------------------------------------- 1 | GNU C: 2 | nofile: 0 3 | 4 | GNU C11: 5 | nofile: 0 6 | 7 | GNU C++: 8 | nofile: 0 9 | 10 | GNU C++0x: 11 | nofile: 0 12 | 13 | GNU C++11: 14 | nofile: 0 15 | 16 | GNU C++14: 17 | nofile: 0 18 | 19 | GNU C++17: 20 | nofile: 0 21 | 22 | GNU C++17 (64): 23 | nofile: 0 24 | 25 | GNU C++20 (64): 26 | nofile: 0 27 | 28 | GNU C++20: 29 | nofile: 0 30 | 31 | GNU C++17 Diagnostics: 32 | nofile: 0 33 | 34 | Clang++17 Diagnostics: 35 | nofile: 0 36 | 37 | Clang++17: 38 | nofile: 0 39 | 40 | Clang++20 Diagnostics: 41 | nofile: 0 42 | 43 | Clang++20: 44 | nofile: 0 45 | 46 | Clang++14: 47 | nofile: 0 48 | 49 | Clang++11: 50 | nofile: 0 51 | 52 | MS C++: 53 | nofile: 0 54 | 55 | MS C++ 2017: 56 | nofile: 0 57 | 58 | MS C#: 59 | nofile: 4 60 | nproc: 4 61 | fsize: 1073741824 62 | 63 | C# 10: 64 | nofile: 4 65 | nproc: 4 66 | fsize: 1073741824 67 | 68 | C# 8: 69 | nofile: 4 70 | nproc: 4 71 | fsize: 1073741824 72 | 73 | Mono C#: 74 | nofile: 4 75 | nproc: 4 76 | fsize: 1073741824 77 | 78 | .NET Core C#: 79 | nofile: 4 80 | nproc: 4 81 | fsize: 1073741824 82 | 83 | PyPy 2: 84 | nofile: 4 85 | 86 | Python 2: 87 | nofile: 4 88 | 89 | PyPy 3: 90 | nofile: 4 91 | 92 | PyPy 3-64: 93 | nofile: 4 94 | 95 | Python 3: 96 | nofile: 4 97 | 98 | Python 3 + libs: 99 | nofile: 4 100 | 101 | JavaScript: 102 | nofile: 25 103 | nproc: 25 104 | 105 | Node js: 106 | nofile: 25 107 | nproc: 25 108 | 109 | Node.js: 110 | nofile: 25 111 | nproc: 25 112 | 113 | Rust: 114 | nofile: 4 115 | 116 | Rust 2021: 117 | nofile: 4 118 | 119 | Rust 2018: 120 | nofile: 4 121 | 122 | Rust 2015: 123 | nofile: 4 124 | 125 | Java 6: 126 | nofile: 25 127 | nproc: 23 128 | 129 | Java 7: 130 | nofile: 25 131 | nproc: 23 132 | 133 | Java 1.5: 134 | nofile: 25 135 | nproc: 23 136 | 137 | Java 8: 138 | nofile: 25 139 | nproc: 23 140 | 141 | Java 11: 142 | nofile: 25 143 | nproc: 23 144 | 145 | Java 17: 146 | nofile: 25 147 | nproc: 23 148 | 149 | PHP: 150 | nofile: 4 151 | 152 | PHP 8.1: 153 | nofile: 4 154 | 155 | Go: 156 | nofile: 0 157 | nproc: 7 158 | 159 | Ruby: 160 | nofile: 10 161 | 162 | Ruby 3: 163 | nofile: 10 164 | 165 | Kotlin: 166 | nofile: 25 167 | nproc: 23 168 | 169 | Kotlin 1.4: 170 | nofile: 25 171 | nproc: 23 172 | 173 | Kotlin 1.5: 174 | nofile: 25 175 | nproc: 23 176 | 177 | Kotlin 1.6: 178 | nofile: 25 179 | nproc: 23 180 | 181 | Kotlin 1.7: 182 | nofile: 25 183 | nproc: 23 184 | -------------------------------------------------------------------------------- /execution_engine/prlimit.py: -------------------------------------------------------------------------------- 1 | from resource_limit import ResourceLimits 2 | 3 | 4 | def process_args(arg: str, val: int) -> str: 5 | if arg.startswith("_"): 6 | arg = arg[1:] 7 | 8 | return f"--{arg}={val}" 9 | 10 | 11 | def get_prlimit_str(limits: ResourceLimits, timelimit_factor: int = 1) -> str: 12 | temp = [] 13 | for field in limits.fields(): 14 | if field == "cpu": 15 | continue 16 | val = getattr(limits, field) 17 | temp.append(process_args(field, val)) 18 | 19 | return f"prlimit {' '.join(temp)}" 20 | -------------------------------------------------------------------------------- /execution_engine/resource_limit.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass, fields 4 | 5 | 6 | @dataclass(kw_only=True) 7 | class ResourceLimits: 8 | core: int = 0 # RLIMIT_CORE 9 | data: int = -1 # RLIMIT_DATA 10 | # nice: int = 20 # RLIMIT_NICE 11 | fsize: int = 0 # RLIMIT_FSIZE 12 | sigpending: int = 0 # RLIMIT_SIGPENDING 13 | # memlock: int = -1 # RLIMIT_MEMLOCK 14 | rss: int = -1 # RLIMIT_RSS 15 | nofile: int = 4 # RLIMIT_NOFILE 16 | msgqueue: int = 0 # RLIMIT_MSGQUEUE 17 | rtprio: int = 0 # RLIMIT_RTPRIO 18 | stack: int = -1 # RLIMIT_STACK 19 | cpu: int = 2 # RLIMIT_CPU, CPU time, in seconds. 20 | nproc: int = 1 # RLIMIT_NPROC 21 | _as: int = 2 * 1024 ** 3 # RLIMIT_AS set to 2GB by default 22 | locks: int = 0 # RLIMIT_LOCKS 23 | # rttime: int = 2 # RLIMIT_RTTIME, Timeout for real-time tasks. 24 | 25 | def fields(self): 26 | for field in fields(self): 27 | yield field.name 28 | 29 | def update(self, other: ResourceLimits): 30 | for field in fields(other): 31 | setattr(self, field.name, getattr(other, field.name)) 32 | 33 | 34 | if __name__ == "__main__": 35 | limits = ResourceLimits() 36 | other = ResourceLimits(cpu=1, _as=2) 37 | limits.update(other) 38 | prlimit_str = " ".join( 39 | f"--{field.name[1:] if field.name.startswith('_') else field.name}={getattr(limits, field.name)}" 40 | for field in fields(limits) 41 | ) 42 | print(prlimit_str) 43 | -------------------------------------------------------------------------------- /execution_engine/runtime.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Callable 3 | 4 | import settings 5 | from config import LanguageConfig 6 | 7 | 8 | class Runtime: 9 | language: str 10 | compile_cmd: str = "" 11 | execute_cmd: str = "" 12 | compile_flags: str = "" 13 | execute_flags: str = "" 14 | file_name: Callable[[str], str] | str 15 | sanitize: Callable[[str], str] | None 16 | _compile: Callable[[Path, str, str], tuple[str | None, Path]] | None 17 | _execute: Callable[[Path, str, str], str] 18 | timelimit_factor: int = 1 19 | extend_mem_for_vm: bool = False 20 | extend_mem_flag_name: str = "" 21 | 22 | def __init__(self, cfg: LanguageConfig): 23 | self.language = cfg.language 24 | self.compile_cmd = cfg.compile_cmd 25 | self.compile_flags = cfg.compile_flags 26 | self.execute_cmd = cfg.execute_cmd 27 | self.execute_flags = cfg.execute_flags 28 | self.timelimit_factor = cfg.timelimit_factor 29 | self.file_name = getattr( 30 | settings, cfg.file_name_fn_or_str_name, cfg.file_name_fn_or_str_name 31 | ) 32 | self.sanitize = getattr(settings, cfg.sanitize_fn_name, None) 33 | self._compile = getattr(settings, cfg.compile_fn_name, None) 34 | self._execute = getattr(settings, cfg.execute_fn_name, lambda _, __, ___: "") 35 | self.extend_mem_for_vm = cfg.extend_mem_for_vm 36 | self.extend_mem_flag_name = cfg.extend_mem_flag_name 37 | 38 | @property 39 | def is_compiled_language(self): 40 | return self._compile is not None 41 | 42 | @property 43 | def has_sanitizer(self): 44 | return self.sanitize is not None 45 | 46 | def get_info(self): 47 | return dict( 48 | runtime_name=self.language, 49 | compile_cmd=self.compile_cmd, 50 | compile_flags=self.compile_flags, 51 | execute_cmd=self.execute_cmd, 52 | execute_flags=self.execute_flags, 53 | timelimit_factor=self.timelimit_factor, 54 | is_compiled=self.is_compiled_language, 55 | has_sanitizer=self.has_sanitizer, 56 | ) 57 | 58 | def get_file_path(self, source_code: str) -> Path | settings.JavaClassNotFoundError: 59 | if isinstance(self.file_name, str): 60 | return Path(self.file_name) 61 | 62 | file_name = self.file_name(source_code) 63 | 64 | if isinstance(file_name, settings.JavaClassNotFoundError): 65 | return file_name 66 | 67 | return Path(file_name) 68 | 69 | def compile( 70 | self, 71 | source_code_path: Path, 72 | cmd: str | None = None, 73 | flags: str | None = None, 74 | ): 75 | if self._compile is None: 76 | return [None, source_code_path] 77 | 78 | return self._compile( 79 | source_code_path, 80 | self.compile_cmd if cmd is None else cmd, 81 | ("" if self.compile_flags is None else self.compile_flags) + " " + ("" if flags is None else flags), 82 | ) 83 | 84 | def execute( 85 | self, executable: Path, cmd: str | None = None, flags: str | None = None 86 | ): 87 | return self._execute( 88 | executable, 89 | self.execute_cmd if cmd is None else cmd, 90 | ("" if self.execute_flags is None else self.execute_flags) + " " + ("" if flags is None else flags), 91 | ) 92 | -------------------------------------------------------------------------------- /execution_engine/seccomp_filter.py: -------------------------------------------------------------------------------- 1 | import errno 2 | 3 | import seccomp 4 | 5 | 6 | def make_filter(blocked_syscalls: list[str] | None): 7 | if blocked_syscalls is None: 8 | return None 9 | 10 | filter = seccomp.SyscallFilter(defaction=seccomp.ALLOW) 11 | for syscall in blocked_syscalls: 12 | filter.add_rule(seccomp.ERRNO(errno.EACCES), syscall) 13 | return filter 14 | -------------------------------------------------------------------------------- /execution_engine/settings.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Callable 3 | 4 | 5 | def init_sanitize_c_cpp() -> Callable[[str], str]: 6 | _cpp_variant_lib_pattern = re.compile(r"variant\s*<.+>") 7 | 8 | def sanitize(source_code: str) -> str: 9 | stripped = lambda s: "".join(i for i in s if ord(i) < 128) 10 | source_code = stripped(source_code) 11 | # _getchar_nolock, _getwchar_nolock, __int64 issue 12 | source_code = ( 13 | source_code.replace("_getchar_nolock", "getchar_unlocked") 14 | .replace("_getwchar_nolock", "getwchar_unlocked") 15 | .replace("_putc_nolock", "putc_unlocked") 16 | .replace("_putwc_nolock", "putwc_unlocked") 17 | .replace("_getc_nolock", "getc_unlocked") 18 | .replace("_getwc_nolock", "getwc_unlocked") 19 | .replace("_putchar_nolock", "putchar_unlocked") 20 | .replace("_putwchar_nolock", "putwchar_unlocked") 21 | .replace("__int64", "long long") 22 | .replace("__popcnt", "__builtin_popcount") 23 | .replace("__popcnt64", "__builtin_popcountll") 24 | ) 25 | # gets to fgets 26 | source_code = re.sub( 27 | r"[^f]gets\((.+)\)", r"fgets(\1, sizeof(\1), stdin)", source_code 28 | ) 29 | # 30 | 31 | # variant library visit issue c++17 32 | # find if variant is used 33 | #### Major change by us 34 | variant_used = bool(_cpp_variant_lib_pattern.search(source_code) != None) 35 | if not variant_used: 36 | source_code = source_code.replace("visit", "__visit") 37 | 38 | _lines = source_code.split("\n") 39 | _sanitized_lines = list() 40 | for _line in _lines: 41 | # pragma issue 42 | if _line.startswith("#pragma GCC"): 43 | _sanitized_lines.append(" // " + _line) 44 | elif "" in _line: 45 | continue 46 | elif "" in _line: 47 | continue 48 | elif _line.strip().startswith("#include"): 49 | _sanitized_lines.append(_line.replace("\\", "/").lower()) 50 | elif "#define _GLIBCXX_DEBUG" in _line: 51 | continue 52 | else: 53 | _sanitized_lines.append(_line) 54 | 55 | return "\n".join(_sanitized_lines) 56 | 57 | return sanitize 58 | 59 | 60 | def sanitize_kotlin(source_code: str) -> str: 61 | source_code = source_code.replace(".min()", ".minOrNull()") 62 | 63 | return source_code 64 | 65 | 66 | sanitize_c_cpp = init_sanitize_c_cpp() 67 | 68 | generic_c_cpp_compile = lambda s, cmd, flags: ( 69 | f"{cmd} {flags} {s.name} -o {s.stem}.out", 70 | s.parent / f"{s.stem}.out", 71 | ) 72 | 73 | generic_cs_compile = lambda s, cmd, flags: ( 74 | f"{cmd} {flags} {s.name}", 75 | s.parent / f"{s.stem}.exe", 76 | ) 77 | 78 | generic_java_compile = lambda s, cmd, flags: ( 79 | f"{cmd} {flags} -d {s.parent} {s.name} ", 80 | s.parent / s.stem, 81 | ) 82 | 83 | generic_kt_compile = lambda s, cmd, flags: ( 84 | f"{cmd} {flags} -d {s.parent}/test.jar {str(s)}", 85 | s.parent / "test.jar", 86 | ) 87 | 88 | generic_interpreted_compile = lambda s, cmd, flags: (f"{cmd} {flags} {s.name}", s) 89 | 90 | generic_rust_go_compile = lambda s, cmd, flags: ( 91 | f"{cmd} {flags} {s.name}", 92 | s.parent / f"{s.stem}", 93 | ) 94 | 95 | 96 | class JavaClassNotFoundError(Exception): 97 | msg: str 98 | def __init__(self, msg, *args, **kwargs): 99 | super().__init__(msg, *args, **kwargs) 100 | self.msg = msg 101 | 102 | def init_java_file_name_suffix(): 103 | _java_class_pattern = re.compile(r"public.* class (\w+)(.|\n|\r\n)*{", re.MULTILINE) 104 | 105 | def _java_file_name_suffix(source_code: str) -> str: 106 | result = _java_class_pattern.search(source_code) 107 | if result is None: 108 | return JavaClassNotFoundError("Failed to parse class name from:\n" + source_code) 109 | 110 | return result.group(1).strip() + ".java" 111 | 112 | return _java_file_name_suffix 113 | 114 | 115 | java_file_name_suffix = init_java_file_name_suffix() 116 | 117 | generic_binary_execute = lambda x, _, __: str(x) 118 | 119 | generic_interpreted_execute = lambda x, cmd, flags: f"{cmd} {flags} {x}" 120 | 121 | generic_java_execute = ( 122 | lambda x, cmd, flags: f"{cmd} {flags} -cp {x.parent} {x.stem}" 123 | ) 124 | 125 | generic_kotlin_execute = ( 126 | lambda x, cmd, flags: f"{cmd} {flags} {str(x)}" 127 | ) -------------------------------------------------------------------------------- /execution_engine/start_engine.sh: -------------------------------------------------------------------------------- 1 | #echo $NUM_WORKERS 2 | for (( i=0; i<$NUM_WORKERS; i++ )) 3 | do 4 | I_GID=$(($RUN_GID+$i)) 5 | I_UID=$(($RUN_UID+$i)) 6 | groupadd -g $I_GID runner$I_GID && useradd -M runner$I_UID -g $I_GID -u $I_UID 7 | echo "Created " $(id runner$I_UID) 8 | done 9 | 10 | gunicorn \ 11 | -w ${NUM_WORKERS} \ 12 | --bind 0.0.0.0:${GUNICORN_PORT} \ 13 | --timeout 0 \ 14 | --log-level ${LOG_LEVEL} \ 15 | "wsgi:app" 16 | -------------------------------------------------------------------------------- /execution_engine/test_codes/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | int main() { 3 | int a, b; 4 | scanf("%d%d", &a, &b); 5 | printf("%d\n", a+b); 6 | return 0; 7 | } -------------------------------------------------------------------------------- /execution_engine/test_codes/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace std; 3 | int main() { 4 | int a, b; 5 | cin>>a>>b; 6 | cout< { 8 | inputString += inputStdin; 9 | }); 10 | 11 | process.stdin.on('end', _ => { 12 | inputString = inputString 13 | .trim() 14 | .split('\n') 15 | .map(string => { 16 | return string.trim(); 17 | }); 18 | 19 | main(); 20 | }); 21 | 22 | function readline() { 23 | return inputString[currentLine++]; 24 | } 25 | 26 | function main() { 27 | s = readline(); 28 | // print(s.split(' ')); 29 | let [a, b] = s.split(' ').map(x => parseInt(x)); 30 | print(a + b); 31 | } 32 | -------------------------------------------------------------------------------- /execution_engine/test_codes/test.kt: -------------------------------------------------------------------------------- 1 | import java.util.Scanner 2 | 3 | fun main(args: Array) { 4 | val reader = Scanner(System.`in`) 5 | val first: Int = reader.nextInt() 6 | val second: Int = reader.nextInt() 7 | val sum = first + second 8 | 9 | println(sum) 10 | } -------------------------------------------------------------------------------- /execution_engine/test_codes/test.php: -------------------------------------------------------------------------------- 1 | = line.split_whitespace() 8 | .map(|x| x.parse().expect("Not an integer!")) 9 | .collect(); 10 | println!("{}", inputs[0]+inputs[1]); 11 | } 12 | -------------------------------------------------------------------------------- /execution_engine/unittest.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | from exec_outcome import ExecOutcome 4 | from helper import convert_crlf_to_lf 5 | 6 | 7 | @dataclass 8 | class Unittest: 9 | input: str 10 | output: str 11 | result: str | None = None 12 | exec_outcome: ExecOutcome | None = None 13 | 14 | def __post_init__(self): 15 | self.input = convert_crlf_to_lf(self.input) 16 | self.output = convert_crlf_to_lf(self.output) 17 | 18 | def update_result(self, result): 19 | self.result = result 20 | 21 | def update_exec_outcome(self, exec_outcome): 22 | self.exec_outcome = exec_outcome 23 | 24 | def match_output(self): 25 | return self.result == self.output 26 | 27 | 28 | @dataclass 29 | class ExtendedUnittest: 30 | input: str 31 | output: list[str] = field(default_factory=list) 32 | result: str | None = None 33 | exec_outcome: ExecOutcome | None = None 34 | time_consumed: float | None = None 35 | peak_memory_consumed: str | None = None 36 | 37 | def __post_init__(self): 38 | self.input = convert_crlf_to_lf(self.input) 39 | self.output = [convert_crlf_to_lf(o) for o in self.output.copy()] 40 | 41 | def update_time_mem(self, tc, mc): 42 | self.time_consumed = tc 43 | self.peak_memory_consumed = mc 44 | 45 | def update_result(self, result): 46 | self.result = result 47 | 48 | def update_exec_outcome(self, exec_outcome): 49 | self.exec_outcome = exec_outcome 50 | 51 | def match_output(self, result=None): 52 | if result is None: 53 | result = self.result 54 | return result in self.output 55 | 56 | def json(self): 57 | _json = self.__dict__.copy() 58 | if self.exec_outcome is not None: 59 | _json["exec_outcome"] = self.exec_outcome.value 60 | 61 | return _json 62 | -------------------------------------------------------------------------------- /execution_engine/wsgi.py: -------------------------------------------------------------------------------- 1 | from app import app 2 | 3 | if __name__ == "__main__": 4 | app.run() 5 | --------------------------------------------------------------------------------