├── .gitattributes
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── alpha_codium
    ├── __init__.py
    ├── code_contests
    │   ├── __init__.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── prepare_and_clean_dataset.py
    │   │   ├── provider.py
    │   │   └── yaml_vs_json.py
    │   └── eval
    │   │   ├── __init__.py
    │   │   ├── code_contests_metric.py
    │   │   ├── code_test_runners.py
    │   │   ├── local_exec.py
    │   │   ├── pass_at_k_evaluator.py
    │   │   └── tracer.py
    ├── evaluate_dataset.py
    ├── gen
    │   ├── __init__.py
    │   ├── coding_competitor.py
    │   ├── dataset_solver.py
    │   ├── example.log
    │   ├── generators.py
    │   ├── stages
    │   │   ├── indirect
    │   │   │   ├── run_analyze_and_fix_test_failure.py
    │   │   │   ├── run_analyze_tests_failure.py
    │   │   │   ├── run_fix_code_from_tests_failure.py
    │   │   │   ├── run_fix_self_reflect.py
    │   │   │   └── run_validate_ai_test.py
    │   │   ├── run_baseline.py
    │   │   ├── run_choose_best_solution.py
    │   │   ├── run_evaluate_all_ai_tests.py
    │   │   ├── run_evaluate_public_tests.py
    │   │   ├── run_generate_ai_test.py
    │   │   ├── run_generate_possible_solutions.py
    │   │   ├── run_initial_code_generation.py
    │   │   ├── run_initial_solve.py
    │   │   ├── run_self_reflect.py
    │   │   ├── run_tests.py
    │   │   └── utils.py
    │   └── utils.py
    ├── litellm
    │   └── proxy
    │   │   └── _types.py
    ├── llm
    │   ├── __init__.py
    │   ├── ai_handler.py
    │   ├── ai_invoker.py
    │   └── token_handler.py
    ├── log
    │   └── __init__.py
    ├── settings
    │   ├── .secrets_template.toml
    │   ├── choose_best_solution_direct.toml
    │   ├── code_contests_prompt_analyze_and_fix.toml
    │   ├── code_contests_prompt_analyze_and_fix_direct.toml
    │   ├── code_contests_prompt_analyze_failure.toml
    │   ├── code_contests_prompts_baseline.toml
    │   ├── code_contests_prompts_choose_best_solution.toml
    │   ├── code_contests_prompts_fix_solution.toml
    │   ├── code_contests_prompts_generate_ai_tests.toml
    │   ├── code_contests_prompts_generate_possible_solutions.toml
    │   ├── code_contests_prompts_reflect.toml
    │   ├── code_contests_prompts_solve.toml
    │   ├── code_contests_prompts_solve_direct.toml
    │   ├── code_contests_prompts_validate_ai_tests.toml
    │   ├── code_contests_prompts_validate_reflection.toml
    │   ├── config_loader.py
    │   └── configuration.toml
    ├── solve_dataset.py
    ├── solve_my_problem.py
    └── solve_problem.py
├── docs
    ├── docs
    │   ├── CNAME
    │   ├── assets
    │   │   ├── favicon.ico
    │   │   ├── logo.png
    │   │   └── logo.svg
    │   ├── css
    │   │   └── custom.css
    │   └── index.md
    ├── mkdocs.yml
    └── overrides
    │   ├── main.html
    │   └── partials
    │       ├── footer.html
    │       └── integrations
    │           └── analytics
    │               └── custom.html
├── my_problem_example.json
├── pics
    ├── comparison.png
    ├── computational_effort.png
    ├── example_problem.png
    ├── iterations.png
    └── proposed_flow.png
├── requirements.txt
└── tests
    ├── __init__.py
    └── alpha_codium
        ├── __init__.py
        └── code_contests
            ├── __init__.py
            └── eval
                ├── __init__.py
                └── test_local_exec.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set default behavior to automatically normalize line endings.
 2 | * text=auto
 3 | 
 4 | # Explicitly declare text files you want to always be normalized and converted to native line endings on checkout.
 5 | *.py text eol=lf
 6 | 
 7 | # Declare files that will always have CRLF line endings on checkout.
 8 | *.bat text eol=crlf
 9 | 
10 | # Declare files that should never be normalized.
11 | *.jpg -text
12 | *.png -text
13 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: docs-ci 
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |       - add-docs-portal
 7 | permissions:
 8 |   contents: write
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Configure Git Credentials
15 |         run: |
16 |           git config user.name github-actions[bot]
17 |           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
18 |       - uses: actions/setup-python@v5
19 |         with:
20 |           python-version: 3.x
21 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
22 |       - uses: actions/cache@v4
23 |         with:
24 |           key: mkdocs-material-${{ env.cache_id }}
25 |           path: .cache
26 |           restore-keys: |
27 |             mkdocs-material-
28 |       - run: pip install mkdocs-material 
29 |       - run: pip install "mkdocs-material[imaging]"
30 |       - run: mkdocs gh-deploy -f docs/mkdocs.yml --force


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # MacOS
 2 | .DS_Store
 3 | 
 4 | # Python virtual environment
 5 | venv/
 6 | 
 7 | # codecontests datasets
 8 | # https://huggingface.co/datasets/talrid/CodeContests_valid_and_test_AlphaCodium/blob/main/codecontests_valid_and_test_processed_alpha_codium.zip
 9 | valid_and_test_processed/
10 | 
11 | # Python cache files
12 | **/__pycache__/
13 | 
14 | # Cache files generated during documentation builds
15 | docs/.cache/
16 | 
17 | # IDEA project-specific settings and configuration files
18 | .idea/
19 | 
20 | alpha_codium/settings/.secrets.toml
21 | dataset_output.json
22 | example.log
23 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV TZ=UTC
 4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 5 | 
 6 | ENV DEBIAN_FRONTEND=noninteractive
 7 | 
 8 | RUN  apt-get update && apt-get install -y \
 9 |     clang \
10 |     curl \
11 |     git \
12 |     vim \
13 |     build-essential \
14 |     libffi-dev \
15 |     libssl-dev \
16 |     zlib1g-dev \
17 |     libbz2-dev \
18 |     libreadline-dev \
19 |     libsqlite3-dev \
20 |     software-properties-common \
21 |     vim
22 | 
23 | 
24 | RUN add-apt-repository ppa:deadsnakes/ppa -y
25 | 
26 | RUN apt install python3.9-dev -y
27 | 
28 | RUN apt install -y python3-pip \
29 |     python3.9-distutils
30 | 
31 | RUN python3.9 -m pip install --upgrade pip
32 | 
33 | RUN python3.9 --version
34 | 
35 | RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
36 | RUN update-alternatives --set python /usr/bin/python3.9
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/alpha_codium/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | def set_all_seeds(seed):
 8 |     random.seed(seed)
 9 |     np.random.seed(seed)
10 |     os.environ["PYTHONHASHSEED"] = str(seed)
11 | 
12 |     try:
13 |         import tensorflow as tf
14 |         tf.random.set_seed(seed)
15 |     except ImportError:
16 |         pass
17 | 
18 |     try:
19 |         import torch
20 |         torch.manual_seed(seed)
21 |         if torch.cuda.is_available():
22 |             torch.cuda.manual_seed(seed)
23 |             torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
24 |     except ImportError:
25 |         pass
26 | 
27 | set_all_seeds(1337)


--------------------------------------------------------------------------------
/alpha_codium/code_contests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/alpha_codium/code_contests/__init__.py


--------------------------------------------------------------------------------
/alpha_codium/code_contests/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/alpha_codium/code_contests/data/__init__.py


--------------------------------------------------------------------------------
/alpha_codium/code_contests/data/prepare_and_clean_dataset.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import copy
  3 | import json
  4 | import os
  5 | import shutil
  6 | from collections import OrderedDict
  7 | import time
  8 | import numpy as np
  9 | from datasets import Dataset
 10 | 
 11 | from alpha_codium.code_contests.data.provider import CodeContestDataProvider
 12 | from alpha_codium.log import get_logger, setup_logger
 13 | from alpha_codium.gen.utils import evaluate_solution_on_subset
 14 | from alpha_codium.settings.config_loader import get_settings
 15 | 
 16 | logger = get_logger(__name__)
 17 | 
 18 | 
 19 | def preapare_and_clean_dataset(dataset_name='valid_and_test'):
 20 | 
 21 |     # process base dataset
 22 |     output_dataset_name = 'valid_and_test_processed'
 23 |     base_path = os.path.expanduser(get_settings().etl.private_dataset_cache_dir)
 24 |     output_path = os.path.join(base_path, output_dataset_name)
 25 |     data_provider = CodeContestDataProvider(dataset_location=dataset_name)
 26 | 
 27 |     # add and process the multiple_solutions field
 28 |     data_provider = add_multiple_solutions_field(data_provider)
 29 | 
 30 |     # will add 'is_valid_test' field to all problems
 31 |     data_provider = add_is_valid_field(data_provider)
 32 | 
 33 |     data_provider = problem_3_validation_fix(data_provider)
 34 |     data_provider = problem_29_test_fix(data_provider)
 35 |     data_provider = problem_92_test_fix(data_provider)
 36 | 
 37 |     # sorting so that 'python' solutions will be first
 38 |     data_provider = sort_solution_by_language(data_provider)
 39 | 
 40 |     # calc if there are valid solutions to the problem. if not, mark the problem as invalid
 41 |     data_provider = calc_is_valid_problem(data_provider)
 42 | 
 43 |     # save the dataset
 44 |     data_provider.dataset.save_to_disk(output_path)
 45 | 
 46 | 
 47 | def calc_is_valid_problem(data_provider):
 48 |     get_settings().code_tester.sandbox = False
 49 |     th_correct = 0.2  # if less than 20% of the solutions are correct, mark the problem as invalid
 50 |     max_tests = 25
 51 | 
 52 |     for split_name in ['valid', 'test']:
 53 |         ds = data_provider.dataset[split_name]
 54 |         ds_dict = ds.to_dict()
 55 |         ds_dict['is_valid_problem'] = [True] * len(ds)
 56 |         solutions_list = ds_dict['solutions']
 57 |         for i, solutions in enumerate(solutions_list):
 58 |             logger.info(f"processing problem {i} in split '{split_name}' for valid solutions")
 59 |             problem_dict = ds[i]
 60 |             s_list = solutions['solution']
 61 |             l_list = solutions['language']
 62 |             s_list = [s for s, l in zip(s_list, l_list) if 'python' in l.lower()]
 63 |             l_list = [l for l in l_list if 'python' in l.lower()]
 64 |             if len(s_list) < 5:
 65 |                 logger.info(f"problem {i} in split '{split_name}' has less than 5 python solutions, cannot validate")
 66 |                 continue
 67 |             test_failed_private_list = []
 68 |             test_failed_generated_list = []
 69 |             counter = 0
 70 |             timeout_len = 60  # 60 seconds
 71 |             start_time = time.time()
 72 |             for language, sol in zip(l_list, s_list):
 73 |                 if 'python' not in language.lower():
 74 |                     continue
 75 |                 counter += 1
 76 |                 if counter > max_tests:
 77 |                     continue
 78 |                 if time.time() > start_time + timeout_len:
 79 |                     continue
 80 |                 # test_results, test_passed_public, test_failed_public, test_timeout_public \
 81 |                 #     = evaluate_solution_on_subset('public_tests', problem_dict, sol, silent=True, break_on_timeout=True)
 82 |                 test_results, test_passed_private, test_failed_private, test_timeout_private \
 83 |                     = evaluate_solution_on_subset('private_tests', problem_dict, sol, silent=True,
 84 |                                                   break_on_timeout=True)
 85 |                 test_results, test_passed_generate, test_failed_generate, test_timeout_generate \
 86 |                     = evaluate_solution_on_subset('generated_tests', problem_dict, sol, silent=True,
 87 |                                                   break_on_timeout=True)
 88 |                 test_failed_private_list.append(test_failed_private)
 89 |                 test_failed_generated_list.append(test_failed_generate)
 90 |                 if (time.time() > start_time + timeout_len) and counter > 10:
 91 |                     continue
 92 |             if not test_failed_private_list:
 93 |                 logger.info(f"problem {i} in split '{split_name}' has no python solutions")
 94 |                 continue
 95 |             test_failed_private_list = np.array(test_failed_private_list)
 96 |             test_failed_generated_list = np.array(test_failed_generated_list)
 97 |             frac_correct = np.sum((test_failed_private_list + test_failed_generated_list) == 0) / len(
 98 |                 test_failed_private_list)
 99 | 
100 |             # final decision
101 |             if frac_correct < th_correct:
102 |                 logger.info(f"Failed - problem {i} in split {split_name} is invalid, has {frac_correct*100}% correct solutions, "
103 |                             f"total of {len(test_failed_private_list)} solutions processed")
104 |                 ds_dict['is_valid_problem'][i] = False
105 |             else:
106 |                 logger.info(f"Passed - problem {i} in split {split_name} is valid, has {frac_correct*100}% correct solutions, "
107 |                             f"total of {len(test_failed_private_list)} solutions processed")
108 | 
109 |         data_provider.dataset[split_name] = Dataset.from_dict(ds_dict)
110 |     return data_provider
111 | def add_multiple_solutions_field(data_provider):
112 |     for split_name in ['valid', 'test']:
113 |         multiple_solutions_list = np.array([False] * len(data_provider.dataset[split_name]))
114 |         ds = data_provider.dataset[split_name]
115 |         for i, p in enumerate(ds):
116 |             d_output = p['description'].split('Output\n')[1]
117 |             if ('multiple solutions' in p['description'] or 'multiple possible solutions' in p['description']
118 |                     or 'multiple possible solutions' in p['description'] or 'multiple' in d_output):
119 |                 # print(f"problem {i} has multiple solutions")
120 |                 # print(f"=========\n{p['description']}\n=======\n\n")
121 |                 multiple_solutions_list[i] = True
122 |             else:
123 |                 multiple_solutions_list[i] = False
124 | 
125 |         data_provider.dataset[split_name] = data_provider.dataset[split_name].add_column('multiple_solutions',
126 |                                                                                          multiple_solutions_list)
127 |     return data_provider
128 | 
129 | 
130 | def sort_solution_by_language(data_provider):
131 |     # sorting so that 'python' solutions will be first
132 |     for split_name in ['valid', 'test']:
133 |         ds_dict = data_provider.dataset[split_name].to_dict()
134 |         solutions_list = ds_dict['solutions']
135 |         for i, p in enumerate(solutions_list):
136 |             np_lang = np.array(p['language'])
137 |             ind_sorted = np.concatenate(
138 |                 (np.argwhere(np_lang == 'PYTHON3'), np.argwhere(np_lang == 'CPP'), np.argwhere(np_lang == 'JAVA')))
139 |             p['solution'] = [p['solution'][i[0]] for i in ind_sorted]
140 |             p['language'] = [p['language'][i[0]] for i in ind_sorted]
141 |         data_provider.dataset[split_name] = Dataset.from_dict(ds_dict)
142 |     return data_provider
143 | def add_is_valid_field(data_provider):
144 |     for split_name in ['valid', 'test']:
145 |         ds_dict = data_provider.dataset[split_name].to_dict()
146 |         ds_dict['public_tests'][0]['is_valid_test'] = None
147 |         ds_dict['private_tests'][0]['is_valid_test'] = None
148 |         ds_dict['generated_tests'][0]['is_valid_test'] = None
149 |         data_provider.dataset[split_name] = Dataset.from_dict(ds_dict)
150 |     return data_provider
151 | 
152 | def problem_3_validation_fix(data_provider):
153 |     # problem 3 validation fix generated tests
154 |     ind_problem_valid = 3
155 |     split_name = 'valid'
156 |     dataset_dict = data_provider.dataset[split_name].to_dict()
157 |     p_3 = data_provider.dataset[split_name][ind_problem_valid]
158 |     p_3_generated_tests = p_3['generated_tests']
159 |     is_valid_test = [True] * len(p_3_generated_tests['input'])
160 |     count_false = 0
161 |     count_correct = 0
162 |     for i, input in enumerate(p_3_generated_tests['input']):
163 |         n, m, x = input.splitlines()[0].split()
164 |         n = int(n)
165 |         m = int(m)
166 |         a = input.splitlines()[1].split()
167 |         b = input.splitlines()[2].split()
168 |         if (n != len(a) or m != len(b)):  # according to the description, they should be equal
169 |             count_false += 1
170 |             is_valid_test[i] = False
171 |         else:
172 |             count_correct += 1
173 |     dataset_dict['generated_tests'][ind_problem_valid]['is_valid_test'] = is_valid_test
174 |     data_provider.dataset[split_name] = Dataset.from_dict(dataset_dict)
175 |     return data_provider
176 | 
177 | def problem_29_test_fix(data_provider):
178 |     ind_problem_test = 29
179 |     split_name = 'test'
180 |     dataset_dict = data_provider.dataset[split_name].to_dict()
181 |     p_29 = data_provider.dataset[split_name][ind_problem_test]
182 |     p_29_generated_tests = p_29['generated_tests']
183 |     is_valid_arr_generated = [True] * len(p_29_generated_tests['input'])
184 |     for i, input in enumerate(p_29_generated_tests['input']):
185 |         for l in input.split():
186 |             l_n = np.array(list(map(int, l.split())))
187 |             if any(l_n < 0):  # according to the description, they should be >=0
188 |                 is_valid_arr_generated[i] = False
189 |                 break
190 | 
191 |         s = input.split('\n', 1)
192 |         n = int(s[0].strip())
193 |         a = s[1].strip().split('\n')
194 |         for j in range(n):
195 |             num_elements = int(a[2 * j].strip())
196 |             if num_elements != len(a[2 * j + 1].strip().split(' ')):  # according to the description, they should be equal
197 |                 is_valid_arr_generated[i] = False
198 |                 break
199 | 
200 | 
201 |     dataset_dict['generated_tests'][ind_problem_test]['is_valid_test'] = is_valid_arr_generated
202 |     data_provider.dataset[split_name] = Dataset.from_dict(dataset_dict)
203 |     return data_provider
204 | 
205 | def problem_92_test_fix(data_provider):
206 |     ind_problem_test = 92
207 |     split_name = 'test'
208 |     dataset_dict = data_provider.dataset[split_name].to_dict()
209 |     p_92 = data_provider.dataset[split_name][ind_problem_test]
210 |     p_92_private_tests = p_92['private_tests']
211 |     is_valid_arr_private = [True] * len(p_92_private_tests['input'])
212 |     for i, input in enumerate(p_92_private_tests['input']):
213 |         if len(set(
214 |                 input)) != 4:  # {'a', 'b',  '1', '\n'} - according to the description, the string should contain only 'a' and 'b'
215 |             is_valid_arr_private[i] = False
216 | 
217 |     p_92_generated_tests = p_92['generated_tests']
218 |     is_valid_arr_generated = [True] * len(p_92_generated_tests['input'])
219 |     for i, input in enumerate(p_92_generated_tests['input']):
220 |         if len(set(
221 |                 input)) != 4:  # {'a', 'b',  '1', '\n'} - according to the description, the string should contain only 'a' and 'b'
222 |             is_valid_arr_generated[i] = False
223 | 
224 |     dataset_dict['generated_tests'][ind_problem_test]['is_valid_test'] = is_valid_arr_generated
225 |     dataset_dict['private_tests'][ind_problem_test]['is_valid_test'] = is_valid_arr_private
226 |     data_provider.dataset[split_name] = Dataset.from_dict(dataset_dict)
227 |     return data_provider
228 | 
229 | if __name__ == "__main__":
230 |     preapare_and_clean_dataset()
231 | 


--------------------------------------------------------------------------------
/alpha_codium/code_contests/data/provider.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | from typing import Iterable
  4 | 
  5 | import duckdb
  6 | import numpy as np
  7 | import pandas as pd
  8 | from datasets import Dataset, DatasetDict, load_dataset, load_from_disk
  9 | from datasets.features.features import Sequence, Value
 10 | 
 11 | from alpha_codium.settings.config_loader import get_settings
 12 | 
 13 | problem_translations = ("source", "difficulty")
 14 | 
 15 | solution_translations = ("solutions", "incorrect_solutions")
 16 | 
 17 | 
 18 | class CodeContestDataProvider:
 19 | 
 20 |     def __init__(self, dataset_location, connection=None):
 21 |         self.private_datasets_root = os.path.expanduser(
 22 |             get_settings().config.private_dataset_cache_dir
 23 |         )
 24 |         (
 25 |             self.dataset_location,
 26 |             self.dataset_name,
 27 |             self.load_from_disk,
 28 |         ) = self.parse_location(dataset_location)
 29 |         self.dataset = self.load_dataset()
 30 |         self.connection = connection or duckdb.connect()
 31 |         self.connect(self.dataset)
 32 | 
 33 | 
 34 |     @staticmethod
 35 |     def find_problem(ds, problem_name, split_name=None, evaluation_test_type = None):
 36 |         if split_name:
 37 |             ds = ds[split_name]
 38 |         example = None
 39 |         if not problem_name:
 40 |             for e in ds:
 41 |                 if evaluation_test_type:
 42 |                     tests = e.get(evaluation_test_type)
 43 |                     if tests and tests.get("input"):
 44 |                         example = e
 45 |                         break
 46 |                 else:
 47 |                     example = e
 48 |                     break
 49 |         else:
 50 |             problems = ds.filter(lambda example: example['name'] == problem_name)
 51 |             if problems:
 52 |                 example = problems[0]
 53 |             else:
 54 |                 raise ValueError(
 55 |                     f"problem with name {problem_name} doesn't exist in dataset {ds.info.dataset_name} in split {split_name}")
 56 |         return example
 57 | 
 58 |     @staticmethod
 59 |     def prepare_for_evaluation(
 60 |         predictions, source_of_truth, evaluation_test_type
 61 |     ):
 62 |         preds = predictions
 63 |         sot = source_of_truth
 64 |         sot = sot.select_columns(["name", evaluation_test_type])
 65 |         sot = sot.rename_column("name", "task_name")
 66 |         sot = sot.flatten()
 67 |         sot = sot.rename_column(f"{evaluation_test_type}.input", "tests_inputs")
 68 |         sot = sot.rename_column(f"{evaluation_test_type}.output", "tests_outputs")
 69 | 
 70 |         joined = sot.to_pandas().merge(preds.to_pandas(), on="task_name", how="left")
 71 |         joined["predictions"] = joined[["task_name", "solution_candidates"]].to_dict(
 72 |             "records"
 73 |         )
 74 |         joined["references"] = joined[["tests_inputs", "tests_outputs"]].to_dict(
 75 |             "records"
 76 |         )
 77 | 
 78 |         # Retain only the 'predictions' and 'references' columns
 79 |         joined = joined[["predictions", "references"]]
 80 |         restructured_dataset = Dataset.from_pandas(joined)
 81 |         return restructured_dataset
 82 | 
 83 |     def parse_location(self, dataset_location):
 84 |         result_location = dataset_location
 85 |         dataset_name = dataset_location.split(os.path.sep)[-1]
 86 |         load_from_disk = True
 87 |         if load_from_disk:
 88 |             if not result_location.startswith(os.path.sep):
 89 |                 result_location = os.path.join(
 90 |                     self.private_datasets_root, result_location
 91 |                 )
 92 |         return result_location, dataset_name, load_from_disk
 93 | 
 94 |     @staticmethod
 95 |     def prepare_code_contest_split_for_eval(
 96 |         ds, evaluation_test_type="public_tests", task_name_column="name",
 97 |             path_to_solutions_column="solutions.solution"
 98 |     ):
 99 |         solutions = ds.flatten()
100 |         solutions = solutions.rename_column(
101 |             path_to_solutions_column, "solution_candidates"
102 |         )
103 |         solutions = solutions.rename_column(task_name_column, "task_name")
104 |         solutions = solutions.select_columns(["task_name", "solution_candidates"])
105 |         return CodeContestDataProvider.prepare_for_evaluation(
106 |             predictions=solutions,
107 |             source_of_truth=ds,
108 |             evaluation_test_type=evaluation_test_type,
109 |         )
110 | 
111 |     def show(self, ds, paths_to_python, paths_to_free_text):
112 |         result = ds.flatte()
113 | 
114 |         def format_example(example):
115 |             for code_col in paths_to_python:
116 |                 import black
117 | 
118 |                 example[code_col] = black.format_str(example[code_col])
119 |             for col in paths_to_free_text:
120 |                 example[col] = example[col].replace("\\n", "\n")
121 | 
122 |         pretty = result.map(format_example)
123 |         return pretty
124 | 
125 |     def load_dataset(self):
126 |         if self.load_from_disk:
127 |             f = load_from_disk
128 |         else:
129 |             f = load_dataset
130 | 
131 |         return f(self.dataset_location)
132 | 
133 |     def connect(self, ds):
134 |         if hasattr(ds, "keys"):
135 |             for split in self.dataset.keys():
136 |                 split_ds = self.dataset[split]
137 |                 table = split_ds.data.table
138 |                 self.connection.register(f"{split_ds.info.dataset_name}_{split}", table)
139 |         else:
140 |             self.connection.register(f"{ds.info.dataset_name}", ds.data.table)
141 | 
142 |     def get_splits(self):
143 |         return self.dataset.keys()
144 | 
145 |     @staticmethod
146 |     def sample(ds, fraction=0.1):
147 |         table = ds
148 |         sample_size = int(len(table) * fraction)
149 |         indices = np.random.choice(len(table), sample_size, replace=False)
150 |         sampled_table = table.select(indices)
151 |         return sampled_table
152 | 
153 |     def query(self, query_string) -> pd.DataFrame:
154 |         return self.connection.query(query_string).df()
155 | 
156 |     def translate_references(self, ds):
157 |         expand = False
158 |         if not isinstance(ds, DatasetDict):
159 |             to_translate = {"ds": ds}
160 |             expand = True
161 |         else:
162 |             to_translate = ds
163 |         for ds_name, ds_val in to_translate.items():
164 |             for col in problem_translations:
165 |                 translated_col = ds_val.features[col].int2str(ds_val[col])
166 |                 ds_val = ds_val.remove_columns([col])
167 |                 ds_val = ds_val.add_column(col, translated_col)
168 | 
169 |             def translate_sequence_references(example, ds):
170 |                 for col in solution_translations:
171 |                     translator = ds.features[col].feature["language"]
172 |                     arr = example[col]["language"]
173 |                     translated_solution = [translator.int2str(item) for item in arr]
174 |                     example[col]["language"] = translated_solution
175 | 
176 |                 return example
177 | 
178 |             new_features = ds_val.features.copy()
179 |             for col in solution_translations:
180 |                 new_features[col] = Sequence(
181 |                     feature={"language": Value("string"), "solution": Value("string")}
182 |                 )
183 | 
184 |             ds_val = ds_val.map(
185 |                 lambda example, ds=ds_val: translate_sequence_references(
186 |                     example=example, ds=ds
187 |                 ),
188 |                 features=new_features,
189 |             )
190 |             to_translate[ds_name] = ds_val
191 |         result = to_translate
192 |         if expand:
193 |             result = result[ds]
194 |         return result
195 | 
196 |     def filter_solution_by_languages(self, ds, languages: Iterable[str], keep=True):
197 |         languages_set = set(languages)
198 | 
199 |         def filter_solutions_by_languages(example):
200 |             for sol_col in solution_translations:
201 |                 langs = example[sol_col]["language"]
202 |                 sols = example[sol_col]["solution"]
203 | 
204 |                 filtered_languages = [
205 |                     lang for lang in langs if (lang in languages_set) == keep
206 |                 ]
207 |                 filtered_solutions = [
208 |                     s
209 |                     for idx, s in enumerate(sols)
210 |                     if (langs[idx] in languages_set) == keep
211 |                 ]
212 | 
213 |                 example[sol_col] = {
214 |                     "language": filtered_languages,
215 |                     "solution": filtered_solutions,
216 |                 }
217 | 
218 |             return example
219 | 
220 |         ds = ds.map(filter_solutions_by_languages)
221 |         return ds
222 | 


--------------------------------------------------------------------------------
/alpha_codium/code_contests/data/yaml_vs_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import yaml
 3 | s1 = 'print("double quote string")'
 4 | s2 = "print('single quote string')"
 5 | s3 = 'print("""triple quote string""")'
 6 | s4 = f"{s1}\n{s2}\n{s3}"
 7 | 
 8 | # Create a dictionary with keys as variable names and values as the strings
 9 | data = {'s1': s1, 's2': s2, 's3': s3, 's4': s4}
10 | 
11 | # Convert the dictionary to a JSON-formatted string
12 | json_data = json.dumps(data, indent=2)
13 | print(json_data)
14 | 
15 | # Convert the dictionary to a YAML-formatted string, with block scalar style
16 | yaml_data = yaml.dump(data, indent=2, default_style='|')
17 | print(yaml_data)
18 | 
19 | 


--------------------------------------------------------------------------------
/alpha_codium/code_contests/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/alpha_codium/code_contests/eval/__init__.py


--------------------------------------------------------------------------------
/alpha_codium/code_contests/eval/code_contests_metric.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """The CodeContestsEval metric estimates the pass@k metric for code synthesis.
 15 | This is an evaluation harness for the code_contests problem solving dataset
 16 | described in the paper "Evaluating Large Language Models Trained on Code"
 17 | (https://arxiv.org/abs/2107.03374)."""
 18 | import itertools
 19 | import os
 20 | 
 21 | import datasets
 22 | import evaluate
 23 | import numpy as np
 24 | 
 25 | from alpha_codium.code_contests.eval.code_test_runners import PythonTestsRunner
 26 | 
 27 | local_runner = 'local'
 28 | code_contests_runner = 'code_contests'
 29 | 
 30 | 
 31 | _CITATION = """\
 32 | 
 33 | """
 34 | 
 35 | _DESCRIPTION = """\
 36 | This metric implements the evaluation harness for Deepmind's code_contests dataset.
 37 | """
 38 | 
 39 | _KWARGS_DESCRIPTION = """
 40 | Calculates how good are predictions given some references, using certain scores
 41 | Args:
 42 |     predictions: list of candidates to evaluate. Each candidates should be a list
 43 |         of strings with several code candidates to solve the problem.
 44 |     references: a list with a test for each prediction. Each test should evaluate the
 45 |         correctness of a code candidate.
 46 |     k: number of code candidates to consider in the evaluation (Default: [1, 10, 100])
 47 |     num_workers: number of workers used to evaluate the candidate programs (Default: 4).
 48 |     timeout:
 49 | Returns:
 50 |     pass_at_k: dict with pass rates for each k
 51 |     results: dict with granular results of each unittest
 52 | Examples:
 53 |     >>> code_eval = evaluate.load("code_eval")
 54 |     >>> test_cases = ["assert add(2,3)==5"]
 55 |     >>> candidates = [["def add(a,b): return a*b", "def add(a, b): return a+b"]]
 56 |     >>> pass_at_k, results = code_eval.compute(references=test_cases, predictions=candidates, k=[1, 2])
 57 |     >>> print(pass_at_k)
 58 |     {'pass@1': 0.5, 'pass@2': 1.0}
 59 | """
 60 | 
 61 | _WARNING = """
 62 | ################################################################################
 63 |                                   !!!WARNING!!!
 64 | ################################################################################
 65 | The "code_eval" metric executes untrusted model-generated code in Python.
 66 | Although it is highly unlikely that model-generated code will do something
 67 | overtly malicious in response to this test suite, model-generated code may act
 68 | destructively due to a lack of model capability or alignment.
 69 | Users are strongly encouraged to sandbox this evaluation suite so that it
 70 | does not perform destructive actions on their host or network. For more
 71 | information on how OpenAI sandboxes its code, see the paper "Evaluating Large
 72 | Language Models Trained on Code" (https://arxiv.org/abs/2107.03374).
 73 | 
 74 | Once you have read this disclaimer and taken appropriate precautions,
 75 | set the environment variable HF_ALLOW_CODE_EVAL="1". Within Python you can to this
 76 | with:
 77 | 
 78 | >>> import os
 79 | >>> os.environ["HF_ALLOW_CODE_EVAL"] = "1"
 80 | 
 81 | ################################################################################\
 82 | """
 83 | 
 84 | _LICENSE = """The MIT License
 85 | 
 86 | Copyright (c) OpenAI (https://openai.com)
 87 | 
 88 | Permission is hereby granted, free of charge, to any person obtaining a copy
 89 | of this software and associated documentation files (the "Software"), to deal
 90 | in the Software without restriction, including without limitation the rights
 91 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 92 | copies of the Software, and to permit persons to whom the Software is
 93 | furnished to do so, subject to the following conditions:
 94 | 
 95 | The above copyright notice and this permission notice shall be included in
 96 | all copies or substantial portions of the Software.
 97 | 
 98 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 99 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
100 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
101 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
102 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
103 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
104 | THE SOFTWARE."""
105 | 
106 | 
107 | os.environ["HF_ALLOW_CODE_EVAL"] = "1"
108 | 
109 | @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
110 | class CodeContestsEval(evaluate.Metric):
111 |     def _info(self):
112 |         if self.config_name not in [
113 |             local_runner, code_contests_runner
114 |         ]:
115 |             raise KeyError(
116 |                 "You should supply a configuration name selected in "
117 |                 f'[{code_contests_runner}, {local_runner}]'
118 |             )
119 |         return evaluate.MetricInfo(
120 |             # This is the description that will appear on the metrics page.
121 |             description=_DESCRIPTION,
122 |             citation=_CITATION,
123 |             inputs_description=_KWARGS_DESCRIPTION,
124 |             # This defines the format of each prediction and reference
125 |             features=datasets.Features(
126 |                 {
127 |                     "predictions": {
128 |                         "task_name": datasets.Value("string"),
129 |                         "solution_candidates": datasets.Sequence(
130 |                             datasets.Value("string")
131 |                         ),
132 |                     },
133 |                     "references": {
134 |                         "tests_inputs": datasets.Sequence(datasets.Value("string")),
135 |                         "tests_outputs": datasets.Sequence(datasets.Value("string")),
136 |                     },
137 |                 }
138 |             ),
139 |             homepage="",
140 |             codebase_urls=[""],
141 |             reference_urls=[""],
142 |             license=_LICENSE,
143 |         )
144 | 
145 |     def _compute(
146 |         self,
147 |         predictions,
148 |         references,
149 |         k=[1, 10, 100],  # noqa: B006
150 |         num_workers=10,
151 |         timeout=3.0,
152 |     ):
153 |         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
154 |             raise ValueError(_WARNING)
155 | 
156 |         if os.name == "nt":
157 |             raise NotImplementedError(
158 |                 "This metric is currently not supported on Windows."
159 |             )
160 |         runner = PythonTestsRunner.factory(self.config_name)
161 |         inputs, results = runner.bulk_test(num_workers, predictions, references)
162 |         correct, total = self.pass_fail_ratio(results)
163 |         total = np.array(total)
164 |         correct = np.array(correct)
165 |         ks = k
166 |         pass_at_k = {
167 |             f"pass@{k}": estimate_pass_at_k(total, correct, k).mean()
168 |             for k in ks
169 |             if (total >= k).all()
170 |         }
171 | 
172 |         return pass_at_k, inputs, results
173 | 
174 |     def pass_fail_ratio(self, results):
175 |         total, correct = [], []
176 |         for task_id, all_candidates_test_results in results.items():
177 |             print(task_id)
178 |             print("======================================")
179 |             candidate_final_results = []
180 |             for candidate_id, test_results in enumerate(all_candidates_test_results):
181 |                 _results = [
182 |                     test_result.passed for test_result in test_results.test_results
183 |                 ]
184 |                 print(f"{candidate_id} test results: {_results}")
185 |                 candidate_pass_fail = all(_results)
186 |                 print(f"{candidate_id} final pass/fail: {candidate_pass_fail}")
187 |                 candidate_final_results.append(candidate_pass_fail)
188 |             total.append(len(candidate_final_results))
189 |             correct.append(sum(candidate_final_results))
190 |             print(f"{task_id} candidates: {candidate_final_results}")
191 |             print("======================================")
192 |         return correct, total
193 | 
194 | 
195 | def estimate_pass_at_k(num_samples, num_correct, k):
196 |     """Estimates pass@k of each problem and returns them in an array."""
197 | 
198 |     def estimator(n: int, c: int, k: int) -> float:
199 |         """Calculates 1 - comb(n - c, k) / comb(n, k)."""
200 |         if n - c < k:
201 |             return 1.0
202 |         denominator = np.math.factorial(n) / (np.math.factorial(k) * np.math.factorial(n - k))
203 |         numerator = 1.0
204 |         for i in range(n - c + 1, n + 1):
205 |             numerator *= 1.0 - k / i
206 |         return 1.0 - numerator / denominator
207 | 
208 |     if isinstance(num_samples, int):
209 |         num_samples_it = itertools.repeat(num_samples, len(num_correct))
210 |     else:
211 |         assert len(num_samples) == len(num_correct)
212 |         num_samples_it = iter(num_samples)
213 | 
214 |     return np.array(
215 |         [estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]
216 |     )
217 | 


--------------------------------------------------------------------------------
/alpha_codium/code_contests/eval/pass_at_k_evaluator.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from evaluate import load as load_metric
 4 | 
 5 | from alpha_codium.code_contests.data.provider import CodeContestDataProvider
 6 | from alpha_codium.settings.config_loader import get_settings
 7 | 
 8 | 
 9 | def calculate_metrics(ds, k_values=[1, 10, 100]):  # noqa: B006
10 | 
11 |     metric_path = os.path.join(
12 |         os.path.dirname(os.path.abspath(__file__)), "code_contests_metric.py"
13 |     )
14 |     metric = load_metric(metric_path, config_name=get_settings().code_tester.tester_type, module_type="metric")
15 |     pass_at_k, inputs, result = metric.compute(
16 |         predictions=ds["predictions"], references=ds["references"], k=k_values
17 |     )
18 |     return pass_at_k, inputs, result
19 | 
20 | 
21 | def evaluate_code_contest_dataset(
22 |         dataset_name,
23 |         split_name='valid',
24 |         k_values=[1, 10, 10],  # noqa: B006
25 |         evaluation_test_type='private_tests',
26 |         path_to_solutions_column='solutions.solution',
27 |         task_name_column='name',
28 |         sample_rate=0.1,
29 | ):
30 |     cc = CodeContestDataProvider(dataset_name)
31 |     ds = cc.dataset[split_name]
32 |     ds = cc.sample(ds, fraction=sample_rate)
33 |     ds = CodeContestDataProvider.prepare_code_contest_split_for_eval(ds=ds,
34 |                                                                      evaluation_test_type=evaluation_test_type,
35 |                                                                      task_name_column=task_name_column,
36 |                                                                      path_to_solutions_column=path_to_solutions_column)
37 |     pass_at_k, inputs, result = calculate_metrics(ds, k_values=k_values)
38 |     print(pass_at_k)
39 | 
40 | 
41 | def evaluate_gen_dataset(evaluation_test_type, ground_truth_dataset, ground_truth_split, k_values, solution_dataset):
42 |     evaluation_set = CodeContestDataProvider(dataset_location=solution_dataset)
43 |     gt_set = CodeContestDataProvider(dataset_location=ground_truth_dataset).dataset
44 |     if ground_truth_split:
45 |         gt_set = gt_set[ground_truth_split]
46 |     prepared_solutions = evaluation_set.prepare_for_evaluation(evaluation_set.dataset, gt_set,
47 |                                                                evaluation_test_type=evaluation_test_type)
48 |     pass_at_k, inputs, evaluation_results = calculate_metrics(prepared_solutions, k_values)
49 |     print(pass_at_k)
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     evaluate_code_contest_dataset("assaf_test", evaluation_test_type="private_tests", sample_rate=0.05)
54 | 


--------------------------------------------------------------------------------
/alpha_codium/code_contests/eval/tracer.py:
--------------------------------------------------------------------------------
 1 | import pysnooper
 2 | 
 3 | from alpha_codium.settings.config_loader import get_settings
 4 | 
 5 | filter_out_lines = ["Starting var:",
 6 |                     "exec(",
 7 |                     "Source path:",
 8 |                     "globals_dict",
 9 |                     "tracer.py",
10 |                     "snooping_inner_function()",
11 |                     "run_generated_code",
12 |                     "return function(*args, **kwargs)",
13 |                     "source_line = source[line_no - 1]"
14 |                     "Elapsed time:",
15 |                     "Return value:.. None"]
16 | 
17 | snooper_kwargs = {
18 |     'color': False,
19 |     'relative_time': True,
20 |     'normalize': True,
21 |     'depth': get_settings().code_tester.trace_depth
22 | }
23 | 
24 | snooper_kwargs_string = ", ".join(f"{key}={value}" for key, value in snooper_kwargs.items())
25 | 
26 | 
27 | class FilteringTracer(pysnooper.tracer.Tracer):
28 |     def trace(self, frame, event, arg):
29 |         if not frame.f_code.co_filename == '<string>':
30 |             return None
31 | 
32 |         return super().trace(frame, event, arg)
33 | 
34 | 
35 | class MockSourceLoader:
36 | 
37 |     def __init__(self, source):
38 |         self.source = source
39 | 
40 |     def get_source(self, module_name):
41 |         return self.source
42 | 
43 | def wrap_solution(check_program):
44 |     import_str = "import pysnooper as pysnooper\n"
45 |     annotation = f"@custom_snoop(output=tracing, {snooper_kwargs_string})\n"
46 |     entrypoint = "def run_code_contests_solution():\n"
47 |     func_body = "\n".join([f"\t{line}" for line in check_program.split("\n")])
48 |     invocation = "\nrun_code_contests_solution()"
49 |     return (import_str + annotation + entrypoint + func_body + invocation).strip()
50 | 
51 | 
52 | def trace_code(check_program, tracing):
53 |     my_program = wrap_solution(check_program)
54 |     # __name__  must be unique otherwise the tracer uses caching mechanisms that break it's behavior
55 |     globals_dict = {'__loader__': MockSourceLoader(my_program),
56 |                     'tracing': tracing,
57 |                     '__name__': hash(my_program),
58 |                     'custom_snoop': FilteringTracer}
59 |     exec(my_program, globals_dict, {})
60 | 
61 | 
62 | def clean_trace(trace_output):
63 |     trace_lines = trace_output.split("\n")
64 |     clean_lines = [line for line in trace_lines if not
65 |     any(substring in line for substring in filter_out_lines)]
66 |     clean_output = "\n".join(clean_lines)
67 |     return clean_output
68 | 


--------------------------------------------------------------------------------
/alpha_codium/evaluate_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | from collections import OrderedDict
 4 | 
 5 | from alpha_codium.code_contests.data.provider import CodeContestDataProvider
 6 | from alpha_codium.log import get_logger
 7 | 
 8 | logger = get_logger(__name__)
 9 | 
10 | def evaluate_dataset_solution(dataset_name='valid_and_test_processed',
11 |                                split_name='test',
12 |                                solution_path_database='valid_database_solution.json'):
13 |     """
14 |     Evaluate the performance of dataset solutions.
15 | 
16 |     Args:
17 |         dataset_name (str, optional): The name of the dataset. Defaults to 'valid_and_test_processed'.
18 |         split_name (str, optional): The name of the split. Defaults to 'test'.
19 |         solution_path_database (str, optional): The path to the solution database file. Defaults to 'valid_database_solution.json'.
20 |     """
21 | 
22 |     # Load the dataset and solution database
23 |     data_provider = CodeContestDataProvider(dataset_location=dataset_name)
24 |     ds = data_provider.dataset[split_name]
25 |     with open(solution_path_database, 'r') as f:
26 |         database_solutions = json.load(f)
27 |         database_solutions[split_name] = OrderedDict(
28 |             sorted(database_solutions[split_name].items(), key=lambda x: int(x[0])))
29 | 
30 |     # Initialize counters for passed and failed problems
31 |     total_passed = 0
32 |     total_failed = 0
33 | 
34 |     # Iterate over the solutions in the database
35 |     for sol in database_solutions[split_name]:
36 |         try:
37 |             key_str = sol
38 |             key_int = int(key_str)
39 |             problem = ds[key_int]
40 |             if problem.get('is_valid_problem', True) is False:
41 |                 print(f"problem {key_int} is not valid")
42 |                 continue
43 |             solution = database_solutions[split_name][sol]
44 |             passed_current = -1
45 | 
46 |             # scanning the iterations
47 |             v_iter =[v for v in solution.values() if (v is not None and 'solution' in v)]
48 |             for v in v_iter:
49 |                 if not v:
50 |                     continue
51 |                 test_failed_generate = v['test_failed_generate']
52 |                 test_failed_private = v['test_failed_private']
53 |                 test_passed_generate = v['test_passed_generate']
54 |                 test_passed_private = v['test_passed_private']
55 |                 if 'test_timeout_generate' in v:
56 |                     test_timeout_generate = v['test_timeout_generate']
57 |                     test_timeout_private = v['test_timeout_private']
58 |                 else:
59 |                     test_timeout_generate = 0
60 |                     test_timeout_private = 0
61 | 
62 |                 if ((test_failed_generate + test_timeout_generate + test_failed_private + test_timeout_private) == 0 and
63 |                         (test_passed_generate + test_passed_private) > 0):
64 |                     print(f"problem {key_int} passed all tests")
65 |                     passed_current=1
66 |                     break
67 |                 else:
68 |                     passed_current = 0
69 |             if passed_current == 1:
70 |                 total_passed += 1
71 |             elif passed_current == 0:
72 |                 total_failed += 1
73 |         except Exception as e:
74 |             print(f"Error: {e}")
75 |             pass
76 | 
77 |     # Print the total number of passed and failed problems
78 |     print(f"total_passed: {total_passed}, total_failed: {total_failed}")
79 | 
80 |     # Calculate the pass rate
81 |     pass_rate = total_passed / (total_passed + total_failed)
82 |     print(f"pass rate: {pass_rate}")
83 | 
84 | parser = argparse.ArgumentParser()
85 | parser.add_argument("--dataset_name", type=str, default="valid_and_test_processed")
86 | parser.add_argument("--split_name", type=str, default="valid")
87 | parser.add_argument("--database_solution_path", type=str, default="./gpt_3_solution_database_valid.json")
88 | 
89 | if __name__ == "__main__":
90 |     args = parser.parse_args()
91 |     evaluate_dataset_solution(dataset_name=args.dataset_name,
92 |                               split_name=args.split_name,
93 |                               solution_path_database=args.database_solution_path)
94 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/alpha_codium/gen/__init__.py


--------------------------------------------------------------------------------
/alpha_codium/gen/coding_competitor.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import os
  4 | from jinja2 import Environment, StrictUndefined
  5 | 
  6 | from alpha_codium.code_contests.data.provider import CodeContestDataProvider
  7 | from alpha_codium.gen.stages.run_baseline import run_baseline
  8 | from alpha_codium.gen.stages.run_choose_best_solution import run_choose_best_solution
  9 | from alpha_codium.gen.stages.run_evaluate_all_ai_tests import run_evaluate_all_ai_tests
 10 | from alpha_codium.gen.stages.run_evaluate_public_tests import run_evaluate_public_tests
 11 | from alpha_codium.gen.stages.run_generate_ai_test import run_generate_ai_tests
 12 | from alpha_codium.gen.stages.run_generate_possible_solutions import run_generate_possible_solutions
 13 | from alpha_codium.gen.stages.run_self_reflect import run_self_reflect
 14 | from alpha_codium.gen.stages.run_initial_code_generation import run_initial_code_generation
 15 | from alpha_codium.gen.stages.utils import set_configurations
 16 | from alpha_codium.gen.utils import evaluate_solution_on_subset
 17 | from alpha_codium.llm.ai_handler import AiHandler
 18 | from alpha_codium.log import get_logger
 19 | from alpha_codium.settings.config_loader import get_settings
 20 | 
 21 | 
 22 | class CodeContestsCompetitor:
 23 |     def __init__(self):
 24 |         self.prompt = {}
 25 |         for set in get_settings():
 26 |             if 'code_contests_prompt' in set.lower():
 27 |                 self.prompt[set.lower()] = get_settings()[set]
 28 |         self.ai_handler = AiHandler()
 29 | 
 30 |     def render(self, problem_json, prompt: str):
 31 |         environment = Environment(undefined=StrictUndefined)
 32 |         environment.globals["zip"] = zip
 33 |         environment.globals["enumerate"] = enumerate
 34 |         sys_prompt = environment.from_string(self.prompt[prompt].system).render(problem_json)
 35 |         usr_prompt = environment.from_string(self.prompt[prompt].user).render(problem_json)
 36 |         if hasattr(self.prompt[prompt], 'temperature'):
 37 |             temperature = self.prompt[prompt].temperature
 38 |         else:
 39 |             temperature = 0.2
 40 |         if hasattr(self.prompt[prompt], 'frequency_penalty'):
 41 |             frequency_penalty = self.prompt[prompt].frequency_penalty
 42 |         else:
 43 |             frequency_penalty = None
 44 |         return sys_prompt, usr_prompt, temperature, frequency_penalty
 45 | 
 46 |     async def _run(self, model, problem, prompt:str = "code_contests_prompt_reflect"):
 47 |         system_prompt, user_prompt, temperature, frequency_penalty = self.render(problem, prompt)
 48 | 
 49 |         if frequency_penalty == None:
 50 |             frequency_penalty = get_settings().get("config.frequency_penalty")
 51 | 
 52 |         response, finish_reason = await self.ai_handler.chat_completion(
 53 |             model=model, system=system_prompt, user=user_prompt,
 54 |             temperature=temperature, frequency_penalty=frequency_penalty,
 55 |         )
 56 |         return response, finish_reason
 57 | 
 58 |     async def run(self, problem, iteration=0, logger_ext=None):
 59 |         if logger_ext:
 60 |             logger = logger_ext
 61 |         else:
 62 |             logger = get_logger(__name__)
 63 |         logger.info(f"Running code contests competitor, model {get_settings().config['model']}")
 64 | 
 65 |         try:
 66 |             if get_settings().get("solve.use_baseline", False):
 67 |                 problem['code_recent_solution'] = await run_baseline(self, problem)
 68 |             else:
 69 |                 # configurations
 70 |                 problem = set_configurations(problem, iteration)
 71 | 
 72 |                 # self-reflect
 73 |                 problem = await run_self_reflect(self, problem)
 74 | 
 75 |                 # generate solutions
 76 |                 problem = await run_generate_possible_solutions(self, problem)
 77 | 
 78 |                 # choose best solution
 79 |                 problem = await run_choose_best_solution(self, problem)
 80 | 
 81 |                 # generate ai tests
 82 |                 problem = await run_generate_ai_tests(self, problem)
 83 | 
 84 |                 # initial code generation
 85 |                 problem = await run_initial_code_generation(self, problem)
 86 | 
 87 |                 # evaluate on public tests
 88 |                 problem = await run_evaluate_public_tests(self, problem)
 89 | 
 90 |                 # evaluate on ai tests
 91 |                 problem = await run_evaluate_all_ai_tests(self, problem)
 92 | 
 93 |             return problem['code_recent_solution']
 94 |         except Exception as e:
 95 |             logging.error(f"Error: {e}")
 96 |             return ""
 97 | 
 98 |     def solve_problem_in_dataset(self, example, iteration=0, logger_ext=None):
 99 |         problem = {k: example.get(k) for k in ["name", "description", 'public_tests']}
100 |         prediction = asyncio.run(self.run(problem=problem, iteration=iteration, logger_ext=logger_ext))
101 |         return prediction
102 | 
103 | 
104 | def solve_problem(dataset_name,
105 |                   split_name="valid",
106 |                   problem_name="",
107 |                   problem_number=0):
108 | 
109 |     # load dataset
110 |     logger = get_logger(__name__)
111 |     data_provider = CodeContestDataProvider(dataset_location=dataset_name)
112 |     if problem_number and problem_name:
113 |         logger.info(f"problem_number and problem_name are both specified, using problem_name")
114 |     if not problem_name and problem_number:
115 |         problem_name = data_provider.dataset[split_name][int(problem_number)]['name']
116 |         logger.info(f"problem_name: {problem_name}")
117 | 
118 |     # find problem
119 |     problem = data_provider.find_problem(ds=data_provider.dataset, problem_name=problem_name, split_name=split_name)
120 |     logger.info(f"problem['name']: {problem['name']}")
121 | 
122 |     # # check if problem is valid (at least one of the provided solutions actually passes the generated tests)
123 |     # if not problem.get('is_valid_problem', True):
124 |     #     logger.info(f"problem['is_valid_problem'] == False, skipping")
125 |     #     return None, None
126 | 
127 |     # evaluate prev solutions
128 |     evaluate_prev_solutions = get_settings().get("dataset.evaluate_prev_solutions", False)
129 |     if evaluate_prev_solutions:
130 |         try:
131 |             if not problem['solutions']['solution']:
132 |                 logger.info("No public solutions for this problem")
133 |             found_solution = False
134 |             for index_published, sol_published in enumerate(problem['solutions']['solution']):
135 |                 if 'python' not in problem['solutions']['language'][index_published].lower():
136 |                     found_solution = True
137 |                     continue
138 |                 logger.info(f"evaluating public solution {index_published} on private tests...")
139 |                 test_results, test_passed_private, test_failed_private, test_timeout_private \
140 |                     = evaluate_solution_on_subset('private_tests', problem, sol_published, silent=True)
141 |                 logger.info(f"evaluating public solution {index_published} on generated tests...")
142 |                 test_results, test_passed_generate, test_failed_generate, test_timeout_generate = (
143 |                     evaluate_solution_on_subset('generated_tests', problem, sol_published, silent=True))
144 | 
145 |                 if (test_failed_private == test_failed_generate == test_timeout_private == test_timeout_generate == 0) \
146 |                         and test_passed_private + test_passed_generate > 0:
147 |                     logger.info(f"sol_published index {index_published} passed all tests:\n{sol_published}")
148 |                     found_solution = True
149 |                     break
150 | 
151 |             if not found_solution:
152 |                 logger.info(f"None of the public solutions passed all tests")
153 |         except Exception as e:
154 |             logger.error(f"Error evaluating public solutions: {e}")
155 |             pass
156 | 
157 | 
158 |     return solve_my_problem(problem)
159 | 
160 | 
161 | def solve_my_problem(problem):
162 | 
163 |     base_path = os.getcwd()
164 |     logger = get_logger(__name__)
165 | 
166 |     solver = CodeContestsCompetitor()
167 |     os.chdir(base_path)
168 |     solution = solver.solve_problem_in_dataset(problem)
169 |     logger.info(f"testing solution on private tests with prediction:\n{solution}")
170 | 
171 |     logger.info(f"evaluating solution on public tests...")
172 |     test_results, test_passed_public, test_failed_public, test_timeout_public = evaluate_solution_on_subset('public_tests',
173 |                                                                                                        problem,
174 |                                                                                                        solution,
175 |                                                                                                        silent=True)
176 | 
177 | 
178 |     logger.info(f"evaluating solution on private tests...")
179 |     test_results, test_passed_private, test_failed_private, test_timeout_private = evaluate_solution_on_subset('private_tests',
180 |                                                                                                        problem,
181 |                                                                                                        solution,
182 |                                                                                                        silent=True)
183 | 
184 |     logger.info(f"evaluating solution on generated tests...")
185 |     test_results, test_passed_generate, test_failed_generate, test_timeout_generate = evaluate_solution_on_subset(
186 |         'generated_tests', problem, solution, silent=True)
187 | 
188 |     logger.info(f"\ntest_passed_generate: {test_passed_generate}, test_passed_private: {test_passed_private}, test_passed_public: {test_passed_public}"
189 |                 f"\ntest_failed_generate: {test_failed_generate}, test_failed_private: {test_failed_private}, test_failed_public: {test_failed_public}"
190 |                 f"\ntest_timeout_generate: {test_timeout_generate}, test_timeout_private: {test_timeout_private}, test_timeout_public: {test_timeout_public}")
191 | 
192 |     return solution, test_results
193 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/dataset_solver.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import shutil
  4 | from collections import OrderedDict
  5 | 
  6 | from alpha_codium.code_contests.data.provider import CodeContestDataProvider
  7 | from alpha_codium.gen.coding_competitor import CodeContestsCompetitor
  8 | from alpha_codium.gen.utils import evaluate_solution_on_subset
  9 | from alpha_codium.log import setup_logger, get_logger
 10 | from alpha_codium.settings.config_loader import get_settings
 11 | 
 12 | 
 13 | def solve_dataset(dataset_name='valid_and_test_processed',
 14 |                   split_name='valid',
 15 |                   database_solution_path='solution_database.json'):
 16 | 
 17 |     # load dataset
 18 |     data_provider = CodeContestDataProvider(dataset_location=dataset_name)
 19 |     setting = get_settings()
 20 |     num_problems = len(data_provider.dataset[split_name])
 21 |     base_path = os.getcwd()
 22 |     setting.solve.reduce_verbose = True
 23 | 
 24 |     ## load previous solution-database if exists
 25 |     try:
 26 |         with open(database_solution_path, 'r') as f:
 27 |             database = json.load(f)
 28 |             database[split_name] = OrderedDict(sorted(database[split_name].items(), key=lambda x: int(x[0])))
 29 |     except:
 30 |         print(f"Failed to load database from {database_solution_path}")
 31 |         database = {split_name: {}}
 32 | 
 33 |     # iterate on problems
 34 |     for problem_number in range(0, num_problems):
 35 | 
 36 |         # skip if already ran
 37 |         logger = setup_logger()
 38 | 
 39 |         num_iterations =  setting.get("dataset.num_iterations", 1)
 40 |         prev = database[split_name].get(str(problem_number), {}).get(f'iteration_{num_iterations-1}', {})
 41 |         if not ((prev == {}) or (prev is None)):
 42 |             print(f"problem_number {problem_number} already ran")
 43 |             continue
 44 | 
 45 |         # check if problem is valid (at least one of the provided solutions actually passes the generated tests)
 46 |         if data_provider.dataset[split_name][problem_number].get('is_valid_problem', True) is False:
 47 |             logger.info(f"problem {problem_number} is not valid")
 48 |             continue
 49 | 
 50 |         os.chdir(base_path)
 51 |         logger.info(f"problem_number: {problem_number}")
 52 |         problem_name = data_provider.dataset[split_name][int(problem_number)]['name']
 53 |         logger.info(f"problem_name: {problem_name}")
 54 |         problem = data_provider.find_problem(ds=data_provider.dataset, problem_name=problem_name, split_name=split_name)
 55 |         logger.info(f"problem['cf_tags']: {problem['cf_tags']}")
 56 | 
 57 |         # solve problem
 58 |         problem_database = {problem_number: {}}
 59 |         solver = CodeContestsCompetitor()
 60 |         for iteration in range(setting.get("dataset.num_iterations", 1)):
 61 |             it_str = f"iteration_{iteration}"
 62 |             problem_database[problem_number][it_str] = {}
 63 | 
 64 |             # skip if iteration already ran
 65 |             prev_iter = database[split_name].get(str(problem_number), {}).get(it_str, {})
 66 |             if not ((prev_iter == {}) or (prev_iter is None)):
 67 |                 print(f"prev_iter {iteration} already ran")
 68 |                 problem_database[problem_number][it_str] = prev_iter
 69 |                 if is_solved(prev_iter):
 70 |                     logger.info(f"codium solved problem {problem_number} in iteration {iteration}")
 71 |                     break
 72 |                 continue
 73 | 
 74 |             # solve problem
 75 |             solution = solver.solve_problem_in_dataset(problem, iteration, logger)
 76 | 
 77 |             logger.info(f"solution code:\n{solution}")
 78 |             if not solution:
 79 |                 logger.info(f"Failed to solve problem {problem_number} in iteration {iteration}")
 80 |                 continue
 81 |             logger.info(f"Evaluating solution on public tests...")
 82 |             test_results, test_passed_public, test_failed_public, test_timeout_public = evaluate_solution_on_subset(
 83 |                 'public_tests', problem, solution, silent=True)
 84 | 
 85 |             logger.info(f"evaluating solution on private tests...")
 86 |             test_results, test_passed_private, test_failed_private, test_timeout_private = evaluate_solution_on_subset(
 87 |                 'private_tests', problem, solution, silent=True)
 88 | 
 89 |             logger.info(f"evaluating solution on generated tests...")
 90 |             test_results, test_passed_generate, test_failed_generate, test_timeout_generate = evaluate_solution_on_subset(
 91 |                 'generated_tests', problem, solution, silent=True)
 92 | 
 93 |             logger.info(
 94 |                 f"\ntest_passed_public: {test_passed_public}, test_failed_public: {test_failed_public}, test_timeout_public: {test_timeout_public}\n"
 95 |                 f"test_passed_private: {test_passed_private}, test_failed_private: {test_failed_private}, test_timeout_private: {test_timeout_private}\n"
 96 |                 f"test_passed_generate: {test_passed_generate}, test_failed_generate: {test_failed_generate}, test_timeout_generate: {test_timeout_generate}\n")
 97 | 
 98 |             problem_database[problem_number][it_str]['solution'] = solution
 99 |             problem_database[problem_number][it_str]['test_passed_private'] = test_passed_private
100 |             problem_database[problem_number][it_str]['test_failed_private'] = test_failed_private
101 |             problem_database[problem_number][it_str]['test_timeout_private'] = test_timeout_private
102 |             problem_database[problem_number][it_str]['test_passed_generate'] = test_passed_generate
103 |             problem_database[problem_number][it_str]['test_failed_generate'] = test_failed_generate
104 |             problem_database[problem_number][it_str]['test_timeout_generate'] = test_timeout_generate
105 |             problem_database[problem_number][it_str]['test_passed_public'] = test_passed_public
106 |             problem_database[problem_number][it_str]['test_failed_public'] = test_failed_public
107 |             problem_database[problem_number][it_str]['test_timeout_public'] = test_timeout_public
108 |             os.chdir(base_path)
109 |             if is_solved(problem_database[problem_number][it_str]):
110 |                 logger.info(f"codium solved problem {problem_number} in iteration {iteration}")
111 |                 break
112 |             else:
113 |                 logger.info(f"codium failed to solve problem {problem_number} in iteration {iteration}")
114 |         database[split_name][problem_number] = problem_database[problem_number]
115 |         os.chdir(base_path)
116 |         with open(database_solution_path, 'w') as f:
117 |             json.dump(database, f)
118 | 
119 | 
120 | def is_solved(s):
121 |     if s['test_failed_private'] == 0 and s['test_failed_generate'] == 0 and \
122 |             s['test_timeout_private'] == 0 and s['test_timeout_generate'] == 0 and \
123 |             (s['test_passed_private'] + s['test_passed_generate']) > 0:
124 |         return True
125 |     else:
126 |         return False
127 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/generators.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import functools
 3 | 
 4 | from alpha_codium.llm.ai_handler import AiHandler
 5 | from alpha_codium.llm.ai_invoker import send_inference
 6 | 
 7 | 
 8 | class SimplePrompt:
 9 |     def __init__(self, system_prompt="", temperature=0.2, frequency_penalty=0):
10 |         self.system_prompt = system_prompt
11 |         self.temperature = temperature
12 |         self.frequency_penalty = frequency_penalty
13 |         self.ai_handler = AiHandler()
14 | 
15 |     async def _run(self, model, user_prompt):
16 |         response, finish_reason = await self.ai_handler.chat_completion(
17 |             model=model,
18 |             temperature=self.temperature,
19 |             frequency_penalty=self.frequency_penalty,
20 |             system=self.system_prompt,
21 |             user=user_prompt,
22 |         )
23 |         return response
24 | 
25 |     async def run(self, user_prompt):
26 |         f = functools.partial(self._run, user_prompt=user_prompt)
27 |         response = await send_inference(f)
28 |         return response
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     p = SimplePrompt()
33 |     asyncio.run(p.run("what is the capital city of Israel"))
34 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/indirect/run_analyze_and_fix_test_failure.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import difflib
 3 | import functools
 4 | import logging
 5 | import yaml
 6 | from alpha_codium.llm.ai_invoker import send_inference
 7 | from alpha_codium.log import get_logger
 8 | from alpha_codium.settings.config_loader import get_settings
 9 | 
10 | logger = get_logger(__name__)
11 | 
12 | 
13 | async def run_analyze_and_fix_test_failure(self, problem, error_str):
14 |     counter_retry = 0
15 |     while True:
16 |         try:
17 |             problem['error_str'] = error_str
18 |             f = functools.partial(self._run, problem=problem, prompt=choose_prompt())
19 |             response_analyze_failure, _ = await send_inference(f)
20 |             problem['error_str'] = ''
21 | 
22 |             response_analyze_failure = response_analyze_failure.rstrip("'` \n") # remove trailing spaces and newlines from yaml response
23 |             if response_analyze_failure.startswith("```yaml"):
24 |                 response_analyze_failure = response_analyze_failure[8:]
25 |             response_analyze_failure_yaml = yaml.safe_load(response_analyze_failure)
26 |             problem['response_analyze_failure'] = response_analyze_failure
27 |             code_recent_solution = response_analyze_failure_yaml['fixed_code'].rstrip("'` \n")
28 | 
29 |             # some cleaning
30 |             if code_recent_solution .startswith("```python"):
31 |                 code_recent_solution= code_recent_solution[10:]
32 |             elif code_recent_solution.startswith("python"):
33 |                 code_recent_solution = code_recent_solution[6:]
34 |             try:
35 |                 ast.parse(code_recent_solution)
36 |             except:
37 |                 code_recent_solution_fallback = '\n'.join(code_recent_solution.splitlines()[:-1]).rstrip("'` \n")
38 |                 try:
39 |                     ast.parse(code_recent_solution_fallback)
40 |                     code_recent_solution = code_recent_solution_fallback
41 |                 except:
42 |                     logger.error(f"Invalid code:\n{code_recent_solution}")
43 |                     return problem
44 |             problem['code_recent_solution'] = code_recent_solution
45 | 
46 |             # diff patch
47 |             diff = difflib.unified_diff(problem['code_prev_solution'].splitlines(keepends=True),
48 |                                         problem['code_recent_solution'].splitlines(keepends=True))
49 |             # patch = ''.join(diff)
50 |             # if get_settings().solve.reduce_verbose:
51 |             #     logger.debug(f"diff:\n{patch}")
52 |             # else:
53 |             #     logger.info(f"diff:\n{patch}")
54 | 
55 |             return problem
56 |         except Exception as e:
57 |             logging.error(f"'analyze_and_fix_test_failure' stage, counter_retry {counter_retry}, Error: {e}")
58 |             counter_retry += 1
59 |             if counter_retry > 2:
60 |                 raise e
61 | 
62 | def choose_prompt():
63 |     if get_settings().get("solve.use_direct_solutions", False):
64 |         return "code_contests_prompt_analyze_and_fix_direct"
65 |     else:
66 |         return "code_contests_prompt_analyze_and_fix"
67 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/indirect/run_analyze_tests_failure.py:
--------------------------------------------------------------------------------
 1 | import difflib
 2 | import functools
 3 | import logging
 4 | import yaml
 5 | 
 6 | from alpha_codium.settings.config_loader import get_settings
 7 | from alpha_codium.llm.ai_invoker import send_inference
 8 | from alpha_codium.log import get_logger
 9 | 
10 | logger = get_logger(__name__)
11 | 
12 | 
13 | async def run_analyze_test_failure(self, problem,error_str):
14 |     counter_retry = 0
15 |     while True:
16 |         try:
17 |             problem['error_str'] = error_str
18 |             f = functools.partial(self._run, problem=problem, prompt="code_contests_prompt_analyze_failure")
19 |             response_analyze_failure, _ = await send_inference(f)
20 |             problem['error_str'] = ''
21 | 
22 |             response_analyze_failure = response_analyze_failure.rstrip("'` \n") # remove trailing spaces and newlines from yaml response
23 |             if response_analyze_failure.startswith("```yaml"):
24 |                 response_analyze_failure = response_analyze_failure[8:]
25 |             problem['response_analyze_failure'] = response_analyze_failure
26 |             response_analyze_failure_yaml = yaml.safe_load(response_analyze_failure)
27 |             problem['what_went_wrong'] = response_analyze_failure_yaml['what_went_wrong']
28 |             problem['fixed_flow'] = response_analyze_failure_yaml['fixed_flow']
29 |             return problem
30 |         except Exception as e:
31 |             logging.error(f"'analyze_test_failure' stage, counter_retry {counter_retry}, Error: {e}")
32 |             counter_retry += 1
33 |             if counter_retry > 2:
34 |                 raise e


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/indirect/run_fix_code_from_tests_failure.py:
--------------------------------------------------------------------------------
 1 | import difflib
 2 | import functools
 3 | import logging
 4 | from alpha_codium.settings.config_loader import get_settings
 5 | from alpha_codium.llm.ai_invoker import send_inference
 6 | from alpha_codium.log import get_logger
 7 | 
 8 | logger = get_logger(__name__)
 9 | 
10 | 
11 | async def run_fix_code_from_tests_failure(self, problem,error_str):
12 |     counter_retry = 0
13 |     while True:
14 |         try:
15 |             problem['error_str'] = error_str
16 |             f = functools.partial(self._run, problem=problem, prompt="code_contests_prompt_fix_solution")
17 |             response_fixed_code, _ = await send_inference(f)
18 |             problem['error_str'] = ''
19 | 
20 |             # some cleaning
21 |             response_fixed_code = response_fixed_code.rstrip("'` \n") # remove trailing spaces and newlines from yaml response
22 |             if response_fixed_code.startswith("```python"):
23 |                 response_fixed_code = response_fixed_code[10:]
24 |             problem['code_recent_solution'] = response_fixed_code
25 | 
26 |             # diff patch
27 |             diff = difflib.unified_diff(problem['code_prev_solution'].splitlines(keepends=True),
28 |                                         response_fixed_code.splitlines(keepends=True))
29 |             # patch = ''.join(diff)
30 |             # if get_settings().solve.reduce_verbose:
31 |             #     logger.debug(f"diff:\n{patch}")
32 |             # else:
33 |             #     logger.info(f"diff:\n{patch}")
34 | 
35 |             return problem
36 | 
37 |         except Exception as e:
38 |             logging.error(f"fix_code_from_tests_failure' stage, counter_retry {counter_retry}, Error: {e}")
39 |             counter_retry += 1
40 |             if counter_retry > 2:
41 |                 raise e
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/indirect/run_fix_self_reflect.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import functools
 3 | import logging
 4 | import yaml
 5 | 
 6 | from alpha_codium.settings.config_loader import get_settings
 7 | from alpha_codium.gen.utils import postprocess_response
 8 | from alpha_codium.llm.ai_invoker import send_inference
 9 | from alpha_codium.log import get_logger
10 | 
11 | logger = get_logger(__name__)
12 | 
13 | 
14 | async def run_validate_self_reflect(self, problem):
15 |     try:
16 |         logger.info("--validate reflection stage--")
17 |         f = functools.partial(self._run, problem=problem, prompt="code_contests_prompts_validate_reflection")
18 | 
19 |         # inference
20 |         response_validate_reflect, _ = await send_inference(f)
21 |         response_validate_reflect = response_validate_reflect.rstrip("` \n")
22 |         if response_validate_reflect.startswith("```yaml"):
23 |             response_validate_reflect = response_validate_reflect[8:]
24 |         try:
25 |             response_validate_reflect_yaml = yaml.safe_load(response_validate_reflect)
26 |         except yaml.YAMLError:
27 |             response_validate_reflect = postprocess_response(response_validate_reflect)  # try to include only the yaml part
28 |             response_validate_reflect_yaml = yaml.safe_load(response_validate_reflect)
29 | 
30 |         # check number of tests
31 |         actual_number_of_tests = len(problem['public_tests']['input'])
32 |         calculated_number_of_tests = len(response_validate_reflect_yaml['fixed_tests_explanations'])
33 |         if actual_number_of_tests != calculated_number_of_tests:
34 |             raise (f"Error: number of tests in validate self-reflection ({calculated_number_of_tests}) "
35 |                    f"does not match the actual number of tests ({actual_number_of_tests})")
36 | 
37 |         problem['response_validate_self_reflect'] = response_validate_reflect
38 |         problem['tests_explanations'] = response_validate_reflect_yaml['fixed_tests_explanations']
39 |         problem['tests_explanations_str'] = response_validate_reflect.split('tests_explanations:')[1]
40 | 
41 |         # re-order the public tests from easiest to hardest
42 |         problem['public_tests']['original'] = copy.deepcopy(problem['public_tests'])
43 |         problem['public_tests']['input'] = [t['input'] for t in problem['tests_explanations']]
44 |         problem['public_tests']['output'] = [t['output'] for t in problem['tests_explanations']]
45 |         problem['public_tests']['explanation'] = [t['explanation'] for t in problem['tests_explanations']]
46 | 
47 |         return problem
48 |     except Exception as e:
49 |         logging.error(f"Failed 'run_validate_self_reflect', Error: {e}")
50 |         return problem
51 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/indirect/run_validate_ai_test.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import logging
 3 | 
 4 | from alpha_codium.gen.utils import load_yaml
 5 | from alpha_codium.llm.ai_invoker import send_inference
 6 | from alpha_codium.log import get_logger
 7 | 
 8 | logger = get_logger(__name__)
 9 | 
10 | 
11 | async def run_validate_ai_tests(self, problem):
12 |     counter_retry = 0
13 |     while True:
14 |         try:
15 |             logger.info("--validate ai tests stage--")
16 | 
17 |             f = functools.partial(self._run, problem=problem, prompt="code_contests_prompts_validate_ai_tests")
18 |             response_problem_tests, _ = await send_inference(f)
19 |             problem['problem_ai_tests'] = load_yaml(response_problem_tests,
20 |                                         keys_fix_yaml=["input:", "output:", "explanation", "what_was_wrong:"])['tests']
21 | 
22 |             # clean up and parse the response
23 |             for p in problem['problem_ai_tests']:
24 |                 p['input'] = str(p['input']).replace('\\n', '\n')
25 |                 p['output'] = str(p['output']).replace('\\n', '\n')
26 | 
27 |             return problem
28 |         except Exception as e:
29 |             logging.error(f"'validate ai tests' stage, counter_retry {counter_retry}, Error: {e}")
30 |             counter_retry += 1
31 |             if counter_retry > 2:
32 |                 # raise e
33 |                 return problem
34 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_baseline.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import logging
 3 | from alpha_codium.gen.utils import postprocess_response
 4 | from alpha_codium.llm.ai_invoker import send_inference
 5 | from alpha_codium.log import get_logger
 6 | 
 7 | logger = get_logger(__name__)
 8 | 
 9 | 
10 | async def run_baseline(self, problem):
11 |     try:
12 |         logging.info("Using baseline prompt")
13 |         f = functools.partial(self._run, problem=problem, prompt="code_contests_prompts_baseline")
14 |         response_baseline, _ = await send_inference(f)
15 |         recent_solution =  postprocess_response(response_baseline)
16 |         return recent_solution
17 |     except Exception as e:
18 |         logging.error(f"Error: {e}")
19 |         exit(-1)
20 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_choose_best_solution.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import logging
 3 | from alpha_codium.llm.ai_invoker import send_inference
 4 | from alpha_codium.log import get_logger
 5 | from alpha_codium.gen.utils import load_yaml
 6 | from alpha_codium.settings.config_loader import get_settings
 7 | 
 8 | logger = get_logger(__name__)
 9 | 
10 | 
11 | async def run_choose_best_solution(self, problem):
12 |     counter_retry = 0
13 |     while True:
14 |         try:
15 |             logger.info("--choose best solution stage--")
16 | 
17 |             # get settings
18 |             f = functools.partial(self._run, problem=problem, prompt=choose_prompt())
19 | 
20 |             # inference
21 |             response_best_solution, _ = await send_inference(f)
22 |             response_best_solution_yaml = load_yaml(response_best_solution,
23 |                                                     keys_fix_yaml=["name:", "content:", "why:", "- "])
24 | 
25 |             # update best solution
26 |             problem['s_best_solution'] = response_best_solution
27 |             if 's_possible_solutions' in problem:
28 |                 problem['s_other_solutions'] = []
29 |                 for solution in problem['s_possible_solutions']:
30 |                     if solution['name'] != response_best_solution_yaml['name']:
31 |                         problem['s_other_solutions'].append(solution)
32 | 
33 |             return problem
34 |         except Exception as e:
35 |             logging.error(f"'run_choose_best_solution' stage, counter_retry {counter_retry}, Error: {e}")
36 |             counter_retry += 1
37 |             if counter_retry > 2:
38 |                 raise e
39 | 
40 | 
41 | def choose_prompt():
42 |     if get_settings().get("solve.use_direct_solutions", False):
43 |         return "code_contests_prompts_choose_best_solution_direct"
44 |     else:
45 |         return "code_contests_prompts_choose_best_solution"


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_evaluate_all_ai_tests.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import logging
 3 | 
 4 | from alpha_codium.gen.stages.indirect.run_analyze_and_fix_test_failure import run_analyze_and_fix_test_failure
 5 | from alpha_codium.gen.stages.run_tests import run_tests
 6 | from alpha_codium.log import get_logger
 7 | from alpha_codium.settings.config_loader import get_settings
 8 | 
 9 | logger = get_logger(__name__)
10 | 
11 | 
12 | async def run_evaluate_all_ai_tests(self, problem):
13 |     try:
14 |         logger.info("--iterate on all ai tests stage--")
15 | 
16 |         ai_tests = problem['problem_ai_tests']
17 |         max_allowed_calls = get_settings().get("ai_tests.max_allowed_calls", 6)
18 | 
19 |         # evaluate ai tests
20 |         actual_number_of_calls = 0
21 |         for i, test in enumerate(ai_tests):
22 |             counter = 0
23 |             test_inputs = test['input']
24 |             test_outputs = test['output']
25 |             if not isinstance(test_inputs, list):
26 |                 test_inputs = [test_inputs]
27 |                 test_outputs = [test_outputs]
28 | 
29 |             # run the solution on the tests
30 |             problem, test_passed, non_empty_output, error_str, trace_str, tests_timeout, d_tot \
31 |                 = run_tests(self, problem, counter, test_inputs, test_outputs)
32 | 
33 |             # we passed without changing the code. Add the test to the passed tests list
34 |             if test_passed:
35 |                 if test_inputs not in problem['passed_tests']['inputs']:
36 |                     logger.info(f"Passed ai tests without code fixing. adding to passed tests list")
37 |                     problem['passed_tests']['inputs'] += test_inputs
38 |                     problem['passed_tests']['outputs'] += test_outputs
39 |             else:
40 |                 # cap the number of calls to the ai
41 |                 if actual_number_of_calls >= max_allowed_calls:
42 |                     if i < len(ai_tests) - len(problem['public_tests']['input']):  # don't skip public tests
43 |                         logger.error(f"Failed to pass ai test. reached max number of calls")
44 |                         continue
45 | 
46 |                 logger.error(f"Failed to pass ai tests. trying to fix code")
47 |                 last_code_solution = copy.deepcopy(problem['code_recent_solution'])
48 | 
49 |                 # run 'analyze_and_fix_test_failure' stage
50 |                 problem = await run_analyze_and_fix_test_failure(self, problem, error_str)
51 |                 actual_number_of_calls += 1
52 | 
53 |                 problem, test_passed2, non_empty_output2, error_str2, trace_str2, tests_timeout2, d_tot2 \
54 |                     = run_tests(self, problem, counter, test_inputs, test_outputs)
55 | 
56 |                 if not test_passed2 and (not 'sandbox error: ' in error_str):
57 |                     logger.error(f"Failed to pass ai tests with fixed code.")
58 |                     problem['code_recent_solution'] = last_code_solution
59 |                 else:  # we passed the test after fixing the code
60 | 
61 |                     # running previous passed tests again to make sure we didn't break anything
62 |                     if problem['passed_tests']['inputs']:
63 |                         problem, all_passed_prev, non_empty_output, error_str, trace_str, tests_timeout, d_tot \
64 |                             = run_tests(self, problem, counter,
65 |                                         problem['passed_tests']['inputs'],
66 |                                         problem['passed_tests']['outputs'])
67 |                         if not all_passed_prev:
68 |                             logger.error(f"The fix broke prev passed tests. reverting to last solution")
69 |                             problem['code_recent_solution'] = last_code_solution
70 |                             continue
71 | 
72 |                     if test_passed2:
73 |                         logger.info(f"Fixed current test, and passed prev tests. using new solution")
74 |                         if test_inputs not in problem['passed_tests']['inputs']:
75 |                             problem['passed_tests']['inputs'] += test_inputs
76 |                             problem['passed_tests']['outputs'] += test_outputs
77 |                     else:
78 |                         logger.info(f"Code doesnt crash, but still fails the test. using new solution")
79 | 
80 |         return problem
81 |     except Exception as e:
82 |         logging.error(f"Error in 'run_evaluate_all_ai_tests': {e}")
83 |         return problem
84 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_evaluate_public_tests.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import logging
  3 | 
  4 | from alpha_codium.gen.stages.indirect.run_analyze_and_fix_test_failure import run_analyze_and_fix_test_failure
  5 | from alpha_codium.gen.stages.indirect.run_analyze_tests_failure import run_analyze_test_failure
  6 | from alpha_codium.gen.stages.indirect.run_fix_code_from_tests_failure import run_fix_code_from_tests_failure
  7 | from alpha_codium.settings.config_loader import get_settings
  8 | from alpha_codium.gen.stages.run_tests import run_tests
  9 | from alpha_codium.log import get_logger
 10 | 
 11 | logger = get_logger(__name__)
 12 | 
 13 | 
 14 | async def run_evaluate_public_tests(self, problem):
 15 |     counter_retry = 0
 16 |     while True:
 17 |         try:
 18 |             logger.info("--iterate on public tests stage--")
 19 | 
 20 |             # configurations
 21 |             problem['use_self_reflection_public'] = get_settings().get('public_tests.use_self_reflection', False)
 22 |             max_allowed_fixes = get_settings().get("public_tests.max_allowed_calls", 6)
 23 |             max_fixes_per_test = get_settings().get("public_tests.max_fixes_per_test", 3)
 24 |             if len(problem['public_tests']['input']) == 1:
 25 |                 max_fixes_per_test += 1
 26 | 
 27 |             # evaluate on public tests one by one
 28 |             test_inputs_all = problem['public_tests']['input']
 29 |             test_outputs_all = problem['public_tests']['output']
 30 |             test_explanations_all = problem['tests_explanations']
 31 |             all_passed_public = True
 32 |             number_of_llm_fixes = 0
 33 |             for test_inputs, test_outputs, test_explanation in zip(test_inputs_all, test_outputs_all,
 34 |                                                                     test_explanations_all):
 35 |                 if not isinstance(test_inputs, list):
 36 |                     test_inputs = [test_inputs]
 37 |                     test_outputs = [test_outputs]
 38 |                 problem['test_explanation_current'] = test_explanation
 39 |                 problem['use_test_explanations_public'] = get_settings().get('public_tests.use_test_explanations', False)
 40 | 
 41 |                 # loop to fix specific test
 42 |                 counter_test = 0
 43 |                 passed_specific_test = False
 44 |                 last_code_solution = copy.deepcopy(problem['code_recent_solution'])
 45 |                 best_solution = copy.deepcopy(problem['code_recent_solution'])
 46 |                 best_d = float('inf')
 47 |                 while not passed_specific_test:
 48 | 
 49 |                     # run the code on the test
 50 |                     problem, passed_specific_test, non_empty_output, error_str, trace_str, tests_timeout, d_tot \
 51 |                         = run_tests(self, problem, counter_test, test_inputs, test_outputs)
 52 | 
 53 |                     # save the best solution so far
 54 |                     if -1 < d_tot < best_d:
 55 |                         if counter_test > 0:
 56 |                             logger.info(f"Found better solution, d_tot: {d_tot}")
 57 |                         best_solution = copy.deepcopy(problem['code_recent_solution'])
 58 |                         best_d = d_tot
 59 | 
 60 |                     # cap the number of calls to the ai
 61 |                     if not passed_specific_test and number_of_llm_fixes >= max_allowed_fixes:
 62 |                         logger.debug(f"Failed to pass public test. reached max number of calls")
 63 |                         break
 64 | 
 65 |                     # analyze the tests results
 66 |                     counter_test += 1
 67 |                     logger.info(f"counter: {counter_test}")
 68 |                     if passed_specific_test:
 69 |                         logger.info(f"Passed a public test after {counter_test-1} attempts")
 70 |                         if test_inputs not in problem['passed_tests']['inputs']:
 71 |                             problem['passed_tests']['inputs'] += test_inputs
 72 |                             problem['passed_tests']['outputs'] += test_outputs
 73 |                         break
 74 |                     elif counter_test > max_fixes_per_test:
 75 |                         logger.debug(f"Failed to pass public tests after {max_fixes_per_test} attempts")
 76 |                         break
 77 |                     elif not non_empty_output:
 78 |                         logging.debug("Failed to pass public tests. actual_output is empty")
 79 |                         problem['code_recent_solution'] = last_code_solution
 80 |                         continue
 81 |                     else:
 82 |                         # tests run. save the last solution
 83 |                         problem['code_prev_solution'] = copy.deepcopy(problem['code_recent_solution'])
 84 | 
 85 |                     if not get_settings().get("public_tests.single_stage_fix", False):
 86 |                         # run 'analyze_and_fix_test_failure' stage
 87 |                         problem = await run_analyze_and_fix_test_failure(self, problem, error_str)
 88 |                     else:
 89 |                         # run 'analyze_test_failure' stage
 90 |                         problem = await run_analyze_test_failure(self, problem, error_str)
 91 | 
 92 |                         # run 'fix_code_from_tests_failure' stage
 93 |                         problem = await run_fix_code_from_tests_failure(self, problem, error_str)
 94 |                     number_of_llm_fixes += 1
 95 | 
 96 |                     # evaluate previous tests that passed. if they fail, revert to last solution
 97 |                     if problem['passed_tests']['inputs']:
 98 |                         problem, passed_prev_test, non_empty_output, error_str, trace_str, tests_timeout, d_tot \
 99 |                             = run_tests(self, problem, counter_test,
100 |                                         problem['passed_tests']['inputs'],
101 |                                         problem['passed_tests']['outputs'])
102 |                         if not passed_prev_test:
103 |                             logger.error(f"The fix broke prev passed tests. reverting to last solution")
104 |                             problem['code_recent_solution'] = last_code_solution
105 |                             continue
106 | 
107 |                 # if not passed_specific_test:
108 |                 #     if problem['passed_tests']['inputs']:
109 |                 #         logger.error(f"Public test - reverting to initial solution, where: '{problem['passed_tests']['inputs']}' passed")
110 |                 #         problem['code_recent_solution'] = last_code_solution
111 |                 #     else: # no solution passed so far.
112 |                 #         pass
113 |                 #         logger.error("No solution passed so far. continuing to next test")
114 |                 #         # logger.error(f'Public test -  Reverting to best solution so far, d_tot: {best_d}')
115 |                 #         # problem['code_recent_solution'] = best_solution
116 |                 all_passed_public = all_passed_public and passed_specific_test
117 | 
118 |             if all_passed_public:
119 |                 logger.info(f"==================")
120 |                 logger.info(f"Passed all public tests")
121 |                 logger.info(f"==================")
122 |             else:
123 |                 logger.info(f"==================")
124 |                 logger.info(f"Failed to pass all public tests")
125 |                 logger.info(f"==================")
126 | 
127 |             return problem
128 |         except Exception as e:
129 |             logging.error(f"'public tests' stage, counter_retry {counter_retry}, Error: {e}")
130 |             counter_retry += 1
131 |             if counter_retry > 2:
132 |                 raise e
133 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_generate_ai_test.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import logging
 3 | 
 4 | from alpha_codium.gen.stages.indirect.run_validate_ai_test import run_validate_ai_tests
 5 | from alpha_codium.gen.utils import load_yaml
 6 | from alpha_codium.settings.config_loader import get_settings
 7 | from alpha_codium.llm.ai_invoker import send_inference
 8 | from alpha_codium.log import get_logger
 9 | 
10 | logger = get_logger(__name__)
11 | 
12 | 
13 | async def run_generate_ai_tests(self, problem):
14 |     counter_retry = 0
15 |     while True:
16 |         try:
17 |             logger.info("--generate ai tests stage--")
18 | 
19 |             # get settings
20 |             validate_ai_tests = get_settings().get('generate_ai_tests.validate_ai_tests', False)
21 |             problem['number_of_ai_tests'] = get_settings().get("generate_ai_tests.number_of_ai_tests", 8)
22 |             problem['use_test_explanations_possible_solutions'] = get_settings().get('generate_ai_tests.use_test_explanations')
23 | 
24 |             # get prompt
25 |             f = functools.partial(self._run, problem=problem, prompt="code_contests_prompts_generate_ai_tests")
26 | 
27 |             # inference
28 |             response_problem_tests, _ = await send_inference(f)
29 |             problem['problem_ai_tests'] = load_yaml(response_problem_tests,
30 |                                                     keys_fix_yaml=["input:", "output:", "explanation:"])['tests']
31 |             problem['problem_ai_simple_test'] = problem['problem_ai_tests'][0]
32 | 
33 |             if validate_ai_tests:
34 |                 problem = await run_validate_ai_tests(self, problem)
35 | 
36 |             # adding public tests to the beginning and end of the list, for the ai-iterate stage
37 |             if get_settings().get('generate_ai_tests.add_public_tests_to_ai_tests', False):
38 |                 for public_input, public_output in zip(problem['public_tests']['input'],
39 |                                                        problem['public_tests']['output']):
40 |                     # to the beginning of the list
41 |                     problem['problem_ai_tests'].insert(0, {'input': public_input, 'output': public_output})
42 |                     # to the end of the list
43 |                     problem['problem_ai_tests'].append({'input': public_input, 'output': public_output})
44 | 
45 |             return problem
46 |         except Exception as e:
47 |             logging.error(f"'generate ai tests' stage, counter_retry {counter_retry}, Error: {e}")
48 |             counter_retry += 1
49 |             if counter_retry > 2:
50 |                 raise e
51 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_generate_possible_solutions.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import functools
 3 | import logging
 4 | import yaml
 5 | 
 6 | from alpha_codium.gen.utils import load_yaml
 7 | from alpha_codium.settings.config_loader import get_settings
 8 | from alpha_codium.llm.ai_invoker import send_inference
 9 | from alpha_codium.log import get_logger
10 | 
11 | logger = get_logger(__name__)
12 | 
13 | 
14 | async def run_generate_possible_solutions(self, problem):
15 |     counter_retry = 0
16 |     while True:
17 |         try:
18 |             logger.info("--generate possible solutions stage--")
19 |             if get_settings().get("solve.use_direct_solutions", False):
20 |                 return problem
21 | 
22 |             # get settings
23 |             problem['max_num_of_possible_solutions'] = get_settings().get('possible_solutions.max_num_of_possible_solutions')
24 |             problem['use_test_explanations_possible_solutions'] = get_settings().get('possible_solutions.use_test_explanations')
25 |             f = functools.partial(self._run, problem=problem, prompt="code_contests_prompt_generate_possible_solutions")
26 | 
27 |             # inference
28 |             response_possible_solutions, _ = await send_inference(f)
29 |             response_possible_solutions_yaml = load_yaml(response_possible_solutions)
30 | 
31 |             if get_settings().get('possible_solutions.remove_bruce_force_solutions'):
32 |                 for i, s in enumerate(response_possible_solutions_yaml['possible_solutions']):
33 |                     if 'brute' in s['name'].lower():
34 |                         response_possible_solutions_yaml['possible_solutions'].pop(i)
35 |                         response_possible_solutions = yaml.dump(response_possible_solutions_yaml, sort_keys=False, line_break="\n")
36 |                         break
37 |             problem['s_possible_solutions'] = response_possible_solutions_yaml['possible_solutions']
38 |             problem['s_possible_solutions_str'] = response_possible_solutions.split('possible_solutions:')[1].strip()
39 | 
40 |             return problem
41 |         except Exception as e:
42 |             logging.error(f"'possible solutions' stage, counter_retry {counter_retry}, Error: {e}")
43 |             counter_retry += 1
44 |             if counter_retry > 2:
45 |                 raise e
46 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_initial_code_generation.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import logging
 3 | 
 4 | from alpha_codium.settings.config_loader import get_settings
 5 | from alpha_codium.gen.stages.run_initial_solve import run_initial_solve
 6 | from alpha_codium.gen.stages.run_tests import run_tests
 7 | from alpha_codium.log import get_logger
 8 | 
 9 | logger = get_logger(__name__)
10 | 
11 | 
12 | async def run_initial_code_generation(self, problem):
13 |     counter_retry = 0
14 |     while True:
15 |         try:
16 |             logger.info("--run initial code generation stage--")
17 | 
18 |             max_attempts = get_settings().get('initial_code_generation.max_attempts', 5)
19 |             counter = 0
20 | 
21 |             # set the public tests as input
22 |             test_input = problem['public_tests']['input']
23 |             test_output = problem['public_tests']['output']
24 | 
25 |             # generate an initial code, using the top solution from the previous stage
26 |             problem = await run_initial_solve(self, problem)
27 | 
28 |             # run the solution on the selected tests
29 |             problem, passed_tests, non_empty_output, error_str, trace_str, tests_timeout, d_tot \
30 |                 = run_tests(self, problem, counter, test_input, test_output)
31 | 
32 |             best_solution = copy.deepcopy(problem['code_recent_solution'])
33 |             best_d = float('inf') # distance to the correct solution
34 | 
35 |             # set the distance to the correct solution
36 |             if -1 < d_tot < best_d:
37 |                 best_solution = copy.deepcopy(problem['code_recent_solution'])
38 |                 best_d = d_tot
39 | 
40 |             while not passed_tests:
41 |                 counter += 1
42 |                 if counter > max_attempts:
43 |                     logger.error(f"Failed to pass tests after {counter - 1} attempts. exiting the stage")
44 |                     break
45 | 
46 |                 s_best_solution_original = problem['s_best_solution']
47 |                 if counter > 1 and 's_possible_solutions' in problem:
48 |                     # give two attempts to the highest ranked solution
49 |                     problem['s_best_solution'] = problem['s_possible_solutions'][
50 |                         counter % len(problem['s_possible_solutions'])]
51 |                 problem = await run_initial_solve(self, problem)
52 |                 problem['s_best_solution'] = s_best_solution_original
53 | 
54 |                 problem, passed_tests, non_empty_output, error_str, trace_str, tests_timeout, d_tot \
55 |                     = run_tests(self, problem, counter, test_input, test_output)
56 | 
57 |                 if passed_tests:
58 |                     logger.info(f"Passed tests after {counter} attempts")
59 |                     break
60 |                 else:
61 |                     logger.info(f"Failed to pass tests after {counter} attempts, d: {d_tot}, best_d so far: {best_d}")
62 | 
63 |                 # save the best solution so far
64 |                 if -1 < d_tot < best_d:
65 |                     best_solution = copy.deepcopy(problem['code_recent_solution'])
66 |                     best_d = d_tot
67 | 
68 |             # set the best solution
69 |             if not passed_tests and best_d < float('inf'):
70 |                 logger.error(f'Reverting to best solution so far, d_tot: {best_d}')
71 |                 problem['code_recent_solution'] = best_solution
72 | 
73 |             return problem
74 |         except Exception as e:
75 |             logging.error(f"'initial code generation' stage, counter_retry {counter_retry}, Error: {e}")
76 |             counter_retry += 1
77 |             if counter_retry > 2:
78 |                 raise e
79 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_initial_solve.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import logging
 3 | from alpha_codium.llm.ai_invoker import send_inference
 4 | from alpha_codium.log import get_logger
 5 | from alpha_codium.settings.config_loader import get_settings
 6 | 
 7 | logger = get_logger(__name__)
 8 | 
 9 | 
10 | async def run_initial_solve(self, problem):
11 |     counter_retry = 0
12 |     while True:
13 |         try:
14 |             logger.info("--initial solve stage--")
15 | 
16 |             f = functools.partial(self._run, problem=problem, prompt=choose_prompt())
17 |             response_solve, _ = await send_inference(f)
18 | 
19 |             # clean up the response
20 |             response_solve = response_solve.rstrip("` \n")
21 |             if response_solve.startswith("```python"):
22 |                 response_solve = response_solve[10:]
23 |             elif response_solve.startswith("python"):
24 |                 response_solve = response_solve[6:]
25 | 
26 |             # save the response
27 |             problem['code_recent_solution'] = response_solve
28 |             problem['code_prev_solution'] = response_solve
29 |             return problem
30 |         except Exception as e:
31 |             logging.error(f"'initial solve' stage, counter_retry {counter_retry}, Error: {e}")
32 |             counter_retry += 1
33 |             if counter_retry > 2:
34 |                 raise e
35 | 
36 | def choose_prompt():
37 |     if get_settings().get("solve.use_direct_solutions", False):
38 |         return "code_contests_prompts_solve_direct"
39 |     else:
40 |         return "code_contests_prompts_solve"


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_self_reflect.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import logging
 3 | import yaml
 4 | 
 5 | from alpha_codium.gen.stages.indirect.run_fix_self_reflect import run_validate_self_reflect
 6 | from alpha_codium.settings.config_loader import get_settings
 7 | from alpha_codium.gen.utils import postprocess_response
 8 | from alpha_codium.llm.ai_invoker import send_inference
 9 | from alpha_codium.log import get_logger
10 | 
11 | logger = get_logger(__name__)
12 | 
13 | 
14 | async def run_self_reflect(self, problem):
15 |     counter_retry = 0
16 |     while True:
17 |         try:
18 |             logger.info("--reflection stage--")
19 | 
20 |             # get settings
21 |             validate_self_reflection = get_settings().get('self_reflection.validate_self_reflection', False)
22 |             actual_number_of_tests = len(problem['public_tests']['input'])
23 |             problem['actual_number_of_tests'] = actual_number_of_tests
24 |             f = functools.partial(self._run, problem=problem, prompt="code_contests_prompt_reflect")
25 | 
26 |             # inference
27 |             response_reflect, _ = await send_inference(f)
28 |             response_reflect = response_reflect.rstrip("` \n")
29 |             if response_reflect.startswith("```yaml"):
30 |                 response_reflect = response_reflect[8:]
31 |             try:
32 |                 response_reflect_yaml = yaml.safe_load(response_reflect)
33 |             except yaml.YAMLError:
34 |                 response_reflect = postprocess_response(response_reflect)  # try to include only the yaml part
35 |                 response_reflect_yaml = yaml.safe_load(response_reflect)
36 | 
37 |             # check number of tests
38 |             actual_number_of_tests = len(problem['public_tests']['input'])
39 |             calculated_number_of_tests = len(response_reflect_yaml['tests_explanations'])
40 |             if actual_number_of_tests != calculated_number_of_tests:
41 |                 raise (f"Error: number of tests in self-reflection ({calculated_number_of_tests}) "
42 |                        f"does not match the actual number of tests ({actual_number_of_tests})")
43 |             problem['response_reflect'] = response_reflect
44 |             try:
45 |                 problem['self_reflection'] = '- ' + '\n- '.join(response_reflect_yaml['self_reflection'])
46 |                 if problem['self_reflection'].startswith('- - '):
47 |                     problem['self_reflection'] = problem['self_reflection'][2:]
48 |             except:
49 |                 problem['self_reflection'] = response_reflect_yaml['self_reflection']
50 |             problem['tests_explanations'] = response_reflect_yaml['tests_explanations']
51 |             problem['tests_explanations_str'] = response_reflect.split('tests_explanations:')[1]
52 | 
53 |             # double validation self-reflection
54 |             if validate_self_reflection:
55 |                 problem = await run_validate_self_reflect(self, problem)
56 | 
57 |             for s in problem['tests_explanations']:
58 |                 s['input'] = s['input'].replace('\\n', '\n')
59 |                 s['output'] = s['output'].replace('\\n', '\n')
60 |                 s['explanation'] = s['explanation'].replace('\\n', '\n')
61 | 
62 |             return problem
63 |         except Exception as e:
64 |             logging.error(f"'run_self_reflect' stage, counter_retry {counter_retry}, Error: {e}")
65 |             counter_retry += 1
66 |             if counter_retry > 2:
67 |                 raise e
68 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/run_tests.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import logging
  3 | import numpy as np
  4 | from alpha_codium.code_contests.eval.code_test_runners import eval_solution
  5 | from alpha_codium.gen.utils import render_trace
  6 | from alpha_codium.log import get_logger
  7 | 
  8 | logger = get_logger(__name__)
  9 | 
 10 | 
 11 | def run_tests(self, problem, counter, test_inputs, test_outputs):
 12 |     try:
 13 |         # run the solution on the public tests
 14 |         logging.info(f"evaluating public tests. attempt {counter}")
 15 |         test_inputs, results = eval_solution(example=problem,
 16 |                                              prediction=problem['code_recent_solution'],
 17 |                                              test_inputs=test_inputs,
 18 |                                              test_outputs=test_outputs, )
 19 | 
 20 |         # analyze the tests results
 21 |         error_str = trace_str = ""
 22 |         all_passed = True
 23 |         non_empty_output = True
 24 |         tests_timeout = False
 25 |         if str(results.compilation_result.program_status) == 'ProgramStatus.kTimeout':
 26 |             tests_timeout = True
 27 |             all_passed = False
 28 |             for i, t in enumerate(results.test_results):
 29 |                 error_str += f"test input:\n{test_inputs[i]}\n" \
 30 |                              f"expected output:\n{t.expected_output}\n"
 31 |                 if t.actual_output:
 32 |                     error_str += f"code output:\n{t.actual_output}\n'Timeout, took too long to run next test'\n"
 33 |                 else:
 34 |                     error_str += f"code output:\n'Timeout, took too long to run the test'\n"
 35 |         elif str(results.test_results[0].program_status) == 'ProgramStatus.kFailed':
 36 |             logger.error("failed to run solution")
 37 |             error_str = results.test_results[0].sandbox_result
 38 |             trace_str = f"trace information:\n{render_trace(results.test_results[0].trace)}\n\n"
 39 |             all_passed = False
 40 |         else: # ProgramStatus.passed
 41 |             # initially assume all tests passed
 42 |             all_passed = True
 43 |             non_empty_output = True
 44 | 
 45 |             # build the error string
 46 |             error_str = ""
 47 |             trace_str = ""
 48 |             for i, t in enumerate(results.test_results):
 49 |                 if str(t.program_status) == 'ProgramStatus.kTimeout':
 50 |                     if t.actual_output.strip():
 51 |                         t.actual_output += "\nTimeout, took too long to run the next test"
 52 |                     else:
 53 |                         t.actual_output = 'Timeout, took too long to run'
 54 |                     t.passed = False
 55 |                 elif str(t.program_status) == 'ProgramStatus.kFailed':
 56 |                     t.actual_output = t.sandbox_result
 57 |                     t.passed = False
 58 |                 error_str += f"test input:\n{test_inputs[i]}\n" \
 59 |                              f"expected output:\n{t.expected_output}\n" \
 60 |                              f"code output:\n{t.actual_output}\n" \
 61 |                              # f"====================\n====================\n"
 62 | 
 63 |                 trace_str += f"trace:\n{render_trace(t.trace)}\n" \
 64 |                              f"====================\n====================\n"
 65 | 
 66 |                 # if get_settings().code_tester.calc_trace:
 67 |                 #     logger.debug(f"trace_str:\n{trace_str}")
 68 | 
 69 |                 # is_all_passed_public = actual_output == expected_output
 70 |                 all_passed = all_passed and t.passed
 71 |                 non_empty_output = non_empty_output and t.actual_output
 72 | 
 73 |         # calculate the distance between the expected and actual output
 74 |         d_tot = calc_distance_between_results(non_empty_output, tests_timeout, test_outputs, results)
 75 | 
 76 |         return problem, all_passed, non_empty_output, error_str, trace_str, tests_timeout, d_tot
 77 |     except Exception as e:
 78 |         logging.error(f"Error: {e}")
 79 |         exit(-1)
 80 | 
 81 | def calc_distance_between_results(non_empty_output, tests_timeout, test_outputs, results):
 82 |     try:
 83 |         d_tot = float('inf')
 84 |         if non_empty_output and not tests_timeout:
 85 |             d_tot = 0
 86 |             for i in range(len(test_outputs)):
 87 |                 # logger.info(f"test_outputs[i]:\n{test_outputs[i]}")
 88 |                 # logger.info(f"results.test_results[i].stdout:\n{results.test_results[i].stdout}")
 89 |                 expected = test_outputs[i].rstrip().split('\n')
 90 |                 actual = results.test_results[i].stdout.rstrip().split('\n')
 91 |                 try:
 92 |                     t1 = np.array(list(map(float, actual)))
 93 |                     t2 = np.array(list(map(float, expected)))
 94 |                     if t1.size == 0:
 95 |                         return float('inf')
 96 |                     d_tot += np.sum(np.abs(t1 - t2))
 97 |                 except:
 98 |                     t1 = np.array(actual)
 99 |                     t2 = np.array(expected)
100 |                     if t1.size == 0:
101 |                         return float('inf')
102 |                     d_tot += np.sum(t1 != t2)
103 |     except:
104 |         d_tot = float('inf')
105 |     return d_tot
106 | 


--------------------------------------------------------------------------------
/alpha_codium/gen/stages/utils.py:
--------------------------------------------------------------------------------
 1 | from alpha_codium.log import get_logger
 2 | 
 3 | logger = get_logger(__name__)
 4 | 
 5 | 
 6 | def set_configurations(problem, iteration=0):
 7 |     # configurations
 8 |     problem = {k: problem.get(k) for k in ["name", "description", "public_tests"]}
 9 |     problem['iteration'] = iteration
10 | 
11 |     # initialize passed tests field
12 |     problem['passed_tests'] = {}
13 |     problem['passed_tests']['inputs'] = []
14 |     problem['passed_tests']['outputs'] = []
15 | 
16 |     # shorter description, without the input-output examples
17 |     if '\nExample\n' in problem['description']:
18 |         problem['description_short'] = problem['description'].split('\nExample\n')[0].strip()
19 |     elif '\nExamples\n' in problem['description']:
20 |         problem['description_short'] = problem['description'].split('\nExamples\n')[0].strip()
21 |     else:
22 |         logger.info(f"could not split description to short description, description: {problem['description']}")
23 |         problem['description_short'] = problem['description']
24 |     return problem


--------------------------------------------------------------------------------
/alpha_codium/gen/utils.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import List
  3 | 
  4 | import yaml
  5 | 
  6 | from alpha_codium.code_contests.eval.code_test_runners import eval_solution
  7 | from alpha_codium.settings.config_loader import get_settings
  8 | from alpha_codium.log import get_logger
  9 | 
 10 | logger = get_logger(__name__)
 11 | 
 12 | 
 13 | def clip_string(s: str, max_lines: int = None):
 14 |     lines = s.split("\n")
 15 |     if max_lines is not None and 0 < max_lines < len(lines):
 16 |         logger.debug(f"clipping string from {len(lines)} to {max_lines}")
 17 |         half_lines = int(max_lines / 2)
 18 |         lines = (
 19 |                 lines[:half_lines] +
 20 |                 [f"\n.... {len(lines) - max_lines} omitted lines ....\n"] +
 21 |                 lines[-half_lines:]
 22 |         )
 23 |         return "\n".join(lines)
 24 |     else:
 25 |         return s
 26 | 
 27 | 
 28 | def render_trace(trace_data):
 29 |     if not trace_data:
 30 |         return ''
 31 | 
 32 |     max_trace_lines = get_settings().code_tester.get("max_trace_lines")
 33 |     trace_data = clip_string(trace_data, max_trace_lines)
 34 |     return trace_data
 35 | 
 36 | 
 37 | def postprocess_response(response):
 38 |     response = str(response)
 39 |     if response.endswith("stop"):
 40 |         response = response[:-4]
 41 |     pattern = r'```\w*\n(.*?)```'
 42 |     matches = re.findall(pattern, response, re.DOTALL)
 43 |     if matches:
 44 |         response = matches[0]
 45 |     return response
 46 | 
 47 | 
 48 | def evaluate_solution_on_subset(evaluation_test_type, problem, solution, silent=False, break_on_timeout=True):
 49 |     # evaluate solution
 50 |     test_results = None
 51 |     if evaluation_test_type:
 52 |         test_results = eval_solution(evaluation_test_type=evaluation_test_type, example=problem, prediction=solution,
 53 |                                      silent=silent, break_on_timeout=break_on_timeout)
 54 | 
 55 |     if test_results[1] == []:
 56 |         if not silent:
 57 |             logger.info("=====================================")
 58 |             logger.info("No tests")
 59 |             logger.info("=====================================")
 60 |         return test_results, 0, 0, 0
 61 | 
 62 |     if (hasattr(test_results[1], 'compilation_result') and
 63 |             test_results[1].compilation_result.program_status.name == 'kTimeout'):
 64 |         if not silent:
 65 |             logger.info("=====================================")
 66 |             logger.info("Timeout")
 67 |             logger.info("=====================================")
 68 |         return test_results, 0, 0, len(test_results[0])
 69 | 
 70 |     test_passed = 0
 71 |     test_failed = 0
 72 |     test_timeout = 0
 73 |     if not problem[evaluation_test_type]['input']:
 74 |         logger.info(f"No {evaluation_test_type} for this problem")
 75 |     else:
 76 |         for test in test_results[1].test_results:
 77 |             if (hasattr(test, 'program_status') and test.program_status.name == 'kTimeout'):
 78 |                 test_timeout += 1
 79 |             elif not test.passed:
 80 |                 test_failed += 1
 81 |             else:
 82 |                 test_passed += 1
 83 |         if not silent:
 84 |             logger.info("=====================================")
 85 |             logger.info(f"test_passed: {test_passed}, test_failed: {test_failed}, test_timeout: {test_timeout}")
 86 |             logger.info("=====================================")
 87 | 
 88 |     return test_results, test_passed, test_failed, test_timeout
 89 | 
 90 | 
 91 | def evaluate_on_private_tests(evaluation_test_type, problem, solution, silent=True):
 92 |     # evaluate solution
 93 |     test_results = None
 94 |     if evaluation_test_type:
 95 |         test_results = eval_solution(evaluation_test_type=evaluation_test_type, example=problem, prediction=solution, silent=silent)
 96 | 
 97 |     test_passed = 0
 98 |     test_failed = 0
 99 |     test_timeout = 0
100 | 
101 |     if not test_results[1]:
102 |         logger.info("No tests were run")
103 |         return test_results, 0, 0
104 | 
105 |     for test in test_results[1].test_results:
106 |         if test.program_status.name=='kTimeout':
107 |             test_timeout += 1
108 |         elif not test.passed:
109 |             test_failed += 1
110 |         else:
111 |             test_passed += 1
112 | 
113 | 
114 |     logger.info("=====================================")
115 |     logger.info(f"test_passed: {test_passed}, test_failed: {test_failed}, test_timeout: {test_timeout}")
116 |     logger.info("=====================================")
117 | 
118 |     return test_results, test_passed, test_failed, test_timeout
119 | 
120 | 
121 | def load_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
122 |     response_text = response_text.rstrip("` \n")
123 |     response_text = response_text.removeprefix('```yaml').rstrip('`')
124 |     try:
125 |         data = yaml.safe_load(response_text)
126 |     except Exception as e:
127 |         data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml)
128 |         if not data:
129 |             get_logger().info(f"Failed to parse AI YAML prediction: {e}")
130 |     return data
131 | 
132 | 
133 | def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
134 |     response_text_lines = response_text.split('\n')
135 | 
136 |     keys = keys_fix_yaml
137 |     response_text_lines_copy = response_text_lines.copy()
138 |     for i in range(0, len(response_text_lines_copy)):
139 |         for key in keys:
140 |             if response_text_lines_copy[i].strip().startswith(key) and not '|' in response_text_lines_copy[i]:
141 |                 response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}',
142 |                                                                                   f'{key} |-\n        ')
143 |     try:
144 |         data = yaml.safe_load('\n'.join(response_text_lines_copy))
145 |         get_logger().info(f"Successfully parsed AI prediction after adding |-\n")
146 |         return data
147 |     except:
148 |         raise "yaml parsing error"


--------------------------------------------------------------------------------
/alpha_codium/llm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/alpha_codium/llm/__init__.py


--------------------------------------------------------------------------------
/alpha_codium/llm/ai_handler.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import litellm
  5 | import openai
  6 | from aiolimiter import AsyncLimiter
  7 | from litellm import acompletion
  8 | from litellm import RateLimitError
  9 | from litellm.exceptions import APIError
 10 | # from openai.error import APIError, RateLimitError, Timeout, TryAgain
 11 | from retry import retry
 12 | 
 13 | from alpha_codium.settings.config_loader import get_settings
 14 | from alpha_codium.log import get_logger
 15 | 
 16 | logger = get_logger(__name__)
 17 | OPENAI_RETRIES = 5
 18 | 
 19 | 
 20 | class AiHandler:
 21 |     """
 22 |     This class handles interactions with the OpenAI API for chat completions.
 23 |     It initializes the API key and other settings from a configuration file,
 24 |     and provides a method for performing chat completions using the OpenAI ChatCompletion API.
 25 |     """
 26 | 
 27 |     def __init__(self):
 28 |         """
 29 |         Initializes the OpenAI API key and other settings from a configuration file.
 30 |         Raises a ValueError if the OpenAI key is missing.
 31 |         """
 32 |         self.limiter = AsyncLimiter(get_settings().config.max_requests_per_minute)
 33 |         try:
 34 |             if "gpt" in get_settings().get("config.model").lower():
 35 |                 openai.api_key = get_settings().openai.key
 36 |                 litellm.openai_key = get_settings().openai.key
 37 |             self.azure = False
 38 |             if "deepseek" in get_settings().get("config.model"):
 39 |                 litellm.register_prompt_template(
 40 |                     model="huggingface/deepseek-ai/deepseek-coder-33b-instruct",
 41 |                     roles={
 42 |                         "system": {
 43 |                             "pre_message": "",
 44 |                             "post_message": "\n"
 45 |                         },
 46 |                         "user": {
 47 |                             "pre_message": "### Instruction:\n",
 48 |                             "post_message": "\n### Response:\n"
 49 |                         },
 50 |                     },
 51 | 
 52 |                 )
 53 |         except AttributeError as e:
 54 |             raise ValueError("OpenAI key is required") from e
 55 | 
 56 |     @property
 57 |     def deployment_id(self):
 58 |         """
 59 |         Returns the deployment ID for the OpenAI API.
 60 |         """
 61 |         return get_settings().get("OPENAI.DEPLOYMENT_ID", None)
 62 | 
 63 |     @retry(
 64 |         exceptions=(AttributeError, RateLimitError),
 65 |         tries=OPENAI_RETRIES,
 66 |         delay=2,
 67 |         backoff=2,
 68 |         jitter=(1, 3),
 69 |     )
 70 |     async def chat_completion(
 71 |             self, model: str,
 72 |             system: str,
 73 |             user: str,
 74 |             temperature: float = 0.2,
 75 |             frequency_penalty: float = 0.0,
 76 |     ):
 77 |         try:
 78 |             deployment_id = self.deployment_id
 79 |             if get_settings().config.verbosity_level >= 2:
 80 |                 logging.debug(
 81 |                     f"Generating completion with {model}"
 82 |                     f"{(' from deployment ' + deployment_id) if deployment_id else ''}"
 83 |                 )
 84 | 
 85 |             async with self.limiter:
 86 |                 logger.info("-----------------")
 87 |                 logger.info("Running inference ...")
 88 |                 logger.debug(f"system:\n{system}")
 89 |                 logger.debug(f"user:\n{user}")
 90 |                 if "deepseek" in get_settings().get("config.model"):
 91 |                     response = await acompletion(
 92 |                         model="huggingface/deepseek-ai/deepseek-coder-33b-instruct",
 93 |                         messages=[
 94 |                             {"role": "system", "content": system},
 95 |                             {"role": "user", "content": user},
 96 |                         ],
 97 |                         api_base=get_settings().get("config.model"),
 98 |                         temperature=temperature,
 99 |                         repetition_penalty=frequency_penalty+1, # the scale of TGI is different from OpenAI
100 |                         force_timeout=get_settings().config.ai_timeout,
101 |                         max_tokens=2000,
102 |                         stop=['<|EOT|>'],
103 |                     )
104 |                     response["choices"][0]["message"]["content"] = response["choices"][0]["message"]["content"].rstrip()
105 |                     if response["choices"][0]["message"]["content"].endswith("<|EOT|>"):
106 |                         response["choices"][0]["message"]["content"] = response["choices"][0]["message"]["content"][:-7]
107 |                 else:
108 |                     response = await acompletion(
109 |                         model=model,
110 |                         deployment_id=deployment_id,
111 |                         messages=[
112 |                             {"role": "system", "content": system},
113 |                             {"role": "user", "content": user},
114 |                         ],
115 |                         temperature=temperature,
116 |                         frequency_penalty=frequency_penalty,
117 |                         force_timeout=get_settings().config.ai_timeout,
118 |                     )
119 |         except (APIError) as e:
120 |             logging.error("Error during OpenAI inference")
121 |             raise
122 |         except RateLimitError as e:
123 |             logging.error("Rate limit error during OpenAI inference")
124 |             raise
125 |         except Exception as e:
126 |             logging.error("Unknown error during OpenAI inference: ", e)
127 |             raise APIError from e
128 |         if response is None or len(response["choices"]) == 0:
129 |             raise APIError
130 |         resp = response["choices"][0]["message"]["content"]
131 |         finish_reason = response["choices"][0]["finish_reason"]
132 |         logger.debug(f"response:\n{resp}")
133 |         logger.info('done')
134 |         logger.info("-----------------")
135 |         return resp, finish_reason
136 | 


--------------------------------------------------------------------------------
/alpha_codium/llm/ai_invoker.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import traceback
 3 | from typing import Callable, List
 4 | 
 5 | from alpha_codium.settings.config_loader import get_settings
 6 | 
 7 | 
 8 | async def send_inference(f: Callable):
 9 |     all_models = _get_all_models()
10 |     all_deployments = _get_all_deployments(all_models)
11 |     # try each (model, deployment_id) pair until one is successful, otherwise raise exception
12 |     for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)):
13 |         try:
14 |             get_settings().set("openai.deployment_id", deployment_id)
15 |             return await f(model)
16 |         except Exception:
17 |             logging.warning(
18 |                 f"Failed to generate prediction with {model}"
19 |                 f"{(' from deployment ' + deployment_id) if deployment_id else ''}: "
20 |                 f"{traceback.format_exc()}"
21 |             )
22 |             if i == len(all_models) - 1:  # If it's the last iteration
23 |                 raise  # Re-raise the last exception
24 | 
25 | 
26 | def _get_all_models() -> List[str]:
27 |     model = get_settings().config.model
28 |     fallback_models = get_settings().config.fallback_models
29 |     if not isinstance(fallback_models, list):
30 |         fallback_models = [m.strip() for m in fallback_models.split(",")]
31 |     all_models = [model] + fallback_models
32 |     return all_models
33 | 
34 | 
35 | def _get_all_deployments(all_models: List[str]) -> List[str]:
36 |     deployment_id = get_settings().get("openai.deployment_id", None)
37 |     fallback_deployments = get_settings().get("openai.fallback_deployments", [])
38 |     if not isinstance(fallback_deployments, list) and fallback_deployments:
39 |         fallback_deployments = [d.strip() for d in fallback_deployments.split(",")]
40 |     if fallback_deployments:
41 |         all_deployments = [deployment_id] + fallback_deployments
42 |         if len(all_deployments) < len(all_models):
43 |             raise ValueError(
44 |                 f"The number of deployments ({len(all_deployments)}) "
45 |                 f"is less than the number of models ({len(all_models)})"
46 |             )
47 |     else:
48 |         all_deployments = [deployment_id] * len(all_models)
49 |     return all_deployments
50 | 


--------------------------------------------------------------------------------
/alpha_codium/llm/token_handler.py:
--------------------------------------------------------------------------------
 1 | from jinja2 import Environment, StrictUndefined
 2 | from tiktoken import encoding_for_model, get_encoding
 3 | 
 4 | from alpha_codium.settings.config_loader import get_settings
 5 | 
 6 | 
 7 | def get_token_encoder():
 8 |     return (
 9 |         encoding_for_model(get_settings().config.model)
10 |         if "gpt" in get_settings().config.model
11 |         else get_encoding("cl100k_base")
12 |     )
13 | 
14 | 
15 | class TokenHandler:
16 |     """
17 |     A class for handling tokens in the context of a pull request.
18 | 
19 |     Attributes:
20 |     - encoder: An object of the encoding_for_model class from the tiktoken module. Used to encode strings and count the
21 |       number of tokens in them.
22 |     - limit: The maximum number of tokens allowed for the given model, as defined in the MAX_TOKENS dictionary in the
23 |       pr_agent.algo module.
24 |     - prompt_tokens: The number of tokens in the system and user strings, as calculated by the _get_system_user_tokens
25 |       method.
26 |     """
27 | 
28 |     def __init__(self, message=None, vars: dict = {}, system="", user=""):  # noqa: B006
29 |         """
30 |         Initializes the TokenHandler object.
31 | 
32 |         Args:
33 |         - pr: The pull request object.
34 |         - vars: A dictionary of variables.
35 |         - system: The system string.
36 |         - user: The user string.
37 |         """
38 |         self.encoder = get_token_encoder()
39 |         if message is not None:
40 |             self.prompt_tokens = self._get_system_user_tokens(
41 |                 message, self.encoder, vars, system, user
42 |             )
43 | 
44 |     def _get_system_user_tokens(self, message, encoder, vars: dict, system, user):
45 |         """
46 |         Calculates the number of tokens in the system and user strings.
47 | 
48 |         Args:
49 |         - message: The pull request object.
50 |         - encoder: An object of the encoding_for_model class from the tiktoken module.
51 |         - vars: A dictionary of variables.
52 |         - system: The system string.
53 |         - user: The user string.
54 | 
55 |         Returns:
56 |         The sum of the number of tokens in the system and user strings.
57 |         """
58 |         environment = Environment(undefined=StrictUndefined)
59 |         system_prompt = environment.from_string(system).render(vars)
60 |         user_prompt = environment.from_string(user).render(vars)
61 |         system_prompt_tokens = len(encoder.encode(system_prompt))
62 |         user_prompt_tokens = len(encoder.encode(user_prompt))
63 |         return system_prompt_tokens + user_prompt_tokens
64 | 
65 |     def count_tokens(self, patch: str) -> int:
66 |         """
67 |         Counts the number of tokens in a given patch string.
68 | 
69 |         Args:
70 |         - patch: The patch string.
71 | 
72 |         Returns:
73 |         The number of tokens in the patch string.
74 |         """
75 |         return len(self.encoder.encode(patch, disallowed_special=()))
76 | 


--------------------------------------------------------------------------------
/alpha_codium/log/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import sys
 4 | from enum import Enum
 5 | 
 6 | from loguru import logger
 7 | 
 8 | 
 9 | class LoggingFormat(str, Enum):
10 |     CONSOLE = "CONSOLE"
11 |     JSON = "JSON"
12 | 
13 | 
14 | def json_format(record: dict) -> str:
15 |     return record["message"]
16 | 
17 | 
18 | def setup_logger(logger_path: str = "./example.log",
19 |                  level: str = "INFO",
20 |                  fmt: LoggingFormat = LoggingFormat.CONSOLE):
21 |     level: int = logging.getLevelName(level.upper())
22 |     if type(level) is not int:
23 |         level = logging.INFO
24 | 
25 |     fileHandler = logging.FileHandler(logger_path, mode='w')
26 | 
27 |     if fmt == LoggingFormat.JSON:
28 |         logger.remove(None)
29 |         logger.add(
30 |             sys.stdout,
31 |             level=level,
32 |             format="{message}",
33 |             colorize=False,
34 |             serialize=True,
35 |         )
36 |     elif fmt == LoggingFormat.CONSOLE:
37 |         logger.remove(None)
38 |         logger.add(sys.stdout, level=level, colorize=True)
39 |         logger.add(fileHandler, level=logging.DEBUG)
40 | 
41 |     return logger
42 | 
43 | 
44 | def get_logger(*args, **kwargs):
45 |     return logger
46 | 


--------------------------------------------------------------------------------
/alpha_codium/settings/.secrets_template.toml:
--------------------------------------------------------------------------------
1 | [openai]
2 | #key = "..."
3 | 


--------------------------------------------------------------------------------
/alpha_codium/settings/choose_best_solution_direct.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_choose_best_solution_direct]
 2 | temperature = 0.3
 3 | system = """\
 4 | To solve the problem, utilize an exhaustive high-computational approach like simulation, recursion, brute-force or other direct approach. Ignore the problem constraints regarding large input size.
 5 | """
 6 | User="""\
 7 | You are given a code contest problem, self-reflection on the problem, and a solution concept.
 8 | 
 9 | problem description:
10 | ==========
11 | {{description|trim}}
12 | ==========
13 | 
14 | 
15 | self-reflection on the problem:
16 | ============
17 | {{ self_reflection|trim }}
18 | ============
19 | 
20 | 
21 | solution concept:
22 | ==========
23 | 'To solve the problem, utilize an exhaustive high-computational approach like simulation, recursion, brute-force or other direct approach. Ignore the problem constraints regarding large input size.'
24 | ==========
25 | 
26 | 
27 | Using the inputs above, your goal is to present a full exhaustive solution to the code contest problem.
28 | The output must be a YAML object equivalent to type $ExhaustiveProblemSolution, according to the following Pydantic definitions:
29 | =====
30 | class Test(BaseModel):
31 |     input: str
32 |     output: str
33 | 
34 | class ExhaustiveProblemSolution(BaseModel):
35 |     name: str = Field(description="The name of the best solution")
36 |     content: str = Field(description="Describe in words content of the solution")
37 |     problem_rules: str = Field(description="Describe the problem rules, in bullet points")
38 |     problem_stopping_criteria: str = Field(description="Describe the stopping criteria problem")
39 |     pseudo_code: str = Field(description="Describe a pseudo code of the solution. Be specific and detailed")
40 | =====
41 | 
42 | 
43 | Example YAML output:
44 | ```yaml
45 | name: |
46 |   ...
47 | content: |
48 |   ...
49 | problem_rules: |
50 |   ...
51 | problem_stopping_criteria: |
52 |   ...
53 | pseudo_code: |
54 |   ...
55 | ```
56 | 
57 | Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
58 | 
59 | Answer:
60 | ```yaml\
61 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompt_analyze_and_fix.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompt_analyze_and_fix]
 2 | temperature = 0.2
 3 | system = """\
 4 | - You must divide the fixed code into small sub-functions, with meaningful names and functionality. Each function should be no longer than 10 lines of code.
 5 | - The fixed code should be robust and general, and work for other input examples as well.
 6 | - The fixed should be different from the original code, and not just a copy-paste of the original code.
 7 | """
 8 | user="""\
 9 | {%- if use_self_reflection_public %}
10 | You are given a code contest problem, and a self-reflection on the problem:
11 | 
12 | problem description:
13 | =============
14 | {{ description|trim }}
15 | =============
16 | 
17 | 
18 | self-reflection on the problem:
19 | ======
20 | {{ self_reflection|trim }}
21 | ======
22 | 
23 | {%- else %}
24 | 
25 | You are given a code contest problem:
26 | =============
27 | {{ description_short|trim }}
28 | =============
29 | 
30 | {%- endif %}
31 | 
32 | 
33 | A Python code solution was generated for the problem:
34 | =============
35 | {{ code_recent_solution|trim }}
36 | =============
37 | 
38 | 
39 | However, when running the input-output example test, the code solution failed to produce the expected output, and gave the following error message:
40 | =============
41 | {{ error_str|trim }}
42 | =============
43 | 
44 | 
45 | Using all the information above, your goal is generate a fixed code solution, that will pass the test.
46 | 
47 | The output must be a YAML object equivalent to type $FixedCode, according to the following Pydantic definitions:
48 | =====
49 | class FixedCode(BaseModel):
50 |     failed_tests: str = Field(description="List the input-output tests that failed. use the format [{input: .., expected_output: .., code_output: ..}, ...]")
51 |     what_went_wrong: str = Field(description="Describe shortly, in bullet points, what went wrong with the previous solutions.")
52 |     fixed_flow: str = Field(description="Describe a fixed flow leading to the correct solution, in pseudo-code. Be specific and detailed.")
53 |     fixed_code: str = Field(description="A fixed code solution. Don't explain your answer. Just provide the code itself, and nothing else")
54 | =====
55 | 
56 | Example YAML output:
57 | ```yaml
58 | failed_tests: |
59 |   ...
60 | what_went_wrong: |
61 |   ...
62 | fixed_flow: |
63 |   ...
64 | fixed_code: |
65 |   ...
66 | ```
67 | 
68 | Additional guidelines for generating the fixed code:
69 | - Divide the generated code into small sub-functions, with meaningful names and functionality. Keep the code simple and short.
70 | - The fixed code should be robust and general, and work for other input examples as well
71 | - Double-check the new solution code, and make sure it is correct and complete.
72 | - The fixed code must have a reasonable runtime - less than three seconds on a modern computer, given the problem constraints for large input.
73 | - The code needs to be self-contained, and executable as-is.
74 | 
75 | 
76 | Answer:
77 | ```yaml
78 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompt_analyze_and_fix_direct.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompt_analyze_and_fix_direct]
 2 | temperature = 0.2
 3 | system = """\
 4 | - You must divide the new code into small sub-functions, with meaningful names and functionality. Variable names should also be meaningful.
 5 | - The new code still needs to utilize an exhaustive approach like simulation, recursion, brute-force or direct solution. Ignore the problem constraints regarding large input size.
 6 | - The new code should be different from the original code, and not just a copy-paste of the original code.
 7 | """
 8 | user="""\
 9 | You are given a code contest problem.
10 | 
11 | 
12 | problem description:
13 | ================
14 | {{ description_short|trim }}
15 | ================
16 | 
17 | 
18 | A code solution was generated for the problem:
19 | =============
20 | {{ code_recent_solution|trim }}
21 | =============
22 | 
23 | 
24 | However, when running the input-output example test, the code solution failed to produce the expected output, and gave the following error message:
25 | =============
26 | {{ error_str|trim }}
27 | =============
28 | 
29 | 
30 | Using the information above, your goal is to generate a fixed Python code, that will correctly solve the problem.
31 | - The fixed code still needs to utilize an exhaustive approach like simulation, recursion, brute-force or direct solution. Ignore the problem constraints regarding large input size.
32 | - If possible, provide minor optimizations to the code, but this is not required.
33 | - Make sure the fixed code covers relevant edge cases of the problem.
34 | 
35 | The output must be a YAML object equivalent to type $FixedCode, according to the following Pydantic definitions:
36 | =====
37 | class FixedCode(BaseModel):
38 |     failed_test: str = Field(description="list the input-output test that failed. use the format {input: .., expected_output: .., code_output: ..}")
39 |     what_went_wrong: str = Field(description="explain what went wrong with the code solution")
40 |     fixed_code: str = Field(description="A fixed code solution. Don't explain your answer. Just provide a fixed code, and nothing else")
41 | =====
42 | 
43 | Example YAML output:
44 | ```yaml
45 | failed_test: |-
46 |   ...
47 | what_went_wrong: |-
48 |   ...
49 | fixed_code: |-
50 |   ...
51 | ```
52 | 
53 | Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
54 | 
55 | 
56 | Answer:
57 | ```yaml
58 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompt_analyze_failure.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompt_analyze_failure]
 2 | temperature = 0.3
 3 | system = """\
 4 | """
 5 | user="""\
 6 | You are given a code contest problem, and a self-reflection on the problem:
 7 | 
 8 | 
 9 | problem description:
10 | ======
11 | {{ description_short|trim }}
12 | ======
13 | 
14 | 
15 | self-reflection on the problem:
16 | ======
17 | {{ self_reflection|trim }}
18 | ======
19 | 
20 | 
21 | A Python code solution was generated for the problem:
22 | ======
23 | {{ code_recent_solution|trim }}
24 | ======
25 | 
26 | 
27 | However, when running the following input example, the code solution above failed to produce the expected output:
28 | ======
29 | {{ error_str|trim }}
30 | ======
31 | 
32 | {%- if use_test_explanations_public %}
33 | 
34 | Here is an explanation of how the input should have led to the expected output:
35 | ======
36 | {{ test_explanation_current|trim }}
37 | ======
38 | {%- endif %}
39 | 
40 | 
41 | Your goal is to analyze the code solution and the error, and propose a fix so the code will produce the expected output for the provided test input.
42 | The fix should keep the solution robust, and work for all other input examples as well.
43 | Make sure the fix has a reasonable runtime - less than three seconds on a modern computer, given the problem constraints for large input.
44 | 
45 | 
46 | The output must be a YAML object equivalent to type $FixedSolution, according to the following Pydantic definitions:
47 | ======
48 | class FixedSolution(BaseModel):
49 |     failed_tests: str = Field(description="List the input-output tests that failed. use the format [{input: .., expected_output: .., code_output: ..}, ...]")
50 |     what_went_wrong: str = Field(description="Explanation shortly, in words, what was the problem with the code solution, and how should it be fix. Be as specific as possible. Don't generate actuall code.")
51 |     fixed_flow: str = Field(description="Describe, in bullet points, a fixed flow that will calculate the correct output. be specific and elaborate. Emphasize the fixed parts, and how they apply to getting the correct output")
52 | ======
53 | 
54 | 
55 | Example YAML output:
56 | ```yaml
57 | failed_tests: |
58 |   ...
59 | what_went_wrong: |
60 |   ...
61 | fixed_flow: |
62 |   ...
63 | ```
64 | 
65 | 
66 | Answer:
67 | ```yaml
68 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_baseline.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_baseline]
 2 | temperature = 0.3
 3 | system= """\
 4 | """
 5 | user="""
 6 | You are given a code contest problem:
 7 | 
 8 | problem description:
 9 | =============
10 | {{description}}
11 | =============
12 | 
13 | 
14 | Your goal is to generate a valid Python code that correctly solves the problem.
15 | Make sure to fully address the problem goals, rules and constraints.
16 | The code should be robust and general, and work for other input examples as well, not just the one given in the problem description.
17 | 
18 | guidelines:
19 | - Generate only code, without any additional explanations or comments.
20 | - Make sure to include all the necessary module imports, properly initialize the variables, and address the problem constraints.
21 | - The code needs to be self-contained, and executable as-is.
22 | 
23 | The code output must follow this structure:
24 | ```
25 | def f1(...):
26 |     ...
27 |     return ...
28 | 
29 | def f2(...):
30 |     ...
31 |     return ...
32 | ...
33 | 
34 | if __name__ == "__main__":
35 |     ...
36 | ```
37 | The code should read the input using the 'input()' method. Make sure to properly parse the input, according to the problem description.
38 | The output should be printed without additional words using the 'print()' method.
39 | 
40 | answer:
41 | ```python
42 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_choose_best_solution.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_choose_best_solution]
 2 | temperature = 0.2
 3 | system = """\
 4 | """
 5 | 
 6 | User="""\
 7 | You are given a code contest problem, and a self-reflection on the problem:
 8 | 
 9 | 
10 | problem description:
11 | =======
12 | {{description|trim}}
13 | =======
14 | 
15 | 
16 | self-reflection on the problem:
17 | =======
18 | {{ self_reflection|trim }}
19 | =======
20 | 
21 | 
22 | Here is a list of {{ s_possible_solutions|length }} possible solutions to the problem:
23 | =======
24 | {{ s_possible_solutions_str|trim }}
25 | =======
26 | 
27 | 
28 | Using the inputs above, your goal is to choose the best solution to the code contest problem.
29 | Don't just pick the most efficient solution. The main consideration is that the solution can fully solve the problem in a simple and robust manner.
30 | Make sure the chosen solution has a reasonable runtime - less than three seconds on a modern computer, given the problem constraints regarding large inputs.
31 | 
32 | The output must be a YAML object equivalent to type $ProblemSolution, according to the following Pydantic definitions:
33 | =======
34 | class Test(BaseModel):
35 |     input: str
36 |     output: str
37 | 
38 | class ProblemSolution(BaseModel):
39 |     name: str = Field(description="The name of the best solution")
40 |     content: str = Field(description="The content of the best solution")
41 |     why: str = Field(description="Shortly explain why is this the best solution")
42 |     flow: List[str] = Field(description="Describe of the flow of the solution, in bullet points")
43 |     problem_tests: List[Test] = Field("List the input-output examples that are provided in the problem description.")
44 |     input_output_examples_flow: List[str] = Field(description="Describe, in bullet points, how the proposed flow will lead to getting the expected output for the provided input examples")
45 | =======
46 | 
47 | 
48 | Example YAML output:
49 | ```yaml
50 | name: |
51 |   ...
52 | content: |
53 |   ...
54 | why: |
55 |   ...
56 | flow:
57 | - |
58 |   ...
59 | - |
60 |   ...
61 | ...
62 | problem_tests:
63 | - input: |
64 |     ...
65 |   output: |
66 |     ...
67 | input_output_examples_flow:
68 | - |
69 |   ...
70 | - |
71 |   ...
72 | ```
73 | 
74 | Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
75 | 
76 | Answer:
77 | ```yaml\
78 | """
79 | 


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_fix_solution.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompt_fix_solution]
 2 | temperature = 0.3
 3 | system="""\
 4 | - You must divide the fixed code into small sub-functions, with meaningful names and functionality. Each function should be no longer than 10 lines of code.
 5 | - The fixed code should be robust and general, and work for other input examples as well.
 6 | - The fixed should be different from the original code, and not just a copy-paste of the original code.
 7 | """
 8 | user="""\
 9 | You are given a code contest problem:
10 | =============
11 | {{ description_short|trim }}
12 | =============
13 | 
14 | 
15 | A previous Python solution code was generated for the problem:
16 | =============
17 | {{ code_recent_solution|trim }}
18 | =============
19 | 
20 | 
21 | However, when running the input-output example test, the code failed to produce the expected output:
22 | =====================================
23 | Error message when running the 'solution code':
24 | '
25 | {{ error_str|trim }}
26 | '
27 | =====================================
28 | 
29 | 
30 | We analyzed the error message, and concluded the following about the problem:
31 | =============
32 | {{ what_went_wrong|trim }}
33 | =============
34 | 
35 | 
36 | Here is a fixed flow, that a correct solution code should follow:
37 | =============
38 | {{ fixed_flow|trim }}
39 | =============
40 | 
41 | 
42 | Using the analysis above, you need to generate a fixed solution code, that will pass all the tests.
43 | Additional guidelines for generating the fixed code:
44 | - The fixed solution code must pass all the tests, and have a reasonable runtime - less than three seconds on a modern computer, under the problem constraints.
45 | - Make sure the new solution code generalizes to all possible input-output examples, not just the provided input-output examples.
46 | - You must divide the new solution code into small sub-functions, with meaningful names and functionality
47 | 
48 | 
49 | The code output must follow this structure:
50 | ````
51 | def f1(...):
52 |     ...
53 |     return ...
54 | 
55 | def f2(...):
56 |     ...
57 |     return ...
58 | ...
59 | 
60 | if __name__ == "__main__":
61 |     ...
62 | ```
63 | The code should read the input using the 'input()' method. Make sure to properly parse the input, according to the problem description.
64 | The output should be printed without additional words using the 'print()' method.
65 | 
66 | 
67 | Answer:
68 | ```python
69 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_generate_ai_tests.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_generate_ai_tests]
 2 | temperature = 0.2
 3 | system = """\
 4 | """
 5 | 
 6 | User="""\
 7 | You are given a code contest problem and a self-reflection on the problem:
 8 | 
 9 | 
10 | problem description:
11 | ======
12 | {{ description|trim }}
13 | ======
14 | 
15 | 
16 | self-reflection on the problem:
17 | ======
18 | {{ self_reflection|trim }}
19 | ======
20 | 
21 | {%- if use_test_explanations_possible_solutions %}
22 | 
23 | 
24 | Here are also explanations for the problem test cases:
25 | ============
26 | {{ tests_explanations_str|trim }}
27 | ============
28 | {%- endif %}
29 | 
30 | 
31 | Your task is to generate additional {{ number_of_ai_tests }} diverse input-output examples for the code contest problem.
32 | Try to cover cases that are not covered by the original tests. Also include a test for large inputs.
33 | The generated tests should be sorted by difficulty, from easiest to hardest.
34 | All the inputs should be valid, and the outputs are correct. Double check them, and validate they match the problem description and rules.
35 | 
36 | The output must be a valid YAML object equivalent to type $ProblemTests, according to the following Pydantic definitions:
37 | ======
38 | class Test(BaseModel):
39 |     input: str
40 |     output: str
41 |     explanation: str = Field(description='Short explanation how we got the output from the input. Be specific')
42 | 
43 | class ProblemTests(BaseModel):
44 |     tests: List[Test] = Field(min_items={{number_of_ai_tests}}, max_items={{number_of_ai_tests}})
45 | ======
46 | 
47 | 
48 | Example YAML output:
49 | ```yaml
50 | tests:
51 | - input: |
52 |     ...
53 |   output: |
54 |     ...
55 |   explanation: |
56 |     ...
57 | ...
58 | ```
59 | 
60 | Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
61 | 
62 | Answer:
63 | ```yaml\
64 | """
65 | 


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_generate_possible_solutions.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompt_generate_possible_solutions]
 2 | temperature = 0.3
 3 | system= """\
 4 | Pay attention to small details and nuances in the problem description.
 5 | """
 6 | user="""You are given a code contest problem, and a self-reflection on the problem:
 7 | 
 8 | problem description:
 9 | =====
10 | {{description}}
11 | =====
12 | 
13 | 
14 | self-reflection on the problem:
15 | ============
16 | {{ self_reflection|trim }}
17 | ============
18 | 
19 | {%- if use_test_explanations_possible_solutions %}
20 | 
21 | 
22 | Here are also explanations for the problem test cases:
23 | ============
24 | {{ tests_explanations_str|trim }}
25 | ============
26 | {%- endif %}
27 | 
28 | 
29 | Your goal is to come up with possible solutions to the code contest problem.
30 | 
31 | Guidelines:
32 | - Make sure each solution fully addresses the problem goals, constraints, examples, and notes.
33 | - Each solution must have reasonable runtime and memory complexity - less than three seconds on a modern computer, given the problem constraints for large inputs.
34 | - Double-check the solutions. Each possible solution must be able to generalize to additional test cases, not just the ones provided in the problem description.
35 | 
36 | The output must be a YAML object equivalent to type $ProblemSolutions, according to the following Pydantic definitions:
37 | ======
38 | class Solution(BaseModel):
39 |     name: str = Field(description="The name of the solution")
40 |     content: str = Field(description="A description of the solution")
41 |     why_it_works: str = Field(description="Shortly explain why this solution correctly solves the problem. Be specific and detailed regarding the problem rules and goals.")
42 |     labels: List[str] = Field(description="A list of labels for the solution. For example (partial list): binary search, dynamic programming, trees, combinatorics, dfs, bfs, graphs, greedy, math, data structures, geometry, number theory, two pointers, simulation, direct approach, probabilities, ...")
43 |     complexity: str = Field(description="The complexity of the solution")
44 | 
45 | 
46 | class $ProblemSolutions(BaseModel):
47 |     possible_solutions: List[Solution] = Field(max_items={{max_num_of_possible_solutions}}, description="A list of possible solutions to the problem. Make sure each solution fully addresses the problem rules and goals.")
48 | ======
49 | 
50 | 
51 | Example YAML output:
52 | ```yaml
53 | possible_solutions:
54 | - name: |
55 |     ...
56 |   content: |
57 |     ...
58 |   why_it_works: |
59 |     ...
60 |   labels:
61 |   - ...
62 |   - ...
63 |   complexity: |
64 |     ...
65 |  ```
66 | 
67 | Answer:
68 | ```yaml\
69 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_reflect.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompt_reflect]
 2 | temperature = 0.2
 3 | system= """\
 4 | The self-reflection must cover every aspect of the problem. Pay attention to small details and nuances in the problem description.
 5 | """
 6 | user="""You are given a code contest problem:
 7 | 
 8 | problem name: '{{name}}'
 9 | 
10 | 
11 | problem description:
12 | =====
13 | {{description|trim}}
14 | =====
15 | 
16 | 
17 | Given the code contest problem, you have two tasks:
18 | 1) Reflect on the problem, and describe it in your own words, in bullet points. Pay attention to small details, nuances, notes and examples in the problem description.
19 | 2) Explain how each provided example input leads to the corresponding output (in total {{ actual_number_of_tests }} examples are provided).
20 | Read carefully the problem description. Make sure the test explanations are consistent with them, and between themselves.
21 | The explanation must coherently and logically lead from the input to the output. Be as specific as possible.
22 | 
23 | The output must be a YAML object equivalent to type $ProblemReflection, according to the following Pydantic definitions:
24 | =====
25 | Class InputOutput(BaseModel):
26 |     input: str
27 |     output: str
28 |     explanation: str = Field(description="Short explanation how the test input leads to the test output.")
29 | 
30 | 
31 | class ProblemReflection(BaseModel):
32 |     self_reflection: str = Field(description="Describe the problem in your own words, in bullet points. Address the problem goals, inputs, outputs, rules, constraints, and other relevant details.")
33 |     tests_explanations: list[InputOutput] = Field(max_items={{ actual_number_of_tests }}, description="List of explanations for each test case")
34 | =====
35 | 
36 | Example YAML output:
37 | ```yaml
38 | self_reflection:
39 | - |
40 |   ...
41 | - |
42 |   ...
43 | tests_explanations:
44 | - input: |
45 |     ...
46 |   output: |
47 |     ..
48 |   explanation: |
49 |     ...
50 | ...
51 |  ```
52 | 
53 | 
54 | Answer:
55 | ```yaml
56 | """
57 | 


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_solve.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_solve]
 2 | temperature = 0.3
 3 | system= """\
 4 | - You must divide the generated code into small sub-functions, with meaningful names and functionality. Each function should be no longer than 10 lines of code.
 5 | - Double-check the solution code. The generated solution must generalize to any valid input, and not just the provided examples.
 6 | """
 7 | user="""\
 8 | You are given a code contest problem, and a self-reflection on the problem.
 9 | 
10 | 
11 | problem description:
12 | =============
13 | {{ description|trim }}
14 | =============
15 | 
16 | 
17 | self-reflection on the problem:
18 | ======
19 | {{ self_reflection|trim }}
20 | ======
21 | 
22 | 
23 | Your goal is to generate a valid Python code that correctly solves the code contest problem, using the following algorithm:
24 | =============
25 | {{ s_best_solution|trim }}
26 | =============
27 | 
28 | 
29 | 
30 | Guidelines:
31 | - You must divide the generated code into small sub-functions, with meaningful names and functionality. Variables names should also be meaningful.
32 | - Double-check the generated code. It should generalize to any valid input, and not just the provided examples.
33 | - Make sure to include all the necessary module imports, properly initialize the variables, and address the problem constraints.
34 | - The code needs to be self-contained, and executable as-is.
35 | 
36 | 
37 | 
38 | The generated code must follow this structure:
39 | ```
40 | def f1(...):
41 |     ...
42 |     return ...
43 | 
44 | def f2(...):
45 |     ...
46 |     return ...
47 | ...
48 | 
49 | if __name__ == "__main__":
50 |     ...
51 | ```
52 | The code should read the input using the 'input()' method. Make sure to properly parse the input, according to the problem description.
53 | The output should be printed without additional words using the 'print()' method.
54 | 
55 | 
56 | Answer:
57 | ```python
58 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_solve_direct.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_solve_direct]
 2 | temperature = 0.3
 3 | system= """\
 4 | - You must divide the generated code into small sub-functions, with meaningful names and functionality. Each function should be no longer than 10 lines of code.
 5 | - The code should ignore the problem constraints for large inputs.
 6 | """
 7 | user="""\
 8 | You are given a code contest problem, and a self-reflection on the problem.
 9 | 
10 | 
11 | problem description:
12 | =============
13 | {{ description|trim }}
14 | =============
15 | 
16 | 
17 | self-reflection on the problem:
18 | ======
19 | {{ self_reflection|trim }}
20 | ======
21 | 
22 | 
23 | Your goal is to generate a valid Python code that correctly solves the code contest problem, using the following algorithm:
24 | =============
25 | {{ s_best_solution|trim }}
26 | =============
27 | 
28 | 
29 | 
30 | Guidelines:
31 | - You must divide the generated code into small sub-functions, with meaningful names and functionality. Variables names should also be meaningful.
32 | - Double-check the solution code. Make sure to include all the necessary module imports, properly initialize the variables, and address the problem constraints.
33 | - The code needs to be self-contained, and executable as-is. Output only code, without any explanations or comments.
34 | 
35 | 
36 | 
37 | The code output must follow this structure:
38 | ```
39 | def f1(...):
40 |     ...
41 |     return ...
42 | 
43 | def f2(...):
44 |     ...
45 |     return ...
46 | ...
47 | 
48 | if __name__ == "__main__":
49 |     ...
50 | ```
51 | The code should read the input using the 'input()' method. Make sure to properly parse the input, according to the problem description.
52 | The output should be printed without additional words using the 'print()' method.
53 | 
54 | 
55 | Answer:
56 | ```python
57 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_validate_ai_tests.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_validate_ai_tests]
 2 | temperature = 0.2
 3 | system = """\
 4 | Your goal is to consider each AI-generated test, and make sure its output and its explanation are correct. Be critical - they could be wrong.
 5 | guidelines:
 6 | - Read carefully the problem description. Make sure the output and the explanations are consistent with them, and between themselves.
 7 | - Make sure you understand problem constraints, rules, and examples.
 8 | - The tests explanations must coherently and logically lead from the input to the output.
 9 | """
10 | 
11 | User="""\
12 | You are given a code contest problem and a self-reflection on the problem:
13 | 
14 | 
15 | problem description:
16 | ======
17 | {{ description|trim }}
18 | ======
19 | 
20 | 
21 | self-reflection on the problem:
22 | ======
23 | {{ self_reflection|trim }}
24 | ======
25 | 
26 | 
27 | Here are additional tests for the problem, generated by an AI:
28 | 
29 | AI-generated tests:
30 | ============
31 | {{ problem_ai_tests|trim }}
32 | ============
33 | 
34 | 
35 | Your goal is to consider each AI-generated test, and make sure the output and the explanation are correct. Be critical - they could be wrong.
36 | 
37 | Guidelines:
38 | - Read the problem description carefully. Make sure the output and the explanations are consistent with them, and between themselves.
39 | - The test explanations must coherently and logically lead from the input to the output.
40 | 
41 | The output must be a YAML object equivalent to type $ProblemTests, according to the following Pydantic definitions:
42 | =====
43 | Class Test(BaseModel):
44 |     input: str
45 |     output: str
46 |     explanation: str = Field(description="Short explanation of how the input leads to the output.")
47 | 
48 | class ProblemTests(BaseModel):
49 |     tests: List[Test] = Field(min_items={{number_of_ai_tests}}, max_items={{number_of_ai_tests}})
50 | =====
51 | 
52 | 
53 | Example YAML output:
54 | ```yaml
55 | tests:
56 | - input: |
57 |     ...
58 |   output: |
59 |     ...
60 |   explanation: |
61 |     ...
62 | ...
63 | ```
64 | 
65 | Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
66 | 
67 | Answer:
68 | ```yaml\
69 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/code_contests_prompts_validate_reflection.toml:
--------------------------------------------------------------------------------
 1 | [code_contests_prompts_validate_reflection]
 2 | temperature = 0.2
 3 | system = """\
 4 | """
 5 | 
 6 | User="""\
 7 | You are given a code contest problem, and ai-generated explanations of how each input example leads to the corresponding output:
 8 | 
 9 | 
10 | problem description:
11 | ============
12 | {{description|trim}}
13 | ============
14 | 
15 | 
16 | tests explanations:
17 | ============
18 | {{ tests_explanations_str|trim }}
19 | ============
20 | 
21 | 
22 | Your goal is to consider each test explanation, and make sure it is correct and complete. Be critical - the provided explanations may be wrong or incomplete.
23 | Read carefully the problem description. Make sure the test explanations are consistent with them, and between themselves.
24 | The explanations must coherently and logically lead from the input to the output, with the actual flow. Be specific as possible, and describe in detail how the input leads to the output.
25 | Pay attention to the problem constraints, and small details.
26 | 
27 | 
28 | The output must be a YAML object equivalent to type $InputOutputExplanation, according to the following Pydantic definitions:
29 | =====
30 | Class InputOutput(BaseModel):
31 |     input: str
32 |     output: str
33 |     explanation: str = Field(description="Short explanation of how the input leads to the output. Be specific as possible.")
34 | 
35 | 
36 | class $InputOutputExplanation(BaseModel):
37 |     fixed_tests_explanations: list[InputOutput] = Field(max_items = {{ actual_number_of_tests }})
38 | =====
39 | 
40 | 
41 | Example YAML output:
42 | ```yaml
43 | fixed_tests_explanations:
44 | - input: |
45 |     ...
46 |   output: |
47 |     ..
48 |   explanation: |
49 |     ...
50 | ...
51 | ```
52 | 
53 | Answer:
54 | ```yaml\
55 | """


--------------------------------------------------------------------------------
/alpha_codium/settings/config_loader.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | from os import listdir
 3 | from os.path import abspath, dirname, join, isfile
 4 | import glob
 5 | 
 6 | from dynaconf import Dynaconf
 7 | 
 8 | PR_AGENT_TOML_KEY = "pr-agent"
 9 | 
10 | current_dir = dirname(abspath(__file__))
11 | # setting_dir = join(current_dir, "settings")
12 | setting_dir = current_dir
13 | 
14 | 
15 | 
16 | toml_files = list(pathlib.Path(join(setting_dir)).glob('*.toml')) # includes hidden files
17 | global_settings = Dynaconf(
18 |     envvar_prefix=False,
19 |     merge_enabled=True,
20 |     settings_files=toml_files,
21 | )
22 | 
23 | 
24 | def get_settings():
25 |     return global_settings
26 | 


--------------------------------------------------------------------------------
/alpha_codium/settings/configuration.toml:
--------------------------------------------------------------------------------
 1 | [config]
 2 | model="gpt-4-0125-preview"
 3 | # model="gpt-4o-2024-05-13"
 4 | # model="gpt-4-0613"
 5 | # model="gpt-3.5-turbo-16k"
 6 | frequency_penalty=0.1
 7 | ai_timeout=90 # seconds
 8 | fallback_models =[]
 9 | verbosity_level=0 # 0,1,2
10 | private_dataset_cache_dir="~/.cache/huggingface/datasets/alpha_codium"
11 | max_requests_per_minute=60
12 | 
13 | [dataset]
14 | evaluate_prev_solutions=false
15 | num_iterations=1 # X iterations to try to solve the problem
16 | use_iteration_scheme=true
17 | 
18 | [solve]
19 | reduce_verbose = false
20 | use_baseline = false
21 | use_direct_solutions=false
22 | 
23 | [self_reflection]
24 | validate_self_reflection=false
25 | 
26 | [possible_solutions]
27 | max_num_of_possible_solutions=3
28 | use_test_explanations=true
29 | remove_bruce_force_solutions=true
30 | 
31 | [generate_ai_tests]
32 | validate_ai_tests=false
33 | number_of_ai_tests=6
34 | use_test_explanations=true
35 | add_public_tests_to_ai_tests=true
36 | 
37 | [initial_code_generation]
38 | max_attempts=8
39 | 
40 | [public_tests]
41 | max_allowed_calls=4
42 | max_fixes_per_test=3
43 | use_test_explanations=false
44 | single_stage_fix=true
45 | use_self_reflection=false
46 | 
47 | [ai_tests]
48 | max_allowed_calls=4
49 | 
50 | [code_tester]
51 | tester_type="local" # local, code_contests
52 | order_matters=true
53 | sandbox=true
54 | delta=0.0001
55 | # trace
56 | calc_trace=false
57 | use_trace=false
58 | max_trace_lines=50
59 | trace_depth=4
60 | 
61 | [code_contests_tester]
62 | stop_on_first_failure = false
63 | timeout = 3
64 | path_to_python_bin = "./venv/bin/python3.9"
65 | path_to_python_lib = ["./venv/lib", "./venv/lib/python3.9"]
66 | 


--------------------------------------------------------------------------------
/alpha_codium/solve_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from alpha_codium.gen.dataset_solver import solve_dataset
 4 | from alpha_codium.log import get_logger, setup_logger
 5 | 
 6 | logger = get_logger(__name__)
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument("--dataset_name", type=str, default="valid_and_test_processed")
10 | parser.add_argument("--split_name", type=str, default="valid")
11 | parser.add_argument("--database_solution_path", type=str, default="")
12 | if __name__ == "__main__":
13 |     args = parser.parse_args()
14 |     setup_logger()
15 | 
16 |     # set default database_solution_path
17 |     args.database_solution_path = args.database_solution_path
18 |     if not args.database_solution_path:
19 |         args.database_solution_path = f"./{args.dataset_name}_{args.split_name}_solution_database.json"
20 |         logger.info(f"args.database_solution_path: {args.database_solution_path}")
21 | 
22 |     solve_dataset(dataset_name=args.dataset_name,
23 |                   split_name=args.split_name,
24 |                   database_solution_path=args.database_solution_path)
25 | 


--------------------------------------------------------------------------------
/alpha_codium/solve_my_problem.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | 
 4 | from alpha_codium.gen.coding_competitor import solve_problem, solve_my_problem
 5 | from alpha_codium.log import setup_logger
 6 | from alpha_codium.settings.config_loader import get_settings
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument("--my_problem_json_file", type=str, default="my_problem_example.json")
10 | 
11 | if __name__ == "__main__":
12 |     args = parser.parse_args()
13 |     setup_logger()
14 |     
15 |     with open(args.my_problem_json_file, "r") as my_problem:
16 |         solve_my_problem(json.load(my_problem))
17 | 


--------------------------------------------------------------------------------
/alpha_codium/solve_problem.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from alpha_codium.gen.coding_competitor import solve_problem
 4 | from alpha_codium.log import setup_logger
 5 | from alpha_codium.settings.config_loader import get_settings
 6 | 
 7 | parser = argparse.ArgumentParser()
 8 | parser.add_argument("--dataset_name", type=str, default="valid_and_test_processed")
 9 | parser.add_argument("--split_name", type=str, default="valid")
10 | parser.add_argument("--problem_number", type=int, default=0)
11 | parser.add_argument("--problem_name", type=str, default="")
12 | 
13 | if __name__ == "__main__":
14 |     args = parser.parse_args()
15 |     setup_logger()
16 |     solve_problem(dataset_name=args.dataset_name,
17 |                   split_name=args.split_name,
18 |                   problem_number=args.problem_number,
19 |                   problem_name=args.problem_name)
20 | 


--------------------------------------------------------------------------------
/docs/docs/CNAME:
--------------------------------------------------------------------------------
1 | qodo-flow-docs.qodo.ai


--------------------------------------------------------------------------------
/docs/docs/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/docs/docs/assets/favicon.ico


--------------------------------------------------------------------------------
/docs/docs/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/docs/docs/assets/logo.png


--------------------------------------------------------------------------------
/docs/docs/assets/logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <!-- Generator: Adobe Illustrator 28.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
  3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
  4 | 	 width="64px" height="64px" viewBox="0 0 64 64" enable-background="new 0 0 64 64" xml:space="preserve">
  5 | <g>
  6 | 	<defs>
  7 | 		<rect id="SVGID_1_" x="0.4" y="0.1" width="63.4" height="63.4"/>
  8 | 	</defs>
  9 | 	<clipPath id="SVGID_00000008836131916906499950000015813697852011234749_">
 10 | 		<use xlink:href="#SVGID_1_"  overflow="visible"/>
 11 | 	</clipPath>
 12 | 	<g clip-path="url(#SVGID_00000008836131916906499950000015813697852011234749_)">
 13 | 		<path fill="#05E5AD" d="M21.4,9.8c3,0,5.9,0.7,8.5,1.9c-5.7,3.4-9.8,11.1-9.8,20.1c0,9,4,16.7,9.8,20.1c-2.6,1.2-5.5,1.9-8.5,1.9
 14 | 			c-11.6,0-21-9.8-21-22S9.8,9.8,21.4,9.8z"/>
 15 | 		
 16 | 			<radialGradient id="SVGID_00000150822754378345238340000008985053211526864828_" cx="-140.0905" cy="350.1757" r="4.8781" gradientTransform="matrix(-4.7708 -6.961580e-02 -0.1061 7.2704 -601.3099 -2523.8489)" gradientUnits="userSpaceOnUse">
 17 | 			<stop  offset="0" style="stop-color:#6447FF"/>
 18 | 			<stop  offset="6.666670e-02" style="stop-color:#6348FE"/>
 19 | 			<stop  offset="0.1333" style="stop-color:#614DFC"/>
 20 | 			<stop  offset="0.2" style="stop-color:#5C54F8"/>
 21 | 			<stop  offset="0.2667" style="stop-color:#565EF3"/>
 22 | 			<stop  offset="0.3333" style="stop-color:#4E6CEC"/>
 23 | 			<stop  offset="0.4" style="stop-color:#447BE4"/>
 24 | 			<stop  offset="0.4667" style="stop-color:#3A8DDB"/>
 25 | 			<stop  offset="0.5333" style="stop-color:#2F9FD1"/>
 26 | 			<stop  offset="0.6" style="stop-color:#25B1C8"/>
 27 | 			<stop  offset="0.6667" style="stop-color:#1BC0C0"/>
 28 | 			<stop  offset="0.7333" style="stop-color:#13CEB9"/>
 29 | 			<stop  offset="0.8" style="stop-color:#0DD8B4"/>
 30 | 			<stop  offset="0.8667" style="stop-color:#08DFB0"/>
 31 | 			<stop  offset="0.9333" style="stop-color:#06E4AE"/>
 32 | 			<stop  offset="1" style="stop-color:#05E5AD"/>
 33 | 		</radialGradient>
 34 | 		<path fill="url(#SVGID_00000150822754378345238340000008985053211526864828_)" d="M21.4,9.8c3,0,5.9,0.7,8.5,1.9
 35 | 			c-5.7,3.4-9.8,11.1-9.8,20.1c0,9,4,16.7,9.8,20.1c-2.6,1.2-5.5,1.9-8.5,1.9c-11.6,0-21-9.8-21-22S9.8,9.8,21.4,9.8z"/>
 36 | 		
 37 | 			<radialGradient id="SVGID_00000022560571240417802950000012439139323268113305_" cx="-191.7649" cy="385.7387" r="4.8781" gradientTransform="matrix(-2.5514 -0.7616 -0.8125 2.7217 -130.733 -1180.2209)" gradientUnits="userSpaceOnUse">
 38 | 			<stop  offset="0" style="stop-color:#6447FF"/>
 39 | 			<stop  offset="6.666670e-02" style="stop-color:#6348FE"/>
 40 | 			<stop  offset="0.1333" style="stop-color:#614DFC"/>
 41 | 			<stop  offset="0.2" style="stop-color:#5C54F8"/>
 42 | 			<stop  offset="0.2667" style="stop-color:#565EF3"/>
 43 | 			<stop  offset="0.3333" style="stop-color:#4E6CEC"/>
 44 | 			<stop  offset="0.4" style="stop-color:#447BE4"/>
 45 | 			<stop  offset="0.4667" style="stop-color:#3A8DDB"/>
 46 | 			<stop  offset="0.5333" style="stop-color:#2F9FD1"/>
 47 | 			<stop  offset="0.6" style="stop-color:#25B1C8"/>
 48 | 			<stop  offset="0.6667" style="stop-color:#1BC0C0"/>
 49 | 			<stop  offset="0.7333" style="stop-color:#13CEB9"/>
 50 | 			<stop  offset="0.8" style="stop-color:#0DD8B4"/>
 51 | 			<stop  offset="0.8667" style="stop-color:#08DFB0"/>
 52 | 			<stop  offset="0.9333" style="stop-color:#06E4AE"/>
 53 | 			<stop  offset="1" style="stop-color:#05E5AD"/>
 54 | 		</radialGradient>
 55 | 		<path fill="url(#SVGID_00000022560571240417802950000012439139323268113305_)" d="M38,18.3c-2.1-2.8-4.9-5.1-8.1-6.6
 56 | 			c2-1.2,4.2-1.9,6.6-1.9c2.2,0,4.3,0.6,6.2,1.7C40.8,12.9,39.2,15.3,38,18.3L38,18.3z"/>
 57 | 		
 58 | 			<radialGradient id="SVGID_00000143611122169386473660000017673587931016751800_" cx="-194.7918" cy="395.2442" r="4.8781" gradientTransform="matrix(-2.5514 -0.7616 -0.8125 2.7217 -130.733 -1172.9556)" gradientUnits="userSpaceOnUse">
 59 | 			<stop  offset="0" style="stop-color:#6447FF"/>
 60 | 			<stop  offset="6.666670e-02" style="stop-color:#6348FE"/>
 61 | 			<stop  offset="0.1333" style="stop-color:#614DFC"/>
 62 | 			<stop  offset="0.2" style="stop-color:#5C54F8"/>
 63 | 			<stop  offset="0.2667" style="stop-color:#565EF3"/>
 64 | 			<stop  offset="0.3333" style="stop-color:#4E6CEC"/>
 65 | 			<stop  offset="0.4" style="stop-color:#447BE4"/>
 66 | 			<stop  offset="0.4667" style="stop-color:#3A8DDB"/>
 67 | 			<stop  offset="0.5333" style="stop-color:#2F9FD1"/>
 68 | 			<stop  offset="0.6" style="stop-color:#25B1C8"/>
 69 | 			<stop  offset="0.6667" style="stop-color:#1BC0C0"/>
 70 | 			<stop  offset="0.7333" style="stop-color:#13CEB9"/>
 71 | 			<stop  offset="0.8" style="stop-color:#0DD8B4"/>
 72 | 			<stop  offset="0.8667" style="stop-color:#08DFB0"/>
 73 | 			<stop  offset="0.9333" style="stop-color:#06E4AE"/>
 74 | 			<stop  offset="1" style="stop-color:#05E5AD"/>
 75 | 		</radialGradient>
 76 | 		<path fill="url(#SVGID_00000143611122169386473660000017673587931016751800_)" d="M38,45.2c1.2,3,2.9,5.3,4.7,6.8
 77 | 			c-1.9,1.1-4,1.7-6.2,1.7c-2.3,0-4.6-0.7-6.6-1.9C33.1,50.4,35.8,48.1,38,45.2L38,45.2z"/>
 78 | 		<path fill="#684BFE" d="M20.1,31.8c0-9,4-16.7,9.8-20.1c3.2,1.5,6,3.8,8.1,6.6c-1.5,3.7-2.5,8.4-2.5,13.5s0.9,9.8,2.5,13.5
 79 | 			c-2.1,2.8-4.9,5.1-8.1,6.6C24.1,48.4,20.1,40.7,20.1,31.8z"/>
 80 | 		
 81 | 			<radialGradient id="SVGID_00000147942998054305738810000004710078864578628519_" cx="-212.7358" cy="363.2475" r="4.8781" gradientTransform="matrix(-2.3342 -1.063 -1.623 3.5638 149.3813 -1470.1027)" gradientUnits="userSpaceOnUse">
 82 | 			<stop  offset="0" style="stop-color:#6447FF"/>
 83 | 			<stop  offset="6.666670e-02" style="stop-color:#6348FE"/>
 84 | 			<stop  offset="0.1333" style="stop-color:#614DFC"/>
 85 | 			<stop  offset="0.2" style="stop-color:#5C54F8"/>
 86 | 			<stop  offset="0.2667" style="stop-color:#565EF3"/>
 87 | 			<stop  offset="0.3333" style="stop-color:#4E6CEC"/>
 88 | 			<stop  offset="0.4" style="stop-color:#447BE4"/>
 89 | 			<stop  offset="0.4667" style="stop-color:#3A8DDB"/>
 90 | 			<stop  offset="0.5333" style="stop-color:#2F9FD1"/>
 91 | 			<stop  offset="0.6" style="stop-color:#25B1C8"/>
 92 | 			<stop  offset="0.6667" style="stop-color:#1BC0C0"/>
 93 | 			<stop  offset="0.7333" style="stop-color:#13CEB9"/>
 94 | 			<stop  offset="0.8" style="stop-color:#0DD8B4"/>
 95 | 			<stop  offset="0.8667" style="stop-color:#08DFB0"/>
 96 | 			<stop  offset="0.9333" style="stop-color:#06E4AE"/>
 97 | 			<stop  offset="1" style="stop-color:#05E5AD"/>
 98 | 		</radialGradient>
 99 | 		<path fill="url(#SVGID_00000147942998054305738810000004710078864578628519_)" d="M50.7,42.5c0.6,3.3,1.5,6.1,2.5,8
100 | 			c-1.8,2-3.8,3.1-6,3.1c-1.6,0-3.1-0.6-4.5-1.7C46.1,50.2,48.9,46.8,50.7,42.5L50.7,42.5z"/>
101 | 		
102 | 			<radialGradient id="SVGID_00000083770737908230256670000016126156495859285174_" cx="-208.5327" cy="357.2025" r="4.8781" gradientTransform="matrix(-2.3342 -1.063 -1.623 3.5638 149.3813 -1476.8097)" gradientUnits="userSpaceOnUse">
103 | 			<stop  offset="0" style="stop-color:#6447FF"/>
104 | 			<stop  offset="6.666670e-02" style="stop-color:#6348FE"/>
105 | 			<stop  offset="0.1333" style="stop-color:#614DFC"/>
106 | 			<stop  offset="0.2" style="stop-color:#5C54F8"/>
107 | 			<stop  offset="0.2667" style="stop-color:#565EF3"/>
108 | 			<stop  offset="0.3333" style="stop-color:#4E6CEC"/>
109 | 			<stop  offset="0.4" style="stop-color:#447BE4"/>
110 | 			<stop  offset="0.4667" style="stop-color:#3A8DDB"/>
111 | 			<stop  offset="0.5333" style="stop-color:#2F9FD1"/>
112 | 			<stop  offset="0.6" style="stop-color:#25B1C8"/>
113 | 			<stop  offset="0.6667" style="stop-color:#1BC0C0"/>
114 | 			<stop  offset="0.7333" style="stop-color:#13CEB9"/>
115 | 			<stop  offset="0.8" style="stop-color:#0DD8B4"/>
116 | 			<stop  offset="0.8667" style="stop-color:#08DFB0"/>
117 | 			<stop  offset="0.9333" style="stop-color:#06E4AE"/>
118 | 			<stop  offset="1" style="stop-color:#05E5AD"/>
119 | 		</radialGradient>
120 | 		<path fill="url(#SVGID_00000083770737908230256670000016126156495859285174_)" d="M42.7,11.5c1.4-1.1,2.9-1.7,4.5-1.7
121 | 			c2.2,0,4.3,1.1,6,3.1c-1,2-1.9,4.7-2.5,8C48.9,16.7,46.1,13.4,42.7,11.5L42.7,11.5z"/>
122 | 		<path fill="#684BFE" d="M38,45.2c2.8-3.7,4.4-8.4,4.4-13.5c0-5.1-1.7-9.8-4.4-13.5c1.2-3,2.9-5.3,4.7-6.8c3.4,1.9,6.2,5.3,8,9.5
123 | 			c-0.6,3.2-0.9,6.9-0.9,10.8s0.3,7.6,0.9,10.8c-1.8,4.3-4.6,7.6-8,9.5C40.8,50.6,39.2,48.2,38,45.2L38,45.2z"/>
124 | 		<path fill="#321BB2" d="M38,45.2c-1.5-3.7-2.5-8.4-2.5-13.5S36.4,22,38,18.3c2.8,3.7,4.4,8.4,4.4,13.5S40.8,41.5,38,45.2z"/>
125 | 		<path fill="#05E6AD" d="M53.2,12.9c1.1-2,2.3-3.1,3.6-3.1c3.9,0,7,9.8,7,22s-3.1,22-7,22c-1.3,0-2.6-1.1-3.6-3.1
126 | 			c3.4-3.8,5.7-10.8,5.7-18.8C58.8,23.8,56.6,16.8,53.2,12.9z"/>
127 | 		
128 | 			<radialGradient id="SVGID_00000009565123575973598080000009335550354766300606_" cx="-7.8671" cy="278.2442" r="4.8781" gradientTransform="matrix(1.5187 0 0 -7.8271 69.237 2209.3281)" gradientUnits="userSpaceOnUse">
129 | 			<stop  offset="0" style="stop-color:#05E5AD"/>
130 | 			<stop  offset="0.32" style="stop-color:#05E5AD;stop-opacity:0"/>
131 | 			<stop  offset="0.9028" style="stop-color:#6447FF"/>
132 | 		</radialGradient>
133 | 		<path fill="url(#SVGID_00000009565123575973598080000009335550354766300606_)" d="M53.2,12.9c1.1-2,2.3-3.1,3.6-3.1
134 | 			c3.9,0,7,9.8,7,22s-3.1,22-7,22c-1.3,0-2.6-1.1-3.6-3.1c3.4-3.8,5.7-10.8,5.7-18.8C58.8,23.8,56.6,16.8,53.2,12.9z"/>
135 | 		<path fill="#684BFE" d="M52.8,31.8c0-3.9-0.8-7.6-2.1-10.8c0.6-3.3,1.5-6.1,2.5-8c3.4,3.8,5.7,10.8,5.7,18.8c0,8-2.3,15-5.7,18.8
136 | 			c-1-2-1.9-4.7-2.5-8C52,39.3,52.8,35.7,52.8,31.8z"/>
137 | 		<path fill="#321BB2" d="M50.7,42.5c-0.6-3.2-0.9-6.9-0.9-10.8s0.3-7.6,0.9-10.8c1.3,3.2,2.1,6.9,2.1,10.8S52,39.3,50.7,42.5z"/>
138 | 	</g>
139 | </g>
140 | </svg>
141 | 


--------------------------------------------------------------------------------
/docs/docs/css/custom.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |     --md-primary-fg-color: #765bfa;
 3 |     --md-accent-fg-color: #AEA1F1;
 4 |   }
 5 | .md-nav__title, .md-nav__link  {
 6 |     font-size: 16px;
 7 | }
 8 | 
 9 | .md-tabs__link  {
10 |     font-size: 16px; 
11 | }
12 | 
13 | .md-header__title {
14 |     font-size: 20px;
15 |     margin-left: 0px !important;
16 | }


--------------------------------------------------------------------------------
/docs/docs/index.md:
--------------------------------------------------------------------------------
  1 | # AlphaCodium 
  2 | 
  3 | Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering.
  4 | 
  5 | [Paper](https://arxiv.org/abs/2401.08500) |
  6 | [Dataset](https://huggingface.co/datasets/talrid/CodeContests_valid_and_test_AlphaCodium/blob/main/codecontests_valid_and_test_processed_alpha_codium.zip)
  7 | 
  8 | **Official Implementation:**
  9 | 
 10 | Tal Ridnik, Dedy Kredo, Itamar Friedman
 11 | 
 12 | ## Abstract
 13 | 
 14 | Code generation problems differ from common natural language problems - they require matching the exact syntax of the target language, identifying happy paths and edge cases, paying attention to numerous small details in the problem spec, and addressing other code-specific issues and requirements. Hence, many of the optimizations and tricks that have been successful in natural language generation may not be effective for code tasks.
 15 | 
 16 | In this work, we propose a new approach to code generation by LLMs, which we call AlphaCodium - a test-based, multi-stage, code-oriented iterative flow, that improves the performances of LLMs on code problems.
 17 | 
 18 | We tested AlphaCodium on a challenging code generation dataset called CodeContests, which includes competitive programming problems from platforms such as Codeforces. The proposed flow consistently and significantly improves results.
 19 | On the validation set, for example, GPT-4 accuracy (pass@5) increased from 19% with a single well-designed direct prompt to 44% with the AlphaCodium flow. 
 20 | 
 21 | Many of the principles and best practices we acquired in this work, we believe, are broadly applicable to general code generation tasks.
 22 | 
 23 | ![Pre-processedf flow](https://github.com/Codium-ai/AlphaCodium/blob/main/pics/proposed_flow.png?raw=true)
 24 | ![Iterations](https://github.com/Codium-ai/AlphaCodium/blob/main/pics/iterations.png?raw=true)
 25 | 
 26 | ## Installation
 27 | 
 28 | 1. setup a virtual environment and run: `pip install -r requirements.txt`
 29 | 
 30 | 2. Duplicate the file `alpha_codium/settings/.secrets_template.toml`, rename it as `.secrets.toml`, and fill in your OpenAI API key:
 31 | ```
 32 | [openai]
 33 | key = "..."
 34 | ```
 35 | 
 36 | 3. Download the processed CodeContest validation and test dataset from [hugging face](https://huggingface.co/datasets/talrid/CodeContests_valid_and_test_AlphaCodium/blob/main/codecontests_valid_and_test_processed_alpha_codium.zip), extract the zip file, and placed the extracted folder in the root of the project.
 37 | 
 38 | ## How to run
 39 | 
 40 | ### Configuration
 41 | The file: `alpha_codium/settings/configuration.toml` contains the configuration for the project.
 42 | In the `config` section you can choose the model you want to use ("gpt-4", "gpt-3.5-turbo-16k", or others).
 43 | 
 44 | ### Solving a specific problem
 45 | To solve a specific problem with AlphaCodium, from the root folder run:
 46 | ```
 47 | python -m alpha_codium.solve_problem \
 48 | --dataset_name /path/to/dataset \
 49 | --split_name test \
 50 | --problem_number 0
 51 | ```
 52 | - The `dataset_name` is the path to the dataset folder you downloaded in the installation step.
 53 | 
 54 | - Note that the validation set contains 117 problems, and the test set contains 165 problems, so the `problem_number` parameter should be accordingly (zero-based)
 55 | 
 56 | - The `split_name` can be either `valid` or `test`.
 57 | 
 58 | - The following sections in the configuration file: 
 59 | `solve`, `self_reflection`,`possible_solutions`,`generate_ai_tests`,`initial_code_generation`,`public_tests`, `ai_tests` 
 60 | enable to adjust possible configurations for the different stages of the flow.
 61 | 
 62 | - Each run logs the results to a file named `alpha_codium/example.log`. Reviewing the log file is a good way to understand what is going on in each stage of the flow.
 63 | 
 64 | ![Example problem (test set, problem number 12)](https://github.com/Codium-ai/AlphaCodium/blob/main/pics/example_problem.png?raw=true)
 65 | 
 66 | ### Solving the entire dataset
 67 | to solve the entire dataset with AlphaCodium, from the root folder run:
 68 | ```
 69 | python -m alpha_codium.solve_dataset \
 70 | --dataset_name /path/to/dataset \
 71 | --split_name test
 72 | --database_solution_path /path/to/output/dir/dataset_output.json
 73 | ```
 74 | 
 75 | - The `split_name` can be either `valid` or `test`.
 76 | - `database_solution_path` is the path to the directory where the solutions will be saved.
 77 | - The `dataset` section in the configuration file contains the configuration for the running and evaluation of a dataset.
 78 | - Note that this is a long process, and it may take a few days to complete with large models (e.g. GPT-4) and several iterations per problem. 
 79 | - `dataset.num_iterations` defines the number of iterations for each problem (pass@K). For a large number of iterations, it is recommended to introduce some randomness and different options for each iteration to achieve top results.
 80 | 
 81 | ### Running the evaluation
 82 | 
 83 | Once you generate a solution for the entire dataset (valid or test), you can evaluate it by running:
 84 | ```
 85 | python -m alpha_codium.evaluate_dataset\
 86 | --dataset_name /path/to/dataset\
 87 | --split_name test\
 88 | --database_solution_path /path/to/output/dir/dataset_output.json
 89 | ```
 90 | 
 91 | ## Technical Q&A
 92 | Aggregating some technical questions we received about this project:
 93 | ___
 94 | **Q: How much time did you spend on "prompt engineering" compared to "flow engineering"?**
 95 | 
 96 | **A:** Structured output almost completely eliminates the need for simple prompt engineering.
 97 | We estimate that ~95% of the time we did more high-level design, reasoning, and injecting data at the correct places, ..., a.k.a. "flow engineering".
 98 | ___
 99 | 
100 | **Q: How do you know that there wasn't a data leakage?** 
101 | 
102 | **A:** The test set of CodeContests dataset comprises problems published after September 2021, while the GPT-4 model variant we used (gpt-4-0613) has a data cutoff of September 2021. Hence, there is no data leakage for GPT4, on the test set.
103 | For other models like DeepSeek, we cannot be sure. However, note that our [main result](https://github.com/Codium-ai/AlphaCodium/blob/main/pics/comparison.png?raw=true) is a comparison of "direct prompt" vs. "AlphaCodium flow". Data leakage would help both approaches, so the relative improvement of AlphaCodium flow is still valid.
104 | ___
105 | 
106 | **Q: Is this project relevant only to specific programming languages?**
107 | 
108 | **A:** No. The proposed flow is language agnostic. We generated solutions in Python, but the flow can be applied to any language.
109 | ___
110 | 
111 | **Q: How did you manage the context window?** 
112 | 
113 | **A:** We used models with a context window of 8192 tokens, and we did not encounter cases where it did not suffice.
114 | However, we clearly observed that as the context we used in practice grows larger (let's say, above 4000 tokens), the model starts to "ignore" some of the information in the context. Hence, there is a clear tradeoff:
115 | - Injecting the results of previous stages into the context, may help the model to generate better code.
116 | - However, it may also cause the model to ignore specific details and nuances from the problem description.
117 | ___
118 | 
119 | **Q: Is this work "realistic" in terms of the number of LLM calls?** 
120 | 
121 | **A:** In comparison to AlphaCode, we do four orders of magnitude (!) fewer [calls](https://github.com/Codium-ai/AlphaCodium/blob/main/pics/computational_effort.png?raw=true) (per solution AlphaCodium does 15-20 calls).
122 | Yet we acknowledge that for some applications, this may still be too much, and more optimizations are needed. We however believe that many of the ideas and principles we acquired in this work are broadly applicable, even when the number of calls is further limited.
123 | ___
124 | **Q: Why do you iterate only on the generated code, and not on the AI-generated tests?**
125 | 
126 | **A:** For code problems in CodeContests, the tests are a list of input-output pairs. Hence, you don't really learn anything new when you "fix" a test - you just change its output to the prediction of the generated code. Instead of fixing tests, we preferred to always try and fix the code, while using "test anchors". (see the [paper](https://arxiv.org/abs/2401.08500) for more details).
127 | However, for other code generation tasks, where the tests are more complex and contain runnable code, iterating on the tests, in addition to iterating on the generated code, may be beneficial.
128 | 
129 | 
130 | ## Broader Applicability
131 | While this work presents results on CodeContests dataset, we believe that it has a broader applicability.
132 | 
133 | First and foremost, we feel that the proposed AlphaCodium [flow](https://github.com/Codium-ai/AlphaCodium/blob/main/pics/proposed_flow.png?raw=true), with reasonable adjustments, can be used as a more general framework for other code generation tasks.
134 | 
135 | Secondly, many of the design concepts, principles, and tricks we acquired in this work are broadly applicable as-is to any general code generation tasks. For example:
136 | - **YAML Structured output**: asking the model to generate an output in YAML format, equivalent to a given Pydantic class
137 | 
138 | - **Semantic reasoning via bullet points analysis**: Bullet points analysis encourages an in-depth understanding of the problem, and forces the model to divide the output into logical semantic sections, leading to improved results
139 | 
140 | - **LLMs do better when generating a modular code**: when asking the model to: `divide the generated code into small sub-functions, with meaningful names and functionality`, we observe a better-produced code, with fewer bugs, and higher success rates for the iterative fixing stages.
141 | 
142 | - **Soft decisions with double validation**: with a double validation process, we add an extra step where, given the generated output, the model is asked to re-generate the same output, but correct it if needed
143 | 
144 | - **Leave room for exploration**: since the model can be wrong, it’s better to avoid irreversible decisions, and leave room for exploration and code iterations with different possible solutions
145 | 
146 | The list above is partial. See the [paper](https://arxiv.org/abs/2401.08500) for more details. The code provided [in the repo](https://github.com/Codium-ai/AlphaCodium/tree/main/alpha_codium/settings) can be used as a reference for better understanding the proposed concepts, and for applying them to other code generation tasks.
147 | 
148 | 
149 | ## Example Problem
150 | In this section, we present an example for a full problem from CodeContests dataset (test-set, problem 1), in order to demonstrate the complexity of the problems in the dataset, and the challenges they pose to LLMs.
151 | 
152 | ```
153 | problem name: '1575_B. Building an Amusement Park'
154 | 
155 | problem description:
156 | Mr. Chanek lives in a city represented as a plane. He wants to build an amusement park in the shape of a circle of radius r. 
157 | The circle must touch the origin (point (0, 0)).
158 | There are n bird habitats that can be a photo spot for the tourists in the park. The i-th bird habitat is at point p_i = (x_i, y_i). 
159 | 
160 | Find the minimum radius r of a park with at least k bird habitats inside. 
161 | 
162 | A point is considered to be inside the park if and only if the distance between p_i and the center of the park is less than or equal 
163 | to the radius of the park.
164 | Note that the center and the radius of the park do not need to be integers.
165 | 
166 | In this problem, it is guaranteed that the given input always has a solution with r ≤ 2 ⋅ 10^5.
167 | 
168 | Input
169 | 
170 | The first line contains two integers n and k (1 ≤ n ≤ 10^5, 1 ≤ k ≤ n) — the number of bird habitats in the city and the number of bird 
171 | habitats required to be inside the park.
172 | The i-th of the next n lines contains two integers x_i and y_i (0 ≤ |x_i|, |y_i| ≤ 10^5) — the position of the i-th bird habitat.
173 | 
174 | Output
175 | 
176 | Output a single real number r denoting the minimum radius of a park with at least k bird habitats inside. It is guaranteed that the given 
177 | input always has a solution with r ≤ 2 ⋅ 10^5.
178 | Your answer is considered correct if its absolute or relative error does not exceed 10^{-4}.
179 | Formally, let your answer be a, and the jury's answer be b. Your answer is accepted if and only if \frac{|a - b|}{max{(1, |b|)}} ≤ 10^{-4}.
180 | 
181 | Examples
182 | 
183 | Input
184 | 
185 | 8 4
186 | -3 1
187 | -4 4
188 | 1 5
189 | 2 2
190 | 2 -2
191 | -2 -4
192 | -1 -1
193 | -6 0
194 | 
195 | Output
196 | 
197 | 3.1622776589
198 | 
199 | 
200 | Input
201 | 
202 | 1 1
203 | 0 0
204 | 
205 | 
206 | Output
207 | 
208 | 0.0000000000
209 | 
210 | Note
211 | 
212 | In the first example, Mr. Chanek can put the center of the park at (-3, -1) with radius √{10} ≈ 3.162. It can be proven this is the minimum r.
213 | ```
214 | 
215 | 
216 | ## Acknowledgments
217 | Our process CodeContests dataset is based on the original [CodeContests](https://huggingface.co/datasets/deepmind/code_contests) dataset.
218 | We removed the train set (which is not relevant to our work) and did some post-processing and cleaning to the validation and test sets.
219 | 
220 | 
221 | ## Citation
222 | ```
223 | @misc{ridnik2024code,
224 |       title={Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering}, 
225 |       author={Tal Ridnik and Dedy Kredo and Itamar Friedman},
226 |       year={2024},
227 |       eprint={2401.08500},
228 |       archivePrefix={arXiv},
229 |       primaryClass={cs.LG}
230 | }
231 | ```
232 | 
233 | 


--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: AlphaCodium Documentation
  2 | description: Documentation for AlphaCodium - From Prompt Engineering to Flow Engineering.
  3 | repo_url: https://github.com/Codium-ai/AlphaCodium
  4 | repo_name: Codium-ai/AlphaCodium
  5 | 
  6 | nav: 
  7 |   - 'index.md'
  8 | 
  9 | theme:
 10 |   logo: assets/logo.svg 
 11 |   favicon: assets/favicon.ico
 12 |   name: material
 13 |   icon:
 14 |     repo: fontawesome/brands/github
 15 |   features:
 16 |     - navigation.tabs
 17 |     - navigation.expand
 18 |     - navigation.path
 19 |     - navigation.top
 20 |     - navigation.tracking
 21 |     - navigation.indexes
 22 |     - search.suggest
 23 |     - search.highlight
 24 |     - content.tabs.link
 25 |     - content.code.annotation
 26 |     - content.code.copy
 27 |     - toc.integrate
 28 |   language: en
 29 |   custom_dir: overrides
 30 |     
 31 |   palette:
 32 |     - media: "(prefers-color-scheme)"
 33 |       toggle:
 34 |         icon: material/brightness-auto
 35 |         name: Switch to light mode
 36 |     - media: "(prefers-color-scheme: light)"
 37 |       scheme: default
 38 |       toggle:
 39 |         icon: material/toggle-switch-off-outline 
 40 |         name: Switch to dark mode
 41 |       primary: custom
 42 |       accent: custom
 43 |     - media: "(prefers-color-scheme: dark)"
 44 |       scheme: slate 
 45 |       toggle:
 46 |         icon: material/toggle-switch
 47 |         name: Switch to light mode    
 48 |       primary: custom
 49 |       accent: custom
 50 | 
 51 | plugins:
 52 |   - social
 53 |   - search
 54 | 
 55 | extra:
 56 |   generator: false
 57 |   social:
 58 |     - icon: fontawesome/brands/github
 59 |       link: https://github.com/Codium-ai
 60 |     - icon: fontawesome/brands/discord
 61 |       link: https://discord.com/invite/SgSxuQ65GF
 62 |     - icon: fontawesome/brands/youtube
 63 |       link: https://www.youtube.com/@Codium-AI
 64 |     - icon: fontawesome/brands/linkedin
 65 |       link: https://www.linkedin.com/company/codiumai
 66 |     - icon: fontawesome/brands/twitter
 67 |       link: https://twitter.com/CodiumAI
 68 |     - icon: fontawesome/brands/instagram
 69 |       link: https://www.instagram.com/codiumai/
 70 |   analytics:
 71 |     provider: custom
 72 |     property: ${{ secrets.GOOGLE_ANALYTICS_ID }}
 73 | 
 74 | extra_css:
 75 |   - css/custom.css
 76 | 
 77 | markdown_extensions:
 78 |   - pymdownx.highlight:
 79 |       anchor_linenums: true
 80 |   - pymdownx.inlinehilite
 81 |   - pymdownx.snippets
 82 |   - admonition
 83 |   - pymdownx.arithmatex:
 84 |       generic: true
 85 |   - footnotes
 86 |   - pymdownx.details
 87 |   - pymdownx.superfences
 88 |   - pymdownx.mark
 89 |   - attr_list
 90 |   - pymdownx.emoji:
 91 |       emoji_index: !!python/name:material.extensions.emoji.twemoji
 92 |       emoji_generator: !!python/name:materialx.emoji.to_svg
 93 |   - toc:
 94 |       title: On this page
 95 |       toc_depth: 3
 96 |       permalink: true
 97 |   
 98 | copyright: |
 99 |   &copy; 2024 <a href="https://www.codium.ai/"  target="_blank" rel="noopener">CodiumAI</a>
100 | 


--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block scripts %}
 4 |   {{ super() }}
 5 |   
 6 |     <!-- Google Tag Manager (noscript) -->
 7 |     <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-NP4XQB2G"
 8 |     height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
 9 |     <!-- End Google Tag Manager (noscript) -->
10 | {% endblock %}


--------------------------------------------------------------------------------
/docs/overrides/partials/footer.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta charset="UTF-8">
  5 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 | <title>Footer</title>
  7 | <style>
  8 |   body {
  9 |     margin: 0;
 10 |     padding: 0;
 11 |     font-family: Arial, sans-serif;
 12 |     font-size: 16px;
 13 |   }
 14 | 
 15 |   .wrapper {
 16 |     background-color: #171518;
 17 |   }
 18 | 
 19 |   .container {
 20 |     display: flex;
 21 |     flex-direction: row;
 22 |     align-items: center;
 23 |     justify-content: space-between;
 24 |     color: white;
 25 |     padding: 20px;
 26 |     max-width: 61rem;
 27 |     margin-left: auto;
 28 |     margin-right: auto;
 29 |   }
 30 | 
 31 |   .footer-links, .social-icons {
 32 |     padding: 0;
 33 |     list-style-type: none;
 34 |     display: flex;
 35 |     justify-content: center;
 36 |     gap: 20px;
 37 |     align-items: center;
 38 |   }
 39 | 
 40 |   .footer-links a:hover, .social-icons a:hover {
 41 |     color: #AEA1F1;
 42 |   }
 43 | 
 44 |   .social-icons svg {
 45 |     width: 24px; 
 46 |     height: auto;
 47 |     fill: white;
 48 |   }
 49 | 
 50 |   .footer-text {
 51 |     width: 240px;
 52 |   }
 53 | 
 54 |   @media (max-width: 768px) {
 55 |     .container {
 56 |       flex-direction: column;
 57 |       align-items: center;
 58 |       text-align: center;
 59 |     }
 60 | 
 61 |     .footer-links, .social-icons, .footer-text {
 62 |       width: 100%;
 63 |       justify-content: center;
 64 |       margin: 10px 0;
 65 |     }
 66 | 
 67 |     .footer-links {
 68 |       order: 1;
 69 |     }
 70 | 
 71 |     .social-icons {
 72 |       order: 2;
 73 |     }
 74 | 
 75 |     .footer-text {
 76 |       order: 3;
 77 |     }
 78 |   }
 79 | </style>
 80 | </head>
 81 | <body>
 82 | 
 83 | <footer class="wrapper">
 84 |   <div class="container">
 85 |     <p class="footer-text">© 2024 <a href="https://www.codium.ai/" target="_blank" rel="noopener">CodiumAI</a></p>
 86 |     <div class="footer-links">
 87 |       <a href="https://codiumate-docs.codium.ai/">Codiumate</a>
 88 |       <p>|</p>
 89 |       <a href="https://pr-agent-docs.codium.ai/">PR-Agent</a>
 90 |     </div>
 91 |     <div class="social-icons">
 92 |       <a href="https://github.com/Codium-ai" target="_blank" rel="noopener" title="github.com" class="social-link">
 93 |         <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>
 94 |       </a>
 95 |       <a href="https://discord.com/invite/SgSxuQ65GF" target="_blank" rel="noopener" title="discord.com" class="social-link">
 96 |         <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M524.531 69.836a1.5 1.5 0 0 0-.764-.7A485.065 485.065 0 0 0 404.081 32.03a1.816 1.816 0 0 0-1.923.91 337.461 337.461 0 0 0-14.9 30.6 447.848 447.848 0 0 0-134.426 0 309.541 309.541 0 0 0-15.135-30.6 1.89 1.89 0 0 0-1.924-.91 483.689 483.689 0 0 0-119.688 37.107 1.712 1.712 0 0 0-.788.676C39.068 183.651 18.186 294.69 28.43 404.354a2.016 2.016 0 0 0 .765 1.375 487.666 487.666 0 0 0 146.825 74.189 1.9 1.9 0 0 0 2.063-.676A348.2 348.2 0 0 0 208.12 430.4a1.86 1.86 0 0 0-1.019-2.588 321.173 321.173 0 0 1-45.868-21.853 1.885 1.885 0 0 1-.185-3.126 251.047 251.047 0 0 0 9.109-7.137 1.819 1.819 0 0 1 1.9-.256c96.229 43.917 200.41 43.917 295.5 0a1.812 1.812 0 0 1 1.924.233 234.533 234.533 0 0 0 9.132 7.16 1.884 1.884 0 0 1-.162 3.126 301.407 301.407 0 0 1-45.89 21.83 1.875 1.875 0 0 0-1 2.611 391.055 391.055 0 0 0 30.014 48.815 1.864 1.864 0 0 0 2.063.7A486.048 486.048 0 0 0 610.7 405.729a1.882 1.882 0 0 0 .765-1.352c12.264-126.783-20.532-236.912-86.934-334.541ZM222.491 337.58c-28.972 0-52.844-26.587-52.844-59.239s23.409-59.241 52.844-59.241c29.665 0 53.306 26.82 52.843 59.239 0 32.654-23.41 59.241-52.843 59.241Zm195.38 0c-28.971 0-52.843-26.587-52.843-59.239s23.409-59.241 52.843-59.241c29.667 0 53.307 26.82 52.844 59.239 0 32.654-23.177 59.241-52.844 59.241Z"></path></svg>
 97 |       </a>
 98 |       <a href="https://www.youtube.com/@Codium-AI" target="_blank" rel="noopener" title="www.youtube.com" class="social-link">
 99 |         <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M549.655 124.083c-6.281-23.65-24.787-42.276-48.284-48.597C458.781 64 288 64 288 64S117.22 64 74.629 75.486c-23.497 6.322-42.003 24.947-48.284 48.597-11.412 42.867-11.412 132.305-11.412 132.305s0 89.438 11.412 132.305c6.281 23.65 24.787 41.5 48.284 47.821C117.22 448 288 448 288 448s170.78 0 213.371-11.486c23.497-6.321 42.003-24.171 48.284-47.821 11.412-42.867 11.412-132.305 11.412-132.305s0-89.438-11.412-132.305zm-317.51 213.508V175.185l142.739 81.205-142.739 81.201z"></path></svg>
100 |       </a>
101 |       <a href="https://www.linkedin.com/company/codiumai" target="_blank" rel="noopener" title="www.linkedin.com" class="social-link">
102 |         <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3zM135.4 416H69V202.2h66.5V416zm-33.2-243c-21.3 0-38.5-17.3-38.5-38.5S80.9 96 102.2 96c21.2 0 38.5 17.3 38.5 38.5 0 21.3-17.2 38.5-38.5 38.5zm282.1 243h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9V416z"></path></svg>
103 |       </a>
104 |       <a href="https://twitter.com/CodiumAI" target="_blank" rel="noopener" title="twitter.com" class="social-link">
105 |         <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg>
106 |       </a>
107 |       <a href="https://www.instagram.com/codiumai/" target="_blank" rel="noopener" title="www.instagram.com" class="social-link">
108 |         <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M224.1 141c-63.6 0-114.9 51.3-114.9 114.9s51.3 114.9 114.9 114.9S339 319.5 339 255.9 287.7 141 224.1 141zm0 189.6c-41.1 0-74.7-33.5-74.7-74.7s33.5-74.7 74.7-74.7 74.7 33.5 74.7 74.7-33.6 74.7-74.7 74.7zm146.4-194.3c0 14.9-12 26.8-26.8 26.8-14.9 0-26.8-12-26.8-26.8s12-26.8 26.8-26.8 26.8 12 26.8 26.8zm76.1 27.2c-1.7-35.9-9.9-67.7-36.2-93.9-26.2-26.2-58-34.4-93.9-36.2-37-2.1-147.9-2.1-184.9 0-35.8 1.7-67.6 9.9-93.9 36.1s-34.4 58-36.2 93.9c-2.1 37-2.1 147.9 0 184.9 1.7 35.9 9.9 67.7 36.2 93.9s58 34.4 93.9 36.2c37 2.1 147.9 2.1 184.9 0 35.9-1.7 67.7-9.9 93.9-36.2 26.2-26.2 34.4-58 36.2-93.9 2.1-37 2.1-147.8 0-184.8zM398.8 388c-7.8 19.6-22.9 34.7-42.6 42.6-29.5 11.7-99.5 9-132.1 9s-102.7 2.6-132.1-9c-19.6-7.8-34.7-22.9-42.6-42.6-11.7-29.5-9-99.5-9-132.1s-2.6-102.7 9-132.1c7.8-19.6 22.9-34.7 42.6-42.6 29.5-11.7 99.5-9 132.1-9s102.7-2.6 132.1 9c19.6 7.8 34.7 22.9 42.6 42.6 11.7 29.5 9 99.5 9 132.1s2.7 102.7-9 132.1z"></path></svg>
109 |       </a>
110 |     </div>
111 |   </div>
112 | </footer>
113 | 
114 | </body>
115 | </html>
116 | 


--------------------------------------------------------------------------------
/docs/overrides/partials/integrations/analytics/custom.html:
--------------------------------------------------------------------------------
1 | <!-- Google Tag Manager -->
2 | <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
3 |     new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
4 |     j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
5 |     'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
6 |     })(window,document,'script','dataLayer','GTM-NP4XQB2G');</script>
7 |     <!-- End Google Tag Manager -->


--------------------------------------------------------------------------------
/my_problem_example.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "Sorting Andi and Budi Books Problem",
  3 |   "description": "Andi and Budi were given an assignment to tidy up their bookshelf of n books. Each book is represented by the book title — a string s_i numbered from 1 to n, each with length m. Andi really wants to sort the book lexicographically ascending, while Budi wants to sort it lexicographically descending.\n\nSettling their fight, they decided to combine their idea and sort it asc-desc-endingly, where the odd-indexed characters will be compared ascendingly, and the even-indexed characters will be compared descendingly.\n\nA string a occurs before a string b in asc-desc-ending order if and only if in the first position where a and b differ, the following holds:\n\n  * if it is an odd position, the string a has a letter that appears earlier in the alphabet than the corresponding letter in b; \n  * if it is an even position, the string a has a letter that appears later in the alphabet than the corresponding letter in b. \n\nInput\n\nThe first line contains two integers n and m (1 ≤ n ⋅ m ≤ 10^6).\n\nThe i-th of the next n lines contains a string s_i consisting of m uppercase Latin letters — the book title. The strings are pairwise distinct.\n\nOutput\n\nOutput n integers — the indices of the strings after they are sorted asc-desc-endingly.\n\nExample\n\nInput\n\n\n5 2\nAA\nAB\nBB\nBA\nAZ\n\n\nOutput\n\n\n5 2 1 3 4\n\nNote\n\nThe following illustrates the first example.\n\n<image>",
  4 |   "public_tests": {
  5 |     "input": [
  6 |       "5 2\nAA\nAB\nBB\nBA\nAZ\n"
  7 |     ],
  8 |     "is_valid_test": null,
  9 |     "output": [
 10 |       "5 2 1 3 4 \n"
 11 |     ]
 12 |   },
 13 |   "private_tests": {
 14 |     "input": [],
 15 |     "is_valid_test": null,
 16 |     "output": []
 17 |   },
 18 |   "generated_tests": {
 19 |     "input": [
 20 |       "5 2\nAA\nAB\nBB\nBA\nZA\n",
 21 |       "5 2\nAA\nAB\nCB\nBA\nAZ\n",
 22 |       "2 2\nAA\nAB\nCB\nBA\nAZ\n",
 23 |       "2 2\nAA\nBC\nCB\nAB\nAZ\n",
 24 |       "1 2\nAA\nAC\nBC\nBA\nZA\n",
 25 |       "5 2\nAA\nAB\nBC\nBA\nAZ\n",
 26 |       "3 2\nAA\nAB\nCB\nAB\nAZ\n",
 27 |       "4 2\nBA\nAC\nEA\nAA\nZ@\n",
 28 |       "3 2\nAA\nBA\nCB\nAB\nAZ\n",
 29 |       "4 2\nAA\nAC\nCB\nBA\nAZ\n",
 30 |       "4 2\nBA\nBC\nEA\nAB\nZ@\n",
 31 |       "3 2\nBA\nAD\nAB\nBA\nC[\n",
 32 |       "3 2\nAA\nBA\nBC\nAB\nAZ\n",
 33 |       "5 2\nAA\nBA\nCC\nBB\nAZ\n",
 34 |       "2 2\nAA\nAB\nCB\nAB\nAZ\n",
 35 |       "2 2\nAA\nAC\nCB\nAB\nAZ\n",
 36 |       "2 2\nAA\nBC\nCB\nAB\nZA\n",
 37 |       "2 2\nAA\nBC\nBC\nAB\nZA\n",
 38 |       "2 2\nAA\nAC\nBC\nAB\nZA\n",
 39 |       "2 2\nAA\nAC\nBC\nBA\nZA\n",
 40 |       "1 2\nAA\nAC\nBC\nAA\nZA\n",
 41 |       "1 2\nAA\nAC\nCB\nAA\nZA\n",
 42 |       "1 2\nAA\nAC\nCB\nAA\nZ@\n",
 43 |       "2 2\nAA\nAC\nCB\nAA\nZ@\n",
 44 |       "2 2\nAA\nAC\nCA\nAA\nZ@\n",
 45 |       "2 2\nAB\nAC\nCA\nAA\nZ@\n",
 46 |       "2 2\nAB\nAC\nCA\nBA\nZ@\n",
 47 |       "2 2\nAB\nAC\nCA\nAB\nZ@\n",
 48 |       "2 2\nAB\nAC\nCA\nAB\n@Z\n",
 49 |       "2 2\nAB\nAC\nCA\nAB\n@Y\n",
 50 |       "5 2\nAA\nAB\nBB\nBA\nZB\n",
 51 |       "2 2\nAA\nAD\nCB\nAB\nAZ\n",
 52 |       "2 2\nAA\nBC\nBB\nAB\nAZ\n",
 53 |       "2 2\nBA\nBC\nCB\nAB\nZA\n",
 54 |       "2 2\nAA\nBC\nBD\nAB\nZA\n",
 55 |       "2 2\nAA\nAC\nBC\nAB\n[A\n",
 56 |       "2 2\nAA\nAC\nBC\nBA\nAZ\n",
 57 |       "1 2\nAA\nAB\nBC\nBA\nZA\n",
 58 |       "1 2\nAA\nAC\nBC\nAA\nAZ\n",
 59 |       "1 2\nBA\nAC\nCB\nAA\nZ@\n",
 60 |       "2 2\nAA\nAC\nBC\nAA\nZ@\n",
 61 |       "2 2\nAA\nAC\nDA\nAA\nZ@\n",
 62 |       "2 2\nAB\nAC\nCA\nAA\n[@\n",
 63 |       "2 2\nAB\nCA\nCA\nBA\nZ@\n",
 64 |       "2 2\nAB\nAC\nAC\nAB\nZ@\n",
 65 |       "2 2\nAB\nAC\nBA\nAB\n@Z\n",
 66 |       "2 2\nAB\nCA\nCA\nAB\n@Y\n",
 67 |       "5 2\nAA\nAB\nCC\nBA\nAZ\n",
 68 |       "2 2\nBA\nBC\nBB\nAB\nZA\n",
 69 |       "2 2\nAA\nAC\nAC\nAB\n[A\n",
 70 |       "2 2\nAA\nAC\nCB\nBA\nAZ\n",
 71 |       "1 2\nAA\nAB\nBC\nBA\nAZ\n",
 72 |       "1 2\nAA\nAC\nBC\nAA\n@Z\n",
 73 |       "1 2\nBA\nAC\nCA\nAA\nZ@\n",
 74 |       "2 2\nAA\nAC\nBC\nAA\nZ?\n",
 75 |       "3 2\nAA\nAC\nDA\nAA\nZ@\n",
 76 |       "2 2\nBA\nAC\nCA\nAA\n[@\n",
 77 |       "1 2\nAB\nCA\nCA\nBA\nZ@\n",
 78 |       "2 2\nAB\nCA\nAC\nAB\nZ@\n",
 79 |       "2 2\nAB\nAC\nBA\nBB\n@Z\n",
 80 |       "2 2\nAB\nCB\nCA\nAB\n@Y\n",
 81 |       "3 2\nAA\nAB\nCC\nBA\nAZ\n",
 82 |       "1 2\nBA\nBC\nBB\nAB\nZA\n",
 83 |       "2 2\nAA\nAC\nAC\nAC\n[A\n",
 84 |       "2 2\nAA\nAB\nCB\nCA\nAZ\n",
 85 |       "1 2\nAA\nAB\nBC\nBA\nA[\n",
 86 |       "1 2\nAA\nAC\nBC\nAB\n@Z\n",
 87 |       "1 2\nBA\nAC\nCA\nAA\n@Z\n",
 88 |       "2 2\nAA\nAC\nBC\nAA\n?Z\n",
 89 |       "3 2\nAA\nAC\nEA\nAA\nZ@\n",
 90 |       "2 2\nBA\nAC\nAC\nAA\n[@\n",
 91 |       "1 2\nBB\nCA\nCA\nBA\nZ@\n",
 92 |       "2 2\nAB\nBA\nAC\nAB\nZ@\n",
 93 |       "2 2\nAB\nAC\nBA\nBB\n?Z\n",
 94 |       "2 2\nAB\nCB\nCA\nAC\n@Y\n",
 95 |       "1 2\nBA\nCB\nBB\nAB\nZA\n",
 96 |       "2 2\nAA\nAC\nAC\nAC\n[B\n",
 97 |       "1 2\nAA\nAC\nCB\nBA\nAZ\n",
 98 |       "2 2\nAA\nAB\nBC\nBA\nA[\n",
 99 |       "1 2\nAA\nAC\nCB\nAB\n@Z\n",
100 |       "1 2\nBA\nAC\nCA\nAB\n@Z\n",
101 |       "2 2\nAA\nAC\nBC\nA@\n?Z\n",
102 |       "3 2\nBA\nAC\nEA\nAA\nZ@\n",
103 |       "1 2\nBB\nAC\nCA\nBA\nZ@\n",
104 |       "2 2\nAA\nAC\nBA\nBB\n?Z\n",
105 |       "2 2\nBA\nCB\nCA\nAC\n@Y\n",
106 |       "2 2\nBA\nCB\nBB\nAB\nZA\n",
107 |       "1 2\nAA\nAC\nAC\nAC\n[B\n",
108 |       "1 2\nAA\nAC\nCB\nBA\nZA\n",
109 |       "2 2\nAA\nAB\nCB\nBA\nA[\n",
110 |       "1 2\nAA\nAC\nCB\nAB\nZ@\n",
111 |       "1 2\nBA\nAC\nCB\nAB\n@Z\n",
112 |       "2 2\nBA\nAC\nBC\nA@\n?Z\n",
113 |       "1 2\nBB\nAC\nCA\nAB\nZ@\n",
114 |       "2 2\nAA\nBC\nBA\nBB\n?Z\n",
115 |       "2 2\nBA\nBC\nCA\nAC\n@Y\n",
116 |       "1 2\nAA\nAC\nAC\nAC\n[C\n",
117 |       "2 2\nAA\nAC\nCB\nBA\nA[\n",
118 |       "1 2\nAA\nAC\nDB\nAB\nZ@\n",
119 |       "2 2\nBA\nAD\nBC\nA@\n?Z\n",
120 |       "4 2\nBA\nAC\nEA\nAB\nZ@\n",
121 |       "1 2\nCB\nAC\nCA\nAB\nZ@\n",
122 |       "2 2\nAA\nBC\nBA\nBB\nZ?\n",
123 |       "2 2\nBA\nBB\nCA\nAC\n@Y\n",
124 |       "1 2\nAA\nAC\nAC\nAD\n[C\n",
125 |       "2 2\nAA\nCA\nCB\nBA\nA[\n",
126 |       "2 2\nBA\nAD\nBC\nA?\n?Z\n",
127 |       "1 2\nCB\nCA\nCA\nAB\nZ@\n",
128 |       "2 2\nAA\nBC\nBB\nBB\nZ?\n",
129 |       "2 2\nBA\nBB\nCA\nCA\n@Y\n",
130 |       "1 2\nAA\nCA\nAC\nAD\n[C\n",
131 |       "2 2\nAA\nCA\nBB\nBA\nA[\n",
132 |       "2 2\nBA\nAD\nBB\nA?\n?Z\n",
133 |       "2 2\nBA\nBB\nCA\nDA\n@Y\n",
134 |       "2 2\nAA\nCA\nBB\nAB\nA[\n",
135 |       "2 2\nBA\nDA\nBB\nA?\n?Z\n",
136 |       "2 2\nAB\nBB\nCA\nDA\n@Y\n",
137 |       "2 2\nAA\nCA\nBB\nAA\nA[\n",
138 |       "2 2\nBA\nAD\nBB\nA?\n?Y\n",
139 |       "2 2\nBA\nBB\nCA\nD@\n@Y\n",
140 |       "2 2\nAA\nDA\nBB\nBA\nA[\n",
141 |       "2 2\nAB\nAD\nBB\nA?\n?Y\n",
142 |       "2 2\nBA\nBB\nAC\nD@\n@Y\n",
143 |       "2 2\nAA\nDA\nBB\nAB\nA[\n",
144 |       "2 2\nAB\nAD\nBA\nA?\n?Y\n",
145 |       "2 2\nAA\nDA\nAB\nAB\nA[\n",
146 |       "2 2\nAB\nAD\nAB\nA?\n?Y\n",
147 |       "2 2\nAA\nDA\nAB\nBA\nA[\n",
148 |       "2 2\nAB\nAD\nCA\nA?\n?Y\n",
149 |       "2 2\nAA\nDA\nAB\nBA\nB[\n",
150 |       "2 2\nAB\nAD\nCA\nA>\n?Y\n",
151 |       "2 2\nAA\nDA\nAB\nCA\nB[\n",
152 |       "1 2\nAA\nDA\nAB\nCA\nB[\n",
153 |       "1 2\nAA\nAD\nAB\nCA\nB[\n",
154 |       "1 2\nAA\nAD\nAB\nAC\nB[\n",
155 |       "1 2\nAA\nAD\nAB\nAC\nC[\n",
156 |       "1 2\nAA\nAD\nAB\nCA\nC[\n",
157 |       "1 2\nAA\nAD\nAB\nBA\nC[\n",
158 |       "2 2\nAA\nAD\nAB\nBA\nC[\n",
159 |       "2 2\nBA\nAD\nAB\nBA\nC[\n",
160 |       "2 2\nBA\nAE\nAB\nBA\nC[\n",
161 |       "2 2\nBA\nAE\nAB\nBA\n[C\n",
162 |       "2 2\nAA\nAB\nCB\nAC\nAZ\n",
163 |       "3 2\nAA\nAC\nCB\nAB\nAZ\n",
164 |       "2 2\nAA\nBC\nDB\nAB\nAZ\n",
165 |       "2 2\nAA\nBC\nCB\nBA\nZA\n",
166 |       "2 2\nAA\nBC\nBC\nAC\nZA\n",
167 |       "2 2\nAA\nAC\nCB\nAB\nZA\n",
168 |       "2 2\nAA\nAC\nCC\nBA\nZA\n",
169 |       "2 2\nAA\nCA\nBC\nBA\nZA\n",
170 |       "1 2\nAA\nAB\nBC\nAA\nZA\n",
171 |       "1 2\nAA\nAC\nCC\nAA\nZA\n",
172 |       "1 2\nAA\nAD\nCB\nAA\nZ@\n",
173 |       "2 2\nAA\nAC\nCB\n@A\nZ@\n",
174 |       "2 2\nAB\nCA\nCA\nAB\nZ@\n",
175 |       "2 2\nAA\nAC\nCA\nAB\nZ@\n",
176 |       "2 2\nAB\nAC\nAC\nAB\n@Z\n",
177 |       "2 2\nAB\nAC\nAC\nAB\n@Y\n",
178 |       "2 2\nAA\nBC\nBD\nAB\nYA\n",
179 |       "3 2\nAA\nAC\nBC\nAB\n[A\n",
180 |       "2 2\nAA\nAC\nAC\nBA\nAZ\n",
181 |       "1 1\nAA\nAC\nBC\nAA\nAZ\n",
182 |       "1 2\nBA\nAC\nCB\nAA\n@Z\n",
183 |       "2 2\nAB\nAC\nDA\nAA\nZ@\n",
184 |       "2 2\nBA\nAC\nCB\nAA\n[@\n",
185 |       "2 1\nAB\nCA\nCA\nBA\nZ@\n",
186 |       "2 2\nBA\nAC\nCA\nAB\n@Z\n",
187 |       "2 2\nAB\nBC\nBA\nAB\n@Z\n",
188 |       "2 2\nAB\nCA\nAC\nAB\n@Y\n",
189 |       "5 2\nAA\nAB\nCC\nBB\nAZ\n",
190 |       "2 2\nBA\nCB\nBB\nAB\nAZ\n",
191 |       "1 2\nAA\nAB\nBC\nBA\nAY\n",
192 |       "1 2\nAA\nAC\nBC\nBA\n@Z\n",
193 |       "2 2\nAB\nAC\nCB\nAA\n[@\n",
194 |       "2 2\nAB\nCA\nAD\nAB\nZ@\n",
195 |       "2 2\nAB\nCB\nCB\nAB\n@Y\n",
196 |       "3 2\nAA\nAB\nCC\nBA\nAY\n",
197 |       "1 2\nBA\nBC\nBB\nBB\nZA\n",
198 |       "2 2\nAA\nCA\nAC\nAC\n[A\n",
199 |       "2 2\nAA\nAB\nBC\nBA\nAZ\n",
200 |       "1 2\nAA\nAB\nBB\nBA\nA[\n",
201 |       "1 2\nAA\nAC\nBC\nAB\n@[\n",
202 |       "1 2\nBA\nBC\nCA\nAA\nZ@\n",
203 |       "2 2\nAA\nCA\nBC\nAA\n?Z\n",
204 |       "3 2\nAB\nAC\nEA\nAA\nZ@\n",
205 |       "2 2\nBA\nAC\nAC\nAA\nZ@\n",
206 |       "1 2\nBB\nCA\nCA\nBA\n@Z\n",
207 |       "2 2\nAB\nBA\nAC\nBA\nZ@\n",
208 |       "2 2\nBB\nCB\nCA\nAC\n@Y\n",
209 |       "1 2\nBA\nCB\nBB\nBA\nZA\n",
210 |       "2 2\nBA\nAC\nAC\nAC\n[B\n",
211 |       "1 2\nAA\nAC\nCB\nAA\nAZ\n",
212 |       "2 2\nAA\nAB\nBC\nB@\nA[\n",
213 |       "1 2\nAA\nAC\nCB\nAA\n@Z\n",
214 |       "2 2\nAA\nAC\nCA\nBB\n?Z\n",
215 |       "2 2\nAB\nCB\nBB\nAB\nZA\n",
216 |       "1 2\nAA\nCA\nAC\nAC\n[B\n",
217 |       "3 2\nAA\nAB\nCB\nBA\nA[\n",
218 |       "1 1\nAA\nAC\nCB\nAB\nZ@\n",
219 |       "1 2\nBA\nAC\nCC\nAB\n@Z\n"
220 |     ],
221 |     "is_valid_test": null,
222 |     "output": [
223 |       "2 1 3 4 5\n",
224 |       "5 2 1 4 3\n",
225 |       "2 1\n",
226 |       "1 2\n",
227 |       "1\n",
228 |       "5 2 1 3 4\n",
229 |       "2 1 3\n",
230 |       "2 4 1 3\n",
231 |       "1 2 3\n",
232 |       "2 1 4 3\n",
233 |       "4 2 1 3\n",
234 |       "2 3 1\n",
235 |       "1 3 2\n",
236 |       "5 1 4 2 3\n",
237 |       "2 1\n",
238 |       "2 1\n",
239 |       "1 2\n",
240 |       "1 2\n",
241 |       "2 1\n",
242 |       "2 1\n",
243 |       "1\n",
244 |       "1\n",
245 |       "1\n",
246 |       "2 1\n",
247 |       "2 1\n",
248 |       "2 1\n",
249 |       "2 1\n",
250 |       "2 1\n",
251 |       "2 1\n",
252 |       "2 1\n",
253 |       "2 1 3 4 5\n",
254 |       "2 1\n",
255 |       "1 2\n",
256 |       "2 1\n",
257 |       "1 2\n",
258 |       "2 1\n",
259 |       "2 1\n",
260 |       "1\n",
261 |       "1\n",
262 |       "1\n",
263 |       "2 1\n",
264 |       "2 1\n",
265 |       "2 1\n",
266 |       "1 2\n",
267 |       "2 1\n",
268 |       "2 1\n",
269 |       "1 2\n",
270 |       "5 2 1 4 3\n",
271 |       "2 1\n",
272 |       "2 1\n",
273 |       "2 1\n",
274 |       "1\n",
275 |       "1\n",
276 |       "1\n",
277 |       "2 1\n",
278 |       "2 1 3\n",
279 |       "2 1\n",
280 |       "1\n",
281 |       "1 2\n",
282 |       "2 1\n",
283 |       "1 2\n",
284 |       "2 1 3\n",
285 |       "1\n",
286 |       "2 1\n",
287 |       "2 1\n",
288 |       "1\n",
289 |       "1\n",
290 |       "1\n",
291 |       "2 1\n",
292 |       "2 1 3\n",
293 |       "2 1\n",
294 |       "1\n",
295 |       "1 2\n",
296 |       "2 1\n",
297 |       "1 2\n",
298 |       "1\n",
299 |       "2 1\n",
300 |       "1\n",
301 |       "2 1\n",
302 |       "1\n",
303 |       "1\n",
304 |       "2 1\n",
305 |       "2 1 3\n",
306 |       "1\n",
307 |       "2 1\n",
308 |       "1 2\n",
309 |       "1 2\n",
310 |       "1\n",
311 |       "1\n",
312 |       "2 1\n",
313 |       "1\n",
314 |       "1\n",
315 |       "2 1\n",
316 |       "1\n",
317 |       "1 2\n",
318 |       "2 1\n",
319 |       "1\n",
320 |       "2 1\n",
321 |       "1\n",
322 |       "2 1\n",
323 |       "2 4 1 3\n",
324 |       "1\n",
325 |       "1 2\n",
326 |       "2 1\n",
327 |       "1\n",
328 |       "1 2\n",
329 |       "2 1\n",
330 |       "1\n",
331 |       "1 2\n",
332 |       "2 1\n",
333 |       "1\n",
334 |       "1 2\n",
335 |       "2 1\n",
336 |       "2 1\n",
337 |       "1 2\n",
338 |       "1 2\n",
339 |       "1 2\n",
340 |       "1 2\n",
341 |       "2 1\n",
342 |       "2 1\n",
343 |       "1 2\n",
344 |       "2 1\n",
345 |       "2 1\n",
346 |       "1 2\n",
347 |       "2 1\n",
348 |       "1 2\n",
349 |       "2 1\n",
350 |       "1 2\n",
351 |       "2 1\n",
352 |       "1 2\n",
353 |       "2 1\n",
354 |       "1 2\n",
355 |       "1\n",
356 |       "1\n",
357 |       "1\n",
358 |       "1\n",
359 |       "1\n",
360 |       "1\n",
361 |       "2 1\n",
362 |       "2 1\n",
363 |       "2 1\n",
364 |       "2 1\n",
365 |       "2 1\n",
366 |       "2 1 3\n",
367 |       "1 2\n",
368 |       "1 2\n",
369 |       "1 2\n",
370 |       "2 1\n",
371 |       "2 1\n",
372 |       "1 2\n",
373 |       "1\n",
374 |       "1\n",
375 |       "1\n",
376 |       "2 1\n",
377 |       "1 2\n",
378 |       "2 1\n",
379 |       "2 1\n",
380 |       "2 1\n",
381 |       "1 2\n",
382 |       "2 1 3\n",
383 |       "2 1\n",
384 |       "1\n",
385 |       "1\n",
386 |       "2 1\n",
387 |       "2 1\n",
388 |       "1 2\n",
389 |       "2 1\n",
390 |       "1 2\n",
391 |       "1 2\n",
392 |       "5 2 1 4 3\n",
393 |       "1 2\n",
394 |       "1\n",
395 |       "1\n",
396 |       "2 1\n",
397 |       "1 2\n",
398 |       "1 2\n",
399 |       "2 1 3\n",
400 |       "1\n",
401 |       "1 2\n",
402 |       "2 1\n",
403 |       "1\n",
404 |       "1\n",
405 |       "1\n",
406 |       "1 2\n",
407 |       "2 1 3\n",
408 |       "2 1\n",
409 |       "1\n",
410 |       "1 2\n",
411 |       "1 2\n",
412 |       "1\n",
413 |       "2 1\n",
414 |       "1\n",
415 |       "2 1\n",
416 |       "1\n",
417 |       "2 1\n",
418 |       "1 2\n",
419 |       "1\n",
420 |       "2 1 3\n",
421 |       "1\n",
422 |       "1\n"
423 |     ]
424 |   }
425 | }


--------------------------------------------------------------------------------
/pics/comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/pics/comparison.png


--------------------------------------------------------------------------------
/pics/computational_effort.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/pics/computational_effort.png


--------------------------------------------------------------------------------
/pics/example_problem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/pics/example_problem.png


--------------------------------------------------------------------------------
/pics/iterations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/pics/iterations.png


--------------------------------------------------------------------------------
/pics/proposed_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/pics/proposed_flow.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | dynaconf==3.1.12
 2 | fastapi==0.99.0
 3 | PyGithub==1.59.*
 4 | retry==0.9.2
 5 | Jinja2==3.1.2
 6 | tiktoken==0.5.2
 7 | uvicorn==0.22.0
 8 | pytest==7.4.0
 9 | aiohttp==3.9.3
10 | atlassian-python-api==3.39.0
11 | GitPython==3.1.32
12 | PyYAML==6.0.1
13 | starlette-context==0.3.6
14 | boto3==1.28.25
15 | google-cloud-storage==2.10.0
16 | ujson==5.8.0
17 | azure-devops==7.1.0b3
18 | msrest==0.7.1
19 | ##
20 | openai
21 | litellm
22 | duckdb==0.9.2
23 | datasets
24 | notebook
25 | black
26 | evaluate
27 | click
28 | code_contests_tester==0.1.6
29 | aiolimiter
30 | Jinja2
31 | tqdm
32 | pysnooper
33 | loguru
34 | numpy
35 | retry
36 | pydantic>=2.8.2
37 | # uninstall ipython to catch breakpoints on debug with sandbox==false
38 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/tests/__init__.py


--------------------------------------------------------------------------------
/tests/alpha_codium/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/tests/alpha_codium/__init__.py


--------------------------------------------------------------------------------
/tests/alpha_codium/code_contests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/tests/alpha_codium/code_contests/__init__.py


--------------------------------------------------------------------------------
/tests/alpha_codium/code_contests/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Codium-ai/AlphaCodium/eb7577dbe998ae7e55696264591ac3c5dde75638/tests/alpha_codium/code_contests/eval/__init__.py


--------------------------------------------------------------------------------
/tests/alpha_codium/code_contests/eval/test_local_exec.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import inspect
  3 | import io
  4 | import math
  5 | import os
  6 | import sys
  7 | import tempfile
  8 | from contextlib import contextmanager
  9 | from functools import partial
 10 | from typing import Callable, List
 11 | 
 12 | import pytest as pytest
 13 | from pysnooper import snoop
 14 | 
 15 | from alpha_codium.code_contests.eval.local_exec import MultiTestResult, ProgramStatus, execute_candidate_code
 16 | from alpha_codium.code_contests.eval.tracer import snooper_kwargs
 17 | 
 18 | timeout = 3
 19 | 
 20 | 
 21 | @contextmanager
 22 | def mock_input_output(mock_input_value):
 23 |     new_out = io.StringIO()
 24 |     new_in = io.StringIO(mock_input_value + '\n')
 25 | 
 26 |     old_out = sys.stdout
 27 |     old_in = sys.stdin
 28 | 
 29 |     sys.stdout = new_out
 30 |     sys.stdin = new_in
 31 | 
 32 |     yield new_out
 33 | 
 34 |     sys.stdout = old_out
 35 |     sys.stdin = old_in
 36 | 
 37 | 
 38 | class SandboxCaseContainer:
 39 | 
 40 |     def __init__(self, f: Callable):
 41 |         self.f = f
 42 | 
 43 |     def execute_as_string(self, input: str, sandbox=False):
 44 |         return self.execute_as_str_inner([input], trace=False, sandbox=sandbox)
 45 | 
 46 |     def execute_as_string_with_tracing(self, input: str, sandbox=False):
 47 |         return self.execute_as_str_inner([input], trace=True, sandbox=sandbox)
 48 | 
 49 |     def execute_as_str_inner(self, inputs: List[str], trace=False, sandbox=False):
 50 |         check_program = self.get_body()
 51 |         f = partial(execute_candidate_code, candidate=check_program, inputs=inputs, test_id=self.f.__name__,
 52 |                     timeout=timeout, sandbox=sandbox, snoop=trace)
 53 |         if sandbox:
 54 |             with tempfile.TemporaryDirectory() as temp_dir:
 55 |                 os.chdir(temp_dir)
 56 |                 result = f()
 57 |         else:
 58 |             result = f()
 59 | 
 60 |         return result
 61 | 
 62 |     def get_body(self):
 63 |         function_body = inspect.getsource(self.f)
 64 |         func_ast = ast.parse(function_body)
 65 |         func_def = [node for node in ast.walk(func_ast) if isinstance(node, ast.FunctionDef)][0]
 66 |         body = func_def.body
 67 |         lines = [ast.unparse(node).strip() for node in body]
 68 |         result = "\n".join(lines).strip()
 69 |         print(result)
 70 |         return result
 71 | 
 72 | 
 73 | def io_solution():
 74 |     x = input()
 75 |     print(x)
 76 | 
 77 | 
 78 | def one_level_and_loop_solution():
 79 |     def my_func(val):
 80 |         for i in range(val):
 81 |             print(i)
 82 | 
 83 |     x = int(input())
 84 |     my_func(x)
 85 | 
 86 | 
 87 | def multi_level_and_loop_solution():
 88 |     def printer_inner(val):
 89 |         print(val)
 90 | 
 91 |     def printer(val):
 92 |         print("p")
 93 |         printer_inner(val)
 94 | 
 95 |     def my_func(val):
 96 |         for i in range(val):
 97 |             printer(i)
 98 | 
 99 |     x = int(input())
100 |     my_func(x)
101 | 
102 | 
103 | def recursion_solution():
104 |     def fibonacci(n):
105 |         if n <= 0:
106 |             return 0
107 |         elif n == 1:
108 |             return 1
109 |         else:
110 |             return fibonacci(n - 1) + fibonacci(n - 2)
111 | 
112 |     x = int(input())
113 |     fib = fibonacci(x)
114 |     print(fib)
115 | 
116 | 
117 | def timeout_solution():
118 |     def sleeper(timeout):
119 |         import time
120 |         print(f"sleeping for {timeout + 1}")
121 |         time.sleep(timeout + 1)
122 | 
123 |     timeout = int(input())
124 |     sleeper(timeout)
125 | 
126 | 
127 | def exception_solution():
128 |     def excepter(n):
129 |         raise ValueError(f"test run cannot accept {n}")
130 | 
131 |     x = int(input())
132 |     excepter(x)
133 | 
134 | 
135 | def bad_import_solution():
136 |     print(math.sqrt(int(input())))
137 | 
138 | 
139 | test_data = [
140 |     (io_solution, 'hello', 'hello'),  # (function, input, expected output)
141 |     (one_level_and_loop_solution, '4', '0\n1\n2\n3'),
142 |     (multi_level_and_loop_solution, '4', 'p\n0\np\n1\np\n2\np\n3'),
143 |     (recursion_solution, '4', '3'),
144 | ]
145 | 
146 | run_types = ['regular', 'regular_with_tracing', 'as_string', 'as_string_with_tracing']
147 | 
148 | 
149 | def data_id(test_case):
150 |     f, input_, output_ = test_case
151 |     return f"{f.__name__}-{hash(str(input_) + str(output_))}"
152 | 
153 | sandbox_ids=["not-sandboxed", "sandboxed"]
154 | 
155 | 
156 | @pytest.mark.parametrize("run_type", run_types)
157 | @pytest.mark.parametrize("sandbox", [False, True], ids=sandbox_ids)
158 | @pytest.mark.parametrize("func, input, expected", test_data, ids=[data_id(case) for case in test_data])
159 | def test_happy_paths(monkeypatch, func, input, expected, run_type, sandbox):
160 |     def assert_passed_and_output(expected, result: MultiTestResult):
161 |         assert len(result.test_results) == 1
162 |         my_result = result.test_results[0]
163 |         assert my_result.stdout == expected
164 |         assert my_result.stderr == ''
165 |         print("trace\n")
166 |         print(my_result.trace)
167 | 
168 |     my_case = SandboxCaseContainer(func)
169 |     if 'regular' in run_type:
170 |         with mock_input_output(input) as captured_output:
171 |             if 'regular_with_tracing' == run_type:
172 |                 with snoop(**snooper_kwargs):
173 |                     my_case.f()
174 |             else:
175 |                 my_case.f()
176 |             result = captured_output.getvalue().strip()
177 |             assert expected == result
178 | 
179 |     elif run_type == 'as_string':
180 |         res = my_case.execute_as_string(input, sandbox=sandbox)
181 |         assert_passed_and_output(expected, res)
182 | 
183 |     elif run_type == 'as_string_with_tracing':
184 |         res = my_case.execute_as_string_with_tracing(input, sandbox=sandbox)
185 |         assert_passed_and_output(expected, res)
186 | 
187 | 
188 | test_exception_data = [
189 |     (timeout_solution, str(timeout), ProgramStatus.kTimeout, ''),
190 |     (exception_solution, '1', ProgramStatus.kFailed, 'test run cannot accept 1'),
191 |     (bad_import_solution, '1', ProgramStatus.kFailed, "NameError: name 'math' is not defined"),
192 | ]
193 | 
194 | def exception_data_id(test_case):
195 |     f, input_, status, _ = test_case
196 |     return f"{f.__name__}-{str(status)}-{hash(input_)}"
197 | 
198 | @pytest.mark.parametrize("run_type", run_types)
199 | @pytest.mark.parametrize("sandbox", [False, True], ids=sandbox_ids)
200 | @pytest.mark.parametrize("func, input, status, error_string", test_exception_data,
201 |                          ids=[exception_data_id(case) for case in test_exception_data])
202 | def test_runtime_issues(monkeypatch, func, input, status, error_string, run_type, sandbox):
203 |     def assert_status_and_error(result: MultiTestResult, status, err):
204 |         assert len(result.test_results) == 1
205 |         my_result = result.test_results[0]
206 |         assert my_result.program_status == status
207 |         assert err in my_result.sandbox_result
208 |         print("trace")
209 |         print(my_result.trace)
210 |         print("=============")
211 |         print("stack trace")
212 |         print(my_result.sandbox_result)
213 | 
214 |     my_case = SandboxCaseContainer(func)
215 | 
216 |     if run_type == 'as_string':
217 |         res = my_case.execute_as_string(input)
218 |         assert_status_and_error(res, status, error_string)
219 | 
220 |     elif run_type == 'as_string_with_tracing':
221 |         res = my_case.execute_as_string_with_tracing(input)
222 |         assert_status_and_error(res, status, error_string)
223 | 
224 | 
225 | if __name__ == '__main__':
226 |     timeout_solution()
227 | 


--------------------------------------------------------------------------------