├── .gitignore ├── README.md ├── assets ├── logo.png └── overview.png ├── coder.py ├── configs └── config.yaml ├── data_cache ├── amp_pd.py ├── circle_packing.py ├── codepde.py ├── molecular_translation.py ├── molecule.py ├── nuclei_image.py ├── openvaccine.py ├── polymer.py └── usp_p2p.py ├── database.py ├── deepevolve.py ├── discoveries ├── burgers │ ├── README.md │ ├── best_program_info.json │ ├── deepevolve_interface.py │ ├── main.py │ └── solver.py ├── circle_packing │ ├── README.md │ ├── best_program_info.json │ ├── deepevolve_interface.py │ └── main.py ├── molecular_translation │ ├── README.md │ ├── best_program_info.json │ ├── deepevolve_interface.py │ └── main.py ├── molecule │ ├── README.md │ ├── best_program_info.json │ ├── conv.py │ ├── dataset.py │ ├── deepevolve_interface.py │ ├── main_pyg.py │ ├── model.py │ └── utils.py ├── nuclei_image │ ├── README.md │ ├── best_program_info.json │ ├── deepevolve_interface.py │ └── main.py ├── openvaccine │ ├── README.md │ ├── best_program_info.json │ ├── deepevolve_interface.py │ └── main.py ├── parkinson_disease │ ├── README.md │ ├── base_model.py │ ├── best_program_info.json │ ├── config.py │ ├── data_loader.py │ ├── deepevolve_interface.py │ ├── lightgbm_model.py │ ├── main.py │ ├── metrics.py │ ├── neural_network.py │ ├── preprocessing.py │ ├── public_timeseries_testing_util.py │ └── utils.py ├── polymer │ ├── README.md │ ├── best_program_info.json │ ├── conv.py │ ├── deepevolve_interface.py │ ├── main_pyg.py │ ├── model.py │ ├── preprocessing.py │ └── utils.py └── usp_p2p │ ├── README.md │ ├── best_program_info.json │ ├── deepevolve_interface.py │ └── main.py ├── examples ├── burgers │ ├── README.md │ ├── info.json │ ├── initial_code │ │ ├── deepevolve_interface.py │ │ ├── main.py │ │ └── solver.py │ ├── initial_idea.json │ └── initial_metrics.json ├── circle_packing │ ├── README.md │ ├── ckpt │ │ ├── best │ │ │ ├── best_program_concatenated.py │ │ │ ├── best_program_info.json │ │ │ ├── deepevolve_interface.py │ │ │ └── main.py │ │ └── checkpoint_50 │ │ │ ├── metadata.json │ │ │ └── programs │ │ │ ├── 06976df4-d5ce-469a-bacf-ce107c6a5b00.json │ │ │ ├── 094742ee-ec68-45f4-97e9-140b86fdc657.json │ │ │ ├── 09507cfc-3d17-4547-8664-dbca302803c2.json │ │ │ ├── 2bb60c45-489b-4e92-ac96-001e03788020.json │ │ │ ├── 2f3f5db2-7b0d-489e-9dc2-301b1f850d71.json │ │ │ ├── 3414c339-4428-47e4-97a6-4173d5c796b6.json │ │ │ ├── 3577ad71-c1a2-482d-88d3-8ce52ab8e670.json │ │ │ ├── 3c9ac271-200f-49d9-9bb9-55eb4884ce98.json │ │ │ ├── 453b9d57-b5f6-421c-84a1-93c58154165b.json │ │ │ ├── 461b048f-84f2-4027-b1c8-99ec5cfcfdb8.json │ │ │ ├── 58af2a81-381b-437a-9e13-e0a8fc29e4ed.json │ │ │ ├── 6483234a-a079-4c7d-aafa-92ff989573cb.json │ │ │ ├── 6d84c330-e329-4fe6-ae6f-70a514db7a60.json │ │ │ ├── 7aac803d-be83-4492-96f4-ee3af60e7cf9.json │ │ │ ├── 80a1d209-186a-4479-bb99-dedc3c1df2cc.json │ │ │ ├── 9df980dc-2c8f-4ece-871e-90486b4a7245.json │ │ │ ├── c410687e-6035-406c-9588-b0aa7b838945.json │ │ │ ├── c42f30e9-7ab7-4f5a-b78a-87db894e6971.json │ │ │ ├── e0e8bb8f-7f5b-4ff0-8877-607d16e7e904.json │ │ │ ├── e304e0fd-7bf3-4cbb-8fed-5f960f2aca78.json │ │ │ ├── e6ff1491-588d-45f2-9f29-7b407425b3b0.json │ │ │ ├── e7af8df5-7c88-4dd8-b299-8ef069b24062.json │ │ │ ├── f52bb9ba-cd8f-44e8-8978-d967cf55cfeb.json │ │ │ ├── f9fff391-dbbc-4a0b-a042-4ae56c977c72.json │ │ │ └── fc9390d8-5746-45f8-89bf-cc820674ff75.json │ ├── info.json │ ├── initial_code │ │ ├── deepevolve_interface.py │ │ ├── main.py │ │ └── requirements.txt │ ├── initial_idea.json │ └── initial_metrics.json ├── molecular_translation │ ├── README.md │ ├── info.json │ ├── initial_code │ │ ├── deepevolve_interface.py │ │ ├── main.py │ │ └── requirements.txt │ ├── initial_idea.json │ └── initial_metrics.json ├── molecule │ ├── README.md │ ├── info.json │ ├── initial_code │ │ ├── LICENSE │ │ ├── README.md │ │ ├── conv.py │ │ ├── dataset.py │ │ ├── deepevolve_interface.py │ │ ├── main_pyg.py │ │ ├── model.py │ │ ├── requirements.txt │ │ └── utils.py │ ├── initial_idea.json │ └── initial_metrics.json ├── nuclei_image │ ├── README.md │ ├── info.json │ ├── initial_code │ │ ├── deepevolve_interface.py │ │ ├── main.py │ │ ├── requirements.txt │ │ └── runtemp.sh │ ├── initial_idea.json │ └── initial_metrics.json ├── openvaccine │ ├── README.md │ ├── info.json │ ├── initial_code │ │ ├── deepevolve_interface.py │ │ ├── main.py │ │ └── requirements.txt │ ├── initial_idea.json │ └── initial_metrics.json ├── parkinson_disease │ ├── README.md │ ├── info.json │ ├── initial_code │ │ ├── base_model.py │ │ ├── config.py │ │ ├── data_loader.py │ │ ├── deepevolve_interface.py │ │ ├── lightgbm_model.py │ │ ├── main.py │ │ ├── metrics.py │ │ ├── neural_network.py │ │ ├── preprocessing.py │ │ ├── public_timeseries_testing_util.py │ │ ├── requirements.txt │ │ └── utils.py │ └── initial_metrics.json ├── polymer │ ├── README.md │ ├── info.json │ ├── initial_code │ │ ├── LICENSE │ │ ├── conv.py │ │ ├── deepevolve_interface.py │ │ ├── main_pyg.py │ │ ├── model.py │ │ ├── preprocessing.py │ │ ├── requirements.txt │ │ └── utils.py │ ├── initial_idea.json │ └── initial_metrics.json └── usp_p2p │ ├── README.md │ ├── info.json │ ├── initial_code │ ├── deepevolve_interface.py │ └── main.py │ ├── initial_idea.json │ ├── initial_metrics.json │ └── requirements.txt ├── problem.py ├── requirements-mini.txt ├── requirements.txt ├── researcher.py ├── run_example.sh └── utils ├── code.py ├── datatypes.py └── format.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .DS_Store 132 | 133 | 134 | data_cache/* 135 | !data_cache/*.py 136 | logs 137 | outputs/ 138 | **/tmp/ 139 | tmp/ 140 | examples/molecule/data 141 | **/*ckpt* 142 | **/data/ 143 | examples/*/data/ 144 | baseline 145 | examples/burgers/selection 146 | 147 | # examples/burgers 148 | # examples/molecular_translation 149 | # examples/openvaccine 150 | !examples/circle_packing/ckpt 151 | examples/create_readme_by_info.py 152 | discoveries/convert_to_output.py 153 | # discoveries/molecular_translation 154 | # discoveries/openvaccine 155 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liugangcode/deepevolve/5fd3d07a7f2aa91603c7bc94094f904829e5b675/assets/logo.png -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liugangcode/deepevolve/5fd3d07a7f2aa91603c7bc94094f904829e5b675/assets/overview.png -------------------------------------------------------------------------------- /configs/config.yaml: -------------------------------------------------------------------------------- 1 | researcher: 2 | planner: "o4-mini" 3 | searcher: "gpt-4o" 4 | writer: "o3-mini" 5 | reasoning_effort: "high" 6 | 7 | coder: 8 | developer: "o3-mini" 9 | debugger: "o4-mini" 10 | reasoning_effort: "high" 11 | 12 | # General Settings 13 | query: null 14 | max_iterations: 50 15 | checkpoint_interval: 10 16 | checkpoint: "ckpt" 17 | log_level: "INFO" 18 | log_dir: null 19 | 20 | workspace: examples 21 | problem: null 22 | search_time_bias: true 23 | max_research_reflect: 1 24 | max_coding_reflect: 1 25 | max_debug_retry: 5 26 | # Database Configuration 27 | database: 28 | random_seed: null 29 | db_path: null 30 | in_memory: true 31 | population_size: 25 # total maintained for random 32 | archive_size: 10 # elite for exploitation 33 | num_islands: 5 34 | migration_interval: 25 35 | migration_rate: 0.1 36 | elite_selection_ratio: 0.1 37 | exploration_ratio: 0.2 38 | exploitation_ratio: 0.7 39 | feature_dimensions: 40 | - "score" 41 | - "diversity" 42 | - "complexity" 43 | feature_bins: 10 44 | n_inspirations: 5 45 | 46 | 47 | hydra: 48 | job_logging: 49 | disable_existing_loggers: false 50 | job: 51 | chdir: false 52 | run: 53 | dir: . 54 | output_subdir: null 55 | 56 | defaults: 57 | - _self_ -------------------------------------------------------------------------------- /data_cache/amp_pd.py: -------------------------------------------------------------------------------- 1 | # from: https://github.com/snap-stanford/MLAgentBench/blob/main/MLAgentBench/benchmarks/amp-parkinsons-disease-progression-prediction/scripts/prepare.py 2 | 3 | import subprocess 4 | import pandas as pd 5 | import random 6 | import os 7 | 8 | taskname = "amp-parkinsons-disease-progression-prediction" 9 | download_dir = "./amp_pd" 10 | os.makedirs(download_dir, exist_ok=True) 11 | 12 | input(f"Consent to the competition at https://www.kaggle.com/competitions/{taskname}/data; Press any key after you have accepted the rules online.") 13 | 14 | subprocess.run(["kaggle", "competitions", "download", "-c", taskname], cwd=download_dir) 15 | subprocess.run(["unzip", "-n", f"{taskname}.zip"], cwd=download_dir) 16 | subprocess.run(["rm", f"{taskname}.zip"], cwd=download_dir) 17 | subprocess.run(["rm", "-r", "amp_pd_peptide"], cwd=download_dir) 18 | subprocess.run(["rm", "-r", "amp_pd_peptide_310"], cwd=download_dir) 19 | 20 | # ## split train to train and test in env 21 | 22 | data_proteins = pd.read_csv(f'{download_dir}/train_proteins.csv') 23 | data_clinical = pd.read_csv(f'{download_dir}/train_clinical_data.csv') 24 | data_peptides = pd.read_csv(f'{download_dir}/train_peptides.csv') 25 | data_supplemental = pd.read_csv(f'{download_dir}/supplemental_clinical_data.csv') 26 | 27 | # raise Exception('stop here') 28 | 29 | random.seed(42) 30 | 31 | patient_id = data_clinical['patient_id'].unique() 32 | patiend_from_supplemental = data_supplemental['patient_id'].unique() 33 | 34 | total_test_patient = int(len(patient_id) * 0.2) 35 | patient_id_not_in_supplemental = [x for x in patient_id if x not in patiend_from_supplemental] 36 | test_patient_id = random.sample(patient_id_not_in_supplemental, total_test_patient) 37 | train_patient_id = [x for x in patient_id if x not in test_patient_id] 38 | 39 | print('train_patient_id', len(train_patient_id)) 40 | print('test_patient_id', len(test_patient_id), 'ratio', len(test_patient_id) / len(patient_id)) 41 | 42 | data_proteins[~data_proteins['patient_id'].isin(test_patient_id)].to_csv(f'{download_dir}/train_proteins.csv', index=False) 43 | data_clinical[~data_clinical['patient_id'].isin(test_patient_id)].to_csv(f'{download_dir}/train_clinical_data.csv', index=False) 44 | data_peptides[~data_peptides['patient_id'].isin(test_patient_id)].to_csv(f'{download_dir}/train_peptides.csv', index=False) 45 | data_supplemental[~data_supplemental['patient_id'].isin(test_patient_id)].to_csv(f'{download_dir}/supplemental_clinical_data.csv', index=False) 46 | 47 | data_proteins[data_proteins['patient_id'].isin(test_patient_id)].to_csv(f'{download_dir}/example_test_files/test_proteins.csv', index=False) 48 | data_peptides[data_peptides['patient_id'].isin(test_patient_id)].to_csv(f'{download_dir}/example_test_files/test_peptides.csv', index=False) 49 | test_clinical = data_clinical[data_clinical['patient_id'].isin(test_patient_id)] 50 | 51 | 52 | # Create test.csv 53 | temp_list = [] 54 | for i in range(1, 5): 55 | temp = test_clinical.copy() 56 | temp['level_3'] = i 57 | temp['updrs_test'] = f'updrs_{i}' 58 | temp_list.append(temp) 59 | mock_train = pd.concat(temp_list) 60 | mock_train['row_id'] = (mock_train[['patient_id', 'visit_month', 'level_3']] 61 | .apply((lambda r: f"{r.patient_id}_{int(r.visit_month)}_updrs_{r.level_3}"), axis=1)) 62 | mock_train[['visit_id', 'patient_id', 'visit_month','row_id', 'updrs_test']].to_csv(f'{download_dir}/example_test_files/test.csv', index=False) 63 | 64 | # Create sample_submission.csv 65 | temp_list = [] 66 | for wait in [0, 6, 12, 24]: 67 | temp = mock_train.copy() 68 | temp['wait'] = wait 69 | temp_list.append(temp) 70 | y = pd.concat(temp_list) 71 | y = y[y.visit_month + y.wait <= 108] 72 | y['prediction_id'] = (y[['patient_id', 'visit_month', 'wait', 'level_3']] 73 | .apply((lambda r: f"{r.patient_id}_{int(r.visit_month)}_updrs_{r.level_3}_plus_{r.wait}_months"), axis=1)) 74 | 75 | def get_rating(row): 76 | rating = test_clinical[test_clinical["visit_id"] == f'{row.patient_id}_{int(row.visit_month) + int(row.wait) }' ][f'updrs_{row.level_3}'] 77 | if len(rating) == 0: 78 | return None 79 | return rating.item() 80 | 81 | y['rating'] = (y[['patient_id', 'visit_month', 'wait', 'level_3']].apply(get_rating, axis=1)) 82 | y = y.dropna() 83 | y[['prediction_id', 'rating', 'visit_month']].to_csv(f'{download_dir}/example_test_files/answer.csv', index=False) 84 | 85 | y['rating'] = 0 86 | y[['prediction_id', 'rating', 'visit_month']].to_csv(f'{download_dir}/example_test_files/sample_submission.csv', index=False) -------------------------------------------------------------------------------- /data_cache/circle_packing.py: -------------------------------------------------------------------------------- 1 | print(""" 2 | The problem about packing circles does not require any dataset preparation. 3 | """) -------------------------------------------------------------------------------- /data_cache/codepde.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import os 3 | import argparse 4 | from pathlib import Path 5 | 6 | import pandas as pd 7 | from torchvision.datasets.utils import download_url 8 | from tqdm import tqdm 9 | import h5py 10 | import numpy as np 11 | 12 | # size info: https://github.com/pdebench/PDEBench/tree/main/pdebench/data_download 13 | 14 | def parse_metadata(pde_name: str) -> pd.DataFrame: 15 | """ 16 | Read the CSV of URLs and filter to the given PDE. 17 | """ 18 | csv_path = Path(__file__).with_name("pdebench_data_urls.csv") 19 | meta_df = pd.read_csv(csv_path) 20 | meta_df["PDE"] = meta_df["PDE"].str.lower() 21 | 22 | valid = { 23 | "advection", "burgers", "1d_cfd", "diff_sorp", "1d_reacdiff", 24 | "2d_cfd", "darcy", "2d_reacdiff", "ns_incom", "swe", "3d_cfd", 25 | } 26 | pde = pde_name.lower() 27 | assert pde in valid, f"PDE name '{pde_name}' not recognized." 28 | 29 | return meta_df[meta_df["PDE"] == pde] 30 | 31 | def download_data(pde_name: str): 32 | """ 33 | Download all HDF5 files for a given PDE into root_folder/ directories. 34 | """ 35 | pde_df = parse_metadata(pde_name) 36 | target_dir = Path(pde_name) / "original" 37 | target_dir.mkdir(parents=True, exist_ok=True) 38 | 39 | # Check if all files already exist 40 | all_files_exist = True 41 | for _, row in pde_df.iterrows(): 42 | file_path = target_dir / row["Filename"] 43 | if not file_path.exists(): 44 | all_files_exist = False 45 | break 46 | 47 | if all_files_exist: 48 | print(f"All files for '{pde_name}' already exist. Skipping download.") 49 | return 50 | 51 | print(f"Downloading missing files for '{pde_name}'...") 52 | for _, row in tqdm(pde_df.iterrows(), total=len(pde_df), desc="Downloading"): 53 | file_path = target_dir / row["Filename"] 54 | if file_path.exists(): 55 | print(f"File {row['Filename']} already exists. Skipping.") 56 | continue 57 | download_url(row["URL"], str(target_dir), row["Filename"], md5=row["MD5"]) 58 | 59 | def work(dataset_path, subset_path, subset_selection): 60 | # Skip if subset file already exists 61 | if os.path.exists(subset_path): 62 | print(f"Subset file {subset_path} already exists. Skipping.") 63 | return 64 | 65 | # Load data from file 66 | with h5py.File(dataset_path, 'r') as f: 67 | # Load the data 68 | print(f"Available keys in {dataset_path}: {list(f.keys())}") 69 | t_coordinate = np.array(f['t-coordinate'])[:-1] # Keep as is 70 | x_coordinate = np.array(f['x-coordinate']) # Keep as is 71 | u = subset_selection(np.array(f['tensor'])) 72 | 73 | # Navier-Stokes data has different structure 74 | # Vx = subset_selection((f['Vx'])) 75 | # density = subset_selection(np.array(f['density'])) 76 | # pressure = subset_selection(np.array(f['pressure'])) 77 | 78 | # Verify shapes 79 | print(t_coordinate.shape, x_coordinate.shape, u.shape) 80 | # (201,) (1024,) (100, 201, 1024) for burgers equation 81 | 82 | # Save the subset to a new HDF5 file 83 | with h5py.File(subset_path, 'w') as f: 84 | # Create datasets in the new file 85 | f.create_dataset('t-coordinate', data=t_coordinate) 86 | f.create_dataset('tensor', data=u) 87 | f.create_dataset('x-coordinate', data=x_coordinate) 88 | 89 | # Uncomment if you want to save Navier-Stokes specific data 90 | # f.create_dataset('Vx', data=Vx) 91 | # f.create_dataset('density', data=density) 92 | # f.create_dataset('pressure', data=pressure) 93 | 94 | print(f"Subset data saved successfully at {subset_path}!") 95 | 96 | if __name__ == '__main__': 97 | pde_name = 'burgers' 98 | 99 | test_subset_size = 100 100 | dev_subset_size = 50 101 | 102 | download_data(pde_name) 103 | 104 | dataset_dir = Path(pde_name) / "original" 105 | for item in os.listdir(dataset_dir): 106 | full_path = os.path.join(dataset_dir, item) 107 | if os.path.isfile(full_path): 108 | print(full_path) 109 | 110 | subset_path = os.path.join(pde_name, item) 111 | work(full_path, subset_path, lambda x: x[:test_subset_size]) 112 | 113 | development_subset_path = subset_path.replace('.hdf5', '_development.hdf5') 114 | work(full_path, development_subset_path, lambda x: x[-dev_subset_size:]) 115 | 116 | print(f"Done. Subsets are in ./{pde_name}/") -------------------------------------------------------------------------------- /data_cache/molecule.py: -------------------------------------------------------------------------------- 1 | print(""" 2 | We don't need to manually download the dataset for the molecular property prediction example, 3 | it can be downloaded automatically while running the example 4 | """) -------------------------------------------------------------------------------- /data_cache/openvaccine.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import zipfile 4 | from kaggle.api.kaggle_api_extended import KaggleApi 5 | from time import time 6 | 7 | def download_raw_data(taskname: str, download_dir: str = "./openvaccine"): 8 | """ 9 | Download raw competition data for a given Kaggle competition. 10 | 11 | Args: 12 | taskname: The Kaggle competition slug. 13 | download_dir: Directory where the raw data will be stored. 14 | """ 15 | os.makedirs(download_dir, exist_ok=True) 16 | input( 17 | f"Consent to the competition at " 18 | f"https://www.kaggle.com/competitions/{taskname}/data; " 19 | "Press any key after you have accepted the rules online." 20 | ) 21 | # download and unzip 22 | subprocess.run( 23 | ["kaggle", "competitions", "download", "-c", taskname], 24 | cwd=download_dir, 25 | check=True 26 | ) 27 | subprocess.run( 28 | ["unzip", "-n", f"{taskname}.zip"], 29 | cwd=download_dir, 30 | check=True 31 | ) 32 | os.remove(os.path.join(download_dir, f"{taskname}.zip")) 33 | 34 | def main(): 35 | # 1) Download raw competition data 36 | start_time = time() 37 | taskname = "stanford-covid-vaccine" 38 | download_dir = "./openvaccine" 39 | download_raw_data(taskname, download_dir) 40 | print(f"Raw competition data downloaded in {time() - start_time:.2f} seconds") 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /data_cache/polymer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import pandas as pd 4 | from sklearn.model_selection import train_test_split 5 | from time import time 6 | 7 | def download_raw_data(taskname: str, download_dir: str = "./polymer"): 8 | """ 9 | Download raw competition data for a given Kaggle competition. 10 | 11 | Args: 12 | taskname: The Kaggle competition slug. 13 | download_dir: Directory where the raw data will be stored. 14 | """ 15 | os.makedirs(download_dir, exist_ok=True) 16 | input( 17 | f"Consent to the competition at " 18 | f"https://www.kaggle.com/competitions/{taskname}/data; " 19 | "Press any key after you have accepted the rules online." 20 | ) 21 | # download and unzip 22 | subprocess.run( 23 | ["kaggle", "competitions", "download", "-c", taskname], 24 | cwd=download_dir, 25 | check=True 26 | ) 27 | subprocess.run( 28 | ["unzip", "-n", f"{taskname}.zip"], 29 | cwd=download_dir, 30 | check=True 31 | ) 32 | os.remove(os.path.join(download_dir, f"{taskname}.zip")) 33 | 34 | def split_train_data(download_dir: str = "./polymer"): 35 | """ 36 | Split train.csv into train/valid/test sets with ratio 0.7/0.1/0.2 37 | and remove unnecessary files. 38 | 39 | Args: 40 | download_dir: Directory containing the downloaded data. 41 | """ 42 | train_path = os.path.join(download_dir, "train.csv") 43 | 44 | if not os.path.exists(train_path): 45 | print(f"train.csv not found in {download_dir}") 46 | return 47 | 48 | # Load the training data 49 | print("Loading train.csv...") 50 | df = pd.read_csv(train_path) 51 | print(f"Original training data shape: {df.shape}") 52 | 53 | # First split: 70% train, 30% temp (which will be split into 10% valid, 20% test) 54 | train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42) 55 | 56 | # Second split: from the 30%, create 10% valid (1/3 of temp) and 20% test (2/3 of temp) 57 | valid_df, test_df = train_test_split(temp_df, test_size=0.667, random_state=42) 58 | 59 | # Save the split datasets 60 | train_df.to_csv(os.path.join(download_dir, "train.csv"), index=False) 61 | valid_df.to_csv(os.path.join(download_dir, "valid.csv"), index=False) 62 | test_df.to_csv(os.path.join(download_dir, "test.csv"), index=False) 63 | 64 | print(f"Train set shape: {train_df.shape} (70%)") 65 | print(f"Valid set shape: {valid_df.shape} (10%)") 66 | print(f"Test set shape: {test_df.shape} (20%)") 67 | 68 | # Remove sample submission file 69 | sample_submission_path = os.path.join(download_dir, "sample_submission.csv") 70 | if os.path.exists(sample_submission_path): 71 | os.remove(sample_submission_path) 72 | print("Removed sample_submission.csv") 73 | 74 | def main(): 75 | # 1) Download raw competition data 76 | start_time = time() 77 | taskname = "neurips-open-polymer-prediction-2025" 78 | download_dir = "./polymer" 79 | download_raw_data(taskname, download_dir) 80 | print(f"Raw competition data downloaded in {time() - start_time:.2f} seconds") 81 | 82 | # 2) Split the training data and clean up files 83 | split_start = time() 84 | split_train_data(download_dir) 85 | print(f"Data splitting completed in {time() - split_start:.2f} seconds") 86 | 87 | if __name__ == "__main__": 88 | main() -------------------------------------------------------------------------------- /data_cache/usp_p2p.py: -------------------------------------------------------------------------------- 1 | # from: https://github.com/snap-stanford/MLAgentBench/blob/main/MLAgentBench/benchmarks/amp-parkinsons-disease-progression-prediction/scripts/prepare.py 2 | 3 | import subprocess 4 | import pandas as pd 5 | import random 6 | import os 7 | from sklearn.model_selection import train_test_split 8 | 9 | taskname = "us-patent-phrase-to-phrase-matching" 10 | download_dir = "./usp_p2p" 11 | os.makedirs(download_dir, exist_ok=True) 12 | 13 | input(f"Consent to the competition at https://www.kaggle.com/competitions/{taskname}/data; Press any key after you have accepted the rules online.") 14 | 15 | subprocess.run(["kaggle", "competitions", "download", "-c", taskname], cwd=download_dir) 16 | subprocess.run(["unzip", "-n", f"{taskname}.zip"], cwd=download_dir) 17 | subprocess.run(["rm", f"{taskname}.zip"], cwd=download_dir) 18 | 19 | data_dir = os.path.dirname(__file__) 20 | train_path = os.path.join(data_dir, download_dir, "train.csv") 21 | test_path = os.path.join(data_dir, download_dir, "test.csv") 22 | sample_submission_path = os.path.join(data_dir, download_dir, "sample_submission.csv") 23 | 24 | # 1. Remove the current test.csv if it exists 25 | if os.path.exists(test_path): 26 | os.remove(test_path) 27 | print(f"Removed existing {test_path}") 28 | 29 | # 2. Read train.csv 30 | df = pd.read_csv(train_path) 31 | 32 | # 3. Split into 90% train, 10% test 33 | train_df, test_df = train_test_split(df, test_size=0.1, random_state=42) 34 | 35 | # 4. Save the new splits 36 | train_df.to_csv(train_path, index=False) 37 | test_df.to_csv(test_path, index=False) 38 | print(f"Split complete. New train: {len(train_df)} rows, new test: {len(test_df)} rows.") 39 | 40 | # 5. Create sample_submission.csv with id from test.csv and score=0 41 | sample_submission = pd.DataFrame({ 42 | "id": test_df["id"], 43 | "score": 0 44 | }) 45 | sample_submission.to_csv(sample_submission_path, index=False) 46 | print(f"Created {sample_submission_path} with {len(sample_submission)} rows.") -------------------------------------------------------------------------------- /discoveries/circle_packing/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | from main import construct_packing, validate_packing 2 | from time import time 3 | import numpy as np 4 | import traceback 5 | import warnings # DEBUG: imported warnings for adaptive_bisection in main.py 6 | import warnings 7 | import signal 8 | from contextlib import contextmanager 9 | 10 | 11 | @contextmanager 12 | def timeout(duration): 13 | """Context manager for timing out function calls""" 14 | 15 | def timeout_handler(signum, frame): 16 | raise TimeoutError(f"Function call timed out after {duration} seconds") 17 | 18 | # Set the signal handler 19 | old_handler = signal.signal(signal.SIGALRM, timeout_handler) 20 | signal.alarm(duration) 21 | 22 | try: 23 | yield 24 | finally: 25 | # Restore the old signal handler 26 | signal.signal(signal.SIGALRM, old_handler) 27 | signal.alarm(0) 28 | 29 | 30 | def deepevolve_interface(): 31 | try: 32 | start_time = time() 33 | 34 | # SOTA values for comparison 35 | sota_values = { 36 | 26: 2.6358627564136983, 37 | 27: 2.685, 38 | 28: 2.737, 39 | 29: 2.790, 40 | 30: 2.842, 41 | 31: 2.889, 42 | 32: 2.937944526205518, 43 | } 44 | 45 | all_results = {} 46 | all_sum_radii = [] 47 | 48 | # Run for n from 26 to 32 49 | for n in range(26, 33): 50 | # Apply 1-minute timeout to construct_packing 51 | try: 52 | with timeout(60): 53 | centers, radii, sum_radii = construct_packing(n=n) 54 | 55 | if not isinstance(centers, np.ndarray): 56 | centers = np.array(centers) 57 | if not isinstance(radii, np.ndarray): 58 | radii = np.array(radii) 59 | 60 | # Validate solution 61 | valid_packing, message_packing = validate_packing(centers, radii) 62 | 63 | if not valid_packing: 64 | print(f"Invalid packing for n={n}: {message_packing}") 65 | 66 | except TimeoutError as te: 67 | warnings.warn( 68 | f"Timeout occurred for n={n}: {te}. Setting sum_radii to 0." 69 | ) 70 | centers = np.array([]) 71 | radii = np.array([]) 72 | sum_radii = 0.0 73 | valid_packing = False 74 | message_packing = f"60s Timeout occurred for n={n}" 75 | 76 | # Store results 77 | all_results[n] = { 78 | "sum_radii": sum_radii if valid_packing else 0.0, 79 | "valid": valid_packing, 80 | "message": message_packing, 81 | } 82 | all_sum_radii.append(sum_radii if valid_packing else 0.0) 83 | 84 | # Calculate runtime in seconds 85 | runtime = time() - start_time 86 | runtime = round(runtime, 2) 87 | 88 | combined_score = np.mean(all_sum_radii) 89 | 90 | metrics = { 91 | "combined_score": combined_score, 92 | "runtime_seconds": runtime, 93 | } 94 | 95 | # Add individual sum_radii and ratios to SOTA for each n 96 | for n in range(26, 33): 97 | result = all_results[n] 98 | sum_radii = result["sum_radii"] 99 | valid = result["valid"] 100 | 101 | # Add sum_radii for this n 102 | metrics[f"sum_radii_for_n_{n}"] = sum_radii 103 | 104 | # Calculate ratio to SOTA 105 | if n in sota_values and valid: 106 | sota_value = sota_values[n] 107 | ratio_to_sota = sum_radii / sota_value 108 | metrics[f"ratio_to_sota_for_n_{n}"] = ratio_to_sota 109 | else: 110 | metrics[f"ratio_to_sota_for_n_{n}"] = 0.0 111 | 112 | # Add validity for this n 113 | metrics[f"validity_for_n_{n}"] = 1.0 if valid else 0.0 114 | if not valid: 115 | metrics[f"message_for_n_{n}"] = message_packing 116 | 117 | overall_validity = all(all_results[n]["valid"] for n in range(26, 33)) 118 | metrics["overall_validity"] = 1.0 if overall_validity else 0.0 119 | 120 | return True, metrics 121 | 122 | except Exception as e: 123 | # Capture full traceback information 124 | error_traceback = traceback.format_exc() 125 | error_info = f""" 126 | Error type: {type(e).__name__} 127 | Error message: {str(e)} 128 | Traceback: {error_traceback} 129 | """ 130 | return False, error_info 131 | 132 | 133 | def visualize(centers, radii): 134 | """ 135 | Visualize the circle packing 136 | 137 | Args: 138 | centers: np.array of shape (n, 2) with (x, y) coordinates 139 | radii: np.array of shape (n) with radius of each circle 140 | """ 141 | import matplotlib.pyplot as plt 142 | from matplotlib.patches import Circle 143 | 144 | fig, ax = plt.subplots(figsize=(8, 8)) 145 | 146 | # Draw unit square 147 | ax.set_xlim(0, 1) 148 | ax.set_ylim(0, 1) 149 | ax.set_aspect("equal") 150 | ax.grid(True) 151 | 152 | # Draw circles 153 | for i, (center, radius) in enumerate(zip(centers, radii)): 154 | circle = Circle(center, radius, alpha=0.5) 155 | ax.add_patch(circle) 156 | ax.text(center[0], center[1], str(i), ha="center", va="center") 157 | 158 | plt.title(f"Circle Packing (n={len(centers)}, sum={sum(radii):.6f})") 159 | plt.show() 160 | # plt.savefig('circle_packing.png') 161 | 162 | 163 | if __name__ == "__main__": 164 | status, metrics = deepevolve_interface() 165 | print(f"Status: {status}") 166 | print(f"Metrics: {metrics}") 167 | # AlphaEvolve improved this to 2.635 168 | 169 | 170 | -------------------------------------------------------------------------------- /discoveries/molecular_translation/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import warnings 3 | from time import time 4 | import threading 5 | 6 | from main import main, Config 7 | 8 | 9 | def run_main_with_timeout(base_dir, timeout_sec): 10 | result = {"metrics": None, "error": None} 11 | 12 | def target(): 13 | try: 14 | result["metrics"] = main(Config(base_dir=base_dir)) 15 | except Exception as e: 16 | result["error"] = str(e) 17 | 18 | thread = threading.Thread(target=target) 19 | thread.daemon = True 20 | thread.start() 21 | thread.join(timeout_sec) 22 | 23 | if thread.is_alive(): 24 | raise TimeoutError( 25 | f"The model runtime exceeded {timeout_sec/60:.2f} minutes and was terminated. Please reduce the runtime of the model." 26 | ) 27 | 28 | if result["error"]: 29 | raise Exception(result["error"]) 30 | 31 | return result["metrics"] 32 | 33 | 34 | def deepevolve_interface(): 35 | # base_dir = "../../../data_cache/molecular_translation" 36 | base_dir = "data_cache/molecular_translation" 37 | try: 38 | with warnings.catch_warnings(record=True) as caught: 39 | warnings.simplefilter("always") 40 | start_time = time() 41 | results = run_main_with_timeout(base_dir, 1800) 42 | runtime = time() - start_time 43 | 44 | warning_messages = [str(w.message) for w in caught] 45 | runtime_minutes = round(runtime / 60, 2) 46 | 47 | scores = 1 - float(results) 48 | 49 | metrics = { 50 | "combined_score": scores, 51 | "runtime_minutes": runtime_minutes, 52 | } 53 | if warning_messages: 54 | warning_messages = list(set(warning_messages)) 55 | if len(warning_messages) > 10: 56 | warning_messages = warning_messages[:10] 57 | metrics["program_warnings"] = warning_messages 58 | 59 | return True, metrics 60 | 61 | except Exception as e: 62 | error_traceback = traceback.format_exc() 63 | error_info = ( 64 | f"Error type: {type(e).__name__}\n" 65 | f"Error message: {e}\n" 66 | f"Traceback:\n{error_traceback}" 67 | ) 68 | return False, error_info 69 | 70 | 71 | if __name__ == "__main__": 72 | status, results = deepevolve_interface() 73 | print(f"Status: {status}") 74 | print(f"Results: {results}") 75 | 76 | -------------------------------------------------------------------------------- /discoveries/molecule/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from main_pyg import config_and_run 3 | from utils import get_args 4 | from time import time 5 | import warnings 6 | 7 | 8 | def deepevolve_interface(): 9 | args = get_args() 10 | args.dataset = "ogbg-molsider" 11 | args.by_default = True 12 | args.trials = 3 13 | 14 | try: 15 | with warnings.catch_warnings(record=True) as caught: 16 | warnings.simplefilter("always") 17 | start_time = time() 18 | results = config_and_run(args) 19 | runtime = time() - start_time 20 | 21 | warning_messages = [str(w.message) for w in caught] 22 | 23 | runtime = round(runtime / 60, 2) 24 | auc_mean = results["test_auc_mean"] 25 | auc_std = results["test_auc_std"] 26 | initial_combined_score = 0.7914562889678236 27 | current_combined_score = auc_mean * 0.5 + (1 - auc_std) * 0.5 28 | impr_pct = ( 29 | (current_combined_score - initial_combined_score) 30 | / initial_combined_score 31 | * 100 32 | ) 33 | metrics = { 34 | "combined_score": current_combined_score, 35 | "improvement_percentage_to_initial": impr_pct, 36 | "runtime_minutes": runtime, 37 | **results, 38 | } 39 | if warning_messages: 40 | warning_messages = list(set(warning_messages)) 41 | if len(warning_messages) > 10: 42 | warning_messages = warning_messages[:10] 43 | metrics["program_warnings"] = warning_messages 44 | 45 | return True, metrics 46 | except Exception as e: 47 | # Capture full traceback information 48 | error_traceback = traceback.format_exc() 49 | error_info = f""" 50 | Error type: {type(e).__name__} 51 | Error message: {str(e)} 52 | Traceback: {error_traceback} 53 | """ 54 | return False, error_info 55 | 56 | 57 | if __name__ == "__main__": 58 | status, results = deepevolve_interface() 59 | print(f"Status: {status}") 60 | print(f"Results: {results}") 61 | 62 | 63 | -------------------------------------------------------------------------------- /discoveries/nuclei_image/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from main import main, Config 3 | from time import time 4 | import warnings 5 | import threading 6 | import signal 7 | 8 | 9 | # DEBUG: module-level worker function for spawn pickling compatibility 10 | ### >>> DEEPEVOLVE-BLOCK-START: Enhance error reporting in _worker_main 11 | def _worker_main(cfg, q): 12 | try: 13 | metrics = main(cfg) 14 | q.put(("metrics", metrics)) 15 | except Exception as e: 16 | import traceback 17 | 18 | q.put(("error", traceback.format_exc())) 19 | 20 | 21 | ### <<< DEEPEVOLVE-BLOCK-END 22 | 23 | 24 | def run_main_with_timeout(config, timeout_sec): 25 | # DEBUG: Use a separate process instead of thread to safely run GPU operations and allow termination 26 | import multiprocessing as mp 27 | 28 | ctx = mp.get_context("spawn") 29 | queue = ctx.Queue() 30 | 31 | # DEBUG: use module-level worker function for spawn pickling compatibility 32 | process = ctx.Process(target=_worker_main, args=(config, queue)) 33 | # DEBUG: Using 'spawn' start method via multiprocessing context to avoid CUDA reinitialization in forked subprocess 34 | process.start() 35 | process.join(timeout_sec) 36 | 37 | if process.is_alive(): 38 | process.terminate() 39 | raise TimeoutError( 40 | f"The model runtime exceeded {timeout_sec/60:.2f} minutes and was terminated. Please reduce the runtime of the model." 41 | ) 42 | 43 | if not queue.empty(): 44 | key, value = queue.get() 45 | if key == "error": 46 | raise Exception(value) 47 | return value 48 | else: 49 | raise Exception( 50 | "No result returned from the model run within the allotted time." 51 | ) 52 | 53 | 54 | def deepevolve_interface(): 55 | config = Config() 56 | try: 57 | with warnings.catch_warnings(record=True) as caught: 58 | warnings.simplefilter("always") 59 | start_time = time() 60 | # results = main(config) 61 | results = run_main_with_timeout(config, 1800) 62 | runtime = time() - start_time 63 | 64 | warning_messages = [str(w.message) for w in caught] 65 | 66 | runtime = round(runtime / 60, 2) 67 | 68 | train_map = results["train_map"] 69 | valid_map = results["valid_map"] 70 | test_map = results["test_map"] 71 | 72 | metrics = { 73 | "combined_score": test_map, 74 | "train_map": train_map, 75 | "valid_map": valid_map, 76 | "test_map": test_map, 77 | "runtime_minutes": runtime, 78 | } 79 | 80 | if warning_messages: 81 | warning_messages = list(set(warning_messages)) 82 | if len(warning_messages) > 10: 83 | warning_messages = warning_messages[:10] 84 | metrics["program_warnings"] = warning_messages 85 | 86 | return True, metrics 87 | 88 | except Exception as e: 89 | # Capture full traceback information 90 | error_traceback = traceback.format_exc() 91 | error_info = f""" 92 | Error type: {type(e).__name__} 93 | Error message: {str(e)} 94 | Traceback: {error_traceback} 95 | """ 96 | return False, error_info 97 | 98 | 99 | if __name__ == "__main__": 100 | status, results = deepevolve_interface() 101 | print(f"Status: {status}") 102 | print(f"Results: {results}") 103 | -------------------------------------------------------------------------------- /discoveries/openvaccine/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import warnings 3 | from main import main 4 | from time import time 5 | import numpy as np 6 | import multiprocessing 7 | 8 | 9 | def run_main_with_timeout(base_dir, timeout_sec): 10 | manager = multiprocessing.Manager() 11 | return_dict = manager.dict() 12 | 13 | def target(): 14 | try: 15 | with warnings.catch_warnings(record=True) as caught: 16 | warnings.simplefilter("always") 17 | metrics = main(base_dir) 18 | 19 | warning_messages = [str(w.message) for w in caught] 20 | return_dict["metrics"] = metrics 21 | if len(warning_messages) > 10: 22 | warning_messages = warning_messages[:10] 23 | return_dict["warnings"] = warning_messages 24 | return_dict["error"] = None 25 | except Exception as e: 26 | return_dict["metrics"] = None 27 | return_dict["warnings"] = [] 28 | return_dict["error"] = str(e) 29 | 30 | p = multiprocessing.Process(target=target) 31 | p.start() 32 | p.join(timeout_sec) 33 | if p.is_alive(): 34 | p.terminate() 35 | p.join() 36 | raise TimeoutError( 37 | f"The model runtime exceeded {timeout_sec/60:.2f} minutes and was terminated. Please reduce the runtime of the model." 38 | ) 39 | 40 | if return_dict["error"]: 41 | raise Exception(return_dict["error"]) 42 | 43 | return return_dict["metrics"], return_dict.get("warnings", []) 44 | 45 | 46 | def deepevolve_interface(): 47 | base_dir = "data_cache/openvaccine" 48 | # base_dir = "../../../data_cache/openvaccine" 49 | try: 50 | start_time = time() 51 | metrics, subprocess_warnings = run_main_with_timeout(base_dir, 1800) 52 | runtime = time() - start_time 53 | 54 | runtime_minutes = round(runtime / 60, 2) 55 | 56 | test_score = metrics["test_MCRMSE"] 57 | if np.isnan(test_score): 58 | test_score = 999 59 | 60 | initial_score = 0.3914539605379105 61 | first_place_score = 0.34198 62 | improvement_to_initial = round( 63 | (initial_score - test_score) / initial_score * 100, 2 64 | ) 65 | improvement_to_first_place = round( 66 | (first_place_score - test_score) / first_place_score * 100, 2 67 | ) 68 | 69 | metrics = { 70 | "combined_score": 1 / (1 + test_score), 71 | "improvement_percentage_to_initial": improvement_to_initial, 72 | "improvement_percentage_to_first_place": improvement_to_first_place, 73 | "runtime_minutes": runtime_minutes, 74 | "test_MCRMSE_lower_is_better": test_score, 75 | "train_mean_loss_across_folds_lower_is_better": metrics[ 76 | "train_mean_loss_across_folds" 77 | ], 78 | } 79 | 80 | # Include warnings from subprocess 81 | if subprocess_warnings: 82 | warning_messages = list(set(subprocess_warnings)) 83 | if len(warning_messages) > 10: 84 | warning_messages = warning_messages[:10] 85 | metrics["program_warnings"] = warning_messages 86 | 87 | return True, metrics 88 | 89 | except Exception as e: 90 | error_traceback = traceback.format_exc() 91 | error_info = ( 92 | f"Error type: {type(e).__name__}\n" 93 | f"Error message: {e}\n" 94 | f"Traceback:\n{error_traceback}" 95 | ) 96 | return False, error_info 97 | 98 | 99 | if __name__ == "__main__": 100 | status, results = deepevolve_interface() 101 | print(f"Status: {status}") 102 | print(f"Results: {results}") 103 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/config.py: -------------------------------------------------------------------------------- 1 | from types import SimpleNamespace 2 | 3 | # Data configuration 4 | DATA_DIR = "" 5 | TARGET_HORIZONS = [0, 6, 12, 24] 6 | TEST_VMONTHS = [0, 6, 12, 18, 24, 36, 48, 60, 72, 84] 7 | 8 | # LightGBM parameters 9 | LGB_PARAMS = { 10 | "boosting_type": "gbdt", 11 | "objective": "multiclass", 12 | "num_class": 87, 13 | "n_estimators": 300, 14 | "learning_rate": 0.019673004699536346, 15 | "num_leaves": 208, 16 | "max_depth": 14, 17 | "min_data_in_leaf": 850, 18 | "feature_fraction": 0.5190632906197453, 19 | "lambda_l1": 7.405660751699475e-08, 20 | "lambda_l2": 0.14583961675675494, 21 | "max_bin": 240, 22 | "verbose": -1, 23 | "force_col_wise": True, 24 | "n_jobs": -1, 25 | } 26 | 27 | 28 | # Neural Network configuration 29 | def get_nn_config(): 30 | cfg = SimpleNamespace(**{}) 31 | cfg.tr_collate_fn = None 32 | cfg.val_collate_fn = None 33 | cfg.target_column = "target_norm" 34 | cfg.output_dir = "results/nn_temp" 35 | cfg.seed = -1 36 | cfg.eval_epochs = 1 37 | cfg.mixed_precision = False 38 | ### >>> DEEPEVOLVE-BLOCK-START: Set device dynamically based on CUDA availability 39 | import torch 40 | 41 | cfg.device = "cuda" if torch.cuda.is_available() else "cpu" 42 | ### >>> DEEPEVOLVE-BLOCK-START: Enable cuDNN benchmark for performance if using CUDA 43 | if cfg.device == "cuda": 44 | torch.backends.cudnn.benchmark = True 45 | ### <<< DEEPEVOLVE-BLOCK-END 46 | ### <<< DEEPEVOLVE-BLOCK-END 47 | cfg.pretrained_transformer = ( 48 | None # path to pre-trained transformer encoder weights (if available) 49 | ) 50 | cfg.n_classes = 1 51 | cfg.batch_size = 128 52 | cfg.batch_size_val = 256 53 | cfg.n_hidden = 64 54 | cfg.n_layers = 2 55 | cfg.num_workers = 0 56 | cfg.drop_last = False 57 | cfg.gradient_clip = 1.0 58 | cfg.bag_size = 1 59 | cfg.bag_agg_function = "mean" 60 | cfg.lr = 2e-3 61 | cfg.warmup = 0 62 | cfg.epochs = 10 63 | # Added parameters for hybrid model enhancements 64 | cfg.use_cat = False # set to True to enable categorical covariate embedding 65 | cfg.use_protein = False # set to True to enable protein sequence embeddings 66 | cfg.use_transformer = ( 67 | True # enable transformer encoder for adaptive feature extraction 68 | ) 69 | cfg.use_transformer = ( 70 | True # enable transformer encoder for adaptive feature extraction 71 | ) 72 | cfg.interp_steps = 5 73 | cfg.cat_vocab_size = 10 74 | cfg.cat_embed_dim = 8 75 | cfg.protein_vocab_size = 1000 76 | cfg.protein_embed_dim = 32 77 | # Enable meta‐learning and MC dropout uncertainty estimation for rapid per‐patient adaptation. 78 | cfg.meta_learning = True 79 | cfg.mc_dropout = True 80 | cfg.mc_dropout_samples = 10 81 | cfg.mc_dropout_prob = 0.1 82 | cfg.calib_factor = 1.0 83 | cfg.inner_lr = 1e-3 84 | return cfg 85 | 86 | 87 | # Feature configuration 88 | def get_lgb_features(): 89 | features = [ 90 | "target_i", 91 | "target_month", 92 | "horizon", 93 | "visit_month", 94 | "visit_6m", 95 | "blood_taken", 96 | ] 97 | features += ["visit_18m", "is_suppl"] 98 | features += ["count_non12_visits"] 99 | features += ["visit_48m"] 100 | return features 101 | 102 | 103 | def get_nn_features(sample_df): 104 | features = ["visit_6m"] 105 | features += [c for c in sample_df.columns if c.startswith("t_month_eq_")] 106 | features += [c for c in sample_df.columns if c.startswith("v_month_eq_")] 107 | features += [c for c in sample_df.columns if c.startswith("hor_eq_")] 108 | features += [c for c in sample_df.columns if c.startswith("target_n_")] 109 | features += ["visit_18m"] 110 | features += ["visit_48m"] 111 | features += ["is_suppl"] 112 | features += ["horizon_scaled"] 113 | return features 114 | 115 | 116 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/data_loader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def load_data(base_path="data"): 5 | """Load training data from CSV files.""" 6 | proteins = pd.read_csv(f"{base_path}/train_proteins.csv") 7 | peptides = pd.read_csv(f"{base_path}/train_peptides.csv") 8 | clinical = pd.read_csv(f"{base_path}/train_clinical_data.csv") 9 | supplement = pd.read_csv(f"{base_path}/supplemental_clinical_data.csv") 10 | return proteins, peptides, clinical, supplement 11 | 12 | 13 | def preprocess_supplement_data(supplement_df): 14 | """Preprocess supplement data.""" 15 | supplement_df.loc[supplement_df["visit_month"] == 5, "visit_month"] = 6 16 | return supplement_df 17 | 18 | 19 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import warnings 3 | from main import main_func 4 | from time import time 5 | import numpy as np 6 | 7 | 8 | def deepevolve_interface(): 9 | base_dir = "data_cache/amp_pd" 10 | # base_dir = "../../../data_cache/amp_pd" 11 | try: 12 | # Run main_func inside a warnings-catching context 13 | with warnings.catch_warnings(record=True) as caught: 14 | # Always trigger all warnings 15 | warnings.simplefilter("always") 16 | 17 | start_time = time() 18 | smape = main_func(base_dir) 19 | runtime = time() - start_time 20 | 21 | # Pull out warning messages 22 | warning_messages = [str(w.message) for w in caught] 23 | 24 | # Compute combined score 25 | if np.isnan(smape): 26 | combined_score = 0.0 27 | print("smape is nan, set combined_score to 0.0") 28 | else: 29 | combined_score = 1 - smape / 200 30 | 31 | # Compute runtime in minutes, rounded 32 | runtime_minutes = round(runtime / 60, 2) 33 | 34 | # Compute improvement ratio 35 | initial_smape = 93.54330168877686 36 | ratio = ( 37 | round((initial_smape - smape) / initial_smape * 100, 2) 38 | if not np.isnan(smape) 39 | else 0.0 40 | ) 41 | 42 | # Build metrics dict 43 | metrics = { 44 | "combined_score": combined_score, 45 | "symmetric_mean_absolute_percentage_error (lower is better)": smape, 46 | "improvement_percentage_to_initial": ratio, 47 | "runtime_minutes": runtime_minutes, 48 | } 49 | if warning_messages: 50 | warning_messages = list(set(warning_messages)) 51 | if len(warning_messages) > 10: 52 | warning_messages = warning_messages[:10] 53 | metrics["program_warnings"] = warning_messages 54 | 55 | return True, metrics 56 | 57 | except Exception as e: 58 | error_traceback = traceback.format_exc() 59 | error_info = ( 60 | f"Error type: {type(e).__name__}\n" 61 | f"Error message: {e}\n" 62 | f"Traceback:\n{error_traceback}" 63 | ) 64 | return False, error_info 65 | 66 | 67 | if __name__ == "__main__": 68 | status, results = deepevolve_interface() 69 | print(f"Status: {status}") 70 | print(f"Results: {results}") 71 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/lightgbm_model.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | 3 | # DEBUG: Removed unused import of torchcde to avoid missing dependency 4 | ### >>> DEEPEVOLVE-BLOCK-START: Import torchcde for Neural CDE functionality 5 | # import torchcde 6 | ### <<< DEEPEVOLVE-BLOCK-END 7 | from base_model import BaseModel 8 | from metrics import opt_smape1p 9 | 10 | 11 | class LGBClassModel1(BaseModel): 12 | """LightGBM classification model.""" 13 | 14 | def __init__(self, params, features): 15 | self.params = params 16 | self.features = features 17 | 18 | def fit(self, df_train): 19 | if self.features is None: 20 | self.features = [col for col in df_train.columns if col.startswith("v_")] 21 | lgb_train = lgb.Dataset(df_train[self.features], df_train["target"]) 22 | params0 = {k: v for k, v in self.params.items() if k not in ["n_estimators"]} 23 | self.m_gbm = lgb.train( 24 | params0, lgb_train, num_boost_round=self.params["n_estimators"] 25 | ) 26 | return self 27 | 28 | def predict_proba(self, df_valid): 29 | return self.m_gbm.predict(df_valid[self.features]) 30 | 31 | def predict(self, df_valid): 32 | return opt_smape1p(self.predict_proba(df_valid)) 33 | 34 | 35 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import sys 4 | from sklearn.utils.validation import check_consistent_length 5 | from data_loader import load_data, preprocess_supplement_data 6 | from preprocessing import DataPrep 7 | from config import LGB_PARAMS, get_nn_config, get_lgb_features, get_nn_features 8 | from lightgbm_model import LGBClassModel1 9 | from neural_network import NNRegModel1 10 | from utils import repl 11 | from public_timeseries_testing_util import MockApi 12 | 13 | 14 | def smapep1(y_true, y_pred): 15 | """SMAPE of y+1, a nonnegative float, smaller is better 16 | 17 | Parameters: y_true, y_pred: array-like 18 | 19 | Returns 100 for 100 % error. 20 | y_true may have missing values. 21 | """ 22 | check_consistent_length(y_true, y_pred) 23 | y_true = np.array(y_true, copy=False).ravel() 24 | y_pred = np.array(y_pred, copy=False).ravel() 25 | y_true, y_pred = y_true[np.isfinite(y_true)], y_pred[np.isfinite(y_true)] 26 | if (y_true < 0).any(): 27 | raise ValueError("y_true < 0") 28 | if (y_pred < 0).any(): 29 | raise ValueError("y_pred < 0") 30 | denominator = (y_true + y_pred) / 2 + 1 31 | ape = np.abs(y_pred - y_true) / denominator 32 | return np.average(ape) * 100 33 | 34 | 35 | def main_func(base_dir): 36 | proteins, peptides, clinical, supplement = load_data(base_dir) 37 | supplement = preprocess_supplement_data(supplement) 38 | 39 | # Initialize data preprocessor 40 | dp3 = DataPrep() 41 | dp3.fit(proteins, peptides, clinical) 42 | 43 | # Prepare training samples 44 | sample3 = dp3.transform_train(proteins, peptides, clinical) 45 | sample3 = sample3[~sample3["target"].isnull()] 46 | sample3["is_suppl"] = 0 47 | 48 | sup_sample3 = dp3.transform_train(proteins, peptides, supplement) 49 | sup_sample3 = sup_sample3[~sup_sample3["target"].isnull()] 50 | sup_sample3["is_suppl"] = 1 51 | 52 | # Train LightGBM model 53 | lgb_features = get_lgb_features() 54 | model_lgb = LGBClassModel1(LGB_PARAMS, lgb_features) 55 | model_lgb = model_lgb.fit(pd.concat([sample3, sup_sample3], axis=0)) 56 | 57 | # Train Neural Network model 58 | cfg = get_nn_config() 59 | cfg.features = get_nn_features(sample3) 60 | model_nn = NNRegModel1(cfg) 61 | model_nn = model_nn.fit(pd.concat([sample3, sup_sample3], axis=0)) 62 | 63 | # Load test environment (if available) 64 | env = MockApi(base_dir) 65 | iter_test = env.iter_test() 66 | 67 | all_test_peptides = pd.DataFrame() 68 | all_test_proteins = pd.DataFrame() 69 | all_test_df = pd.DataFrame() 70 | 71 | for test_df, test_peptides, test_proteins, sample_submission in iter_test: 72 | 73 | all_test_df = pd.concat([all_test_df, test_df], axis=0) 74 | all_test_proteins = pd.concat([all_test_proteins, test_proteins], axis=0) 75 | all_test_peptides = pd.concat([all_test_peptides, test_peptides], axis=0) 76 | 77 | sample_test = dp3.transform_test( 78 | all_test_proteins, all_test_peptides, all_test_df, sample_submission 79 | ) 80 | sample_test["is_suppl"] = 0 81 | 82 | if not sample_test.empty: 83 | sample_test["preds_lgb"] = model_lgb.predict(sample_test) 84 | sample_test["preds_nn"] = np.round( 85 | np.clip(model_nn.predict(sample_test), 0, None) 86 | ) 87 | sample_submission["rating"] = np.round( 88 | (sample_test["preds_lgb"] + sample_test["preds_nn"]) / 2 89 | ) 90 | 91 | env.predict(sample_submission) 92 | 93 | # Read final submission 94 | prediction = env.get_predictions() 95 | solution = env.get_answer() 96 | score = smapep1(solution["rating"], prediction["rating"]) 97 | return score 98 | 99 | 100 | if __name__ == "__main__": 101 | base_dir = "../../../data_cache/amp_pd" 102 | score = main_func(base_dir) 103 | print("score", score) 104 | 105 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.special import softmax 3 | 4 | 5 | def smape1p_ind(A, F): 6 | """Individual SMAPE+1 calculation.""" 7 | val = 200 * np.abs(F - A) / (np.abs(A + 1) + np.abs(F + 1)) 8 | return val 9 | 10 | 11 | def smape1p(A, F): 12 | """SMAPE+1 metric calculation.""" 13 | return smape1p_ind(A, F).mean() 14 | 15 | 16 | def smape1p_opt(x): 17 | """Optimal SMAPE+1 calculation.""" 18 | tgts = np.arange(0, 61) 19 | scores = [smape1p(x, val) for val in tgts] 20 | return tgts[np.argmin(scores)] 21 | 22 | 23 | def single_smape1p(preds, tgt): 24 | """Single SMAPE+1 calculation for probability distributions.""" 25 | x = np.tile(np.arange(preds.shape[1]), (preds.shape[0], 1)) 26 | x = np.abs(x - tgt) / (2 + x + tgt) 27 | return (x * preds).sum(axis=1) 28 | 29 | 30 | def opt_smape1p(preds): 31 | """Optimal SMAPE+1 for probability distributions.""" 32 | x = np.hstack( 33 | [single_smape1p(preds, i).reshape(-1, 1) for i in range(preds.shape[1])] 34 | ) 35 | return x.argmin(axis=1) 36 | 37 | 38 | def max_dif(val, lst): 39 | """Calculate maximum difference.""" 40 | lst0 = [x for x in lst if x < val] 41 | if len(lst0) == 0: 42 | return -1 43 | return val - max(lst0) 44 | 45 | 46 | def count_prev_visits(val, lst): 47 | """Count previous visits.""" 48 | lst0 = [x for x in lst if x < val] 49 | return len(lst0) 50 | 51 | 52 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/public_timeseries_testing_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | An unlocked version of the timeseries API intended for testing alternate inputs. 3 | Mirrors the production timeseries API in the crucial respects, but won't be as fast. 4 | 5 | ONLY works afer the first three variables in MockAPI.__init__ are populated. 6 | """ 7 | 8 | from typing import Sequence, Tuple 9 | 10 | import pandas as pd 11 | 12 | 13 | class MockApi: 14 | def __init__(self, base_dir: str): 15 | """ 16 | YOU MUST UPDATE THE FIRST THREE LINES of this method. 17 | They've been intentionally left in an invalid state. 18 | 19 | Variables to set: 20 | input_paths: a list of two or more paths to the csv files to be served 21 | group_id_column: the column that identifies which groups of rows the API should serve. 22 | A call to iter_test serves all rows of all dataframes with the current group ID value. 23 | export_group_id_column: if true, the dataframes iter_test serves will include the group_id_column values. 24 | """ 25 | # get the current directory 26 | self.input_paths: Sequence[str] = [ 27 | f"{base_dir}/example_test_files/test.csv", 28 | f"{base_dir}/example_test_files/test_peptides.csv", 29 | f"{base_dir}/example_test_files/test_proteins.csv", 30 | f"{base_dir}/example_test_files/sample_submission.csv", 31 | ] 32 | self.group_id_column: str = "visit_month" 33 | self.export_group_id_column: bool = True 34 | self.answer_path = f"{base_dir}/example_test_files/answer.csv" 35 | # iter_test is only designed to support at least two dataframes, such as test and sample_submission 36 | assert len(self.input_paths) >= 2 37 | 38 | self._status = "initialized" 39 | self.predictions = [] 40 | 41 | def iter_test(self) -> Tuple[pd.DataFrame]: 42 | """ 43 | Loads all of the dataframes specified in self.input_paths, 44 | then yields all rows in those dataframes that equal the current self.group_id_column value. 45 | """ 46 | if self._status != "initialized": 47 | 48 | raise Exception( 49 | "WARNING: the real API can only iterate over `iter_test()` once." 50 | ) 51 | 52 | dataframes = [] 53 | for pth in self.input_paths: 54 | dataframes.append(pd.read_csv(pth, low_memory=False)) 55 | group_order = dataframes[0][self.group_id_column].drop_duplicates().tolist() 56 | dataframes = [df.set_index(self.group_id_column) for df in dataframes] 57 | 58 | for group_id in group_order: 59 | self._status = "prediction_needed" 60 | current_data = [] 61 | for df in dataframes: 62 | try: 63 | cur_df = df.loc[group_id].copy() 64 | # returning single line dataframes from df.loc requires special handling 65 | if not isinstance(cur_df, pd.DataFrame): 66 | cur_df = pd.DataFrame( 67 | {a: b for a, b in zip(cur_df.index.values, cur_df.values)}, 68 | index=[group_id], 69 | ) 70 | cur_df = cur_df.index.rename(self.group_id_column) 71 | except KeyError: 72 | cur_df = df.loc[[]].copy() 73 | cur_df = cur_df.reset_index(drop=not (self.export_group_id_column)) 74 | current_data.append(cur_df) 75 | yield tuple(current_data) 76 | 77 | while self._status != "prediction_received": 78 | print( 79 | "You must call `predict()` successfully before you can continue with `iter_test()`", 80 | flush=True, 81 | ) 82 | yield None 83 | 84 | # with open('submission.csv', 'w') as f_open: 85 | # pd.concat(self.predictions).to_csv(f_open, index=False) 86 | self._status = "finished" 87 | 88 | def predict(self, user_predictions: pd.DataFrame): 89 | """ 90 | Accepts and stores the user's predictions and unlocks iter_test once that is done 91 | """ 92 | if self._status == "finished": 93 | raise Exception("You have already made predictions for the full test set.") 94 | if self._status != "prediction_needed": 95 | raise Exception( 96 | "You must get the next test sample from `iter_test()` first." 97 | ) 98 | if not isinstance(user_predictions, pd.DataFrame): 99 | raise Exception("You must provide a DataFrame.") 100 | 101 | self.predictions.append(user_predictions) 102 | self._status = "prediction_received" 103 | 104 | def get_predictions(self): 105 | return pd.concat(self.predictions) 106 | 107 | def get_answer(self): 108 | return pd.read_csv(self.answer_path) 109 | 110 | 111 | def make_env(): 112 | return MockApi() 113 | 114 | 115 | -------------------------------------------------------------------------------- /discoveries/parkinson_disease/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def repl(x1, x2, cond): 5 | """Replace values in x1 with x2 where condition is True.""" 6 | res = x1.copy() 7 | res[cond] = x2[cond] 8 | return res 9 | 10 | 11 | -------------------------------------------------------------------------------- /discoveries/polymer/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from main_pyg import config_and_run 3 | from utils import get_args 4 | from time import time 5 | import warnings 6 | 7 | 8 | def deepevolve_interface(): 9 | args = get_args() 10 | # args.base_dir = "../../../data_cache/polymer" 11 | try: 12 | with warnings.catch_warnings(record=True) as caught: 13 | warnings.simplefilter("always") 14 | start_time = time() 15 | results, wmae, r2 = config_and_run(args) 16 | runtime = time() - start_time 17 | 18 | warning_messages = [str(w.message) for w in caught] 19 | 20 | runtime = round(runtime / 60, 2) 21 | 22 | current_combined_score = 1 / (1 + wmae) * 0.5 + r2 * 0.5 23 | metrics = { 24 | "combined_score": current_combined_score, 25 | "wmae_inverse": 1 / (1 + wmae), 26 | "r2_avg": r2, 27 | "runtime_minutes": runtime, 28 | **results, 29 | } 30 | if warning_messages: 31 | warning_messages = list(set(warning_messages)) 32 | if len(warning_messages) > 10: 33 | warning_messages = warning_messages[:10] 34 | metrics["program_warnings"] = warning_messages 35 | 36 | return True, metrics 37 | except Exception as e: 38 | # Capture full traceback information 39 | error_traceback = traceback.format_exc() 40 | error_info = f""" 41 | Error type: {type(e).__name__} 42 | Error message: {str(e)} 43 | Traceback: {error_traceback} 44 | """ 45 | return False, error_info 46 | 47 | 48 | if __name__ == "__main__": 49 | status, results = deepevolve_interface() 50 | print(f"Status: {status}") 51 | print(f"Results: {results}") 52 | 53 | 54 | -------------------------------------------------------------------------------- /discoveries/usp_p2p/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import warnings 3 | from main import main 4 | from time import time 5 | import numpy as np 6 | import multiprocessing 7 | 8 | 9 | def run_main_with_timeout(base_dir, timeout_sec): 10 | manager = multiprocessing.Manager() 11 | return_dict = manager.dict() 12 | 13 | ### >>> DEEPEVOLVE-BLOCK-START: Capture full traceback in exception handling within target function 14 | def target(): 15 | try: 16 | return_dict["metrics"] = main(base_dir) 17 | return_dict["error"] = None 18 | except Exception as e: 19 | import traceback 20 | 21 | return_dict["metrics"] = None 22 | return_dict["error"] = traceback.format_exc() 23 | 24 | ### <<< DEEPEVOLVE-BLOCK-END 25 | 26 | p = multiprocessing.Process(target=target) 27 | p.start() 28 | p.join(timeout_sec) 29 | if p.is_alive(): 30 | p.terminate() 31 | p.join() 32 | raise TimeoutError( 33 | f"The model runtime exceeded {timeout_sec/60:.2f} minutes and was terminated. Please reduce the runtime of the model." 34 | ) 35 | 36 | if return_dict["error"]: 37 | raise Exception(return_dict["error"]) 38 | 39 | return return_dict["metrics"] 40 | 41 | 42 | def deepevolve_interface(): 43 | base_dir = "data_cache/usp_p2p" 44 | # base_dir = "../../../data_cache/usp_p2p" 45 | try: 46 | with warnings.catch_warnings(record=True) as caught: 47 | warnings.simplefilter("always") 48 | start_time = time() 49 | metrics = run_main_with_timeout(base_dir, 1800) 50 | # metrics = main(base_dir) 51 | runtime = time() - start_time 52 | 53 | warning_messages = [str(w.message) for w in caught] 54 | runtime_minutes = round(runtime / 60, 2) 55 | 56 | initial_score = 0.803648329426078 57 | ratio = round( 58 | (metrics["eval_pearson"] - initial_score) / initial_score * 100, 2 59 | ) 60 | 61 | metrics = { 62 | "combined_score": metrics["eval_pearson"], 63 | "improvement_percentage_to_initial": ratio, 64 | "runtime_minutes": runtime_minutes, 65 | "eval_loss": metrics["eval_loss"], 66 | } 67 | if warning_messages: 68 | warning_messages = list(set(warning_messages)) 69 | if len(warning_messages) > 10: 70 | warning_messages = warning_messages[:10] 71 | metrics["program_warnings"] = warning_messages 72 | 73 | return True, metrics 74 | 75 | except Exception as e: 76 | error_traceback = traceback.format_exc() 77 | error_info = ( 78 | f"Error type: {type(e).__name__}\n" 79 | f"Error message: {e}\n" 80 | f"Traceback:\n{error_traceback}" 81 | ) 82 | return False, error_info 83 | 84 | 85 | if __name__ == "__main__": 86 | status, results = deepevolve_interface() 87 | print(f"Status: {status}") 88 | print(f"Results: {results}") 89 | -------------------------------------------------------------------------------- /examples/burgers/README.md: -------------------------------------------------------------------------------- 1 | # PDE Burgers Solver 2 | 3 | This repository contains a solver for the one-dimensional viscous Burgers equation. The solver is tailored for the specific case where the viscosity $\nu = \text{burgers\_nu}$, and it is optimized for this use. The code is implemented in PyTorch and leverages GPU acceleration when available. 4 | 5 | --- 6 | 7 | ## Problem Description 8 | 9 | We aim to solve the following partial differential equation (PDE): 10 | 11 | ```math 12 | \begin{cases} 13 | \partial_t u(x, t) + \partial_x \Bigl(\frac{u^2(x, t)}{2}\Bigr) = \nu\, \partial_{xx} u(x, t), & x \in (0,1), \; t \in (0,1] \\ 14 | u(x, 0) = u_0(x), & x \in (0,1) 15 | \end{cases} 16 | ``` 17 | 18 | with periodic boundary conditions. The initial condition $u_0(x)$ is provided as a discretized array with shape `[batch_size, N]`, where $N$ is the number of spatial points. The goal is to predict the evolution of $u(\cdot, t)$ at specified time steps $t = t_1, \dots, t_T$, producing an output of shape `[batch_size, T+1, N]` (including the initial condition). 19 | 20 | **Note:** To ensure numerical stability, the solver may use smaller internal time steps than those specified for the output. 21 | 22 | --- 23 | 24 | ## Evaluation Metrics 25 | 26 | The performance of the solver is measured using the following metrics: 27 | 28 | 1. **Scale-Independent Normalized Root Mean Squared Error (nRMSE):** 29 | 30 | For a set of $S$ PDE examples, the nRMSE is defined as: 31 | 32 | ```math 33 | \text{nRMSE} = \frac{1}{S} \sum_{s=1}^{S} \frac{\| u^{(s)}(x,t) - \hat{u}^{(s)}(x,t) \|_{2}}{\| u^{(s)}(x,t) \|_{2}} 34 | ``` 35 | 36 | where $u^{(s)}(x,t)$ is the ground truth and $\hat{u}^{(s)}(x,t)$ is the predicted solution. 37 | 38 | 2. **Convergence Rate:** 39 | 40 | The convergence test assesses if the solution error decreases as the grid is refined. Specifically, for a grid spacing $h$, the solver is considered convergent if: 41 | 42 | ```math 43 | \| u_{h} - u_{h/2} \|_{2} \rightarrow 0 \quad \text{as} \quad h \rightarrow 0. 44 | ``` 45 | 46 | This ensures the numerical solution approaches the reference solution at the expected rate, confirming consistency and correctness. 47 | 48 | 3. **Computational Efficiency:** 49 | 50 | The execution time of the solver is recorded to measure its computational efficiency. 51 | 52 | --- 53 | 54 | ## Solver Interface 55 | 56 | The solver is implemented in the file `deepevolve_interface.py`. This interface defines the structure and methods for interacting with the solver and is designed to integrate with the broader system. 57 | 58 | --- 59 | 60 | ## Initial Idea 61 | 62 | The initial idea behind the solver is as follows: 63 | 64 | - **Equation:** The solver integrates the one-dimensional viscous Burgers equation: 65 | 66 | ```math 67 | u_t + \frac{1}{2}(u^2)_x = \nu\, u_{xx} 68 | ``` 69 | 70 | - **Spatial Discretization:** For each batch of $B$ initial states sampled on an evenly spaced grid of $N$ points (with $\Delta x = 1/N$): 71 | 72 | - Compute the convective flux $f = \frac{1}{2}u^2$. 73 | - Evaluate the spatial derivative of the convective flux using a centered finite-difference stencil implemented through `torch.roll`. 74 | - Compute the diffusion term $u_{xx}$ using the standard three-point Laplacian. 75 | 76 | - **Time Integration:** 77 | 78 | - The solver uses an explicit Euler method for time integration. 79 | - The time step for the inner loop is chosen adaptively but never exceeds $0.2\,\Delta x^2/\nu$, which satisfies the explicit stability criterion for the diffusive term. 80 | - The simulation is advanced on the GPU (when available), updating the solution tensor in place until the simulation time matches each requested output time provided by the user in the array $\{t_0, \dots, t_T\}$. 81 | - At each specified output time, the current field is stored, resulting in a final output tensor of shape `[B, T+1, N]` in single precision before conversion back to NumPy format. 82 | 83 | For a more detailed implementation, please refer to the supplementary material: 84 | 85 | [Supplementary Implementation](https://github.com/LithiumDA/CodePDE/blob/main/solvers/burgers/nu_1.0/seeds/implementation_0.py) -------------------------------------------------------------------------------- /examples/burgers/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "problem": { 3 | "name": "pde_burgers", 4 | "description": "The PDE is the burgers equation, given by\n\n\\\\[\\n\\\\begin{{cases}}\\n\\\\partial_t u(x, t) + \\\\partial_x \\left( \\\\frac{{u^2(x, t)}}{{2}} \\right) = \\\\nu \\\\partial_{{xx}} u(x, t), & x \\\\in (0,1), \\; t \\\\in (0,1] \\\\\\\\u(x, 0) = u_0(x), & x \\\\in (0,1)\\n\\\\end{{cases}}\\n\\\\]\\n\\nwheer $\\\\nu$ is a constant representing the viscosity. In our task, we assume the periodic boundary condition.\\n\\nGiven the discretization of $u_0(x)$ of shape [batch_size, N] where $N$ is the number of spatial points, you need to implement a solver to predict u(\\\\cdot, t) for the specified subseqent time steps ($t=t_1, ..., t_T$). The solution is of shape [batch_size, T+1, N] (with the initial time frame and the subsequent steps). Note that although the required time steps are specified, you should consider using smaller time steps internally to obtain more stable simulation.\\n\\nIn particular, your code should be tailored to the case where $\\\\nu={burgers_nu}$, i.e., optimizing it particularly for this use case.", 5 | "metric": "nRMSE, convergence rate, and time: For executable solvers, we evaluate their performance by calling the solver, obtaining the predicted solution, and comparing it against reference solutions. We investigate three metrics. First, we compute the error with respect to the ground truth solution. We use the scale-independent normalized root mean squared error (nRMSE), defined as: nRMSE = \\frac{1}{S} \\sum_{s=1}^{S} \\frac{\\lVert u^{(s)}(x,t) - \\hat{u}^{(s)}(x,t)\\rVert_{2}}{\\lVert u^{(s)}(x,t)\\rVert_{2}} (2) where S denotes the number of examples in a PDE family. Second, we measure the quality of the solver using a convergence test, which assesses how the solution error decreases as the grid is refined. This test verifies that the numerical solution approaches the reference or exact solution at an expected rate, confirming the solver’s consistency and correctness. Mathematically, a solver is considered convergent if the difference between solutions at successive resolutions decreases with finer discretization. That is, for a grid spacing h, we test whether \\lVert u_{h} - u_{h/2}\\rVert_{2} → 0 as h → 0. This test makes sure that the numerical solution remains stable and consistent as resolution increases, even in the absence of an exact solution. Finally, we record code execution time as a measure of computational efficiency.", 6 | "interface": "deepevolve_interface.py" 7 | }, 8 | "initial_idea": { 9 | "title": "The initial idea", 10 | "content": "The solver integrates the one-dimensional viscous Burgers equation $u_t + \\tfrac{1}{2}(u^2)_x = \\nu\\,u_{xx}$ on a periodic domain with an explicit Euler time integrator written in PyTorch. For each batch of $B$ initial states sampled on an evenly spaced grid of $N$ points ($\\Delta x = 1/N$), the code first computes the convective flux $f=\\tfrac{1}{2}u^{2}$, evaluates its spatial derivative with a centered finite-difference stencil implemented through `torch.roll`, and obtains the diffusion term $u_{xx}$ with the standard three-point Laplacian. The time step for the inner loop is chosen adaptively but never exceeds $0.2\\,\\Delta x^{2}/\\nu$, satisfying the explicit stability criterion for the diffusive term. Integration proceeds on the GPU when available, updating the solution tensor in place until the simulation time matches each requested output time in the user-supplied array $\\{t_0,\\dots,t_T\\}$. At every such moment the current field is stored, producing an output tensor of shape $[B,\\,T+1,\\,N]$ that contains the initial conditions and all subsequent states in single precision before conversion back to NumPy.", 11 | "supplement": "https://github.com/LithiumDA/CodePDE/blob/main/solvers/burgers/nu_1.0/seeds/implementation_0.py" 12 | } 13 | } -------------------------------------------------------------------------------- /examples/burgers/initial_code/solver.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | 5 | def solve_burgers_step(u, dt, dx, nu): 6 | """ 7 | Computes one time step update using explicit Euler for the Burgers' equation. 8 | 9 | Args: 10 | u (torch.Tensor): Current solution of shape [batch_size, N]. 11 | dt (float): Time step size. 12 | dx (float): Spatial grid spacing. 13 | nu (float): Viscosity. 14 | 15 | Returns: 16 | u_new (torch.Tensor): Updated solution of shape [batch_size, N]. 17 | """ 18 | # Compute the flux f = 0.5*u^2 19 | flux = 0.5 * u * u 20 | 21 | # Compute the spatial derivative of the flux using central differences. 22 | # Using torch.roll to account for periodic boundary conditions. 23 | flux_x = (torch.roll(flux, shifts=-1, dims=1) - torch.roll(flux, shifts=1, dims=1)) / (2 * dx) 24 | 25 | # Compute the second derivative u_xx for the diffusion term. 26 | u_xx = (torch.roll(u, shifts=-1, dims=1) - 2*u + torch.roll(u, shifts=1, dims=1)) / (dx*dx) 27 | 28 | # Explicit Euler update: u_new = u - dt*(flux derivative) + dt*nu*(u_xx) 29 | u_new = u - dt * flux_x + dt * nu * u_xx 30 | 31 | return u_new 32 | 33 | 34 | def solver(u0_batch, t_coordinate, nu): 35 | """Solves the Burgers' equation for all times in t_coordinate. 36 | 37 | Args: 38 | u0_batch (np.ndarray): Initial condition [batch_size, N], 39 | where batch_size is the number of different initial conditions, 40 | and N is the number of spatial grid points. 41 | t_coordinate (np.ndarray): Time coordinates of shape [T+1]. 42 | It begins with t_0=0 and follows the time steps t_1, ..., t_T. 43 | nu (float): Viscosity coefficient. 44 | 45 | Returns: 46 | solutions (np.ndarray): Shape [batch_size, T+1, N]. 47 | solutions[:, 0, :] contains the initial conditions (u0_batch), 48 | solutions[:, i, :] contains the solutions at time t_coordinate[i]. 49 | """ 50 | # Print initial debug info. 51 | # print("Starting solver for Burgers' equation") 52 | 53 | # Determine device: use GPU if available. 54 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 55 | # print("Using device:", device) 56 | 57 | # Convert the initial condition to a torch tensor with float type. 58 | # u0_batch: shape [batch_size, N] 59 | u = torch.tensor(u0_batch, dtype=torch.float32, device=device) 60 | 61 | batch_size, N = u.shape 62 | 63 | # The spatial grid spacing. 64 | dx = 1.0 / N 65 | 66 | # Set a reasonable internal time step dt_internal based on diffusive stability condition. 67 | # For explicit Euler, a sufficient condition is dt < C * dx^2/nu. Use C=0.2 for safety. 68 | dt_internal = 0.2 * dx * dx / nu 69 | # print("Internal time step dt_internal =", dt_internal) 70 | 71 | # Total number of output time steps provided in t_coordinate. 72 | T_plus_one = len(t_coordinate) 73 | 74 | # Preallocate a tensor (on device, then later convert) for the final solution. 75 | solution_tensor = torch.empty((batch_size, T_plus_one, N), dtype=torch.float32, device=device) 76 | 77 | # Set the initial condition 78 | solution_tensor[:, 0, :] = u 79 | 80 | # current simulation time starts from the initial time. 81 | current_time = t_coordinate[0] 82 | output_index = 1 # next output index to fill from time coordinate. 83 | 84 | # Get the final simulation time we need to compute until. 85 | final_time = t_coordinate[-1] 86 | 87 | internal_step = 0 # counter for debugging 88 | 89 | # Continue integration until we reach the final output time. 90 | while current_time < final_time: 91 | # Take one internal time step. 92 | # Note: We make sure not to overshoot the next required output time. 93 | next_output_time = t_coordinate[output_index] if output_index < T_plus_one else final_time 94 | # Determine time step dt: if the next internal step would overshoot the next output time, 95 | # set dt to exactly reach it. 96 | dt = dt_internal 97 | if current_time + dt > next_output_time: 98 | dt = next_output_time - current_time 99 | 100 | # Update the solution by one time step. 101 | u = solve_burgers_step(u, dt, dx, nu) 102 | current_time += dt 103 | 104 | internal_step += 1 105 | # if internal_step % 1000 == 0: 106 | # print("Internal step:", internal_step, "Current time:", current_time.item()) 107 | 108 | # If we have reached or passed the next required time, store the result. 109 | # (Due to our dt adjustment, we should hit it exactly.) 110 | if abs(current_time - next_output_time) < 1e-10: 111 | solution_tensor[:, output_index, :] = u 112 | # print("Recorded solution at t =", current_time.item(), "for output index", output_index) 113 | output_index += 1 114 | 115 | # If we have recorded all outputs, we can exit. 116 | if output_index >= T_plus_one: 117 | break 118 | 119 | # Convert the solution to numpy before returning. 120 | solutions = solution_tensor.cpu().numpy() 121 | return solutions -------------------------------------------------------------------------------- /examples/burgers/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The idea involves solving the one-dimensional viscous Burgers equation using an explicit Euler time integrator implemented in PyTorch. The solver operates on a periodic domain and processes batches of initial states on a grid. Convective flux is calculated and its spatial derivative is evaluated using a finite-difference stencil. The diffusion term is obtained via a three-point Laplacian. The time-stepping is adaptive with a ceiling set by stability criteria, and computations are optimized for GPU execution. The method outputs a tensor recording the initial and subsequent states of the system.", 3 | "motivation": "To efficiently solve the Burgers equation using advanced computing techniques, leveraging PyTorch and GPU capabilities for optimized performance.", 4 | "implementation_notes": "Implemented in PyTorch, with GPU optimization. The code computes on a batch of states, using finite-difference methods for derivatives, and adaptive time-stepping to enhance stability.", 5 | "pseudocode": "1. Initialize batch of states on grid\\n2. FOR each time step until final time:\\n a. Compute convective flux \\n b. Calculate spatial derivative using `torch.roll`\\n c. Compute diffusion term with three-point Laplacian\\n d. Update solution using explicit Euler step \\n e. Adjust time step according to stability criteria\\n3. Record states at specified times", 6 | "originality": { 7 | "score": 3, 8 | "positive": "Combines traditional numerical methods with modern machine learning frameworks for efficient computation.", 9 | "negative": "Uses well-known methods in the context of the Burgers equation, with advancements mainly in implementation rather than new theory." 10 | }, 11 | "future_potential": { 12 | "score": 4, 13 | "positive": "Potential for application to other PDEs and in contexts requiring efficient computation on large datasets, especially with GPU optimization.", 14 | "negative": "Limited to problems suitable for explicit methods and specific boundary conditions." 15 | }, 16 | "code_difficulty": { 17 | "score": 4, 18 | "positive": "The implementation requires knowledge of differential equations, numerical stability, and PyTorch.", 19 | "negative": "The adaptive time-stepping and GPU optimization add complexity to the code." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/burgers/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "nu_1.0_combined_score": 0.6638293548348706, 3 | "nu_1.0_nrmse": 0.001506411237642169, 4 | "nu_1.0_convergence_rate": -3.015332898611762, 5 | "nu_1.0_runtime_minutes": 12.771473093827565, 6 | "combined_score": 0.6638293548348706 7 | } -------------------------------------------------------------------------------- /examples/circle_packing/README.md: -------------------------------------------------------------------------------- 1 | # Circle Packing 2 | 3 | ## Problem Description 4 | 5 | Given a positive integer *n*, the goal is to pack *n* disjoint circles inside a unit square in such a way as to maximize the sum of their radii. This problem focuses on discovering a new algorithm applicable to cases where *n* ranges from 26 to 32. 6 | 7 | **Metric:** Sum of radii 8 | **Interface:** deepevolve_interface.py 9 | 10 | ### Mathematical Formulation 11 | 12 | The objective is to maximize: 13 | 14 | $$ 15 | \text{Objective} = \sum_{i=1}^{n} r_i 16 | $$ 17 | 18 | subject to the following constraints: 19 | 20 | - **Non-overlapping circles:** 21 | For each pair of circles, the distance between their centers must be at least as large as the sum of their radii: 22 | 23 | ```math 24 | (x_i - x_j)^2 + (y_i - y_j)^2 \geq (r_i + r_j)^2 \quad \forall\, i \neq j 25 | ``` 26 | 27 | - **Boundary constraints:** 28 | Each circle must lie entirely within the unit square: 29 | 30 | ```math 31 | r_i \leq x_i \leq 1 - r_i \quad \text{and} \quad r_i \leq y_i \leq 1 - r_i \quad \forall\, i 32 | ``` 33 | 34 | Here, $x_i$ and $y_i$ represent the center coordinates of the $i$-th circle, and $r_i$ its radius. 35 | 36 | ## Initial Idea 37 | > The initial idea is adapted from the output from [OpenEvolve](https://github.com/codelion/openevolve/tree/main/examples/circle_packing) 38 | 39 | The proposed method leverages `scipy.optimize.minimize` with the Sequential Least Squares Programming (SLSQP) algorithm. The problem is modeled as a constrained optimization task where both the center coordinates \((x_i, y_i)\) and the radius \(r_i\) of each circle are treated as decision variables. 40 | 41 | Inequality constraints are formulated to: 42 | - Prevent any pair of circles from overlapping. 43 | - Ensure that all circles remain within the boundaries of the unit square. 44 | 45 | Since SLSQP enforces constraints only within a numerical tolerance, it is important to note that the solution may occasionally permit slight violations (e.g., minor overlapping or circles slightly outside the unit square). 46 | 47 | ## Dependencies 48 | 49 | - numpy 50 | - scipy 51 | - shapely 52 | 53 | *Note:* No computational geometry libraries other than the ones listed above are to be used. 54 | 55 | ## Supplementary Material 56 | 57 | For further details and insights on circle packing, please refer to the following resource: 58 | 59 | [Circle Packing Supplementary Material](https://erich-friedman.github.io/packing/cirRsqu/) -------------------------------------------------------------------------------- /examples/circle_packing/ckpt/checkpoint_50/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "feature_map": { 3 | "8-0-4": "e0e8bb8f-7f5b-4ff0-8877-607d16e7e904", 4 | "0-6-1": "6483234a-a079-4c7d-aafa-92ff989573cb", 5 | "1-4-8": "fc9390d8-5746-45f8-89bf-cc820674ff75", 6 | "6-0-4": "06976df4-d5ce-469a-bacf-ce107c6a5b00", 7 | "8-0-5": "e6ff1491-588d-45f2-9f29-7b407425b3b0", 8 | "3-6-4": "2bb60c45-489b-4e92-ac96-001e03788020", 9 | "3-7-6": "2f3f5db2-7b0d-489e-9dc2-301b1f850d71", 10 | "5-9-7": "6d84c330-e329-4fe6-ae6f-70a514db7a60", 11 | "9-0-5": "461b048f-84f2-4027-b1c8-99ec5cfcfdb8", 12 | "1-9-7": "c410687e-6035-406c-9588-b0aa7b838945", 13 | "8-1-5": "9df980dc-2c8f-4ece-871e-90486b4a7245", 14 | "7-1-5": "f52bb9ba-cd8f-44e8-8978-d967cf55cfeb", 15 | "1-9-0": "58af2a81-381b-437a-9e13-e0a8fc29e4ed", 16 | "1-8-0": "3577ad71-c1a2-482d-88d3-8ce52ab8e670", 17 | "4-1-7": "c42f30e9-7ab7-4f5a-b78a-87db894e6971", 18 | "4-2-7": "f9fff391-dbbc-4a0b-a042-4ae56c977c72", 19 | "4-5-9": "453b9d57-b5f6-421c-84a1-93c58154165b", 20 | "4-7-9": "09507cfc-3d17-4547-8664-dbca302803c2", 21 | "4-2-5": "e7af8df5-7c88-4dd8-b299-8ef069b24062", 22 | "7-3-7": "80a1d209-186a-4479-bb99-dedc3c1df2cc", 23 | "3-4-8": "e304e0fd-7bf3-4cbb-8fed-5f960f2aca78" 24 | }, 25 | "islands": [ 26 | [ 27 | "6d84c330-e329-4fe6-ae6f-70a514db7a60", 28 | "2f3f5db2-7b0d-489e-9dc2-301b1f850d71" 29 | ], 30 | [ 31 | "9df980dc-2c8f-4ece-871e-90486b4a7245", 32 | "c410687e-6035-406c-9588-b0aa7b838945", 33 | "461b048f-84f2-4027-b1c8-99ec5cfcfdb8" 34 | ], 35 | [ 36 | "f52bb9ba-cd8f-44e8-8978-d967cf55cfeb", 37 | "3577ad71-c1a2-482d-88d3-8ce52ab8e670", 38 | "e0e8bb8f-7f5b-4ff0-8877-607d16e7e904", 39 | "6483234a-a079-4c7d-aafa-92ff989573cb", 40 | "58af2a81-381b-437a-9e13-e0a8fc29e4ed", 41 | "3414c339-4428-47e4-97a6-4173d5c796b6" 42 | ], 43 | [ 44 | "09507cfc-3d17-4547-8664-dbca302803c2", 45 | "e7af8df5-7c88-4dd8-b299-8ef069b24062", 46 | "fc9390d8-5746-45f8-89bf-cc820674ff75", 47 | "c42f30e9-7ab7-4f5a-b78a-87db894e6971", 48 | "453b9d57-b5f6-421c-84a1-93c58154165b", 49 | "f9fff391-dbbc-4a0b-a042-4ae56c977c72" 50 | ], 51 | [ 52 | "094742ee-ec68-45f4-97e9-140b86fdc657", 53 | "e6ff1491-588d-45f2-9f29-7b407425b3b0", 54 | "80a1d209-186a-4479-bb99-dedc3c1df2cc", 55 | "3c9ac271-200f-49d9-9bb9-55eb4884ce98", 56 | "06976df4-d5ce-469a-bacf-ce107c6a5b00", 57 | "2bb60c45-489b-4e92-ac96-001e03788020", 58 | "7aac803d-be83-4492-96f4-ee3af60e7cf9", 59 | "e304e0fd-7bf3-4cbb-8fed-5f960f2aca78" 60 | ] 61 | ], 62 | "archive": [ 63 | "094742ee-ec68-45f4-97e9-140b86fdc657", 64 | "f52bb9ba-cd8f-44e8-8978-d967cf55cfeb", 65 | "80a1d209-186a-4479-bb99-dedc3c1df2cc", 66 | "9df980dc-2c8f-4ece-871e-90486b4a7245", 67 | "3414c339-4428-47e4-97a6-4173d5c796b6", 68 | "e6ff1491-588d-45f2-9f29-7b407425b3b0", 69 | "e0e8bb8f-7f5b-4ff0-8877-607d16e7e904", 70 | "461b048f-84f2-4027-b1c8-99ec5cfcfdb8", 71 | "3c9ac271-200f-49d9-9bb9-55eb4884ce98", 72 | "7aac803d-be83-4492-96f4-ee3af60e7cf9" 73 | ], 74 | "best_program_id": "461b048f-84f2-4027-b1c8-99ec5cfcfdb8", 75 | "last_iteration": 50, 76 | "current_island": 4, 77 | "island_generations": [ 78 | 10, 79 | 10, 80 | 10, 81 | 10, 82 | 10 83 | ], 84 | "last_migration_generation": 0 85 | } -------------------------------------------------------------------------------- /examples/circle_packing/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "problem": { 3 | "name": "circle_packing", 4 | "description": "Given a positive integer n, the problem is to pack n disjoint circles inside a unit square so as to maximize the sum of their radii. The problem focuses on discovering a new algorithm that can be applied to n from 26 to 32. You can use numpy, scipy, and shapely. Do not use any other computational geometry libraries.", 5 | "metric": "sum of radii", 6 | "interface": "deepevolve_interface.py" 7 | }, 8 | "initial_idea": { 9 | "title": "The initial idea", 10 | "content": "We use scipy.optimize.minimize with the SLSQP algorithm to locate the best circle-packing arrangement. The problem is cast as a constrained optimization in which both each circle's center coordinates and its radius are treated as decision variables. We add inequality constraints to prevent any pair of circles from overlapping and boundary constraints to keep all circles inside the unit square. SLSQP will try to satisfy every inequality, but only to within a numerical tolerance rather than exactly, so it may lead to invalid solutions (e.g., overlapping circles or circles outside the unit square).", 11 | "supplement": "https://erich-friedman.github.io/packing/cirRsqu/" 12 | } 13 | } -------------------------------------------------------------------------------- /examples/circle_packing/initial_code/main.py: -------------------------------------------------------------------------------- 1 | """Constructor-based circle packing for n=26 circles""" 2 | 3 | import numpy as np 4 | from time import time 5 | import traceback 6 | from scipy.optimize import minimize 7 | 8 | 9 | def construct_packing(n=26): 10 | """ 11 | Compute circle packing for n circles in the unit square using multiple SLSQP restarts. 12 | Returns: 13 | centers: array of shape (n, 2) 14 | radii: array of shape (n,) 15 | sum_radii: float 16 | """ 17 | # Prebuild bounds and constraints 18 | bounds = [(0.0, 1.0)] * (2 * n) + [(0.0, 0.5)] * n 19 | constraints = [] 20 | 21 | # Non-overlap constraints 22 | for i in range(n): 23 | for j in range(i + 1, n): 24 | 25 | def overlap(x, i=i, j=j): 26 | xi, yi = x[2 * i], x[2 * i + 1] 27 | xj, yj = x[2 * j], x[2 * j + 1] 28 | ri = x[2 * n + i] 29 | rj = x[2 * n + j] 30 | dist = np.hypot(xi - xj, yi - yj) 31 | return dist - (ri + rj) 32 | 33 | constraints.append({"type": "ineq", "fun": overlap}) 34 | 35 | # Boundary constraints 36 | for i in range(n): 37 | 38 | def left(x, i=i): 39 | return x[2 * i] - x[2 * n + i] 40 | 41 | def right(x, i=i): 42 | return 1 - (x[2 * i] + x[2 * n + i]) 43 | 44 | def bottom(x, i=i): 45 | return x[2 * i + 1] - x[2 * n + i] 46 | 47 | def top(x, i=i): 48 | return 1 - (x[2 * i + 1] + x[2 * n + i]) 49 | 50 | constraints.extend( 51 | [ 52 | {"type": "ineq", "fun": left}, 53 | {"type": "ineq", "fun": right}, 54 | {"type": "ineq", "fun": bottom}, 55 | {"type": "ineq", "fun": top}, 56 | ] 57 | ) 58 | 59 | best_sum = -np.inf 60 | best_x = None 61 | 62 | rng = np.random.default_rng(42) 63 | centers0 = rng.uniform(0.1, 0.9, size=(n, 2)) 64 | radii0 = np.full(n, 0.05) 65 | x0 = np.hstack((centers0.flatten(), radii0)) 66 | 67 | def objective(x): 68 | return -np.sum(x[2 * n :]) 69 | 70 | result = minimize( 71 | objective, 72 | x0, 73 | method="SLSQP", 74 | bounds=bounds, 75 | constraints=constraints, 76 | options={"maxiter": 1000, "ftol": 1e-6}, 77 | ) 78 | 79 | if result.success: 80 | radii = result.x[2 * n :] 81 | total = np.sum(radii) 82 | if total > best_sum: 83 | best_sum = total 84 | best_x = result.x.copy() 85 | 86 | if best_x is None: 87 | return [], [], 0.0 88 | 89 | centers = best_x[: 2 * n].reshape(n, 2) 90 | radii = best_x[2 * n :] 91 | return centers, radii, best_sum 92 | 93 | def validate_packing(centers, radii): 94 | """ 95 | Validate that circles don't overlap and are inside the unit square 96 | 97 | Args: 98 | centers: np.array of shape (n, 2) with (x, y) coordinates 99 | radii: np.array of shape (n) with radius of each circle 100 | 101 | Returns: 102 | True if valid, False otherwise 103 | """ 104 | n = centers.shape[0] 105 | 106 | # Check if circles are inside the unit square 107 | for i in range(n): 108 | x, y = centers[i] 109 | r = radii[i] 110 | if x - r < 0 or x + r > 1 or y - r < 0 or y + r > 1: 111 | message = ( 112 | f"Circle {i} at ({x}, {y}) with radius {r} is outside the unit square" 113 | ) 114 | return False, message 115 | 116 | # Check for overlaps 117 | for i in range(n): 118 | for j in range(i + 1, n): 119 | dist = np.sqrt(np.sum((centers[i] - centers[j]) ** 2)) 120 | if dist < radii[i] + radii[j]: 121 | message = f"Circles {i} and {j} overlap: dist={dist}, r1+r2={radii[i]+radii[j]}" 122 | return False, message 123 | 124 | return True, "success" 125 | 126 | def visualize(centers, radii): 127 | """ 128 | Visualize the circle packing 129 | 130 | Args: 131 | centers: np.array of shape (n, 2) with (x, y) coordinates 132 | radii: np.array of shape (n) with radius of each circle 133 | """ 134 | import matplotlib.pyplot as plt 135 | from matplotlib.patches import Circle 136 | 137 | fig, ax = plt.subplots(figsize=(8, 8)) 138 | 139 | # Draw unit square 140 | ax.set_xlim(0, 1) 141 | ax.set_ylim(0, 1) 142 | ax.set_aspect("equal") 143 | ax.grid(True) 144 | 145 | # Draw circles 146 | for i, (center, radius) in enumerate(zip(centers, radii)): 147 | circle = Circle(center, radius, alpha=0.5) 148 | ax.add_patch(circle) 149 | ax.text(center[0], center[1], str(i), ha="center", va="center") 150 | 151 | plt.title(f"Circle Packing (n={len(centers)}, sum={sum(radii):.6f})") 152 | plt.show() 153 | plt.savefig('circle_packing.png') 154 | 155 | if __name__ == "__main__": 156 | centers, radii, sum_radii = construct_packing(n=28) 157 | print('centers', centers) 158 | print('radii', radii) 159 | print('sum_radii', sum_radii) 160 | 161 | valid_packing, message_packing = validate_packing(centers, radii) 162 | print('valid_packing', valid_packing) 163 | print('message_packing', message_packing) 164 | 165 | # visualize(centers, radii) -------------------------------------------------------------------------------- /examples/circle_packing/initial_code/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | shapely 3 | scipy -------------------------------------------------------------------------------- /examples/circle_packing/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The core idea is to use a constrained optimization approach to find an optimal circle-packing arrangement within a unit square. The problem is formulated where both the center coordinates and radii of circles are decision variables. Constraints are added to ensure circles do not overlap and remain within the square's boundaries, which are solved using the SLSQP algorithm in SciPy. This algorithm attempts to satisfy inequalities within a numerical tolerance, potentially resulting in some invalid solutions.", 3 | "motivation": "Optimizing circle-packing has applications in various fields such as material science, logistics, and telecommunications. Efficient packing reduces waste and maximizes space usage, which is critical in these domains.", 4 | "implementation_notes": "The implementation relies on the SLSQP algorithm from SciPy's optimization library, highlighting the usefulness of numerical methods in solving geometric problems. Keeping solutions within tolerance while minimizing overlaps and boundary violations is key.", 5 | "pseudocode": "1. Define decision variables: centers (x, y) and radii for each circle.\n2. Add constraints:\n a. No overlap constraint between all circle pairs.\n b. Boundary constraint to keep circles within the unit square.\n3. Use scipy.optimize.minimize with SLSQP.\n a. Objective: Minimize overlap and out-of-bound placements.\n b. Constraints: Non-overlapping and boundary conditions.\n4. Execute optimization and adjust if solutions fall outside tolerance.", 6 | "originality": { 7 | "score": 3, 8 | "positive": "Utilizes a well-known optimization method in a specific application scenario.", 9 | "negative": "Circle-packing is a classic problem with numerous existing approaches." 10 | }, 11 | "future_potential": { 12 | "score": 4, 13 | "positive": "Could lead to more efficient packing solutions in practical applications.", 14 | "negative": "Limited improvement scope unless coupled with other advanced algorithms." 15 | }, 16 | "code_difficulty": { 17 | "score": 3, 18 | "positive": "Relies on well-documented libraries and standard optimization techniques.", 19 | "negative": "Requires understanding of numerical optimization and constraint handling." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/circle_packing/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.38910315149024705, 3 | "runtime_seconds": 87.78, 4 | "sum_radii_for_n_26": 0.0, 5 | "ratio_to_sota_for_n_26": 0.0, 6 | "validity_for_n_26": 0.0, 7 | "message_for_n_26": "Circle 1 at (0.8946439270602085, 0.7208676227176816) with radius 0.10535607294020073 is outside the unit square", 8 | "sum_radii_for_n_27": 0.0, 9 | "ratio_to_sota_for_n_27": 0.0, 10 | "validity_for_n_27": 0.0, 11 | "message_for_n_27": "Circle 1 at (0.8946439270602085, 0.7208676227176816) with radius 0.10535607294020073 is outside the unit square", 12 | "sum_radii_for_n_28": 0.0, 13 | "ratio_to_sota_for_n_28": 0.0, 14 | "validity_for_n_28": 0.0, 15 | "message_for_n_28": "Circle 1 at (0.8946439270602085, 0.7208676227176816) with radius 0.10535607294020073 is outside the unit square", 16 | "sum_radii_for_n_29": 2.7237220604317294, 17 | "ratio_to_sota_for_n_29": 0.9762444661045625, 18 | "validity_for_n_29": 1.0, 19 | "sum_radii_for_n_30": 0.0, 20 | "ratio_to_sota_for_n_30": 0.0, 21 | "validity_for_n_30": 0.0, 22 | "message_for_n_30": "Circle 1 at (0.8946439270602085, 0.7208676227176816) with radius 0.10535607294020073 is outside the unit square", 23 | "sum_radii_for_n_31": 0.0, 24 | "ratio_to_sota_for_n_31": 0.0, 25 | "validity_for_n_31": 0.0, 26 | "message_for_n_31": "Circle 1 at (0.8946439270602085, 0.7208676227176816) with radius 0.10535607294020073 is outside the unit square", 27 | "sum_radii_for_n_32": 0.0, 28 | "ratio_to_sota_for_n_32": 0.0, 29 | "validity_for_n_32": 0.0, 30 | "message_for_n_32": "Circle 1 at (0.8946439270602085, 0.7208676227176816) with radius 0.10535607294020073 is outside the unit square", 31 | "overall_validity": 0.0 32 | } -------------------------------------------------------------------------------- /examples/molecular_translation/README.md: -------------------------------------------------------------------------------- 1 | # Molecular Translation 2 | 3 | In this competition, you'll interpret old chemical images. With access to a large set of synthetic image data generated by Bristol-Myers Squibb, your task is to convert these images back to the underlying chemical structure annotated as InChI text. The submissions are evaluated using the mean Levenshtein distance between your output InChI strings and the ground truth, with the final score computed as: 4 | 5 | ```math 6 | 1 - \text{mean Levenshtein distance} 7 | ``` 8 | 9 | A higher score indicates a better performance. 10 | 11 | --- 12 | 13 | ## Problem Overview 14 | 15 | The challenge is centered on converting structural chemical images into their corresponding InChI textual representations. Key details include: 16 | 17 | - **Data Files:** 18 | - Files: `train.csv`, `valid.csv`, and `test.csv` 19 | - Contents: Each CSV contains three columns: `image_id`, `InChI`, and `SMILES`. 20 | 21 | - **Image Repository:** 22 | - All images are stored in the `images` folder, and filenames match the `image_id` values present in the CSV files. 23 | 24 | - **Interface File:** 25 | - The entry point for your solution is defined in `deepevolve_interface.py`. 26 | 27 | --- 28 | 29 | ## Evaluation Metric 30 | 31 | The performance of your solution is measured by: 32 | 33 | $$ 34 | \text{Score} = 1 - \text{mean Levenshtein distance} 35 | $$ 36 | 37 | This metric rewards submissions that minimize the average edit distance between the predicted and actual InChI strings. 38 | 39 | --- 40 | 41 | ## Initial Method Proposal: ResNet+GRU 42 | 43 | ### Overview 44 | 45 | The proposed method, titled **ResNet+GRU**, formulates molecular translation as an image-to-sequence problem. The approach involves leveraging a deep convolutional network and a recurrent decoder to accurately convert chemical imagery into InChI strings. 46 | 47 | ### Method Details 48 | 49 | 1. **Visual Feature Extraction:** 50 | - A deep convolutional backbone (e.g., ResNet) processes each image to extract a fixed-length feature vector. 51 | 52 | 2. **Recurrent Decoding:** 53 | - This feature vector initializes a GRU (Gated Recurrent Unit) decoder. 54 | - The decoder generates the InChI string one character at a time. 55 | 56 | 3. **Character-Level Vocabulary:** 57 | - The model constructs a character-level vocabulary that includes special tokens for start, end, and padding. 58 | 59 | 4. **Training Objective:** 60 | - The network is trained end-to-end using cross-entropy loss, aligning the predicted sequence with the true InChI token sequence. 61 | - The loss function is defined as: 62 | 63 | $$ 64 | \mathcal{L} = - \sum_{t=1}^{T} \log P(y_t | y_{ 10: 52 | warning_messages = warning_messages[:10] 53 | metrics["program_warnings"] = warning_messages 54 | 55 | return True, metrics 56 | 57 | except Exception as e: 58 | error_traceback = traceback.format_exc() 59 | error_info = ( 60 | f"Error type: {type(e).__name__}\n" 61 | f"Error message: {e}\n" 62 | f"Traceback:\n{error_traceback}" 63 | ) 64 | return False, error_info 65 | 66 | 67 | if __name__ == "__main__": 68 | status, results = deepevolve_interface() 69 | print(f"Status: {status}") 70 | print(f"Results: {results}") -------------------------------------------------------------------------------- /examples/molecular_translation/initial_code/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | albumentations 3 | timm 4 | pandas 5 | numpy 6 | tqdm -------------------------------------------------------------------------------- /examples/molecular_translation/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The \"ResNet+GRU\" method converts molecular images into InChI (International Chemical Identifier) strings, treating this as an image-to-sequence task. A deep convolutional network (like ResNet) extracts features from the images, which are then used to initialize a recurrent network (GRU) to sequentially generate the InChI string. The method uses a character-level vocabulary with special markers (for start, end, and padding), and the training optimizes the cross-entropy loss between the predicted sequences and the ground truth. Techniques like decoder dropout, gradient clipping, and a cosine learning-rate schedule ensure stable training. The model is evaluated based on the edit distance metric during validation. At test phase, a greedy decoding strategy is employed to produce the final chemical identifiers. This method effectively integrates visual processing and sequential modeling to directly derive chemical identifiers from images.", 3 | "motivation": "The main motivation is to facilitate the automatic generation of InChI strings from molecular structure images, enhancing the accuracy and efficiency of chemical identification and documentation processes.", 4 | "implementation_notes": "The implementation employs techniques such as decoder dropout, gradient clipping, and cosine learning-rate scheduling to ensure stability and efficacy during model training. Evaluation is centered on minimizing edit distance between the predicted and true sequences.", 5 | "pseudocode": "1. Extract features using ResNet from the input image. \\n2. Initialize the GRU decoder with the ResNet features. \\n3. At each timestep, predict the next character of the InChI using GRU. \\n4. Use a character-level vocabulary with start, end, and padding markers. \\n5. Train the model end-to-end using cross-entropy loss. \\n6. Apply decoder dropout and gradient clipping. \\n7. Adjust learning rate using cosine schedule. \\n8. During inference, perform greedy decoding to generate the complete InChI string.", 6 | "originality": { 7 | "score": 4, 8 | "positive": "Integrates convolutional and recurrent neural networks to address image-to-sequence conversion specifically for chemical identifiers.", 9 | "negative": "Combines established techniques from computer vision and sequence modeling, thus originality is in application rather than fundamental design." 10 | }, 11 | "future_potential": { 12 | "score": 4, 13 | "positive": "Could enhance applications in chemistry and pharmacology by providing tools for automated chemical identification from images.", 14 | "negative": "Potential improvements in model accuracy and efficiency are needed to address complex molecular structures." 15 | }, 16 | "code_difficulty": { 17 | "score": 3, 18 | "positive": "The use of ResNet and GRU is manageable with modern deep learning frameworks.", 19 | "negative": "Requires understanding of both convolutional and sequential neural networks, as well as the specific domain (chemical informatics)." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/molecular_translation/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.18847274998679453, 3 | "runtime_minutes": 21.42, 4 | "program_warnings": [ 5 | "This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 1, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary." 6 | ] 7 | } -------------------------------------------------------------------------------- /examples/molecule/README.md: -------------------------------------------------------------------------------- 1 | # Molecule 2 | 3 | ## Overview 4 | This repository addresses the "molecule" problem, focusing on general molecular property prediction using the Side Effect Resource (SIDER) as a proxy dataset for algorithm development. The primary goal is to design algorithms that exhibit strong generalization across various molecular property prediction tasks. The dataset is scaffold-split to assess the algorithm's ability to generalize to novel chemical structures. 5 | 6 | ## Problem Description 7 | - **Task**: General molecular property prediction. 8 | - **Dataset**: Side Effect Resource (SIDER), with a scaffold split to evaluate generalization. 9 | - **Evaluation Metric**: Area Under the Curve (AUC), denoted as $auc$. 10 | - **Interface Implementation**: The main interface is implemented in the [`deepevolve_interface.py`](./deepevolve_interface.py) file. 11 | 12 | ## Initial Idea 13 | ### Graph Rationalization with Environment-based Augmentations 14 | - **Reference Paper**: [Graph Rationalization with Environment-based Augmentations](https://arxiv.org/abs/2206.02886) 15 | - **Supplementary Material**: Available at the [GREA GitHub repository](https://github.com/liugangcode/GREA) -------------------------------------------------------------------------------- /examples/molecule/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "problem": { 3 | "name": "molecule", 4 | "description": "This task focuses on general molecular property prediction, with Side Effect Resource (SIDER) used as a proxy dataset for algorithm development. The primary goal is to design algorithms that generalize across molecular property prediction tasks. The dataset is scaffold-split to assess generalization to novel chemical structures", 5 | "metric": "auc", 6 | "interface": "deepevolve_interface.py" 7 | }, 8 | "initial_idea": { 9 | "title": "Graph Rationalization with Environment-based Augmentations", 10 | "content": "https://arxiv.org/abs/2206.02886", 11 | "supplement": "https://github.com/liugangcode/GREA" 12 | } 13 | } -------------------------------------------------------------------------------- /examples/molecule/initial_code/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Gang Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/molecule/initial_code/README.md: -------------------------------------------------------------------------------- 1 | Graph Rationalization with Environment-based Augmentations 2 | ==== 3 | 4 | This is the source code for the KDD'22 paper: 5 | 6 | [Graph Rationalization with Environment-based Augmentations](https://arxiv.org/pdf/2206.02886.pdf) 7 | 8 | by [Gang Liu](https://liugangcode.github.io/) ([gliu7@nd.edu](mailto:gliu7@nd.edu)), [Tong Zhao](https://tzhao.io/), Jiaxin Xu, [Tengfei Luo](https://monsterlab.nd.edu/), [Meng Jiang](http://www.meng-jiang.com/) 9 | 10 | ## Requirements 11 | 12 | This code package was developed and tested with Python 3.9.9 and PyTorch 1.10.1. All dependencies specified in the ```requirements.txt``` file. The packages can be installed by 13 | ``` 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | ## Usage 18 | 19 | Following are the commands to run experiments on polymer or molecule datasets using default settings. 20 | 21 | ``` 22 | # OGBG-HIV for example 23 | python main_pyg.py --dataset ogbg-molhiv --by_default 24 | 25 | # Polymer Oxygen Permeability 26 | python main_pyg.py --dataset plym-o2_prop --by_default 27 | ``` 28 | 29 | ## Datasets 30 | 31 | We provide the oxygen permeability dataset (.csv) for polymer graph regression. It can be found in the ``` data/'name'/raw ``` folder. 32 | 33 | > Update March 26, 2025: We delegated the polymer datasets for GlassTemp, MeltingTemp, and PolyDensit as requested by the NIMS Materials Database, MatNavi. 34 | 35 | Binary classification tasks for the OGBG dataset (i.e., HIV, ToxCast, Tox21, BBBP, BACE, ClinTox and SIDER) can be directedly implemented using commands such as ``` --dataset ogbg-molhiv ``` following the [instructions](https://github.com/snap-stanford/ogb/tree/master/examples/graphproppred/mol) of the official OGBG dataset implementations. 36 | 37 | ## Reference 38 | 39 | If you find this repository useful in your research, please cite our paper: 40 | 41 | ```bibtex 42 | @inproceedings{liu2022graph, 43 | title={Graph Rationalization with Environment-based Augmentations}, 44 | author={Liu, Gang and Zhao, Tong and Xu, Jiaxin and Luo, Tengfei and Jiang, Meng}, 45 | booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, 46 | publisher = {Association for Computing Machinery}, 47 | pages = {1069–1078}, 48 | numpages = {10}, 49 | year={2022} 50 | } 51 | ``` 52 | -------------------------------------------------------------------------------- /examples/molecule/initial_code/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from main_pyg import config_and_run 3 | from utils import get_args 4 | from time import time 5 | import warnings 6 | 7 | def deepevolve_interface(): 8 | args = get_args() 9 | args.dataset = "ogbg-molsider" 10 | args.by_default = True 11 | args.trials = 3 12 | 13 | try: 14 | with warnings.catch_warnings(record=True) as caught: 15 | warnings.simplefilter("always") 16 | start_time = time() 17 | results = config_and_run(args) 18 | runtime = time() - start_time 19 | 20 | warning_messages = [str(w.message) for w in caught] 21 | 22 | runtime = round(runtime / 60, 2) 23 | auc_mean = results["test_auc_mean"] 24 | auc_std = results["test_auc_std"] 25 | initial_combined_score = 0.7914562889678236 26 | current_combined_score = auc_mean * 0.5 + (1 - auc_std) * 0.5 27 | impr_pct = (current_combined_score - initial_combined_score) / initial_combined_score * 100 28 | metrics = { 29 | "combined_score": current_combined_score, 30 | "improvement_percentage_to_initial": impr_pct, 31 | "runtime_minutes": runtime, 32 | **results 33 | } 34 | if warning_messages: 35 | warning_messages = list(set(warning_messages)) 36 | if len(warning_messages) > 10: 37 | warning_messages = warning_messages[:10] 38 | metrics["program_warnings"] = warning_messages 39 | 40 | return True, metrics 41 | except Exception as e: 42 | # Capture full traceback information 43 | error_traceback = traceback.format_exc() 44 | error_info = f""" 45 | Error type: {type(e).__name__} 46 | Error message: {str(e)} 47 | Traceback: {error_traceback} 48 | """ 49 | return False, error_info 50 | 51 | if __name__ == "__main__": 52 | status, results = deepevolve_interface() 53 | print(f"Status: {status}") 54 | print(f"Results: {results}") 55 | -------------------------------------------------------------------------------- /examples/molecule/initial_code/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn.inits import reset 4 | 5 | from conv import GNN_node, GNN_node_Virtualnode 6 | from utils import scatter_add 7 | 8 | nn_act = torch.nn.ReLU() 9 | F_act = F.relu 10 | 11 | class GraphEnvAug(torch.nn.Module): 12 | def __init__( 13 | self, 14 | num_tasks, 15 | num_layer=5, 16 | emb_dim=300, 17 | gnn_type="gin", 18 | drop_ratio=0.5, 19 | gamma=0.4, 20 | use_linear_predictor=False, 21 | ): 22 | """ 23 | num_tasks (int): number of labels to be predicted 24 | """ 25 | 26 | super(GraphEnvAug, self).__init__() 27 | 28 | self.num_layer = num_layer 29 | self.drop_ratio = drop_ratio 30 | self.emb_dim = emb_dim 31 | self.num_tasks = num_tasks 32 | self.gamma = gamma 33 | 34 | if self.num_layer < 2: 35 | raise ValueError("Number of GNN layers must be greater than 1.") 36 | 37 | ### GNN to generate node embeddings 38 | gnn_name = gnn_type.split("-")[0] 39 | emb_dim_rat = emb_dim 40 | if "virtual" in gnn_type: 41 | rationale_gnn_node = GNN_node_Virtualnode( 42 | 2, 43 | emb_dim_rat, 44 | JK="last", 45 | drop_ratio=drop_ratio, 46 | residual=True, 47 | gnn_name=gnn_name, 48 | ) 49 | self.graph_encoder = GNN_node_Virtualnode( 50 | num_layer, 51 | emb_dim, 52 | JK="last", 53 | drop_ratio=drop_ratio, 54 | residual=True, 55 | gnn_name=gnn_name, 56 | ) 57 | else: 58 | rationale_gnn_node = GNN_node( 59 | 2, 60 | emb_dim_rat, 61 | JK="last", 62 | drop_ratio=drop_ratio, 63 | residual=True, 64 | gnn_name=gnn_name, 65 | ) 66 | self.graph_encoder = GNN_node( 67 | num_layer, 68 | emb_dim, 69 | JK="last", 70 | drop_ratio=drop_ratio, 71 | residual=True, 72 | gnn_name=gnn_name, 73 | ) 74 | self.separator = Separator( 75 | rationale_gnn_node=rationale_gnn_node, 76 | gate_nn=torch.nn.Sequential( 77 | torch.nn.Linear(emb_dim_rat, 2 * emb_dim_rat), 78 | torch.nn.BatchNorm1d(2 * emb_dim_rat), 79 | nn_act, 80 | torch.nn.Dropout(), 81 | torch.nn.Linear(2 * emb_dim_rat, 1), 82 | ), 83 | nn=None, 84 | ) 85 | rep_dim = emb_dim 86 | if use_linear_predictor: 87 | self.predictor = torch.nn.Linear(rep_dim, self.num_tasks) 88 | else: 89 | self.predictor = torch.nn.Sequential( 90 | torch.nn.Linear(rep_dim, 2 * emb_dim), 91 | torch.nn.BatchNorm1d(2 * emb_dim), 92 | nn_act, 93 | torch.nn.Dropout(), 94 | torch.nn.Linear(2 * emb_dim, self.num_tasks), 95 | ) 96 | 97 | def forward(self, batched_data): 98 | h_node = self.graph_encoder(batched_data) 99 | h_r, h_env, r_node_num, env_node_num = self.separator(batched_data, h_node) 100 | h_rep = (h_r.unsqueeze(1) + h_env.unsqueeze(0)).view(-1, self.emb_dim) 101 | pred_rem = self.predictor(h_r) 102 | pred_rep = self.predictor(h_rep) 103 | loss_reg = torch.abs( 104 | r_node_num / (r_node_num + env_node_num) 105 | - self.gamma * torch.ones_like(r_node_num) 106 | ).mean() 107 | output = {"pred_rep": pred_rep, "pred_rem": pred_rem, "loss_reg": loss_reg} 108 | return output 109 | 110 | def eval_forward(self, batched_data): 111 | h_node = self.graph_encoder(batched_data) 112 | h_r, _, _, _ = self.separator(batched_data, h_node) 113 | pred_rem = self.predictor(h_r) 114 | return pred_rem 115 | 116 | 117 | class Separator(torch.nn.Module): 118 | def __init__(self, rationale_gnn_node, gate_nn, nn=None): 119 | super(Separator, self).__init__() 120 | self.rationale_gnn_node = rationale_gnn_node 121 | self.gate_nn = gate_nn 122 | self.nn = nn 123 | self.reset_parameters() 124 | 125 | def reset_parameters(self): 126 | reset(self.rationale_gnn_node) 127 | reset(self.gate_nn) 128 | reset(self.nn) 129 | 130 | def forward(self, batched_data, h_node, size=None): 131 | x = self.rationale_gnn_node(batched_data) 132 | batch = batched_data.batch 133 | x = x.unsqueeze(-1) if x.dim() == 1 else x 134 | size = batch[-1].item() + 1 if size is None else size 135 | 136 | gate = self.gate_nn(x).view(-1, 1) 137 | h_node = self.nn(h_node) if self.nn is not None else h_node 138 | assert gate.dim() == h_node.dim() and gate.size(0) == h_node.size(0) 139 | gate = torch.sigmoid(gate) 140 | 141 | h_out = scatter_add(gate * h_node, batch, dim=0, dim_size=size) 142 | c_out = scatter_add((1 - gate) * h_node, batch, dim=0, dim_size=size) 143 | 144 | r_node_num = scatter_add(gate, batch, dim=0, dim_size=size) 145 | env_node_num = scatter_add((1 - gate), batch, dim=0, dim_size=size) 146 | 147 | return h_out, c_out, r_node_num + 1e-8, env_node_num + 1e-8 148 | -------------------------------------------------------------------------------- /examples/molecule/initial_code/requirements.txt: -------------------------------------------------------------------------------- 1 | -f https://download.pytorch.org/whl/cu118/torch_stable.html 2 | torch==2.2.0+cu118 3 | 4 | # Install PyTorch Geometric and related packages 5 | -f https://data.pyg.org/whl/torch-2.2.0+cu118.html 6 | torch_geometric==2.6.1 7 | 8 | ogb==1.3.2 9 | rdkit==2023.9.5 -------------------------------------------------------------------------------- /examples/molecule/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The paper introduces a novel approach to graph rationalization by identifying subgraph structures, termed 'graph rationales,' that are most influential in a Graph Neural Network's (GNN) predictions. To enhance the identification of these rationales, the authors propose an 'environment replacement' augmentation technique, which generates virtual data examples by substituting parts of the graph's environment. This method aims to improve the generalizability and interpretability of GNNs, particularly in applications like molecular and polymer property prediction.", 3 | "motivation": "In graph-based applications, especially in chemistry and materials science, understanding which substructures (subgraphs) significantly influence a model's predictions is crucial. Existing methods often struggle due to limited data and the complexity of graph structures. By introducing environment-based augmentations, the authors seek to create more diverse training examples, thereby improving the model's ability to identify and learn from important subgraph patterns.", 4 | "implementation_notes": "The proposed framework involves separating the graph into rationale and environment subgraphs. The environment replacement augmentation then generates new training examples by replacing the environment subgraphs with alternative structures. This process is conducted in latent spaces to avoid the computational complexity associated with explicit graph decoding and encoding. The approach is evaluated on seven molecular and four polymer datasets, demonstrating its effectiveness and efficiency compared to recent techniques.", 5 | "pseudocode": "1. Input: Graph dataset G\n2. For each graph g in G:\n a. Identify rationale subgraph R and environment subgraph E\n b. Generate augmented environment subgraph E' through environment replacement\n c. Combine R and E' to form augmented graph g'\n3. Train GNN on both original and augmented graphs\n4. Evaluate model performance on test data", 6 | "originality": { 7 | "score": 8, 8 | "positive": "The introduction of environment replacement as a data augmentation technique for graph rationalization is innovative and addresses a significant gap in existing methods.", 9 | "negative": "While the approach is novel, it builds upon existing concepts of data augmentation and rationale identification, which are not entirely new." 10 | }, 11 | "future_potential": { 12 | "score": 7, 13 | "positive": "This method has the potential to significantly improve the interpretability and generalizability of GNNs in various applications, particularly in chemistry and materials science.", 14 | "negative": "The effectiveness of the approach may vary depending on the complexity and nature of the graph data, and further validation across diverse datasets is needed." 15 | }, 16 | "code_difficulty": { 17 | "score": 6, 18 | "positive": "The framework is designed to be efficient by operating in latent spaces, which simplifies the implementation compared to explicit graph manipulations.", 19 | "negative": "Implementing the separation of rationale and environment subgraphs and the augmentation process may still require a solid understanding of graph theory and neural network architectures." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/molecule/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.7914562889678236, 3 | "train_bce_loss_mean": 0.8592897733052572, 4 | "train_bce_loss_std": 0.05240924925925277, 5 | "train_auc_mean": 0.8043603550557327, 6 | "train_auc_std": 0.040930618247002906, 7 | "valid_auc_mean": 0.6497507955764487, 8 | "valid_auc_std": 0.005306585239214318, 9 | "test_auc_mean": 0.5925327307033416, 10 | "test_auc_std": 0.00962015276769418 11 | } -------------------------------------------------------------------------------- /examples/nuclei_image/README.md: -------------------------------------------------------------------------------- 1 | # Nuclei Image Segmentation 2 | 3 | ## Overview 4 | 5 | Identifying the cells' nuclei is the starting point for most analyses because most of the human body's 30 trillion cells contain a nucleus full of DNA, the genetic code that programs each cell. By isolating nuclei, researchers can identify individual cells within a sample and analyze how they respond to various treatments. This is crucial for understanding the underlying biological processes at work. 6 | 7 | By participating in this challenge, teams will work on automating the process of nuclei identification, which has the potential to significantly accelerate drug testing, thereby reducing the time required to bring new drugs to market. 8 | 9 | ## Evaluation 10 | 11 | The competition is evaluated on the mean average precision across a range of intersection over union (IoU) thresholds. For any two sets, the IoU is defined as: 12 | 13 | ```math 14 | \mathrm{IoU}(A, B) = \frac{\lvert A \cap B\rvert}{\lvert A \cup B\rvert}. 15 | ``` 16 | 17 | The competition metric sweeps over threshold values from 0.5 to 0.95 with a step size of 0.05 (i.e., 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95). At each threshold \(t\), precision is calculated as: 18 | 19 | ```math 20 | \mathrm{Precision}(t) = \frac{\mathrm{TP}(t)}{\mathrm{TP}(t) + \mathrm{FP}(t) + \mathrm{FN}(t)}, 21 | ``` 22 | 23 | where: 24 | - A true positive (TP) is counted when a predicted object matches a ground truth object with an IoU above the threshold. 25 | - A false positive (FP) indicates a predicted object that has no associated ground truth object. 26 | - A false negative (FN) indicates a ground truth object that has no associated predicted object. 27 | 28 | The average precision for a single image is then calculated as: 29 | 30 | ```math 31 | \text{Average Precision} = \frac{1}{\lvert \text{thresholds} \rvert} \sum_{t} \frac{\mathrm{TP}(t)}{\mathrm{TP}(t) + \mathrm{FP}(t) + \mathrm{FN}(t)}. 32 | ``` 33 | 34 | Finally, the competition metric score is the mean of the individual average precisions computed for each image in the test dataset. 35 | 36 | ## Dataset Description 37 | 38 | This dataset comprises a large number of segmented nuclei images acquired under various conditions. Images vary by cell type, magnification, and imaging modality (brightfield vs. fluorescence), posing a significant challenge for algorithms to generalize across different conditions. 39 | 40 | Each image is represented by an associated `ImageId`. Files for a given image reside in a folder named after its `ImageId`. Within each folder, there are two subfolders: 41 | 42 | - **images**: Contains the raw image file. 43 | - **masks**: Contains the segmented masks, where each mask delineates a single nucleus. Note that: 44 | - Masks in the training set are provided, and each nucleus is given a unique integer label through connected-components analysis. 45 | - Masks do not overlap; no pixel belongs to two masks. 46 | 47 | For the second stage, the dataset will include images from unseen experimental conditions. Some images in this stage will be ignored in scoring to prevent hand labeling, and submissions are required to be in run-length encoded format. Please refer to the evaluation page for submission details. 48 | 49 | Data directories overview: 50 | - `/stage1_train/*` - Training set images (includes images and annotated masks) 51 | - `/stage1_test/*` - Test set images (images and annotated masks) 52 | 53 | ## Interface 54 | 55 | The project leverages the provided `deepevolve_interface.py` to interact with the dataset and evaluation process. 56 | 57 | ## Initial Approach: Nucleus Detection with U-Net 58 | 59 | ### Methodology 60 | 61 | The initial strategy employs a U-Net segmentation network to identify nuclei in microscopy images: 62 | 63 | - **Preprocessing**: 64 | - Resize raw images to 256×256 pixels. 65 | - Normalize images to have zero mean and unit variance. 66 | - Convert ground-truth masks into unique integer labels using connected-component analysis. 67 | 68 | - **Training**: 69 | - Use the Adam optimizer and train the network for up to 100 epochs. 70 | - Apply a soft Dice loss function. 71 | - Implement early stopping based on the validation Dice coefficient. 72 | 73 | - **Inference**: 74 | - The trained model outputs probability maps. 75 | - Threshold the probability maps at 0.5. 76 | - Extract connected components from the thresholded maps to obtain individual nucleus predictions. -------------------------------------------------------------------------------- /examples/nuclei_image/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "problem": { 3 | "name": "nuclei_image", 4 | "description": "# Overview\n\nWhy nuclei?\nIdentifying the cells' nuclei is the starting point for most analyses because most of the human body's 30 trillion cells contain a nucleus full of DNA, the genetic code that programs each cell. Identifying nuclei allows researchers to identify each individual cell in a sample, and by measuring how cells react to various treatments, the researcher can understand the underlying biological processes at work.\n\nBy participating, teams will work to automate the process of identifying nuclei, which will allow for more efficient drug testing, shortening the 10 years it takes for each new drug to come to market. Check out this video overview to find out more.\n\nWhat will participants do?\nTeams will create a computer model that can identify a range of nuclei across varied conditions. By observing patterns, asking questions, and building a model, participants will have a chance to push state-of-the-art technology farther.\n\n# Evaluation\n\nThis competition is evaluated on the mean average precision at different intersection over union (IoU) thresholds. The IoU of a proposed set of object pixels and a set of true object pixels is calculated as:\n\n$$\n\\mathrm{IoU}(A, B) = \\frac{\\lvert A \\cap B\\rvert}{\\lvert A \\cup B\\rvert}.\n$$\n\nThe metric sweeps over a range of IoU thresholds, at each point calculating an average precision value. The threshold values range from 0.5 to 0.95 with a step size of 0.05:\n(0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95). In other words, at a threshold of 0.5, a predicted object is considered a “hit” if its intersection over union with a ground truth object is greater than 0.5.\n\nAt each threshold value t, a precision value is calculated based on the number of true positives (TP), false negatives (FN), and false positives (FP) resulting from comparing the predicted object to all ground truth objects:\n\n$$\n\\mathrm{Precision}(t) = \\frac{\\mathrm{TP}(t)}{\\mathrm{TP}(t) + \\mathrm{FP}(t) + \\mathrm{FN}(t)}.\n$$\n\nA true positive is counted when a single predicted object matches a ground truth object with an IoU above the threshold. A false positive indicates a predicted object had no associated ground truth object. A false negative indicates a ground truth object had no associated predicted object. The average precision of a single image is then calculated as the mean of the above precision values at each IoU threshold:\n\n$$\n\\text{Average Precision} = \\frac{1}{\\lvert \\text{thresholds}\\rvert} \\sum_{t} \\frac{\\mathrm{TP}(t)}{\\mathrm{TP}(t) + \\mathrm{FP}(t) + \\mathrm{FN}(t)}.\n$$\n\nLastly, the score returned by the competition metric is the mean of the individual average precisions of each image in the test dataset.\n\n## Dataset Description\nThis dataset contains a large number of segmented nuclei images. The images were acquired under a variety of conditions and vary in the cell type, magnification, and imaging modality (brightfield vs. fluorescence). The dataset is designed to challenge an algorithm's ability to generalize across these variations.\n\nEach image is represented by an associated ImageId. Files belonging to an image are contained in a folder with this ImageId. Within this folder are two subfolders:\n\n- **images** contains the image file.\n- **masks** contains the segmented masks of each nucleus. This folder is only included in the training set. Each mask contains one nucleus. Masks are not allowed to overlap (no pixel belongs to two masks).\n\nThe second stage dataset will contain images from unseen experimental conditions. To deter hand labeling, it will also contain images that are ignored in scoring. The metric used to score this competition requires that your submissions are in run-length encoded format. Please see the evaluation page for details.\n\nAs with any human-annotated dataset, you may find various forms of errors in the data. You may manually correct errors you find in the training set. The dataset will not be updated/re-released unless it is determined that there are a large number of systematic errors. The masks of the stage 1 test set will be released with the release of the stage 2 test set.\n\n## File descriptions\n\n- `/stage1_train/*` - training set images (images and annotated masks)\n- `/stage1_test/*` - test set images (images and annotated masks)\n", 5 | "metric": "mean average precision across intersection-over-union thresholds from 0.5 to 0.95", 6 | "interface": "deepevolve_interface.py" 7 | }, 8 | "initial_idea": { 9 | "title": "nucleus detection with UNet", 10 | "content": "The initial approach applies a U-Net segmentation network to identify nuclei in microscopy images. We resize raw images to 256×256 pixels, normalize them to zero mean and unit variance, and convert ground-truth masks into unique integer labels via connected-component analysis. The network is trained with the Adam optimizer over up to 100 epochs using a soft Dice loss, with early stopping triggered when the validation Dice coefficient stops improving. At inference, the model produces probability maps that are thresholded at 0.5, and connected components are extracted as individual nucleus predictions.", 11 | "supplement": "https://www.kaggle.com/code/cloudfall/pytorch-tutorials-on-dsb2018" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /examples/nuclei_image/initial_code/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from main import main, Config 3 | from time import time 4 | import warnings 5 | import threading 6 | import signal 7 | 8 | def run_main_with_timeout(config, timeout_sec): 9 | result = {"metrics": None, "error": None} 10 | 11 | def target(): 12 | try: 13 | result["metrics"] = main(config) 14 | except Exception as e: 15 | result["error"] = str(e) 16 | 17 | thread = threading.Thread(target=target) 18 | thread.daemon = True 19 | thread.start() 20 | thread.join(timeout_sec) 21 | 22 | if thread.is_alive(): 23 | # Note: Cannot forcefully kill a thread in Python, but the daemon thread will exit when main exits 24 | raise TimeoutError(f"The model runtime exceeded {timeout_sec/60:.2f} minutes and was terminated. Please reduce the runtime of the model.") 25 | 26 | if result["error"]: 27 | raise Exception(result["error"]) 28 | 29 | return result["metrics"] 30 | 31 | def deepevolve_interface(): 32 | config = Config() 33 | try: 34 | with warnings.catch_warnings(record=True) as caught: 35 | warnings.simplefilter("always") 36 | start_time = time() 37 | # results = main(config) 38 | results = run_main_with_timeout(config, 1800) 39 | runtime = time() - start_time 40 | 41 | warning_messages = [str(w.message) for w in caught] 42 | 43 | runtime = round(runtime / 60, 2) 44 | 45 | train_map = results["train_map"] 46 | valid_map = results["valid_map"] 47 | test_map = results["test_map"] 48 | 49 | metrics = { 50 | "combined_score": test_map, 51 | "train_map": train_map, 52 | "valid_map": valid_map, 53 | "test_map": test_map, 54 | "runtime_minutes": runtime, 55 | } 56 | 57 | if warning_messages: 58 | warning_messages = list(set(warning_messages)) 59 | if len(warning_messages) > 10: 60 | warning_messages = warning_messages[:10] 61 | metrics["program_warnings"] = warning_messages 62 | 63 | return True, metrics 64 | 65 | except Exception as e: 66 | # Capture full traceback information 67 | error_traceback = traceback.format_exc() 68 | error_info = f""" 69 | Error type: {type(e).__name__} 70 | Error message: {str(e)} 71 | Traceback: {error_traceback} 72 | """ 73 | return False, error_info 74 | 75 | 76 | if __name__ == "__main__": 77 | status, results = deepevolve_interface() 78 | print(f"Status: {status}") 79 | print(f"Results: {results}") -------------------------------------------------------------------------------- /examples/nuclei_image/initial_code/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-image 2 | torchvision -------------------------------------------------------------------------------- /examples/nuclei_image/initial_code/runtemp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -M liugangswu@outlook.com 3 | #$ -m be 4 | #$ -q gpu@ta-a6k-002 5 | #$ -l gpu=1 6 | #$ -N tmp_run_nuclei_image 7 | 8 | # gpu@qa-titanx-001.crc.nd.edu 9 | ###### gpu@qa-titanx-001.crc.nd.edu 10 | ###### gpu@qa-2080ti-006.crc.nd.edu 11 | ###### gpu@ta-a6k-003 12 | ###### gpu@qa-h100-001.crc.nd.edu 13 | ###### qrsh -q gpu@qa-h100-001.crc.nd.edu -l gpu_card=1 14 | 15 | conda activate aplus 16 | 17 | fsync $SGE_STDOUT_PATH & 18 | 19 | 20 | cd /afs/crc.nd.edu/group/dmsquare/vol2/gliu7/a-plus-dev/examples/nuclei_image/initial_code 21 | 22 | python main.py 23 | -------------------------------------------------------------------------------- /examples/nuclei_image/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The approach uses a U-Net architecture for segmenting nuclei in microscopy images. Key steps include resizing images to 256x256 pixels, normalizing them, and converting masks to unique integers using connected-component analysis. The model is trained using the Adam optimizer for a maximum of 100 epochs with the soft Dice loss function. Early stopping is employed based on the validation Dice coefficient. During inference, the model outputs probability maps which are thresholded and processed to extract individual nuclei as connected components.", 3 | "motivation": "The motivation is to accurately identify and segment nuclei in microscopy images, which can be crucial for biological research, diagnostics, and treatment planning.", 4 | "implementation_notes": "Utilizes common preprocessing steps (resizing, normalization) followed by U-Net training. Employs connected-component analysis for mask conversion.", 5 | "pseudocode": "1. Resize images to 256x256\\n2. Normalize images to zero mean, unit variance\\n3. Convert masks to unique integer labels\\n4. Train U-Net with Adam optimizer, soft Dice loss up to 100 epochs\\n5. Apply early stopping based on validation Dice\\n6. At inference, generate probability maps\\n7. Threshold maps at 0.5\\n8. Extract connected components as nuclei predictions", 6 | "originality": { 7 | "score": 5, 8 | "positive": "Combines standard image processing techniques with U-Net for segmentation.", 9 | "negative": "Uses well-known methods without introducing novel techniques." 10 | }, 11 | "future_potential": { 12 | "score": 6, 13 | "positive": "Could be applied to various microscopy imaging problems.", 14 | "negative": "Limited by scope to nuclei detection, may need adjustments for broader applications." 15 | }, 16 | "code_difficulty": { 17 | "score": 4, 18 | "positive": "Leverages popular libraries and techniques, making implementation feasible.", 19 | "negative": "Requires understanding of image processing and neural network training." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/nuclei_image/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.31848411850033215, 3 | "train_map": 0.5330009856990411, 4 | "valid_map": 0.49809356905982716, 5 | "test_map": 0.31848411850033215, 6 | "runtime_minutes": 11.37 7 | } -------------------------------------------------------------------------------- /examples/openvaccine/README.md: -------------------------------------------------------------------------------- 1 | # OpenVaccine Competition 2 | 3 | Welcome to the OpenVaccine competition repository. This competition challenges participants to develop models to predict RNA degradation rates at the base level, a critical step towards designing stable mRNA vaccines for COVID-19. 4 | 5 | --- 6 | 7 | ## Overview 8 | 9 | ### Competition Background 10 | 11 | Winning the fight against the COVID-19 pandemic requires an effective vaccine that can be equitably and widely distributed. mRNA vaccines show significant promise but face challenges related to stability. RNA molecules are prone to degradation and even a single cut can render the vaccine ineffective. Understanding the degradation mechanisms at the base level is therefore essential. 12 | 13 | The Eterna community, in collaboration with researchers from Stanford University, has embraced a novel approach. By combining scientific expertise with crowdsourced insights from gamers, they tackle challenges in RNA design. This competition leverages data science to design models predicting degradation rates at each RNA base position, aiding the acceleration of mRNA vaccine development. 14 | 15 | ### Data Overview 16 | 17 | Participants will work with a dataset comprised of over 3000 RNA molecules, where each molecule is annotated with a series of experimental measurements. The dataset includes: 18 | 19 | - **train.json** – Contains the training data. 20 | - **test.json** – Test data without any ground truth values. 21 | - **sample_submission.csv** – A sample submission file demonstrating the required format. 22 | 23 | Each sample includes the following columns: 24 | 25 | - **id** – Unique identifier for each sample. 26 | - **seq_scored** – Number of positions used in scoring. In Train and Public Test the value is 68, while it is 91 in the Private Test. 27 | - **seq_length** – Length of the RNA sequence. This is 107 for Train and Public Test, and 130 for Private Test. 28 | - **sequence** – RNA sequence comprising the characters A, G, U, and C. 29 | - **structure** – Dot-bracket notation representing the secondary structure of the RNA. 30 | - **reactivity, deg_pH10, deg_Mg_pH10, deg_50C, deg_Mg_50C** – Vectors of experimental measurements (reactivity and different degradation rates) for positions specified by **seq_scored**. 31 | - **\*_error_\*** – Estimated errors corresponding to the experimental measurements. 32 | - **predicted_loop_type** – Structural context (or loop type) assigned to each base. The loop types include: 33 | - S: Stem (paired) 34 | - M: Multiloop 35 | - I: Internal loop 36 | - B: Bulge 37 | - H: Hairpin loop 38 | - E: Dangling end 39 | - X: External loop 40 | - **S/N_filter** – Indicator if the sample passed additional quality filters. 41 | 42 | **Test Set Filtering:** 43 | The 629 RNA sequences in the test set were selected based on the following criteria: 44 | - A minimum value greater than -0.5 across all 5 conditions. 45 | - A signal-to-noise ratio (mean measurement over 68 nts divided by mean statistical error) greater than 1.0. 46 | - Less than 50% sequence similarity within clusters containing at most 3 similar sequences. 47 | 48 | *Note: The public training data contains additional noisy measurements to allow competitors to extract further insights.* 49 | 50 | --- 51 | 52 | ## Evaluation Metric 53 | 54 | Submissions are scored using the mean columnwise root mean squared error (MCRMSE): 55 | 56 | ```math 57 | \mathrm{MCRMSE} = \frac{1}{N_t}\sum_{j=1}^{N_t}\sqrt{\frac{1}{n}\sum_{i=1}^{n}(y_{ij}-\hat{y}_{ij})^2}, 58 | ``` 59 | 60 | where: 61 | - $N_t$ is the number of ground-truth target columns that are scored. 62 | - $n$ is the number of samples. 63 | - $y_{ij}$ and $\hat{y}_{ij}$ are the actual and predicted values, respectively. 64 | 65 | Although the training data provides five ground-truth measurements, only the following three are used for scoring: 66 | - **reactivity** 67 | - **deg_Mg_pH10** 68 | - **deg_Mg_50C** 69 | 70 | --- 71 | 72 | ## Proposed Approach 73 | 74 | ### Model: GraphSAGE (with GCN) + GRU + KFold 75 | 76 | This repository presents a model that integrates GraphSAGE-based graph convolution with a GRU and k-fold cross-validation framework. The approach is designed as follows: 77 | 78 | 1. **Feature Embedding:** 79 | Each nucleotide is embedded along with its predicted secondary structure and loop-type context. 80 | 81 | 2. **Graph Construction:** 82 | A graph is constructed where: 83 | - Nodes represent individual RNA bases. 84 | - Edges connect adjacent bases and bases that are paired in the structure. 85 | 86 | 3. **Graph Convolution:** 87 | A GraphSAGE-based convolution network aggregates information from neighboring nodes, yielding enriched base-level features. 88 | 89 | 4. **Sequence Modeling:** 90 | The enriched features are passed through a bidirectional GRU to capture sequential patterns along the RNA chain. 91 | 92 | 5. **Prediction:** 93 | A linear output layer predicts the three scored targets (reactivity, deg_Mg_pH10, and deg_Mg_50C) for each position in the RNA sequence. 94 | 95 | 6. **Cross-Validation:** 96 | Training employs k-fold cross-validation to ensure robust performance across the public dataset. 97 | 98 | ### Optional Enhancements 99 | 100 | An optional enhancement involves the incorporation of precomputed base-pair probability (bpps) matrices. These matrices provide a richer view of the RNA folding ensemble and can be used as additional node features. Participants can choose to integrate them into the graph or disregard them. 101 | 102 | For more details and a complete implementation, please refer to the accompanying Kaggle notebook: 103 | [GraphSAGE (Graph Convolution) & GRU with KFold Implementation](https://www.kaggle.com/code/vudangthinh/openvaccine-gcn-graphsage-gru-kfold/notebook#Pytorch-model-based-on-GCN-(GraphSAGE)-and-GRU) -------------------------------------------------------------------------------- /examples/openvaccine/initial_code/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import warnings 3 | from main import main 4 | from time import time 5 | import numpy as np 6 | import multiprocessing 7 | 8 | 9 | def run_main_with_timeout(base_dir, timeout_sec): 10 | manager = multiprocessing.Manager() 11 | return_dict = manager.dict() 12 | 13 | def target(): 14 | try: 15 | # Capture warnings in the subprocess 16 | with warnings.catch_warnings(record=True) as caught: 17 | warnings.simplefilter("always") 18 | metrics = main(base_dir) 19 | 20 | warning_messages = [str(w.message) for w in caught] 21 | return_dict["metrics"] = metrics 22 | if len(warning_messages) > 10: 23 | warning_messages = warning_messages[:10] 24 | return_dict["warnings"] = warning_messages 25 | return_dict["error"] = None 26 | except Exception as e: 27 | return_dict["metrics"] = None 28 | return_dict["warnings"] = [] 29 | return_dict["error"] = str(e) 30 | 31 | p = multiprocessing.Process(target=target) 32 | p.start() 33 | p.join(timeout_sec) 34 | if p.is_alive(): 35 | p.terminate() 36 | p.join() 37 | raise TimeoutError(f"The model runtime exceeded {timeout_sec/60:.2f} minutes and was terminated. Please reduce the runtime of the model.") 38 | 39 | if return_dict["error"]: 40 | raise Exception(return_dict["error"]) 41 | 42 | return return_dict["metrics"], return_dict.get("warnings", []) 43 | 44 | def deepevolve_interface(): 45 | base_dir = "data_cache/openvaccine" 46 | # base_dir = "../../../data_cache/openvaccine" 47 | try: 48 | start_time = time() 49 | metrics, subprocess_warnings = run_main_with_timeout(base_dir, 1800) 50 | runtime = time() - start_time 51 | 52 | runtime_minutes = round(runtime / 60, 2) 53 | 54 | test_score = metrics["test_MCRMSE"] 55 | if np.isnan(test_score): 56 | test_score = 999 57 | 58 | initial_score = 0.3914539605379105 59 | first_place_score = 0.34198 60 | improvement_to_initial = round((initial_score - test_score) / initial_score * 100, 2) 61 | improvement_to_first_place = round((first_place_score - test_score) / first_place_score * 100, 2) 62 | 63 | metrics = { 64 | "combined_score": 1 / (1 + test_score), 65 | "improvement_percentage_to_initial": improvement_to_initial, 66 | "improvement_percentage_to_first_place": improvement_to_first_place, 67 | "runtime_minutes": runtime_minutes, 68 | "test_MCRMSE_lower_is_better": test_score, 69 | "train_mean_loss_across_folds_lower_is_better": metrics["train_mean_loss_across_folds"], 70 | } 71 | 72 | # Include warnings from subprocess 73 | if subprocess_warnings: 74 | warning_messages = list(set(subprocess_warnings)) 75 | if len(warning_messages) > 10: 76 | warning_messages = warning_messages[:10] 77 | metrics["program_warnings"] = warning_messages 78 | 79 | return True, metrics 80 | 81 | except Exception as e: 82 | error_traceback = traceback.format_exc() 83 | error_info = ( 84 | f"Error type: {type(e).__name__}\n" 85 | f"Error message: {e}\n" 86 | f"Traceback:\n{error_traceback}" 87 | ) 88 | return False, error_info 89 | 90 | 91 | if __name__ == "__main__": 92 | status, results = deepevolve_interface() 93 | print(f"Status: {status}") 94 | print(f"Results: {results}") 95 | -------------------------------------------------------------------------------- /examples/openvaccine/initial_code/requirements.txt: -------------------------------------------------------------------------------- 1 | torch -------------------------------------------------------------------------------- /examples/openvaccine/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The proposed model, GraphSAGE(with GCN)+GRU+KFold, is designed to predict specific RNA properties using a hybrid approach. It begins by embedding each nucleotide with associated features like predicted secondary structure and loop-type context. A graph is then constructed linking adjacent and paired bases. To process this graph, a GraphSAGE-based Graph Convolution Network (GCN) is utilized to enrich the base-level features. These features are then passed through a bidirectional GRU, capturing sequential patterns in the RNA chain. The final predictions for reactivity and two different degradation metrics (deg_Mg_pH10 and deg_Mg_50C) at each nucleotide position are made using a linear output layer. The model is trained using k-fold cross-validation for robustness across datasets. \n\nAn optional component involves the use of precomputed base-pair probability (bpps) matrices, which provide detailed insights into nucleotide pairings. These matrices can enhance the node features by offering a richer depiction of the RNA's folding structure, supplementing or replacing simpler models like dot-bracket notation.", 3 | "motivation": "The primary motivation behind this model is to improve the prediction of RNA structure-related properties utilizing both structural and sequential information. By combining graph and network approaches, the model aims to capture complex interactions within RNA sequences more effectively.", 4 | "implementation_notes": "The implementation uses a combination of GraphSAGE for graph convolution, GRU for sequence modeling, and k-fold cross-validation for robust training. Incorporating base-pair probability matrices is optional, allowing flexibility depending on computational resources and desired accuracy.", 5 | "pseudocode": "1. Embed nucleotides with predicted secondary-structure and loop-type data.\n2. Construct a graph linking adjacent and paired nucleotides.\n3. Apply GraphSAGE-based GCN to enrich base-level features.\n4. Feed features into a bidirectional GRU to capture sequential patterns.\n5. Use a linear layer for positional predictions of reactivity and degradation targets.\n6. Train the model with k-fold cross-validation.", 6 | "originality": { 7 | "score": 4, 8 | "positive": "Combines graph-based and sequential models innovatively. Uses optional bpps matrices for detailed structure insights.", 9 | "negative": "Utilizes well-known models (GraphSAGE, GRU) in a new combination, which might limit novelty." 10 | }, 11 | "future_potential": { 12 | "score": 5, 13 | "positive": "Can be adapted for other RNA-related prediction tasks or extended with more complex graph features.", 14 | "negative": "Complexity may increase with additional features, requiring more computational resources." 15 | }, 16 | "code_difficulty": { 17 | "score": 3, 18 | "positive": "Uses established frameworks like PyTorch and existing models, aiding in implementation.", 19 | "negative": "Integration of optional bpps matrices and tuning for optimal performance could require extra effort." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/openvaccine/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.7186727181497392, 3 | "improvement_percentage_to_initial": 0.0, 4 | "improvement_percentage_to_first_place": -14.47, 5 | "runtime_minutes": 26.68, 6 | "test_MCRMSE_lower_is_better": 0.3914539605379105, 7 | "train_mean_loss_across_folds_lower_is_better": 0.3197553661678519 8 | } -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/config.py: -------------------------------------------------------------------------------- 1 | from types import SimpleNamespace 2 | 3 | # Data configuration 4 | DATA_DIR = "" 5 | TARGET_HORIZONS = [0, 6, 12, 24] 6 | TEST_VMONTHS = [0, 6, 12, 18, 24, 36, 48, 60, 72, 84] 7 | 8 | # LightGBM parameters 9 | LGB_PARAMS = { 10 | "boosting_type": "gbdt", 11 | "objective": "multiclass", 12 | "num_class": 87, 13 | "n_estimators": 300, 14 | "learning_rate": 0.019673004699536346, 15 | "num_leaves": 208, 16 | "max_depth": 14, 17 | "min_data_in_leaf": 850, 18 | "feature_fraction": 0.5190632906197453, 19 | "lambda_l1": 7.405660751699475e-08, 20 | "lambda_l2": 0.14583961675675494, 21 | "max_bin": 240, 22 | "verbose": -1, 23 | "force_col_wise": True, 24 | "n_jobs": -1, 25 | } 26 | 27 | 28 | # Neural Network configuration 29 | def get_nn_config(): 30 | cfg = SimpleNamespace(**{}) 31 | cfg.tr_collate_fn = None 32 | cfg.val_collate_fn = None 33 | cfg.target_column = "target_norm" 34 | cfg.output_dir = "results/nn_temp" 35 | cfg.seed = -1 36 | cfg.eval_epochs = 1 37 | cfg.mixed_precision = False 38 | cfg.device = "cpu" 39 | cfg.n_classes = 1 40 | cfg.batch_size = 128 41 | cfg.batch_size_val = 256 42 | cfg.n_hidden = 64 43 | cfg.n_layers = 2 44 | cfg.num_workers = 0 45 | cfg.drop_last = False 46 | cfg.gradient_clip = 1.0 47 | cfg.bag_size = 1 48 | cfg.bag_agg_function = "mean" 49 | cfg.lr = 2e-3 50 | cfg.warmup = 0 51 | cfg.epochs = 10 52 | return cfg 53 | 54 | 55 | # Feature configuration 56 | def get_lgb_features(): 57 | features = [ 58 | "target_i", 59 | "target_month", 60 | "horizon", 61 | "visit_month", 62 | "visit_6m", 63 | "blood_taken", 64 | ] 65 | features += ["visit_18m", "is_suppl"] 66 | features += ["count_non12_visits"] 67 | features += ["visit_48m"] 68 | return features 69 | 70 | 71 | def get_nn_features(sample_df): 72 | features = ["visit_6m"] 73 | features += [c for c in sample_df.columns if c.startswith("t_month_eq_")] 74 | features += [c for c in sample_df.columns if c.startswith("v_month_eq_")] 75 | features += [c for c in sample_df.columns if c.startswith("hor_eq_")] 76 | features += [c for c in sample_df.columns if c.startswith("target_n_")] 77 | features += ["visit_18m"] 78 | features += ["visit_48m"] 79 | features += ["is_suppl"] 80 | features += ["horizon_scaled"] 81 | return features 82 | -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/data_loader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def load_data(base_path="data"): 5 | """Load training data from CSV files.""" 6 | proteins = pd.read_csv(f"{base_path}/train_proteins.csv") 7 | peptides = pd.read_csv(f"{base_path}/train_peptides.csv") 8 | clinical = pd.read_csv(f"{base_path}/train_clinical_data.csv") 9 | supplement = pd.read_csv(f"{base_path}/supplemental_clinical_data.csv") 10 | return proteins, peptides, clinical, supplement 11 | 12 | 13 | def preprocess_supplement_data(supplement_df): 14 | """Preprocess supplement data.""" 15 | supplement_df.loc[supplement_df["visit_month"] == 5, "visit_month"] = 6 16 | return supplement_df 17 | -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import warnings 3 | from main import main_func 4 | from time import time 5 | import numpy as np 6 | 7 | 8 | def deepevolve_interface(): 9 | base_dir = "data_cache/amp_pd" 10 | # base_dir = "../../../data_cache/amp_pd" 11 | try: 12 | # Run main_func inside a warnings-catching context 13 | with warnings.catch_warnings(record=True) as caught: 14 | warnings.simplefilter("always") 15 | start_time = time() 16 | smape = main_func(base_dir) 17 | runtime = time() - start_time 18 | 19 | warning_messages = [str(w.message) for w in caught] 20 | 21 | # Compute combined score 22 | if np.isnan(smape): 23 | combined_score = 0.0 24 | print("smape is nan, set combined_score to 0.0") 25 | else: 26 | combined_score = 1 - smape / 200 27 | 28 | # Compute runtime in minutes, rounded 29 | runtime_minutes = round(runtime / 60, 2) 30 | 31 | # Compute improvement ratio 32 | initial_smape = 93.54330168877686 33 | ratio = ( 34 | round((initial_smape - smape) / initial_smape * 100, 2) 35 | if not np.isnan(smape) 36 | else 0.0 37 | ) 38 | 39 | # Build metrics dict 40 | metrics = { 41 | "combined_score": combined_score, 42 | "symmetric_mean_absolute_percentage_error (lower is better)": smape, 43 | "improvement_percentage_to_initial": ratio, 44 | "runtime_minutes": runtime_minutes, 45 | } 46 | if warning_messages: 47 | warning_messages = list(set(warning_messages)) 48 | if len(warning_messages) > 10: 49 | warning_messages = warning_messages[:10] 50 | metrics["program_warnings"] = warning_messages 51 | 52 | return True, metrics 53 | 54 | except Exception as e: 55 | error_traceback = traceback.format_exc() 56 | error_info = ( 57 | f"Error type: {type(e).__name__}\n" 58 | f"Error message: {e}\n" 59 | f"Traceback:\n{error_traceback}" 60 | ) 61 | return False, error_info 62 | 63 | 64 | if __name__ == "__main__": 65 | status, results = deepevolve_interface() 66 | print(f"Status: {status}") 67 | print(f"Results: {results}") 68 | -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/lightgbm_model.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | from base_model import BaseModel 3 | from metrics import opt_smape1p 4 | 5 | 6 | class LGBClassModel1(BaseModel): 7 | """LightGBM classification model.""" 8 | 9 | def __init__(self, params, features): 10 | self.params = params 11 | self.features = features 12 | 13 | def fit(self, df_train): 14 | if self.features is None: 15 | self.features = [col for col in df_train.columns if col.startswith("v_")] 16 | lgb_train = lgb.Dataset(df_train[self.features], df_train["target"]) 17 | params0 = {k: v for k, v in self.params.items() if k not in ["n_estimators"]} 18 | self.m_gbm = lgb.train( 19 | params0, lgb_train, num_boost_round=self.params["n_estimators"] 20 | ) 21 | return self 22 | 23 | def predict_proba(self, df_valid): 24 | return self.m_gbm.predict(df_valid[self.features]) 25 | 26 | def predict(self, df_valid): 27 | return opt_smape1p(self.predict_proba(df_valid)) 28 | -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import sys 4 | from sklearn.utils.validation import check_consistent_length 5 | from data_loader import load_data, preprocess_supplement_data 6 | from preprocessing import DataPrep 7 | from config import LGB_PARAMS, get_nn_config, get_lgb_features, get_nn_features 8 | from lightgbm_model import LGBClassModel1 9 | from neural_network import NNRegModel1 10 | from utils import repl 11 | from public_timeseries_testing_util import MockApi 12 | 13 | 14 | def smapep1(y_true, y_pred): 15 | """SMAPE of y+1, a nonnegative float, smaller is better 16 | 17 | Parameters: y_true, y_pred: array-like 18 | 19 | Returns 100 for 100 % error. 20 | y_true may have missing values. 21 | """ 22 | check_consistent_length(y_true, y_pred) 23 | y_true = np.array(y_true, copy=False).ravel() 24 | y_pred = np.array(y_pred, copy=False).ravel() 25 | y_true, y_pred = y_true[np.isfinite(y_true)], y_pred[np.isfinite(y_true)] 26 | if (y_true < 0).any(): 27 | raise ValueError("y_true < 0") 28 | if (y_pred < 0).any(): 29 | raise ValueError("y_pred < 0") 30 | denominator = (y_true + y_pred) / 2 + 1 31 | ape = np.abs(y_pred - y_true) / denominator 32 | return np.average(ape) * 100 33 | 34 | 35 | def main_func(base_dir): 36 | proteins, peptides, clinical, supplement = load_data(base_dir) 37 | supplement = preprocess_supplement_data(supplement) 38 | 39 | # Initialize data preprocessor 40 | dp3 = DataPrep() 41 | dp3.fit(proteins, peptides, clinical) 42 | 43 | # Prepare training samples 44 | sample3 = dp3.transform_train(proteins, peptides, clinical) 45 | sample3 = sample3[~sample3["target"].isnull()] 46 | sample3["is_suppl"] = 0 47 | 48 | sup_sample3 = dp3.transform_train(proteins, peptides, supplement) 49 | sup_sample3 = sup_sample3[~sup_sample3["target"].isnull()] 50 | sup_sample3["is_suppl"] = 1 51 | 52 | # Train LightGBM model 53 | lgb_features = get_lgb_features() 54 | model_lgb = LGBClassModel1(LGB_PARAMS, lgb_features) 55 | model_lgb = model_lgb.fit(pd.concat([sample3, sup_sample3], axis=0)) 56 | 57 | # Train Neural Network model 58 | cfg = get_nn_config() 59 | cfg.features = get_nn_features(sample3) 60 | model_nn = NNRegModel1(cfg) 61 | model_nn = model_nn.fit(pd.concat([sample3, sup_sample3], axis=0)) 62 | 63 | # Load test environment (if available) 64 | env = MockApi(base_dir) 65 | iter_test = env.iter_test() 66 | 67 | all_test_peptides = None 68 | all_test_proteins = None 69 | all_test_df = None 70 | 71 | for test_df, test_peptides, test_proteins, sample_submission in iter_test: 72 | 73 | all_test_df = pd.concat([all_test_df, test_df], axis=0) 74 | all_test_proteins = pd.concat([all_test_proteins, test_proteins], axis=0) 75 | all_test_peptides = pd.concat([all_test_peptides, test_peptides], axis=0) 76 | 77 | sample_test = dp3.transform_test( 78 | all_test_proteins, all_test_peptides, all_test_df, sample_submission 79 | ) 80 | sample_test["is_suppl"] = 0 81 | 82 | if not sample_test.empty: 83 | sample_test["preds_lgb"] = model_lgb.predict(sample_test) 84 | sample_test["preds_nn"] = np.round( 85 | np.clip(model_nn.predict(sample_test), 0, None) 86 | ) 87 | sample_submission["rating"] = np.round( 88 | (sample_test["preds_lgb"] + sample_test["preds_nn"]) / 2 89 | ) 90 | 91 | env.predict(sample_submission) 92 | 93 | # Read final submission 94 | prediction = env.get_predictions() 95 | solution = env.get_answer() 96 | score = smapep1(solution["rating"], prediction["rating"]) 97 | return score 98 | 99 | 100 | if __name__ == "__main__": 101 | base_dir = "../../../data_cache/amp_pd" 102 | score = main_func(base_dir) 103 | print("score", score) -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.special import softmax 3 | 4 | 5 | def smape1p_ind(A, F): 6 | """Individual SMAPE+1 calculation.""" 7 | val = 200 * np.abs(F - A) / (np.abs(A + 1) + np.abs(F + 1)) 8 | return val 9 | 10 | 11 | def smape1p(A, F): 12 | """SMAPE+1 metric calculation.""" 13 | return smape1p_ind(A, F).mean() 14 | 15 | 16 | def smape1p_opt(x): 17 | """Optimal SMAPE+1 calculation.""" 18 | tgts = np.arange(0, 61) 19 | scores = [smape1p(x, val) for val in tgts] 20 | return tgts[np.argmin(scores)] 21 | 22 | 23 | def single_smape1p(preds, tgt): 24 | """Single SMAPE+1 calculation for probability distributions.""" 25 | x = np.tile(np.arange(preds.shape[1]), (preds.shape[0], 1)) 26 | x = np.abs(x - tgt) / (2 + x + tgt) 27 | return (x * preds).sum(axis=1) 28 | 29 | 30 | def opt_smape1p(preds): 31 | """Optimal SMAPE+1 for probability distributions.""" 32 | x = np.hstack( 33 | [single_smape1p(preds, i).reshape(-1, 1) for i in range(preds.shape[1])] 34 | ) 35 | return x.argmin(axis=1) 36 | 37 | 38 | def max_dif(val, lst): 39 | """Calculate maximum difference.""" 40 | lst0 = [x for x in lst if x < val] 41 | if len(lst0) == 0: 42 | return -1 43 | return val - max(lst0) 44 | 45 | 46 | def count_prev_visits(val, lst): 47 | """Count previous visits.""" 48 | lst0 = [x for x in lst if x < val] 49 | return len(lst0) 50 | -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/public_timeseries_testing_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | An unlocked version of the timeseries API intended for testing alternate inputs. 3 | Mirrors the production timeseries API in the crucial respects, but won't be as fast. 4 | 5 | ONLY works afer the first three variables in MockAPI.__init__ are populated. 6 | """ 7 | 8 | from typing import Sequence, Tuple 9 | 10 | import pandas as pd 11 | 12 | 13 | class MockApi: 14 | def __init__(self, base_dir: str): 15 | """ 16 | YOU MUST UPDATE THE FIRST THREE LINES of this method. 17 | They've been intentionally left in an invalid state. 18 | 19 | Variables to set: 20 | input_paths: a list of two or more paths to the csv files to be served 21 | group_id_column: the column that identifies which groups of rows the API should serve. 22 | A call to iter_test serves all rows of all dataframes with the current group ID value. 23 | export_group_id_column: if true, the dataframes iter_test serves will include the group_id_column values. 24 | """ 25 | # get the current directory 26 | self.input_paths: Sequence[str] = [ 27 | f"{base_dir}/example_test_files/test.csv", 28 | f"{base_dir}/example_test_files/test_peptides.csv", 29 | f"{base_dir}/example_test_files/test_proteins.csv", 30 | f"{base_dir}/example_test_files/sample_submission.csv", 31 | ] 32 | self.group_id_column: str = "visit_month" 33 | self.export_group_id_column: bool = True 34 | self.answer_path = f"{base_dir}/example_test_files/answer.csv" 35 | # iter_test is only designed to support at least two dataframes, such as test and sample_submission 36 | assert len(self.input_paths) >= 2 37 | 38 | self._status = "initialized" 39 | self.predictions = [] 40 | 41 | def iter_test(self) -> Tuple[pd.DataFrame]: 42 | """ 43 | Loads all of the dataframes specified in self.input_paths, 44 | then yields all rows in those dataframes that equal the current self.group_id_column value. 45 | """ 46 | if self._status != "initialized": 47 | 48 | raise Exception( 49 | "WARNING: the real API can only iterate over `iter_test()` once." 50 | ) 51 | 52 | dataframes = [] 53 | for pth in self.input_paths: 54 | dataframes.append(pd.read_csv(pth, low_memory=False)) 55 | group_order = dataframes[0][self.group_id_column].drop_duplicates().tolist() 56 | dataframes = [df.set_index(self.group_id_column) for df in dataframes] 57 | 58 | for group_id in group_order: 59 | self._status = "prediction_needed" 60 | current_data = [] 61 | for df in dataframes: 62 | try: 63 | cur_df = df.loc[group_id].copy() 64 | # returning single line dataframes from df.loc requires special handling 65 | if not isinstance(cur_df, pd.DataFrame): 66 | cur_df = pd.DataFrame( 67 | {a: b for a, b in zip(cur_df.index.values, cur_df.values)}, 68 | index=[group_id], 69 | ) 70 | cur_df = cur_df.index.rename(self.group_id_column) 71 | except KeyError: 72 | cur_df = df.loc[[]].copy() 73 | cur_df = cur_df.reset_index(drop=not (self.export_group_id_column)) 74 | current_data.append(cur_df) 75 | yield tuple(current_data) 76 | 77 | while self._status != "prediction_received": 78 | print( 79 | "You must call `predict()` successfully before you can continue with `iter_test()`", 80 | flush=True, 81 | ) 82 | yield None 83 | 84 | # with open('submission.csv', 'w') as f_open: 85 | # pd.concat(self.predictions).to_csv(f_open, index=False) 86 | self._status = "finished" 87 | 88 | def predict(self, user_predictions: pd.DataFrame): 89 | """ 90 | Accepts and stores the user's predictions and unlocks iter_test once that is done 91 | """ 92 | if self._status == "finished": 93 | raise Exception("You have already made predictions for the full test set.") 94 | if self._status != "prediction_needed": 95 | raise Exception( 96 | "You must get the next test sample from `iter_test()` first." 97 | ) 98 | if not isinstance(user_predictions, pd.DataFrame): 99 | raise Exception("You must provide a DataFrame.") 100 | 101 | self.predictions.append(user_predictions) 102 | self._status = "prediction_received" 103 | 104 | def get_predictions(self): 105 | return pd.concat(self.predictions) 106 | 107 | def get_answer(self): 108 | return pd.read_csv(self.answer_path) 109 | 110 | 111 | def make_env(): 112 | return MockApi() 113 | -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/requirements.txt: -------------------------------------------------------------------------------- 1 | lightgbm 2 | transformers -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_code/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def repl(x1, x2, cond): 5 | """Replace values in x1 with x2 where condition is True.""" 6 | res = x1.copy() 7 | res[cond] = x2[cond] 8 | return res 9 | -------------------------------------------------------------------------------- /examples/parkinson_disease/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.5316947062554211, 3 | "symmetric_mean_absolute_percentage_error (lower is better)": 93.66105874891576, 4 | "improvement_percentage_to_sota": -0.13, 5 | "runtime_minutes": 1.26 6 | } -------------------------------------------------------------------------------- /examples/polymer/README.md: -------------------------------------------------------------------------------- 1 | # Polymer Competition 2 | 3 | Welcome to the Polymer Competition! This challenge invites you to develop a machine learning model that predicts the fundamental properties of polymers directly from their chemical structure. Your work will accelerate the discovery of innovative, sustainable, and biocompatible materials with wide-ranging applications. 4 | 5 | --- 6 | 7 | ## Overview 8 | 9 | Polymers are the essential building blocks of our world—from the DNA within our bodies to everyday plastics. They drive innovation in medicine, electronics, and sustainability. However, the discovery of new, eco-friendly polymer materials has been slow due to a lack of high-quality, accessible data. 10 | 11 | The Open Polymer Prediction 2025 challenge introduces a game-changing, large-scale open-source dataset that is ten times larger than any previous resource. In this competition, your mission is to predict a polymer’s real-world performance based solely on its chemical structure (provided in SMILES format). By accurately forecasting five key properties, your model will help scientists accelerate the design and virtual screening of new polymers. 12 | 13 | --- 14 | 15 | ## Problem Description 16 | 17 | You are provided with polymer data in CSV files (`train.csv`, `valid.csv`, and `test.csv`). Each file includes the following columns: 18 | 19 | | Column | Description | 20 | |-----------|-----------------------------------------------------------| 21 | | `id` | A unique identifier for each polymer. | 22 | | `SMILES` | A sequence-like chemical notation representing the polymer structure. | 23 | | `Tg` | Glass transition temperature (°C). | 24 | | `FFV` | Fractional free volume. | 25 | | `Tc` | Thermal conductivity (W/m·K). | 26 | | `Density` | Polymer density (g/cm³). | 27 | | `Rg` | Radius of gyration (Å). | 28 | 29 | Your task is to accurately predict the following properties from the SMILES representation: 30 | 31 | - **Glass transition temperature (Tg)** 32 | - **Fractional free volume (FFV)** 33 | - **Thermal conductivity (Tc)** 34 | - **Polymer density (Density)** 35 | - **Radius of gyration (Rg)** 36 | 37 | These target variables are averaged from multiple runs of molecular dynamics simulations. 38 | 39 | --- 40 | 41 | ## Evaluation Metric 42 | 43 | The predictions will be evaluated using a weighted Mean Absolute Error (wMAE) across the five properties. The wMAE is defined as: 44 | 45 | ```math 46 | \mathrm{wMAE} = \frac{1}{\lvert \mathcal{X} \rvert} \sum_{X \in \mathcal{X}} \sum_{i \in I(X)} w_{i}\,\bigl| \hat{y}_{i}(X) - y_{i}(X)\bigr| 47 | ``` 48 | 49 | Each property is given a weight $w_i$ to ensure equal contribution regardless of scale or frequency. The weight for property $i$ is calculated as: 50 | 51 | ```math 52 | w_{i} = \frac{1}{r_{i}} \;\times\; \frac{K\,\sqrt{\tfrac{1}{n_{i}}}}{\displaystyle\sum_{j=1}^{K}\sqrt{\tfrac{1}{n_{j}}}} 53 | ``` 54 | 55 | In addition to wMAE, the final evaluation metric is a combination of the weighted MAE and the $R^2$ score. 56 | 57 | --- 58 | 59 | ## Task 60 | 61 | Your challenge is to build a predictive model that estimates the five key polymer properties from the provided SMILES strings. By doing so, you will play a vital role in enabling rapid, in-silico screening of polymers, ultimately expediting the development of targeted and sustainable materials. 62 | 63 | --- 64 | 65 | ## Interface 66 | 67 | The competition interface is provided in the file: `deepevolve_interface.py`. 68 | 69 | --- 70 | 71 | ## Initial Idea 72 | 73 | ### Graph Rationalization with Environment-based Augmentations 74 | 75 | For an innovative approach to modeling, consider exploring ideas from the paper [Graph Rationalization with Environment-based Augmentations](https://arxiv.org/abs/2206.02886). Additional resources and code are available in the [GREA GitHub repository](https://github.com/liugangcode/GREA). -------------------------------------------------------------------------------- /examples/polymer/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "problem": { 3 | "name": "polymer", 4 | "description": "# Overview\n\nCan your model unlock the secrets of polymers? In this competition, you're tasked with predicting the fundamental properties of polymers to speed up the development of new materials. Your contributions will help researchers innovate faster, paving the way for more sustainable and biocompatible materials that can positively impact our planet.\n\n# Description\nPolymers are the essential building blocks of our world, from the DNA within our bodies to the plastics we use every day. They are key to innovation in critical fields like medicine, electronics, and sustainability. The search for the next generation of groundbreaking, eco-friendly materials is on, and machine learning can be the solution. However, progress has been stalled by one major hurdle: a critical lack of accessible, high-quality data.\n\nOur Open Polymer Prediction 2025 introduces a game-changing, large-scale open-source dataset - ten times larger than any existing resource. We invite you to piece together the missing links and unlock the vast potential of sustainable materials.\n\nYour mission is to predict a polymer's real-world performance directly from its chemical structure. You'll be provided with a polymer's structure as a simple text string (SMILES), and your challenge is to build a model that can accurately forecast five key metrics that determine how it will behave. This includes predicting its density, its response to heat (thermal conductivity, Tc) and glass transition temperature (Tg), and its fundamental molecular size and packing efficiency (radius of gyration, Rg, and fractional free volume, FFV). The ground truth for this competition is averaged from multiple runs of molecular dynamics simulation.\n\nYour contributions have the potential to redefine polymer discovery, accelerating sustainable polymer research through virtual screening and driving significant advancements in materials science.\n\n# Evaluation Metric\n\nThe evaluation metric for this contest is a weighted Mean Absolute Error (wMAE) across five polymer properties, defined as:\n\n$$\n\\\\mathrm{wMAE}\n=\n\\\\frac{1}{\\\\lvert \\\\mathcal{X} \\\\rvert}\n\\\\sum_{X \\\\in \\\\mathcal{X}}\n\\\\sum_{i \\\\in I(X)}\nw_{i}\\\\,\\\\bigl\\\\lvert \\\\hat{y}_{i}(X) \\\\;-\\\\; y_{i}(X)\\\\bigr\\\\rvert\n$$\n\nTo ensure that all property types contribute equally regardless of their scale or frequency, each property is given a weight $w_{i}$:\n\n$$\n w_{i}\n=\n\\\\frac{1}{r_{i}}\n\\\\;\\\\times\\\\;\n\\\\frac{\n K\\\\,\\\\sqrt{\\\\tfrac{1}{n_{i}}}\n }{\n \\\\displaystyle\\\\sum_{j=1}^{K}\\\\sqrt{\\\\tfrac{1}{n_{j}}}\n }\n$$\n\n# Task\n\nIn this competition, your task is to use polymer structure data (SMILES) to predict five key chemical properties derived from molecular-dynamics simulation:\n\n- **Glass transition temperature** (`Tg`)\n- **Fractional free volume** (`FFV`)\n- **Thermal conductivity** (`Tc`)\n- **Polymer density** (`Density`)\n- **Radius of gyration** (`Rg`)\n\nSuccessfully predicting these properties is crucial for scientists to accelerate the design of novel polymers with targeted characteristics, which can be used in various applications.\n\n# Data Files\n\n### `train/valid/test.csv`\n\n| Column | Description |\n|-----------|----------------------------------------------------------|\n| `id` | Unique identifier for each polymer. |\n| `SMILES` | Sequence-like chemical notation of polymer structures. |\n| `Tg` | Glass transition temperature (°C). |\n| `FFV` | Fractional free volume. |\n| `Tc` | Thermal conductivity (W/m·K). |\n| `Density` | Polymer density (g/cm³). |\n| `Rg` | Radius of gyration (Å). |\n", 5 | "metric": "Combination of weighted MAE and R^2", 6 | "interface": "deepevolve_interface.py" 7 | }, 8 | "initial_idea": { 9 | "title": "Graph Rationalization with Environment-based Augmentations", 10 | "content": "https://arxiv.org/abs/2206.02886", 11 | "supplement": "https://github.com/liugangcode/GREA" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /examples/polymer/initial_code/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Gang Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/polymer/initial_code/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from main_pyg import config_and_run 3 | from utils import get_args 4 | from time import time 5 | import warnings 6 | 7 | 8 | def deepevolve_interface(): 9 | args = get_args() 10 | # args.base_dir = "../../../data_cache/polymer" 11 | try: 12 | with warnings.catch_warnings(record=True) as caught: 13 | warnings.simplefilter("always") 14 | start_time = time() 15 | results, wmae, r2 = config_and_run(args) 16 | runtime = time() - start_time 17 | 18 | warning_messages = [str(w.message) for w in caught] 19 | 20 | runtime = round(runtime / 60, 2) 21 | 22 | current_combined_score = 1 / (1 + wmae) * 0.5 + r2 * 0.5 23 | metrics = { 24 | "combined_score": current_combined_score, 25 | "wmae_inverse": 1 / (1 + wmae), 26 | "r2_avg": r2, 27 | "runtime_minutes": runtime, 28 | **results, 29 | } 30 | if warning_messages: 31 | warning_messages = list(set(warning_messages)) 32 | if len(warning_messages) > 10: 33 | warning_messages = warning_messages[:10] 34 | metrics["program_warnings"] = warning_messages 35 | 36 | return True, metrics 37 | except Exception as e: 38 | # Capture full traceback information 39 | error_traceback = traceback.format_exc() 40 | error_info = f""" 41 | Error type: {type(e).__name__} 42 | Error message: {str(e)} 43 | Traceback: {error_traceback} 44 | """ 45 | return False, error_info 46 | 47 | 48 | if __name__ == "__main__": 49 | status, results = deepevolve_interface() 50 | print(f"Status: {status}") 51 | print(f"Results: {results}") 52 | -------------------------------------------------------------------------------- /examples/polymer/initial_code/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn.inits import reset 4 | 5 | from conv import GNN_node, GNN_node_Virtualnode 6 | from utils import scatter_add 7 | 8 | nn_act = torch.nn.ReLU() 9 | F_act = F.relu 10 | 11 | 12 | class GraphEnvAug(torch.nn.Module): 13 | def __init__( 14 | self, 15 | num_tasks, 16 | num_layer=5, 17 | emb_dim=300, 18 | gnn_type="gin", 19 | drop_ratio=0.5, 20 | gamma=0.4, 21 | use_linear_predictor=False, 22 | ): 23 | """ 24 | num_tasks (int): number of labels to be predicted 25 | """ 26 | 27 | super(GraphEnvAug, self).__init__() 28 | 29 | self.num_layer = num_layer 30 | self.drop_ratio = drop_ratio 31 | self.emb_dim = emb_dim 32 | self.num_tasks = num_tasks 33 | self.gamma = gamma 34 | 35 | if self.num_layer < 2: 36 | raise ValueError("Number of GNN layers must be greater than 1.") 37 | 38 | ### GNN to generate node embeddings 39 | gnn_name = gnn_type.split("-")[0] 40 | emb_dim_rat = emb_dim 41 | if "virtual" in gnn_type: 42 | rationale_gnn_node = GNN_node_Virtualnode( 43 | 2, 44 | emb_dim_rat, 45 | JK="last", 46 | drop_ratio=drop_ratio, 47 | residual=True, 48 | gnn_name=gnn_name, 49 | ) 50 | self.graph_encoder = GNN_node_Virtualnode( 51 | num_layer, 52 | emb_dim, 53 | JK="last", 54 | drop_ratio=drop_ratio, 55 | residual=True, 56 | gnn_name=gnn_name, 57 | ) 58 | else: 59 | rationale_gnn_node = GNN_node( 60 | 2, 61 | emb_dim_rat, 62 | JK="last", 63 | drop_ratio=drop_ratio, 64 | residual=True, 65 | gnn_name=gnn_name, 66 | ) 67 | self.graph_encoder = GNN_node( 68 | num_layer, 69 | emb_dim, 70 | JK="last", 71 | drop_ratio=drop_ratio, 72 | residual=True, 73 | gnn_name=gnn_name, 74 | ) 75 | self.separator = Separator( 76 | rationale_gnn_node=rationale_gnn_node, 77 | gate_nn=torch.nn.Sequential( 78 | torch.nn.Linear(emb_dim_rat, 2 * emb_dim_rat), 79 | torch.nn.BatchNorm1d(2 * emb_dim_rat), 80 | nn_act, 81 | torch.nn.Dropout(), 82 | torch.nn.Linear(2 * emb_dim_rat, 1), 83 | ), 84 | nn=None, 85 | ) 86 | rep_dim = emb_dim 87 | if use_linear_predictor: 88 | self.predictor = torch.nn.Linear(rep_dim, self.num_tasks) 89 | else: 90 | self.predictor = torch.nn.Sequential( 91 | torch.nn.Linear(rep_dim, 2 * emb_dim), 92 | torch.nn.BatchNorm1d(2 * emb_dim), 93 | nn_act, 94 | torch.nn.Dropout(), 95 | torch.nn.Linear(2 * emb_dim, self.num_tasks), 96 | ) 97 | 98 | def forward(self, batched_data): 99 | h_node = self.graph_encoder(batched_data) 100 | h_r, h_env, r_node_num, env_node_num = self.separator(batched_data, h_node) 101 | h_rep = (h_r.unsqueeze(1) + h_env.unsqueeze(0)).view(-1, self.emb_dim) 102 | pred_rem = self.predictor(h_r) 103 | pred_rep = self.predictor(h_rep) 104 | loss_reg = torch.abs( 105 | r_node_num / (r_node_num + env_node_num) 106 | - self.gamma * torch.ones_like(r_node_num) 107 | ).mean() 108 | output = {"pred_rep": pred_rep, "pred_rem": pred_rem, "loss_reg": loss_reg} 109 | return output 110 | 111 | def eval_forward(self, batched_data): 112 | h_node = self.graph_encoder(batched_data) 113 | h_r, _, _, _ = self.separator(batched_data, h_node) 114 | pred_rem = self.predictor(h_r) 115 | return pred_rem 116 | 117 | 118 | class Separator(torch.nn.Module): 119 | def __init__(self, rationale_gnn_node, gate_nn, nn=None): 120 | super(Separator, self).__init__() 121 | self.rationale_gnn_node = rationale_gnn_node 122 | self.gate_nn = gate_nn 123 | self.nn = nn 124 | self.reset_parameters() 125 | 126 | def reset_parameters(self): 127 | reset(self.rationale_gnn_node) 128 | reset(self.gate_nn) 129 | reset(self.nn) 130 | 131 | def forward(self, batched_data, h_node, size=None): 132 | x = self.rationale_gnn_node(batched_data) 133 | batch = batched_data.batch 134 | x = x.unsqueeze(-1) if x.dim() == 1 else x 135 | size = batch[-1].item() + 1 if size is None else size 136 | 137 | gate = self.gate_nn(x).view(-1, 1) 138 | h_node = self.nn(h_node) if self.nn is not None else h_node 139 | assert gate.dim() == h_node.dim() and gate.size(0) == h_node.size(0) 140 | gate = torch.sigmoid(gate) 141 | 142 | h_out = scatter_add(gate * h_node, batch, dim=0, dim_size=size) 143 | c_out = scatter_add((1 - gate) * h_node, batch, dim=0, dim_size=size) 144 | 145 | r_node_num = scatter_add(gate, batch, dim=0, dim_size=size) 146 | env_node_num = scatter_add((1 - gate), batch, dim=0, dim_size=size) 147 | 148 | return h_out, c_out, r_node_num + 1e-8, env_node_num + 1e-8 149 | -------------------------------------------------------------------------------- /examples/polymer/initial_code/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torch_geometric 3 | rdkit -------------------------------------------------------------------------------- /examples/polymer/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The paper \"Graph Rationalization with Environment-based Augmentations\" introduces a novel approach to enhance the interpretability and performance of Graph Neural Networks (GNNs) by identifying and utilizing graph rationales\u2014subgraph structures that are most representative of the graph's properties. The authors propose an augmentation technique called \"environment replacement,\" which generates virtual data examples to improve the identification of these rationales. This method involves separating the graph into rationale and environment subgraphs, augmenting the environment subgraphs, and then combining them with the rationale subgraphs in a latent space to avoid the complexity of explicit graph decoding and encoding. The framework was tested on seven molecular and four polymer datasets, demonstrating its effectiveness and efficiency in improving GNN performance.", 3 | "motivation": "In graph-based applications, such as molecule and polymer property prediction, accurately identifying subgraph structures that significantly influence the graph's properties is crucial for the performance and interpretability of GNNs. Existing methods often lack sufficient examples to effectively learn these optimal subgraph structures. The proposed environment replacement augmentation addresses this gap by generating additional virtual examples, thereby enhancing the model's ability to identify and utilize graph rationales.", 4 | "implementation_notes": "The implementation involves the following steps:\n1. **Rationale-Environment Separation**: Decompose the original graph into rationale and environment subgraphs.\n2. **Environment Replacement Augmentation**: Generate virtual data examples by replacing the environment subgraphs with alternative structures.\n3. **Representation Learning in Latent Space**: Perform learning on both real and augmented examples within a latent space to circumvent the high complexity associated with explicit graph decoding and encoding.\n\nThis approach ensures that the model can effectively learn from augmented data without the computational overhead of processing explicit graph structures.", 5 | "pseudocode": "```python\n# Pseudocode for the proposed framework\n\n# Step 1: Rationale-Environment Separation\ndef separate_graph(graph):\n rationale_subgraph = extract_rationale(graph)\n environment_subgraph = extract_environment(graph)\n return rationale_subgraph, environment_subgraph\n\n# Step 2: Environment Replacement Augmentation\ndef augment_environment(environment_subgraph):\n augmented_environment = generate_alternative_structures(environment_subgraph)\n return augmented_environment\n\n# Step 3: Representation Learning in Latent Space\ndef learn_representation(rationale_subgraph, augmented_environment):\n combined_representation = combine_in_latent_space(rationale_subgraph, augmented_environment)\n model = train_model(combined_representation)\n return model\n\n# Main function\ndef main(graph):\n rationale_subgraph, environment_subgraph = separate_graph(graph)\n augmented_environment = augment_environment(environment_subgraph)\n model = learn_representation(rationale_subgraph, augmented_environment)\n return model\n```", 6 | "originality": { 7 | "score": 8, 8 | "positive": "The introduction of environment replacement as an augmentation technique is a novel approach to improving graph rationalization.", 9 | "negative": "While innovative, the concept of data augmentation is not entirely new in machine learning, though its application in graph rationalization is unique." 10 | }, 11 | "future_potential": { 12 | "score": 7, 13 | "positive": "This method has the potential to significantly enhance the interpretability and performance of GNNs in various applications, including drug discovery and material science.", 14 | "negative": "The effectiveness of this approach may vary depending on the complexity and nature of the graphs, and further research is needed to generalize its applicability." 15 | }, 16 | "code_difficulty": { 17 | "score": 6, 18 | "positive": "The framework is designed to be efficient by operating in latent spaces, reducing computational complexity.", 19 | "negative": "Implementing the separation of graphs into rationale and environment subgraphs and generating meaningful augmented environments may require domain-specific knowledge and careful tuning." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/polymer/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.6769716813345132, 3 | "wmae_inverse": 0.9282183258095142, 4 | "r2_avg": 0.42572503685951235, 5 | "runtime_minutes": 9.37, 6 | "train_wmae": "0.0659 \u00b1 0.0025", 7 | "valid_wmae": "0.0772 \u00b1 0.0004", 8 | "test_wmae": "0.0773 \u00b1 0.0001", 9 | "test_r2_avg": "0.4257 \u00b1 0.0082", 10 | "test_r2_Tg": "0.5135 \u00b1 0.0264", 11 | "test_r2_FFV": "-0.1531 \u00b1 0.0040", 12 | "test_r2_Tc": "0.7762 \u00b1 0.0109", 13 | "test_r2_Density": "0.6586 \u00b1 0.0183", 14 | "test_r2_Rg": "0.3335 \u00b1 0.0260" 15 | } -------------------------------------------------------------------------------- /examples/usp_p2p/README.md: -------------------------------------------------------------------------------- 1 | # USP-P2P Semantic Similarity Challenge 2 | 3 | ## Overview 4 | 5 | In this competition, participants are tasked with building a model to determine the semantic similarity between pairs of phrases extracted from patent documents. The goal is to assist patent attorneys and examiners in identifying whether an invention has been described before. This is achieved by matching key phrases and their contexts within patent documents using the Cooperative Patent Classification (CPC) system. 6 | 7 | ## Problem Description 8 | 9 | Patent documents contain rich technical content and the phrasing used can vary significantly. For example, a model should be able to recognize that the phrases "television set" and "TV set" refer to the same device. Moreover, the model must account for context provided by CPC codes (version 2021.05) which indicate the technical domain. Thus, the task extends beyond simple paraphrase identification to include cases such as matching "strong material" with "steel", where the interpretation can vary by domain. 10 | 11 | ### Technical Challenge 12 | 13 | Given pairs of phrases (an anchor and a target), alongside a contextual feature defined by the CPC code, your model must predict a similarity score between 0 and 1: 14 | - **0.0**: Unrelated 15 | - **0.25**: Somewhat related (e.g., same high-level domain or even antonyms) 16 | - **0.5**: Synonyms with different breadth (hyponym/hypernym matches) 17 | - **0.75**: Close synonym or abbreviation (e.g., "mobile phone" vs. "cellphone", "TCP" vs. "transmission control protocol") 18 | - **1.0**: Very close match (usually an almost exact match, barring minor differences) 19 | 20 | The model’s performance is evaluated using the Pearson correlation coefficient between the predicted and actual similarity scores. 21 | 22 | ## Data Description 23 | 24 | The dataset provided for this challenge consists of the following files: 25 | 26 | - **train.csv**: The training set containing the phrases, contextual CPC classification, and their similarity scores. 27 | - **test.csv**: The test set, which mirrors the structure of the training set and includes true scores for evaluation. 28 | 29 | ### Data Columns 30 | 31 | Each entry in the dataset consists of: 32 | - **id**: Unique identifier for a phrase pair. 33 | - **anchor**: The first phrase. 34 | - **target**: The second phrase. 35 | - **context**: The CPC classification (version 2021.05) indicating the subject area within which similarity is scored. 36 | - **score**: The similarity score (floating point number between 0 and 1) obtained from manual expert ratings. 37 | 38 | ## Evaluation Metric 39 | 40 | The submission will be evaluated based on the Pearson correlation coefficient between the predicted similarity scores and the actual scores. Mathematically, this is represented as: 41 | 42 | ```math 43 | r = \frac{\sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y})}{\sqrt{\sum_{i=1}^{n} (x_i - \bar{x})^2} \sqrt{\sum_{i=1}^{n} (y_i - \bar{y})^2}} 44 | ``` 45 | 46 | where $x_i$ and $y_i$ are the predicted and actual scores respectively, and $\bar{x}$ and $\bar{y}$ are their means. 47 | 48 | ## Initial Idea 49 | 50 | ### Fine-tuning the Patent BERT Model 51 | 52 | An initial approach to address this challenge is to fine-tune the [BERT for Patents](https://huggingface.co/anferico/bert-for-patents) model. The following steps outline the proposed methodology: 53 | 54 | 1. **Model Selection**: Start with the `anferico/bert-for-patents` model. 55 | 2. **Architecture Modification**: Attach a single-label regression head on the model to predict the similarity score. 56 | 3. **Data Tokenization**: Tokenize each example by concatenating the anchor phrase, target phrase, and context with a `[SEP]` token. This results in an input format similar to: 57 | ``` 58 | anchor [SEP] target [SEP] context 59 | ``` 60 | 4. **Training**: 61 | - Fine-tune for one epoch. 62 | - Use a batch size of 160. 63 | - Set a learning rate of $2 \times 10^{-5}$. 64 | - Training is conducted without checkpointing or logging. 65 | 5. **Evaluation**: Evaluate the fine-tuned model on the test set by computing the Pearson correlation between the predictions and the provided similarity scores. 66 | 67 | ## Competition Details 68 | 69 | - **Interface**: The competition code should implement the interface defined in `deepevolve_interface.py`. 70 | - **Test Set**: The unseen test set contains approximately 12,000 phrase pairs. Note that a small public test set has been provided for preliminary testing, but it is not used for scoring. 71 | 72 | ## Resources 73 | 74 | - **Patent BERT Model**: [anferico/bert-for-patents](https://huggingface.co/anferico/bert-for-patents) 75 | - **CPC Codes Information**: Detailed information on CPC codes can be found on the USPTO website and the [CPC archive website](https://www.cooperativepatentclassification.org/). -------------------------------------------------------------------------------- /examples/usp_p2p/info.json: -------------------------------------------------------------------------------- 1 | { 2 | "problem": { 3 | "name": "usp_p2p", 4 | "description": "In this competition, you will train your models on a novel semantic similarity dataset to extract relevant information by matching key phrases in patent documents. Determining the semantic similarity between phrases is critically important during the patent search and examination process to determine if an invention has been described before. For example, if one invention claims \"television set\" and a prior publication describes \"TV set\", a model would ideally recognize these are the same and assist a patent attorney or examiner in retrieving relevant documents. This extends beyond paraphrase identification; if one invention claims a \"strong material\" and another uses \"steel\", that may also be a match. What counts as a \"strong material\" varies per domain (it may be steel in one domain and ripstop fabric in another, but you wouldn't want your parachute made of steel). We have included the Cooperative Patent Classification as the technical domain context as an additional feature to help you disambiguate these situations. \n Can you build a model to match phrases in order to extract contextual information, thereby helping the patent community connect the dots between millions of patent documents? \n Models are evaluated on the Pearson correlation coefficient between the predicted and actual similarity scores. \n In the dataset, you are presented pairs of phrases (an anchor and a target phrase) and asked to rate how similar they are on a scale from 0 (not at all similar) to 1 (identical in meaning). This challenge differs from a standard semantic similarity task in that similarity has been scored here within a patent's context, specifically its CPC classification (version 2021.05), which indicates the subject to which the patent relates. For example, while the phrases \"bird\" and \"Cape Cod\" may have low semantic similarity in normal language, the likeness of their meaning is much closer if considered in the context of \"house\".\n\nThis is a code competition in which you will submit code that will be run against an unseen test set. The unseen test set contains approximately 12 000 pairs of phrases. A small public test set has been provided for testing purposes but is not used in scoring.\n\nInformation on the meaning of CPC codes may be found on the USPTO website. The CPC version 2021.05 can be found on the CPC archive website.\n\nScore meanings:\n- 1.0: Very close match (usually exact match except for minor changes in conjugation, quantity, or stopwords).\n- 0.75: Close synonym or abbreviation (for example, \"mobile phone\" vs. \"cellphone\" or \"TCP\" → \"transmission control protocol\").\n- 0.5: Synonyms with different breadth (hyponym/hypernym matches).\n- 0.25: Somewhat related (same high‐level domain or antonyms).\n- 0.0: Unrelated.\n\nFiles:\n- train.csv: the training set, containing phrases, contexts, and their similarity scores\n- test.csv: the test set, identical in structure to the training set but including true scores\n\nColumns:\n- id: unique identifier for a phrase pair\n- anchor: the first phrase\n- target: the second phrase\n- context: the CPC classification (version 2021.05) indicating the subject within which similarity is scored\n- score: the similarity value, sourced from one or more manual expert ratings", 5 | "metric": "pearson_correlation", 6 | "interface": "deepevolve_interface.py" 7 | }, 8 | "initial_idea": { 9 | "title": "Fine-tune the Patent BERT model on the USP-P2P dataset", 10 | "content": "The idea first uses the `anferico/bert-for-patents` model with a single-label regression head. It then tokenizes each example by joining the anchor, target, and context with `[SEP]`, fine-tunes for one epoch (batch size = 160, learning rate = 2e-5) without checkpointing or logging, and finally evaluates on the test set by computing the Pearson correlation between predicted and actual scores.", 11 | "supplement": "BERT for Patents: https://huggingface.co/anferico/bert-for-patents" 12 | } 13 | } -------------------------------------------------------------------------------- /examples/usp_p2p/initial_code/deepevolve_interface.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import warnings 3 | from main import main 4 | from time import time 5 | import numpy as np 6 | import multiprocessing 7 | 8 | 9 | def run_main_with_timeout(base_dir, timeout_sec): 10 | manager = multiprocessing.Manager() 11 | return_dict = manager.dict() 12 | def target(): 13 | try: 14 | return_dict["metrics"] = main(base_dir) 15 | return_dict["error"] = None 16 | except Exception as e: 17 | return_dict["metrics"] = None 18 | return_dict["error"] = str(e) 19 | p = multiprocessing.Process(target=target) 20 | p.start() 21 | p.join(timeout_sec) 22 | if p.is_alive(): 23 | p.terminate() 24 | p.join() 25 | raise TimeoutError(f"The model runtime exceeded {timeout_sec/60:.2f} minutes and was terminated. Please reduce the runtime of the model.") 26 | 27 | if return_dict["error"]: 28 | raise Exception(return_dict["error"]) 29 | 30 | return return_dict["metrics"] 31 | 32 | def deepevolve_interface(): 33 | base_dir = "data_cache/usp_p2p" 34 | # base_dir = "../../../data_cache/usp_p2p" 35 | try: 36 | with warnings.catch_warnings(record=True) as caught: 37 | warnings.simplefilter("always") 38 | start_time = time() 39 | metrics = run_main_with_timeout(base_dir, 1800) 40 | # metrics = main(base_dir) 41 | runtime = time() - start_time 42 | 43 | warning_messages = [str(w.message) for w in caught] 44 | runtime_minutes = round(runtime / 60, 2) 45 | 46 | initial_score = 0.803648329426078 47 | ratio = round((metrics["eval_pearson"] - initial_score) / initial_score * 100, 2) 48 | 49 | # if nan for eval_pearson, set to 0 50 | if np.isnan(metrics["eval_pearson"]): 51 | metrics["eval_pearson"] = 0 52 | if np.isnan(metrics["eval_loss"]): 53 | metrics["eval_loss"] = "nan" 54 | 55 | metrics = { 56 | "combined_score": metrics["eval_pearson"], 57 | "improvement_percentage_to_initial": ratio, 58 | "runtime_minutes": runtime_minutes, 59 | "eval_loss": metrics["eval_loss"] 60 | } 61 | if warning_messages: 62 | warning_messages = list(set(warning_messages)) 63 | if len(warning_messages) > 10: 64 | warning_messages = warning_messages[:10] 65 | metrics["program_warnings"] = warning_messages 66 | 67 | return True, metrics 68 | 69 | except Exception as e: 70 | error_traceback = traceback.format_exc() 71 | error_info = ( 72 | f"Error type: {type(e).__name__}\n" 73 | f"Error message: {e}\n" 74 | f"Traceback:\n{error_traceback}" 75 | ) 76 | return False, error_info 77 | 78 | 79 | if __name__ == "__main__": 80 | status, results = deepevolve_interface() 81 | print(f"Status: {status}") 82 | print(f"Results: {results}") 83 | -------------------------------------------------------------------------------- /examples/usp_p2p/initial_code/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | # disable tokenizers parallelism warning 3 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 4 | from dataclasses import dataclass 5 | 6 | import numpy as np 7 | from datasets import load_dataset 8 | from transformers import ( 9 | AutoTokenizer, 10 | AutoModelForSequenceClassification, 11 | Trainer, 12 | TrainingArguments, 13 | DataCollatorWithPadding, 14 | ) 15 | 16 | @dataclass 17 | class Config: 18 | train_file: str = "train.csv" 19 | test_file: str = "test.csv" 20 | model_name: str = "anferico/bert-for-patents" 21 | max_length: int = 128 22 | train_batch_size: int = 16 * 10 23 | eval_batch_size: int = 16 * 10 24 | epochs: int = 3 # FIXED to 3 and don't change it 25 | learning_rate: float = 2e-5 26 | seed: int = 42 27 | 28 | def compute_metrics(eval_pred): 29 | preds, labels = eval_pred 30 | preds = preds.reshape(-1) 31 | corr = np.corrcoef(labels, preds)[0, 1] 32 | return {"pearson": corr} 33 | 34 | def preprocess_batch(batch, tokenizer, max_length): 35 | # combine anchor, target, and context into one input string 36 | texts = [ 37 | f"{a} [SEP] {t} [SEP] {c}" 38 | for a, t, c in zip(batch["anchor"], batch["target"], batch["context"]) 39 | ] 40 | return tokenizer( 41 | texts, 42 | truncation=True, 43 | padding="max_length", 44 | max_length=max_length, 45 | ) 46 | 47 | 48 | def main(base_dir: str): 49 | # define data directory manually 50 | cfg = Config() 51 | train_path = os.path.join(base_dir, cfg.train_file) 52 | test_path = os.path.join(base_dir, cfg.test_file) 53 | 54 | # load datasets (test.csv includes true similarity scores) 55 | raw = load_dataset( 56 | "csv", 57 | data_files={"train": train_path, "test": test_path}, 58 | column_names=["id", "anchor", "target", "context", "score"], 59 | sep=",", 60 | skiprows=1, 61 | ) 62 | 63 | # split off 20% of train for validation 64 | split = raw["train"].train_test_split(test_size=0.2, seed=cfg.seed) 65 | data = { 66 | "train": split["train"], 67 | "validation": split["test"], 68 | "test": raw["test"] 69 | } 70 | 71 | # load tokenizer and model 72 | tokenizer = AutoTokenizer.from_pretrained(cfg.model_name) 73 | model = AutoModelForSequenceClassification.from_pretrained( 74 | cfg.model_name, 75 | num_labels=1, 76 | problem_type="regression", 77 | ) 78 | 79 | # tokenize and attach labels for regression 80 | tokenized = {} 81 | for split in ["train", "validation", "test"]: 82 | tokenized[split] = data[split].map( 83 | lambda batch: preprocess_batch(batch, tokenizer, cfg.max_length), 84 | batched=True, 85 | remove_columns=["id", "anchor", "target", "context", "score"], 86 | load_from_cache_file=False, 87 | ) 88 | tokenized[split] = tokenized[split].add_column( 89 | "labels", data[split]["score"] 90 | ) 91 | 92 | # training arguments: no saving or logging 93 | args = TrainingArguments( 94 | per_device_train_batch_size=cfg.train_batch_size, 95 | per_device_eval_batch_size=cfg.eval_batch_size, 96 | num_train_epochs=cfg.epochs, 97 | learning_rate=cfg.learning_rate, 98 | seed=cfg.seed, 99 | logging_strategy="no", 100 | save_strategy="no", 101 | report_to=[], 102 | output_dir="." 103 | ) 104 | 105 | trainer = Trainer( 106 | model=model, 107 | args=args, 108 | train_dataset=tokenized["train"], 109 | eval_dataset=tokenized["validation"], 110 | data_collator=DataCollatorWithPadding(tokenizer), 111 | compute_metrics=compute_metrics, 112 | ) 113 | 114 | trainer.train() 115 | 116 | test_metrics = trainer.evaluate(eval_dataset=tokenized["test"]) 117 | 118 | if test_metrics.get("eval_pearson") is None: 119 | raise ValueError("Test set metrics don't have the key 'eval_pearson'") 120 | 121 | return test_metrics 122 | 123 | if __name__ == "__main__": 124 | base_dir = "../../../data_cache/usp_p2p" 125 | test_metrics = main(base_dir) 126 | print("Test set metrics:", test_metrics) -------------------------------------------------------------------------------- /examples/usp_p2p/initial_idea.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The idea involves fine-tuning the BERT model specifically trained for patent data (BERT for Patents) on the USP-P2P dataset. This process involves using a single-label regression head for the task. The dataset examples are tokenized by concatenating anchor, target, and context elements using `[SEP]` as a separator. The model is trained for one epoch using a batch size of 160 and a learning rate of 2e-5. No checkpointing or logging is applied during training. Evaluation on the test set is performed by calculating the Pearson correlation between the model's predicted scores and the actual scores.", 3 | "motivation": "The motivation is to leverage a pretrained BERT model tailored for patents to enhance performance on a specific patent paragraph-to-paragraph (USP-P2P) similarity task, potentially improving the accuracy and efficiency of patent-related text processing.", 4 | "implementation_notes": "1. Use the 'anferico/bert-for-patents' as the base model.\n2. Add a regression head to the model output.\n3. Tokenize input by concatenating anchor, target, and context text with `[SEP]`.\n4. Set training parameters: batch size = 160, learning rate = 2e-5.\n5. Limit fine-tuning to one epoch, ignoring checkpointing and logging.\n6. Evaluate using Pearson correlation between predictions and true scores.", 5 | "pseudocode": "1. Load pre-trained `anferico/bert-for-patents` model with regression head.\n2. Prepare input data by joining anchor, target, and context with `[SEP]`.\n3. Fine-tune model for 1 epoch:\n - Set batch size = 160\n - Set learning rate = 2e-5\n4. Calculate Pearson correlation on test set predictions.", 6 | "originality": { 7 | "score": 3, 8 | "positive": "Combines existing pretrained model with a new dataset for evaluation, which is a common approach but applied to a specialized use case.", 9 | "negative": "Utilizes well-established methods and tools (BERT, fine-tuning) with limited innovation in methodology." 10 | }, 11 | "future_potential": { 12 | "score": 4, 13 | "positive": "Successfully applying this approach could improve patent text analysis tools, aiding legal, research, and corporate sectors.", 14 | "negative": "Specific to patent datasets and may have limited use outside this domain without further adaptation." 15 | }, 16 | "code_difficulty": { 17 | "score": 2, 18 | "positive": "Implementation leverages existing libraries and frameworks for BERT, making it accessible.", 19 | "negative": "Requires understanding of BERT architectures and fine-tuning processes to execute effectively." 20 | } 21 | } -------------------------------------------------------------------------------- /examples/usp_p2p/initial_metrics.json: -------------------------------------------------------------------------------- 1 | { 2 | "combined_score": 0.803648329426078, 3 | "improvement_percentage_to_initial": 0.0, 4 | "runtime_minutes": 14.36, 5 | "eval_loss": 0.02417738549411297 6 | } -------------------------------------------------------------------------------- /examples/usp_p2p/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | transformers 3 | datasets 4 | accelerate>=0.26.0 -------------------------------------------------------------------------------- /requirements-mini.txt: -------------------------------------------------------------------------------- 1 | openai-agents 2 | rich 3 | GitPython 4 | PyYAML 5 | # format code 6 | black 7 | # code distance 8 | rapidfuzz 9 | # config 10 | hydra-core 11 | omegaconf 12 | # kaggle for downloading datasets 13 | kaggle 14 | # for circle packing example 15 | matplotlib 16 | shapely 17 | scipy -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | agents==1.4.0 2 | albumentations==2.0.8 3 | black==25.1.0 4 | datasets==3.6.0 5 | GitPython==3.1.44 6 | h5py==3.14.0 7 | hydra-core==1.3.2 8 | imageio==2.37.0 9 | joblib==1.5.0 10 | kaggle==1.7.4.5 11 | lightgbm==4.6.0 12 | matplotlib==3.10.3 13 | numpy==2.3.1 14 | ogb==1.3.6 15 | omegaconf==2.3.0 16 | opencv_python==4.11.0.86 17 | opencv_python_headless==4.11.0.86 18 | pandas==2.3.0 19 | Pillow==11.2.1 20 | pydantic==2.11.7 21 | rapidfuzz==3.13.0 22 | rdkit==2023.9.5 23 | rich==14.0.0 24 | scikit_learn==1.7.0 25 | scipy==1.16.0 26 | Shapely==2.1.1 27 | skimage==0.0 28 | timm==1.0.15 29 | torch==2.7.1 30 | torch_geometric==2.6.1 31 | torchvision==0.22.1 32 | tqdm==4.67.1 33 | transformers==4.52.4 -------------------------------------------------------------------------------- /run_example.sh: -------------------------------------------------------------------------------- 1 | python deepevolve.py \ 2 | query="'You are an expert mathematician. Your task is to improve an algorithm that maximizes the sum of circle radii in the circle-packing problem within a unit square, using between 26 and 32 circles. Do not develop neural-network-based models. The algorithm must produce exact, valid packings that satisfy these constraints: circles not overlap and must remain entirely within the square.'" \ 3 | problem="circle_packing" \ 4 | checkpoint="ckpt" \ 5 | checkpoint_interval=20 6 | 7 | python deepevolve.py \ 8 | query="Your task is to improve the graph rationalization method for more accurate and interpretable molecular property prediction" \ 9 | problem="molecule" \ 10 | max_iterations=100 11 | 12 | python deepevolve.py \ 13 | query="'Your task is to improve the nucleus detection models in a Kaggle competition within a compute budget of an A6k GPU with a maximum runtime of 30 minutes. You should significantly improve both the performance of the initial idea and its efficiency.'" \ 14 | problem="nuclei_image" 15 | 16 | 17 | python deepevolve.py \ 18 | query="Your task is to improve the performance of the winning solution for the Kaggle competition on Parkinson disease progression prediction. You may propose a completely new approach that differs from the winning solution if you believe it will perform better." \ 19 | problem="parkinson_disease" 20 | 21 | python deepevolve.py \ 22 | query="'Your task is to significantly improve polymer property prediction for five properties in the competition. The input SMILES strings are the monomer structures of polymers, using asterisks (*) to mark the polymerization points. You should improve the initial idea by focusing on how to better incorporate polymerization inductive bias into the models to improve the weighted mean absolute error and the R-squared value for each property. You should explore different ways to exploit polymer structures or properties and find the best. Your time budget is 30 minutes. Make sure you implement your idea within the time limit rather than create a placeholder.'" \ 23 | problem="polymer" 24 | 25 | python deepevolve.py \ 26 | query="'Your task is to fine-tune Patent BERT to predict semantic similarity between phrase pairs from U.S. patents. Improve model performance, optimize training time and inference latency, and ensure the fixed three-epoch run finishes in thirty minutes. Focus solely on technical model and algorithm development. No legal-style assistance in your response.'" \ 27 | problem="usp_p2p" 28 | -------------------------------------------------------------------------------- /utils/datatypes.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | # Used in Researcher 4 | 5 | reasoning_models = ["o4-mini", "o3-mini", "o1-mini", "o1", "o3", "o1-pro"] 6 | 7 | class ResearchWork(BaseModel): 8 | title: str 9 | "The title of the research paper." 10 | 11 | link: str 12 | "The link to the research paper." 13 | 14 | contributions: list[str] 15 | "A list of contributions of the research paper." 16 | 17 | limitations: list[str] 18 | "A list of limitations of the research paper." 19 | 20 | 21 | class EvaluationData(BaseModel): 22 | score: int 23 | "The score of the idea between 0 and 10. Higher is better." 24 | 25 | positive: str 26 | "A positive reason for the evaluation." 27 | 28 | negative: str 29 | "A negative reason for the evaluation." 30 | 31 | 32 | class IdeaData(BaseModel): 33 | description: str 34 | "One or two sentences describing the new idea including (1) the problem the idea solves, (2) how the idea solves it, and (3) what makes the idea new." 35 | 36 | motivation: str 37 | "The motivation for the new idea on why it is different from existing methods and why it can improve the existing methods for the target problem." 38 | 39 | implementation_notes: str 40 | "Notes on how to implement the new idea (e.g. pseudocode, logic, etc.)." 41 | 42 | pseudocode: str 43 | "A pseudocode implementation of the new idea if available." 44 | 45 | originality: EvaluationData 46 | "Self-assessment of the originality of the new idea." 47 | 48 | future_potential: EvaluationData 49 | "Self-assessment of the future potential of the new idea." 50 | 51 | code_difficulty: EvaluationData 52 | "Self-assessment of the difficulty of implementing the new idea." 53 | 54 | 55 | class ReportData(BaseModel): 56 | markdown_report: str 57 | """The final report""" 58 | 59 | idea: IdeaData 60 | """The new idea from the research report.""" 61 | 62 | related_work: list[ResearchWork] 63 | """A list of existing research works that are relevant to the query.""" 64 | 65 | class WebSearchItem(BaseModel): 66 | reason: str 67 | "Your reasoning for why this search is important to the query." 68 | 69 | query: str 70 | "The search term to use for the web search." 71 | 72 | 73 | class WebSearchPlan(BaseModel): 74 | searches: list[WebSearchItem] 75 | """A list of web searches to perform to best answer the query.""" 76 | 77 | 78 | class ReflectionPlan(BaseModel): 79 | is_sufficient: bool 80 | "Whether the report is sufficient to answer the query." 81 | 82 | knowledge_gaps: list[str] 83 | "The information that the report lacks. If is_sufficient is true, this should be empty." 84 | 85 | follow_up_queries: list[WebSearchItem] 86 | "A list of follow-up queries to perform to best answer the query. If is_sufficient is true, this should be empty." -------------------------------------------------------------------------------- /utils/format.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for formatting output 3 | """ 4 | 5 | from typing import Any, Dict 6 | 7 | 8 | def format_metrics_safe(metrics: Dict[str, Any]) -> str: 9 | """ 10 | Safely format metrics dictionary for logging, handling both numeric and string values. 11 | 12 | Args: 13 | metrics: Dictionary of metric names to values 14 | 15 | Returns: 16 | Formatted string representation of metrics 17 | """ 18 | if not metrics: 19 | return "" 20 | 21 | formatted_parts = [] 22 | for name, value in metrics.items(): 23 | # Check if value is numeric (int, float) 24 | if isinstance(value, (int, float)): 25 | try: 26 | # Only apply float formatting to numeric values 27 | formatted_parts.append(f"{name}={value:.4f}") 28 | except (ValueError, TypeError): 29 | # Fallback to string representation if formatting fails 30 | formatted_parts.append(f"{name}={value}") 31 | else: 32 | # For non-numeric values (strings, etc.), just convert to string 33 | formatted_parts.append(f"{name}={value}") 34 | 35 | return ", ".join(formatted_parts) 36 | 37 | 38 | def format_improvement_safe(parent_metrics: Dict[str, Any], child_metrics: Dict[str, Any]) -> str: 39 | """ 40 | Safely format improvement metrics for logging. 41 | 42 | Args: 43 | parent_metrics: Parent program metrics 44 | child_metrics: Child program metrics 45 | 46 | Returns: 47 | Formatted string representation of improvements 48 | """ 49 | if not parent_metrics or not child_metrics: 50 | return "" 51 | 52 | improvement_parts = [] 53 | for metric, child_value in child_metrics.items(): 54 | if metric in parent_metrics: 55 | parent_value = parent_metrics[metric] 56 | # Only calculate improvement for numeric values 57 | if isinstance(child_value, (int, float)) and isinstance(parent_value, (int, float)): 58 | try: 59 | diff = child_value - parent_value 60 | improvement_parts.append(f"{metric}={diff:+.4f}") 61 | except (ValueError, TypeError): 62 | # Skip non-numeric comparisons 63 | continue 64 | 65 | return ", ".join(improvement_parts) --------------------------------------------------------------------------------