├── README.md ├── SIRLTrain ├── README.md ├── partial_kl │ ├── kl_ppo │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── core_algos.cpython-310.pyc │ │ │ ├── metric_utils.cpython-310.pyc │ │ │ ├── ray_trainer.cpython-310.pyc │ │ │ └── reward.cpython-310.pyc │ │ ├── core_algos.py │ │ ├── metric_utils.py │ │ ├── ray_trainer.py │ │ └── reward.py │ └── partialKL_ppo.py ├── reward_func │ ├── batch_score_gurobi.py │ ├── content_utils.py │ ├── executor.py │ ├── stage1_reward_gurobi.py │ ├── stage2_reward_gurobi.py │ └── utils.py ├── training_scripts │ ├── run_partialKL.sh │ ├── run_withKL.sh │ └── run_withoutKL.sh └── trainset │ ├── gurobi_examples_OR_test.parquet │ └── gurobi_examples_OR_train.parquet ├── pic ├── answer.png ├── prompt.png └── question.png ├── reproduce_copt.ipynb ├── reproduce_gurobi.ipynb ├── requirements.txt ├── rule_prompt_utils.py ├── test_data ├── IndustryOR_fixedV2.json ├── MAMO_ComplexLP.json ├── MAMO_ComplexLP_fixed.jsonl ├── MAMO_EasyLP.json ├── MAMO_EasyLP_fixed.jsonl ├── NL4OPT.jsonl ├── OptMATH_Bench_166.jsonl ├── OptMATH_Bench_193.jsonl ├── OptiBench.jsonl └── README.md └── utils.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/README.md -------------------------------------------------------------------------------- /SIRLTrain/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/README.md -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/__init__.py -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/__pycache__/core_algos.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/__pycache__/core_algos.cpython-310.pyc -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/__pycache__/metric_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/__pycache__/metric_utils.cpython-310.pyc -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/__pycache__/ray_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/__pycache__/ray_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/__pycache__/reward.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/__pycache__/reward.cpython-310.pyc -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/core_algos.py -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/metric_utils.py -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/ray_trainer.py -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/kl_ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/kl_ppo/reward.py -------------------------------------------------------------------------------- /SIRLTrain/partial_kl/partialKL_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/partial_kl/partialKL_ppo.py -------------------------------------------------------------------------------- /SIRLTrain/reward_func/batch_score_gurobi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/reward_func/batch_score_gurobi.py -------------------------------------------------------------------------------- /SIRLTrain/reward_func/content_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/reward_func/content_utils.py -------------------------------------------------------------------------------- /SIRLTrain/reward_func/executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/reward_func/executor.py -------------------------------------------------------------------------------- /SIRLTrain/reward_func/stage1_reward_gurobi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/reward_func/stage1_reward_gurobi.py -------------------------------------------------------------------------------- /SIRLTrain/reward_func/stage2_reward_gurobi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/reward_func/stage2_reward_gurobi.py -------------------------------------------------------------------------------- /SIRLTrain/reward_func/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/reward_func/utils.py -------------------------------------------------------------------------------- /SIRLTrain/training_scripts/run_partialKL.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/training_scripts/run_partialKL.sh -------------------------------------------------------------------------------- /SIRLTrain/training_scripts/run_withKL.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/training_scripts/run_withKL.sh -------------------------------------------------------------------------------- /SIRLTrain/training_scripts/run_withoutKL.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/training_scripts/run_withoutKL.sh -------------------------------------------------------------------------------- /SIRLTrain/trainset/gurobi_examples_OR_test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/trainset/gurobi_examples_OR_test.parquet -------------------------------------------------------------------------------- /SIRLTrain/trainset/gurobi_examples_OR_train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/SIRLTrain/trainset/gurobi_examples_OR_train.parquet -------------------------------------------------------------------------------- /pic/answer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/pic/answer.png -------------------------------------------------------------------------------- /pic/prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/pic/prompt.png -------------------------------------------------------------------------------- /pic/question.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/pic/question.png -------------------------------------------------------------------------------- /reproduce_copt.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/reproduce_copt.ipynb -------------------------------------------------------------------------------- /reproduce_gurobi.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/reproduce_gurobi.ipynb -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/requirements.txt -------------------------------------------------------------------------------- /rule_prompt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/rule_prompt_utils.py -------------------------------------------------------------------------------- /test_data/IndustryOR_fixedV2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/IndustryOR_fixedV2.json -------------------------------------------------------------------------------- /test_data/MAMO_ComplexLP.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/MAMO_ComplexLP.json -------------------------------------------------------------------------------- /test_data/MAMO_ComplexLP_fixed.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/MAMO_ComplexLP_fixed.jsonl -------------------------------------------------------------------------------- /test_data/MAMO_EasyLP.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/MAMO_EasyLP.json -------------------------------------------------------------------------------- /test_data/MAMO_EasyLP_fixed.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/MAMO_EasyLP_fixed.jsonl -------------------------------------------------------------------------------- /test_data/NL4OPT.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/NL4OPT.jsonl -------------------------------------------------------------------------------- /test_data/OptMATH_Bench_166.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/OptMATH_Bench_166.jsonl -------------------------------------------------------------------------------- /test_data/OptMATH_Bench_193.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/OptMATH_Bench_193.jsonl -------------------------------------------------------------------------------- /test_data/OptiBench.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/OptiBench.jsonl -------------------------------------------------------------------------------- /test_data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/test_data/README.md -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cardinal-Operations/SIRL/HEAD/utils.py --------------------------------------------------------------------------------