├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── README_rewardbench.md ├── analysis ├── calculate_ldi.ipynb ├── figure1_2_nocaps.pdf ├── plot_scores.ipynb └── sample_data │ ├── gpt-4-scores.csv │ ├── prometheus-scores.csv │ └── skyworks-scores.csv ├── rewardbench ├── .DS_Store ├── __init__.py ├── chattemplates.py ├── constants.py ├── dpo.py ├── generative.py ├── models │ ├── README.md │ ├── __init__.py │ ├── armorm.py │ ├── beaver.py │ ├── betterpairrm.py │ ├── cohere.py │ ├── grm.py │ ├── internlm.py │ ├── openassistant.py │ ├── openbmb.py │ ├── pairrm.py │ ├── pipeline.py │ ├── shp.py │ ├── slicpairpm.py │ ├── starling.py │ └── ziya.py ├── rewardbench.py └── utils.py ├── scripts ├── __init__.py ├── configs │ ├── README.md │ ├── beaker_eval.yaml │ ├── beaker_train.yaml │ ├── eval_bon_configs.yaml │ ├── eval_configs.yaml │ ├── stage3_no_offloading.conf │ └── train_configs.yaml ├── model.py ├── run.sh ├── run_bon.py ├── run_dpo.py ├── run_generative.py ├── run_generative.sh ├── run_lang_pairs.py ├── run_rm.py ├── submit_eval_jobs.py ├── submit_train_jobs.py └── train_rm.py ├── setup.py └── tests ├── __init__.py ├── test_data.py ├── test_package.py └── test_utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/README.md -------------------------------------------------------------------------------- /README_rewardbench.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/README_rewardbench.md -------------------------------------------------------------------------------- /analysis/calculate_ldi.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/analysis/calculate_ldi.ipynb -------------------------------------------------------------------------------- /analysis/figure1_2_nocaps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/analysis/figure1_2_nocaps.pdf -------------------------------------------------------------------------------- /analysis/plot_scores.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/analysis/plot_scores.ipynb -------------------------------------------------------------------------------- /analysis/sample_data/gpt-4-scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/analysis/sample_data/gpt-4-scores.csv -------------------------------------------------------------------------------- /analysis/sample_data/prometheus-scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/analysis/sample_data/prometheus-scores.csv -------------------------------------------------------------------------------- /analysis/sample_data/skyworks-scores.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/analysis/sample_data/skyworks-scores.csv -------------------------------------------------------------------------------- /rewardbench/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/.DS_Store -------------------------------------------------------------------------------- /rewardbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/__init__.py -------------------------------------------------------------------------------- /rewardbench/chattemplates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/chattemplates.py -------------------------------------------------------------------------------- /rewardbench/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/constants.py -------------------------------------------------------------------------------- /rewardbench/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/dpo.py -------------------------------------------------------------------------------- /rewardbench/generative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/generative.py -------------------------------------------------------------------------------- /rewardbench/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/README.md -------------------------------------------------------------------------------- /rewardbench/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/__init__.py -------------------------------------------------------------------------------- /rewardbench/models/armorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/armorm.py -------------------------------------------------------------------------------- /rewardbench/models/beaver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/beaver.py -------------------------------------------------------------------------------- /rewardbench/models/betterpairrm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/betterpairrm.py -------------------------------------------------------------------------------- /rewardbench/models/cohere.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/cohere.py -------------------------------------------------------------------------------- /rewardbench/models/grm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/grm.py -------------------------------------------------------------------------------- /rewardbench/models/internlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/internlm.py -------------------------------------------------------------------------------- /rewardbench/models/openassistant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/openassistant.py -------------------------------------------------------------------------------- /rewardbench/models/openbmb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/openbmb.py -------------------------------------------------------------------------------- /rewardbench/models/pairrm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/pairrm.py -------------------------------------------------------------------------------- /rewardbench/models/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/pipeline.py -------------------------------------------------------------------------------- /rewardbench/models/shp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/shp.py -------------------------------------------------------------------------------- /rewardbench/models/slicpairpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/slicpairpm.py -------------------------------------------------------------------------------- /rewardbench/models/starling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/starling.py -------------------------------------------------------------------------------- /rewardbench/models/ziya.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/models/ziya.py -------------------------------------------------------------------------------- /rewardbench/rewardbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/rewardbench.py -------------------------------------------------------------------------------- /rewardbench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/rewardbench/utils.py -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/configs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/configs/README.md -------------------------------------------------------------------------------- /scripts/configs/beaker_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/configs/beaker_eval.yaml -------------------------------------------------------------------------------- /scripts/configs/beaker_train.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/configs/beaker_train.yaml -------------------------------------------------------------------------------- /scripts/configs/eval_bon_configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/configs/eval_bon_configs.yaml -------------------------------------------------------------------------------- /scripts/configs/eval_configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/configs/eval_configs.yaml -------------------------------------------------------------------------------- /scripts/configs/stage3_no_offloading.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/configs/stage3_no_offloading.conf -------------------------------------------------------------------------------- /scripts/configs/train_configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/configs/train_configs.yaml -------------------------------------------------------------------------------- /scripts/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/model.py -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/run.sh -------------------------------------------------------------------------------- /scripts/run_bon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/run_bon.py -------------------------------------------------------------------------------- /scripts/run_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/run_dpo.py -------------------------------------------------------------------------------- /scripts/run_generative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/run_generative.py -------------------------------------------------------------------------------- /scripts/run_generative.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/run_generative.sh -------------------------------------------------------------------------------- /scripts/run_lang_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/run_lang_pairs.py -------------------------------------------------------------------------------- /scripts/run_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/run_rm.py -------------------------------------------------------------------------------- /scripts/submit_eval_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/submit_eval_jobs.py -------------------------------------------------------------------------------- /scripts/submit_train_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/submit_train_jobs.py -------------------------------------------------------------------------------- /scripts/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/scripts/train_rm.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tests/test_package.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/tests/test_package.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guijinSON/MM-Eval/HEAD/tests/test_utils.py --------------------------------------------------------------------------------