├── .gitignore ├── LICENSE ├── README.md ├── accelerate_config.yaml ├── assets ├── case.gif ├── daedal_logo.png ├── method.png └── teaser.png ├── dllm_eval ├── __init__.py ├── __main__.py ├── api │ ├── __init__.py │ ├── filter.py │ ├── group.py │ ├── instance.py │ ├── metrics.py │ ├── model.py │ ├── registry.py │ ├── samplers.py │ └── task.py ├── caching │ ├── __init__.py │ └── cache.py ├── decontamination │ ├── __init__.py │ ├── archiver.py │ ├── decontaminate.py │ └── janitor.py ├── evaluator.py ├── evaluator_utils.py ├── filters │ ├── __init__.py │ ├── custom.py │ ├── decontamination.py │ ├── extraction.py │ ├── selection.py │ └── transformation.py ├── loggers │ ├── __init__.py │ ├── evaluation_tracker.py │ ├── utils.py │ └── wandb_logger.py ├── models │ ├── __init__.py │ ├── configuration_llada.py │ ├── dummy.py │ ├── huggingface.py │ ├── modeling_llada.py │ └── utils.py ├── prompts │ └── __init__.py ├── tasks │ ├── __init__.py │ ├── gsm8k │ │ ├── gsm8k.yaml │ │ └── utils.py │ ├── humaneval │ │ ├── humaneval.yaml │ │ └── utils.py │ ├── math500 │ │ ├── math500.yaml │ │ └── utils.py │ └── mbpp │ │ ├── mbpp.yaml │ │ └── utils.py └── utils.py ├── evaluation_script.py ├── metrics ├── gsm8k.py ├── humaneval.py ├── math500.py └── mbpp.py ├── models ├── LLaDA.py ├── LLaDA_DAEDAL.py └── __init__.py ├── requirements.txt └── scripts ├── eval_LLaDA_1p5_Baseline.sh ├── eval_LLaDA_1p5_DAEDAL.sh ├── eval_LLaDA_Baseline.sh └── eval_LLaDA_DAEDAL.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/README.md -------------------------------------------------------------------------------- /accelerate_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/accelerate_config.yaml -------------------------------------------------------------------------------- /assets/case.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/assets/case.gif -------------------------------------------------------------------------------- /assets/daedal_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/assets/daedal_logo.png -------------------------------------------------------------------------------- /assets/method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/assets/method.png -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/assets/teaser.png -------------------------------------------------------------------------------- /dllm_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/__init__.py -------------------------------------------------------------------------------- /dllm_eval/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/__main__.py -------------------------------------------------------------------------------- /dllm_eval/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dllm_eval/api/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/filter.py -------------------------------------------------------------------------------- /dllm_eval/api/group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/group.py -------------------------------------------------------------------------------- /dllm_eval/api/instance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/instance.py -------------------------------------------------------------------------------- /dllm_eval/api/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/metrics.py -------------------------------------------------------------------------------- /dllm_eval/api/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/model.py -------------------------------------------------------------------------------- /dllm_eval/api/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/registry.py -------------------------------------------------------------------------------- /dllm_eval/api/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/samplers.py -------------------------------------------------------------------------------- /dllm_eval/api/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/api/task.py -------------------------------------------------------------------------------- /dllm_eval/caching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dllm_eval/caching/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/caching/cache.py -------------------------------------------------------------------------------- /dllm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dllm_eval/decontamination/archiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/decontamination/archiver.py -------------------------------------------------------------------------------- /dllm_eval/decontamination/decontaminate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/decontamination/decontaminate.py -------------------------------------------------------------------------------- /dllm_eval/decontamination/janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/decontamination/janitor.py -------------------------------------------------------------------------------- /dllm_eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/evaluator.py -------------------------------------------------------------------------------- /dllm_eval/evaluator_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/evaluator_utils.py -------------------------------------------------------------------------------- /dllm_eval/filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/filters/__init__.py -------------------------------------------------------------------------------- /dllm_eval/filters/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/filters/custom.py -------------------------------------------------------------------------------- /dllm_eval/filters/decontamination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/filters/decontamination.py -------------------------------------------------------------------------------- /dllm_eval/filters/extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/filters/extraction.py -------------------------------------------------------------------------------- /dllm_eval/filters/selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/filters/selection.py -------------------------------------------------------------------------------- /dllm_eval/filters/transformation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/filters/transformation.py -------------------------------------------------------------------------------- /dllm_eval/loggers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/loggers/__init__.py -------------------------------------------------------------------------------- /dllm_eval/loggers/evaluation_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/loggers/evaluation_tracker.py -------------------------------------------------------------------------------- /dllm_eval/loggers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/loggers/utils.py -------------------------------------------------------------------------------- /dllm_eval/loggers/wandb_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/loggers/wandb_logger.py -------------------------------------------------------------------------------- /dllm_eval/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/models/__init__.py -------------------------------------------------------------------------------- /dllm_eval/models/configuration_llada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/models/configuration_llada.py -------------------------------------------------------------------------------- /dllm_eval/models/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/models/dummy.py -------------------------------------------------------------------------------- /dllm_eval/models/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/models/huggingface.py -------------------------------------------------------------------------------- /dllm_eval/models/modeling_llada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/models/modeling_llada.py -------------------------------------------------------------------------------- /dllm_eval/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/models/utils.py -------------------------------------------------------------------------------- /dllm_eval/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/prompts/__init__.py -------------------------------------------------------------------------------- /dllm_eval/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/__init__.py -------------------------------------------------------------------------------- /dllm_eval/tasks/gsm8k/gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/gsm8k/gsm8k.yaml -------------------------------------------------------------------------------- /dllm_eval/tasks/gsm8k/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/gsm8k/utils.py -------------------------------------------------------------------------------- /dllm_eval/tasks/humaneval/humaneval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/humaneval/humaneval.yaml -------------------------------------------------------------------------------- /dllm_eval/tasks/humaneval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/humaneval/utils.py -------------------------------------------------------------------------------- /dllm_eval/tasks/math500/math500.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/math500/math500.yaml -------------------------------------------------------------------------------- /dllm_eval/tasks/math500/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/math500/utils.py -------------------------------------------------------------------------------- /dllm_eval/tasks/mbpp/mbpp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/mbpp/mbpp.yaml -------------------------------------------------------------------------------- /dllm_eval/tasks/mbpp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/tasks/mbpp/utils.py -------------------------------------------------------------------------------- /dllm_eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/dllm_eval/utils.py -------------------------------------------------------------------------------- /evaluation_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/evaluation_script.py -------------------------------------------------------------------------------- /metrics/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/metrics/gsm8k.py -------------------------------------------------------------------------------- /metrics/humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/metrics/humaneval.py -------------------------------------------------------------------------------- /metrics/math500.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/metrics/math500.py -------------------------------------------------------------------------------- /metrics/mbpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/metrics/mbpp.py -------------------------------------------------------------------------------- /models/LLaDA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/models/LLaDA.py -------------------------------------------------------------------------------- /models/LLaDA_DAEDAL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/models/LLaDA_DAEDAL.py -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/models/__init__.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/eval_LLaDA_1p5_Baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/scripts/eval_LLaDA_1p5_Baseline.sh -------------------------------------------------------------------------------- /scripts/eval_LLaDA_1p5_DAEDAL.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/scripts/eval_LLaDA_1p5_DAEDAL.sh -------------------------------------------------------------------------------- /scripts/eval_LLaDA_Baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/scripts/eval_LLaDA_Baseline.sh -------------------------------------------------------------------------------- /scripts/eval_LLaDA_DAEDAL.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Li-Jinsong/DAEDAL/HEAD/scripts/eval_LLaDA_DAEDAL.sh --------------------------------------------------------------------------------