├── .gitignore ├── README.md ├── dataset ├── GSM8K.json ├── GSM8K_dataset_embedding.json └── description.json ├── examples ├── aug_gen.yaml ├── data_prepare.py ├── eval_config.yaml ├── eval_generation.yaml ├── eval_judge.yaml ├── generation_config_MMLU.yaml ├── generation_config_example.yaml └── llama-factory_config │ └── train.yaml ├── images ├── architecture.png ├── overview_UniGen.png ├── unigen_icon.png └── unigen_logo.png ├── inference ├── evaluation_results.csv ├── test_data │ └── combined_dataset_20240619_100140.json └── test_res │ ├── chatgpt │ └── evaluated_combined_dataset_20240619_100140.json │ └── evaluation_results.csv ├── setup.py └── unigen ├── augmentation.py ├── cli.py ├── eval.py ├── generation.py ├── inference.py ├── model_config.yaml ├── readme.md └── utils ├── IO.py ├── LLM_model.py ├── RAG_eval.py ├── __init__.py ├── attribute.py ├── challenge.py ├── configuration.py ├── data_format.py ├── diversity.py ├── embedding.py ├── eval_utils.py ├── file_process.py ├── generation_utils.py ├── group_check.py ├── knowledge.py ├── math_eval.py ├── prompt.py └── self_reflection.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/README.md -------------------------------------------------------------------------------- /dataset/GSM8K.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/dataset/GSM8K.json -------------------------------------------------------------------------------- /dataset/GSM8K_dataset_embedding.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/dataset/GSM8K_dataset_embedding.json -------------------------------------------------------------------------------- /dataset/description.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/dataset/description.json -------------------------------------------------------------------------------- /examples/aug_gen.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/aug_gen.yaml -------------------------------------------------------------------------------- /examples/data_prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/data_prepare.py -------------------------------------------------------------------------------- /examples/eval_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/eval_config.yaml -------------------------------------------------------------------------------- /examples/eval_generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/eval_generation.yaml -------------------------------------------------------------------------------- /examples/eval_judge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/eval_judge.yaml -------------------------------------------------------------------------------- /examples/generation_config_MMLU.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/generation_config_MMLU.yaml -------------------------------------------------------------------------------- /examples/generation_config_example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/generation_config_example.yaml -------------------------------------------------------------------------------- /examples/llama-factory_config/train.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/examples/llama-factory_config/train.yaml -------------------------------------------------------------------------------- /images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/images/architecture.png -------------------------------------------------------------------------------- /images/overview_UniGen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/images/overview_UniGen.png -------------------------------------------------------------------------------- /images/unigen_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/images/unigen_icon.png -------------------------------------------------------------------------------- /images/unigen_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/images/unigen_logo.png -------------------------------------------------------------------------------- /inference/evaluation_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/inference/evaluation_results.csv -------------------------------------------------------------------------------- /inference/test_data/combined_dataset_20240619_100140.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/inference/test_data/combined_dataset_20240619_100140.json -------------------------------------------------------------------------------- /inference/test_res/chatgpt/evaluated_combined_dataset_20240619_100140.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/inference/test_res/chatgpt/evaluated_combined_dataset_20240619_100140.json -------------------------------------------------------------------------------- /inference/test_res/evaluation_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/inference/test_res/evaluation_results.csv -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/setup.py -------------------------------------------------------------------------------- /unigen/augmentation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/augmentation.py -------------------------------------------------------------------------------- /unigen/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/cli.py -------------------------------------------------------------------------------- /unigen/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/eval.py -------------------------------------------------------------------------------- /unigen/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/generation.py -------------------------------------------------------------------------------- /unigen/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/inference.py -------------------------------------------------------------------------------- /unigen/model_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/model_config.yaml -------------------------------------------------------------------------------- /unigen/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/readme.md -------------------------------------------------------------------------------- /unigen/utils/IO.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/IO.py -------------------------------------------------------------------------------- /unigen/utils/LLM_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/LLM_model.py -------------------------------------------------------------------------------- /unigen/utils/RAG_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/RAG_eval.py -------------------------------------------------------------------------------- /unigen/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /unigen/utils/attribute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/attribute.py -------------------------------------------------------------------------------- /unigen/utils/challenge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/challenge.py -------------------------------------------------------------------------------- /unigen/utils/configuration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/configuration.py -------------------------------------------------------------------------------- /unigen/utils/data_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/data_format.py -------------------------------------------------------------------------------- /unigen/utils/diversity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/diversity.py -------------------------------------------------------------------------------- /unigen/utils/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/embedding.py -------------------------------------------------------------------------------- /unigen/utils/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/eval_utils.py -------------------------------------------------------------------------------- /unigen/utils/file_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/file_process.py -------------------------------------------------------------------------------- /unigen/utils/generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/generation_utils.py -------------------------------------------------------------------------------- /unigen/utils/group_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/group_check.py -------------------------------------------------------------------------------- /unigen/utils/knowledge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/knowledge.py -------------------------------------------------------------------------------- /unigen/utils/math_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/math_eval.py -------------------------------------------------------------------------------- /unigen/utils/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/prompt.py -------------------------------------------------------------------------------- /unigen/utils/self_reflection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HowieHwong/DataGen/HEAD/unigen/utils/self_reflection.py --------------------------------------------------------------------------------