├── .all-contributorsrc ├── .gitignore ├── LICENSE ├── README-EN.md ├── README.md ├── datas ├── agent_intent │ ├── embeddings_examples.json │ └── examples.json ├── cdss_datas │ └── examples.jsonl └── wizard_instruction │ └── example.jsonl ├── docs ├── logo.png └── 数据合成工具总览.png ├── examples ├── __init__.py ├── cdss_agent │ └── cdss_agent_pipline.py ├── data_sample_and_quality_evaluate │ └── data_sampling_and_evaluate.py ├── dpo_synth │ ├── dpo_synth.py │ └── model_args_parser.py ├── magpie_synth │ └── magpie_ins_res_synth.py ├── model_votes │ └── model_votes_and_pipline.py └── wizard_synth │ └── wizard_query_synth.py ├── poetry.lock ├── pyproject.toml ├── results ├── cdss_datas │ ├── cdss_agent_data_synth.json │ ├── cdss_agent_data_synth.json_post_binary.json │ ├── test_dataset_v_0_2_middle.xlsx │ ├── train_dataset_v_0_2_middle.json │ ├── 孕期孕周.json │ └── 药品.json ├── cdss_datas_with_votes_post_processing │ └── cdss_agent_data_synth.json_cdss_multi_model_vote_and_post_labeling.json ├── data_select_and_evaluation │ ├── evaluation_res.json │ ├── k_center_selected_10.json │ └── random_selected_10.json └── wizard_instruction │ └── wizard_evolution_data_synth.json_wizard.json └── src └── joydataforge ├── __init__.py ├── components ├── __init__.py ├── filter │ ├── __init__.py │ ├── k_center_greedy.py │ ├── mini_hash.py │ └── sampling_def.py ├── loader │ ├── __init__.py │ └── data_load_and_process.py ├── score │ ├── __init__.py │ ├── data_evaluation.py │ ├── data_sampling.py │ ├── entity_diversity.py │ └── vendi_scores.py └── synth │ ├── __init__.py │ ├── joy_synth │ ├── data_generate.py │ └── wizard.py │ └── magpie_synth │ └── data_generate.py ├── config ├── __init__.py ├── magpie_model_templates.json ├── model.yaml └── prompt.yaml ├── memory ├── __init__.py └── cache │ ├── __init__.py │ └── data_cache.py ├── models ├── __init__.py └── llm.py └── utils ├── __init__.py ├── dialog_sample_by_round.py ├── file.py ├── magpie_utils └── str_utils.py └── parse_response.py /.all-contributorsrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/.all-contributorsrc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/LICENSE -------------------------------------------------------------------------------- /README-EN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/README-EN.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/README.md -------------------------------------------------------------------------------- /datas/agent_intent/embeddings_examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/datas/agent_intent/embeddings_examples.json -------------------------------------------------------------------------------- /datas/agent_intent/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/datas/agent_intent/examples.json -------------------------------------------------------------------------------- /datas/cdss_datas/examples.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/datas/cdss_datas/examples.jsonl -------------------------------------------------------------------------------- /datas/wizard_instruction/example.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/datas/wizard_instruction/example.jsonl -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/数据合成工具总览.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/docs/数据合成工具总览.png -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/cdss_agent/cdss_agent_pipline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/examples/cdss_agent/cdss_agent_pipline.py -------------------------------------------------------------------------------- /examples/data_sample_and_quality_evaluate/data_sampling_and_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/examples/data_sample_and_quality_evaluate/data_sampling_and_evaluate.py -------------------------------------------------------------------------------- /examples/dpo_synth/dpo_synth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/examples/dpo_synth/dpo_synth.py -------------------------------------------------------------------------------- /examples/dpo_synth/model_args_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/examples/dpo_synth/model_args_parser.py -------------------------------------------------------------------------------- /examples/magpie_synth/magpie_ins_res_synth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/examples/magpie_synth/magpie_ins_res_synth.py -------------------------------------------------------------------------------- /examples/model_votes/model_votes_and_pipline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/examples/model_votes/model_votes_and_pipline.py -------------------------------------------------------------------------------- /examples/wizard_synth/wizard_query_synth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/examples/wizard_synth/wizard_query_synth.py -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/pyproject.toml -------------------------------------------------------------------------------- /results/cdss_datas/cdss_agent_data_synth.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/cdss_datas/cdss_agent_data_synth.json -------------------------------------------------------------------------------- /results/cdss_datas/cdss_agent_data_synth.json_post_binary.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/cdss_datas/cdss_agent_data_synth.json_post_binary.json -------------------------------------------------------------------------------- /results/cdss_datas/test_dataset_v_0_2_middle.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/cdss_datas/test_dataset_v_0_2_middle.xlsx -------------------------------------------------------------------------------- /results/cdss_datas/train_dataset_v_0_2_middle.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/cdss_datas/train_dataset_v_0_2_middle.json -------------------------------------------------------------------------------- /results/cdss_datas/孕期孕周.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/cdss_datas/孕期孕周.json -------------------------------------------------------------------------------- /results/cdss_datas/药品.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/cdss_datas/药品.json -------------------------------------------------------------------------------- /results/cdss_datas_with_votes_post_processing/cdss_agent_data_synth.json_cdss_multi_model_vote_and_post_labeling.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/cdss_datas_with_votes_post_processing/cdss_agent_data_synth.json_cdss_multi_model_vote_and_post_labeling.json -------------------------------------------------------------------------------- /results/data_select_and_evaluation/evaluation_res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/data_select_and_evaluation/evaluation_res.json -------------------------------------------------------------------------------- /results/data_select_and_evaluation/k_center_selected_10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/data_select_and_evaluation/k_center_selected_10.json -------------------------------------------------------------------------------- /results/data_select_and_evaluation/random_selected_10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/data_select_and_evaluation/random_selected_10.json -------------------------------------------------------------------------------- /results/wizard_instruction/wizard_evolution_data_synth.json_wizard.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/results/wizard_instruction/wizard_evolution_data_synth.json_wizard.json -------------------------------------------------------------------------------- /src/joydataforge/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/components/filter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/components/filter/k_center_greedy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/filter/k_center_greedy.py -------------------------------------------------------------------------------- /src/joydataforge/components/filter/mini_hash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/filter/mini_hash.py -------------------------------------------------------------------------------- /src/joydataforge/components/filter/sampling_def.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/filter/sampling_def.py -------------------------------------------------------------------------------- /src/joydataforge/components/loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/components/loader/data_load_and_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/loader/data_load_and_process.py -------------------------------------------------------------------------------- /src/joydataforge/components/score/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/components/score/data_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/score/data_evaluation.py -------------------------------------------------------------------------------- /src/joydataforge/components/score/data_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/score/data_sampling.py -------------------------------------------------------------------------------- /src/joydataforge/components/score/entity_diversity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/score/entity_diversity.py -------------------------------------------------------------------------------- /src/joydataforge/components/score/vendi_scores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/score/vendi_scores.py -------------------------------------------------------------------------------- /src/joydataforge/components/synth/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/components/synth/joy_synth/data_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/synth/joy_synth/data_generate.py -------------------------------------------------------------------------------- /src/joydataforge/components/synth/joy_synth/wizard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/synth/joy_synth/wizard.py -------------------------------------------------------------------------------- /src/joydataforge/components/synth/magpie_synth/data_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/components/synth/magpie_synth/data_generate.py -------------------------------------------------------------------------------- /src/joydataforge/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/config/__init__.py -------------------------------------------------------------------------------- /src/joydataforge/config/magpie_model_templates.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/config/magpie_model_templates.json -------------------------------------------------------------------------------- /src/joydataforge/config/model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/config/model.yaml -------------------------------------------------------------------------------- /src/joydataforge/config/prompt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/config/prompt.yaml -------------------------------------------------------------------------------- /src/joydataforge/memory/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/memory/cache/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/memory/cache/data_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/memory/cache/data_cache.py -------------------------------------------------------------------------------- /src/joydataforge/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/models/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/models/llm.py -------------------------------------------------------------------------------- /src/joydataforge/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/joydataforge/utils/dialog_sample_by_round.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/utils/dialog_sample_by_round.py -------------------------------------------------------------------------------- /src/joydataforge/utils/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/utils/file.py -------------------------------------------------------------------------------- /src/joydataforge/utils/magpie_utils/str_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/utils/magpie_utils/str_utils.py -------------------------------------------------------------------------------- /src/joydataforge/utils/parse_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdh-algo/JoyDataForge/HEAD/src/joydataforge/utils/parse_response.py --------------------------------------------------------------------------------