├── .DS_Store ├── LICENSE ├── README.md ├── assets ├── .DS_Store └── hdee.jpg ├── configs ├── .DS_Store ├── hdee_3_iterations │ ├── eval │ │ ├── dataset_all_trained_domains.yaml │ │ ├── directory.yaml │ │ ├── eval_ensemble_mhe_iho_iter3.yaml │ │ ├── eval_ensemble_mho_ihe_iter3.yaml │ │ └── eval_ensemble_mho_iho_iter3.yaml │ └── train_domains │ │ ├── dataset_Caselaw_Access_Project.yaml │ │ ├── dataset_History_and_events.yaml │ │ ├── dataset_Human_activites.yaml │ │ ├── dataset_Philosophy_and_thinking.yaml │ │ ├── dataset_TinyStories.yaml │ │ ├── dataset_cs_l1.yaml │ │ ├── dataset_math_l1.yaml │ │ ├── dataset_physics_l1.yaml │ │ ├── dataset_simple_wikipedia_LM.yaml │ │ ├── directory.yaml │ │ ├── train_domain_hyperparam_lr6e5.yaml │ │ ├── train_iter1_expert_l_domain_math_l1.yaml │ │ ├── train_iter1_expert_m_domain_Caselaw_Access_Project.yaml │ │ ├── train_iter1_expert_m_domain_History_and_events.yaml │ │ ├── train_iter1_expert_m_domain_math_l1.yaml │ │ ├── train_iter1_expert_s_domain_Caselaw_Access_Project.yaml │ │ ├── train_iter2_MHC_expert_l_domain_physics_l1.yaml │ │ ├── train_iter2_MHC_expert_m_domain_Human_activites.yaml │ │ ├── train_iter2_MHC_expert_m_domain_physics_l1.yaml │ │ ├── train_iter2_MHC_expert_m_domain_simple_wikipedia_LM.yaml │ │ ├── train_iter2_MHC_expert_s_domain_simple_wikipedia_LM.yaml │ │ ├── train_iter3_MHC_PHS_expert_l_domain_cs_l1.yaml │ │ ├── train_iter3_MHC_PHS_expert_m_domain_Philosophy_and_thinking.yaml │ │ ├── train_iter3_MHC_PHS_expert_m_domain_TinyStories.yaml │ │ ├── train_iter3_MHC_PHS_expert_m_domain_cs_l1.yaml │ │ └── train_iter3_MHC_PHS_expert_s_domain_TinyStories.yaml └── hdee_seed_models │ ├── train_seed_expert_l.yaml │ ├── train_seed_expert_m.yaml │ ├── train_seed_expert_s.yaml │ └── train_seed_hyperparam_small_close_experts.yaml ├── pdm.lock ├── pyproject.toml ├── scripts ├── eval_ensembles.sh ├── load_openwebtext.sh ├── load_single_M2D2_domain_dataset.sh ├── load_trained_M2D2_domains.sh ├── load_trained_other_domains.sh ├── train_domain_experts.sh └── train_seed_models.sh └── src ├── .DS_Store ├── gensyn_dataprep ├── __init__.py └── dataprep │ ├── pretokenize_data.py │ └── shuffle_parquet.py └── hdee ├── __init__.py ├── btm_utils.py ├── data_loaders.py ├── data_parallel_optimizer.py ├── eval.py ├── eval_ensemble.py ├── llama.py ├── schedulers.py ├── train.py ├── train_domain.py └── weight_strategies.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/README.md -------------------------------------------------------------------------------- /assets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/assets/.DS_Store -------------------------------------------------------------------------------- /assets/hdee.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/assets/hdee.jpg -------------------------------------------------------------------------------- /configs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/.DS_Store -------------------------------------------------------------------------------- /configs/hdee_3_iterations/eval/dataset_all_trained_domains.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/eval/dataset_all_trained_domains.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/eval/directory.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | 4 | log_dir: hdee/experiments/hdee_3_iterations 5 | 6 | -------------------------------------------------------------------------------- /configs/hdee_3_iterations/eval/eval_ensemble_mhe_iho_iter3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/eval/eval_ensemble_mhe_iho_iter3.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/eval/eval_ensemble_mho_ihe_iter3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/eval/eval_ensemble_mho_ihe_iter3.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/eval/eval_ensemble_mho_iho_iter3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/eval/eval_ensemble_mho_iho_iter3.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_Caselaw_Access_Project.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_Caselaw_Access_Project.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_History_and_events.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_History_and_events.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_Human_activites.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_Human_activites.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_Philosophy_and_thinking.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_Philosophy_and_thinking.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_TinyStories.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_TinyStories.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_cs_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_cs_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_math_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_math_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_physics_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_physics_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/dataset_simple_wikipedia_LM.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/dataset_simple_wikipedia_LM.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/directory.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/directory.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_domain_hyperparam_lr6e5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_domain_hyperparam_lr6e5.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter1_expert_l_domain_math_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter1_expert_l_domain_math_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter1_expert_m_domain_Caselaw_Access_Project.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter1_expert_m_domain_Caselaw_Access_Project.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter1_expert_m_domain_History_and_events.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter1_expert_m_domain_History_and_events.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter1_expert_m_domain_math_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter1_expert_m_domain_math_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter1_expert_s_domain_Caselaw_Access_Project.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter1_expert_s_domain_Caselaw_Access_Project.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_l_domain_physics_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_l_domain_physics_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_m_domain_Human_activites.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_m_domain_Human_activites.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_m_domain_physics_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_m_domain_physics_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_m_domain_simple_wikipedia_LM.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_m_domain_simple_wikipedia_LM.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_s_domain_simple_wikipedia_LM.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter2_MHC_expert_s_domain_simple_wikipedia_LM.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_l_domain_cs_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_l_domain_cs_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_m_domain_Philosophy_and_thinking.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_m_domain_Philosophy_and_thinking.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_m_domain_TinyStories.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_m_domain_TinyStories.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_m_domain_cs_l1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_m_domain_cs_l1.yaml -------------------------------------------------------------------------------- /configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_s_domain_TinyStories.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_3_iterations/train_domains/train_iter3_MHC_PHS_expert_s_domain_TinyStories.yaml -------------------------------------------------------------------------------- /configs/hdee_seed_models/train_seed_expert_l.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_seed_models/train_seed_expert_l.yaml -------------------------------------------------------------------------------- /configs/hdee_seed_models/train_seed_expert_m.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_seed_models/train_seed_expert_m.yaml -------------------------------------------------------------------------------- /configs/hdee_seed_models/train_seed_expert_s.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_seed_models/train_seed_expert_s.yaml -------------------------------------------------------------------------------- /configs/hdee_seed_models/train_seed_hyperparam_small_close_experts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/configs/hdee_seed_models/train_seed_hyperparam_small_close_experts.yaml -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/pdm.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/eval_ensembles.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/scripts/eval_ensembles.sh -------------------------------------------------------------------------------- /scripts/load_openwebtext.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/scripts/load_openwebtext.sh -------------------------------------------------------------------------------- /scripts/load_single_M2D2_domain_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/scripts/load_single_M2D2_domain_dataset.sh -------------------------------------------------------------------------------- /scripts/load_trained_M2D2_domains.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/scripts/load_trained_M2D2_domains.sh -------------------------------------------------------------------------------- /scripts/load_trained_other_domains.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/scripts/load_trained_other_domains.sh -------------------------------------------------------------------------------- /scripts/train_domain_experts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/scripts/train_domain_experts.sh -------------------------------------------------------------------------------- /scripts/train_seed_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/scripts/train_seed_models.sh -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/.DS_Store -------------------------------------------------------------------------------- /src/gensyn_dataprep/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/gensyn_dataprep/dataprep/pretokenize_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/gensyn_dataprep/dataprep/pretokenize_data.py -------------------------------------------------------------------------------- /src/gensyn_dataprep/dataprep/shuffle_parquet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/gensyn_dataprep/dataprep/shuffle_parquet.py -------------------------------------------------------------------------------- /src/hdee/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/hdee/btm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/btm_utils.py -------------------------------------------------------------------------------- /src/hdee/data_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/data_loaders.py -------------------------------------------------------------------------------- /src/hdee/data_parallel_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/data_parallel_optimizer.py -------------------------------------------------------------------------------- /src/hdee/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/eval.py -------------------------------------------------------------------------------- /src/hdee/eval_ensemble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/eval_ensemble.py -------------------------------------------------------------------------------- /src/hdee/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/llama.py -------------------------------------------------------------------------------- /src/hdee/schedulers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/schedulers.py -------------------------------------------------------------------------------- /src/hdee/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/train.py -------------------------------------------------------------------------------- /src/hdee/train_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/train_domain.py -------------------------------------------------------------------------------- /src/hdee/weight_strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gensyn-ai/hdee/HEAD/src/hdee/weight_strategies.py --------------------------------------------------------------------------------