├── .gitignore ├── LICENSE ├── README.md ├── assets └── logo-final.png ├── examples ├── pipelines │ ├── combined_filter.py │ ├── embed_datasets.py │ ├── embed_datasets.sh │ ├── filter_datasets.sh │ ├── score_complexity.py │ ├── score_complexity_dataset.py │ ├── score_complexity_dataset.sh │ ├── score_quality.py │ ├── score_quality_dataset.sh │ ├── score_vllm.py │ └── utils.py └── train │ ├── dpo.sh │ ├── sft.sh │ └── train_scorers.sh ├── requirements.txt ├── sample_little.py ├── setup.py └── src └── deita ├── __init__.py ├── alignment ├── __init__.py ├── constants.py ├── conversation.py ├── dpo_train.py ├── flash_attn │ ├── bloom_flash_attention.py │ └── triton_flash_attention.py ├── train.py └── train_scorers.py ├── data └── sample_ultrafeedback.py ├── ds_configs ├── deepspeed_config_zero2_no_offload.json ├── deepspped_llama_x.json └── stage3_no_offloading_accelerate.json ├── pipeline ├── __init__.py ├── base.py ├── embed_pipeline.py ├── filter_pipeline.py ├── score_pipeline.py └── utils.py └── selection ├── __init__.py ├── embedder ├── __init__.py ├── base.py ├── clm_embedder.py ├── conversation.py └── utils.py ├── filter ├── __init__.py ├── base.py ├── combined_filter.py └── utils.py └── scorer ├── __init__.py ├── base.py ├── llama_scorer.py └── mistral_scorer.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/README.md -------------------------------------------------------------------------------- /assets/logo-final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/assets/logo-final.png -------------------------------------------------------------------------------- /examples/pipelines/combined_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/combined_filter.py -------------------------------------------------------------------------------- /examples/pipelines/embed_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/embed_datasets.py -------------------------------------------------------------------------------- /examples/pipelines/embed_datasets.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/embed_datasets.sh -------------------------------------------------------------------------------- /examples/pipelines/filter_datasets.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/filter_datasets.sh -------------------------------------------------------------------------------- /examples/pipelines/score_complexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/score_complexity.py -------------------------------------------------------------------------------- /examples/pipelines/score_complexity_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/score_complexity_dataset.py -------------------------------------------------------------------------------- /examples/pipelines/score_complexity_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/score_complexity_dataset.sh -------------------------------------------------------------------------------- /examples/pipelines/score_quality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/score_quality.py -------------------------------------------------------------------------------- /examples/pipelines/score_quality_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/score_quality_dataset.sh -------------------------------------------------------------------------------- /examples/pipelines/score_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/score_vllm.py -------------------------------------------------------------------------------- /examples/pipelines/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/pipelines/utils.py -------------------------------------------------------------------------------- /examples/train/dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/train/dpo.sh -------------------------------------------------------------------------------- /examples/train/sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/train/sft.sh -------------------------------------------------------------------------------- /examples/train/train_scorers.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/examples/train/train_scorers.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/requirements.txt -------------------------------------------------------------------------------- /sample_little.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/sample_little.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/setup.py -------------------------------------------------------------------------------- /src/deita/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.0' -------------------------------------------------------------------------------- /src/deita/alignment/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.19" 2 | -------------------------------------------------------------------------------- /src/deita/alignment/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/alignment/constants.py -------------------------------------------------------------------------------- /src/deita/alignment/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/alignment/conversation.py -------------------------------------------------------------------------------- /src/deita/alignment/dpo_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/alignment/dpo_train.py -------------------------------------------------------------------------------- /src/deita/alignment/flash_attn/bloom_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/alignment/flash_attn/bloom_flash_attention.py -------------------------------------------------------------------------------- /src/deita/alignment/flash_attn/triton_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/alignment/flash_attn/triton_flash_attention.py -------------------------------------------------------------------------------- /src/deita/alignment/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/alignment/train.py -------------------------------------------------------------------------------- /src/deita/alignment/train_scorers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/alignment/train_scorers.py -------------------------------------------------------------------------------- /src/deita/data/sample_ultrafeedback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/data/sample_ultrafeedback.py -------------------------------------------------------------------------------- /src/deita/ds_configs/deepspeed_config_zero2_no_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/ds_configs/deepspeed_config_zero2_no_offload.json -------------------------------------------------------------------------------- /src/deita/ds_configs/deepspped_llama_x.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/ds_configs/deepspped_llama_x.json -------------------------------------------------------------------------------- /src/deita/ds_configs/stage3_no_offloading_accelerate.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/ds_configs/stage3_no_offloading_accelerate.json -------------------------------------------------------------------------------- /src/deita/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/pipeline/__init__.py -------------------------------------------------------------------------------- /src/deita/pipeline/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/pipeline/base.py -------------------------------------------------------------------------------- /src/deita/pipeline/embed_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/pipeline/embed_pipeline.py -------------------------------------------------------------------------------- /src/deita/pipeline/filter_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/pipeline/filter_pipeline.py -------------------------------------------------------------------------------- /src/deita/pipeline/score_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/pipeline/score_pipeline.py -------------------------------------------------------------------------------- /src/deita/pipeline/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/pipeline/utils.py -------------------------------------------------------------------------------- /src/deita/selection/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/deita/selection/embedder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/embedder/__init__.py -------------------------------------------------------------------------------- /src/deita/selection/embedder/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/embedder/base.py -------------------------------------------------------------------------------- /src/deita/selection/embedder/clm_embedder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/embedder/clm_embedder.py -------------------------------------------------------------------------------- /src/deita/selection/embedder/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/embedder/conversation.py -------------------------------------------------------------------------------- /src/deita/selection/embedder/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/embedder/utils.py -------------------------------------------------------------------------------- /src/deita/selection/filter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/filter/__init__.py -------------------------------------------------------------------------------- /src/deita/selection/filter/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/filter/base.py -------------------------------------------------------------------------------- /src/deita/selection/filter/combined_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/filter/combined_filter.py -------------------------------------------------------------------------------- /src/deita/selection/filter/utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/deita/selection/scorer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/scorer/__init__.py -------------------------------------------------------------------------------- /src/deita/selection/scorer/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/scorer/base.py -------------------------------------------------------------------------------- /src/deita/selection/scorer/llama_scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/scorer/llama_scorer.py -------------------------------------------------------------------------------- /src/deita/selection/scorer/mistral_scorer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkust-nlp/deita/HEAD/src/deita/selection/scorer/mistral_scorer.py --------------------------------------------------------------------------------