├── .gitignore ├── LICENSE ├── README.md ├── assets └── DCMHA_arch.jpg ├── data └── synthetic_dataset.jsonl ├── jax ├── MaxText │ ├── __init__.py │ ├── accelerator_to_spec_map.py │ ├── checkpointing.py │ ├── common_types.py │ ├── configs │ │ ├── dcformer_pp_2b8.yml │ │ ├── dcformer_pp_405m.yml │ │ └── transformer_pp_405m.yml │ ├── convert_gemma_chkpt.py │ ├── convert_gpt3_ckpt_from_paxml.py │ ├── decode.py │ ├── generate_param_only_checkpoint.py │ ├── inference_utils.py │ ├── input_pipeline │ │ ├── _grain_data_processing.py │ │ ├── _grain_operations.py │ │ ├── _grain_tokenizer.py │ │ ├── _pile_data_processing.py │ │ ├── _tfds_data_processing.py │ │ ├── _tfds_data_processing_c4_mlperf.py │ │ └── input_pipeline_interface.py │ ├── layers │ │ ├── attentions.py │ │ ├── dc_attentions.py │ │ ├── dcformer.py │ │ ├── embeddings.py │ │ ├── gemma.py │ │ ├── gpt3.py │ │ ├── initializers.py │ │ ├── linears.py │ │ ├── llama2.py │ │ ├── mistral.py │ │ ├── models.py │ │ ├── normalizations.py │ │ └── quantizations.py │ ├── llama_or_mistral_ckpt.py │ ├── max_logging.py │ ├── max_utils.py │ ├── maxtext_utils.py │ ├── multihost_dataloading.py │ ├── optimizers.py │ ├── pyconfig.py │ ├── pytest.ini │ ├── sequence_packing.py │ ├── standalone_checkpointer.py │ ├── standalone_dataloader.py │ ├── tokenizer.py │ ├── train.py │ ├── train_compile.py │ └── train_tokenizer.py ├── README.md ├── assets │ ├── tokenizer │ ├── tokenizer.gemma │ └── tokenizer.llama2 ├── images │ └── 405m_dcformer_pp_vs_transformer_pp_loss.png ├── requirements_gpu.txt └── requirements_tpu.txt └── pytorch ├── README.md ├── dcformer ├── README.md ├── __init__.py ├── build_dataset.py ├── config.json ├── configuration_dcformer.py ├── ds_zero0.json ├── ds_zero1_no_offload.json ├── ds_zero2_no_offload.json ├── ds_zero3_no_offload.json ├── generation_demo.py ├── img │ ├── dcformer和llama3单步运行时间对比.png │ └── test.jpg ├── maxtext2torch.py ├── modeling_dcformer.py ├── requirements.txt ├── run_clm_pt_dcformer.py ├── run_clm_sft_dcformer.py ├── run_pt.sh └── run_sft.sh ├── dcpythia ├── __init__.py ├── config.json ├── configuration_dcpythia.py ├── generation_demo.py └── modeling_dcpythia.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/README.md -------------------------------------------------------------------------------- /assets/DCMHA_arch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/assets/DCMHA_arch.jpg -------------------------------------------------------------------------------- /data/synthetic_dataset.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/data/synthetic_dataset.jsonl -------------------------------------------------------------------------------- /jax/MaxText/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/__init__.py -------------------------------------------------------------------------------- /jax/MaxText/accelerator_to_spec_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/accelerator_to_spec_map.py -------------------------------------------------------------------------------- /jax/MaxText/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/checkpointing.py -------------------------------------------------------------------------------- /jax/MaxText/common_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/common_types.py -------------------------------------------------------------------------------- /jax/MaxText/configs/dcformer_pp_2b8.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/configs/dcformer_pp_2b8.yml -------------------------------------------------------------------------------- /jax/MaxText/configs/dcformer_pp_405m.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/configs/dcformer_pp_405m.yml -------------------------------------------------------------------------------- /jax/MaxText/configs/transformer_pp_405m.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/configs/transformer_pp_405m.yml -------------------------------------------------------------------------------- /jax/MaxText/convert_gemma_chkpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/convert_gemma_chkpt.py -------------------------------------------------------------------------------- /jax/MaxText/convert_gpt3_ckpt_from_paxml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/convert_gpt3_ckpt_from_paxml.py -------------------------------------------------------------------------------- /jax/MaxText/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/decode.py -------------------------------------------------------------------------------- /jax/MaxText/generate_param_only_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/generate_param_only_checkpoint.py -------------------------------------------------------------------------------- /jax/MaxText/inference_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/inference_utils.py -------------------------------------------------------------------------------- /jax/MaxText/input_pipeline/_grain_data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_grain_data_processing.py -------------------------------------------------------------------------------- /jax/MaxText/input_pipeline/_grain_operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_grain_operations.py -------------------------------------------------------------------------------- /jax/MaxText/input_pipeline/_grain_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_grain_tokenizer.py -------------------------------------------------------------------------------- /jax/MaxText/input_pipeline/_pile_data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_pile_data_processing.py -------------------------------------------------------------------------------- /jax/MaxText/input_pipeline/_tfds_data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_tfds_data_processing.py -------------------------------------------------------------------------------- /jax/MaxText/input_pipeline/_tfds_data_processing_c4_mlperf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_tfds_data_processing_c4_mlperf.py -------------------------------------------------------------------------------- /jax/MaxText/input_pipeline/input_pipeline_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/input_pipeline_interface.py -------------------------------------------------------------------------------- /jax/MaxText/layers/attentions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/attentions.py -------------------------------------------------------------------------------- /jax/MaxText/layers/dc_attentions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/dc_attentions.py -------------------------------------------------------------------------------- /jax/MaxText/layers/dcformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/dcformer.py -------------------------------------------------------------------------------- /jax/MaxText/layers/embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/embeddings.py -------------------------------------------------------------------------------- /jax/MaxText/layers/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/gemma.py -------------------------------------------------------------------------------- /jax/MaxText/layers/gpt3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/gpt3.py -------------------------------------------------------------------------------- /jax/MaxText/layers/initializers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/initializers.py -------------------------------------------------------------------------------- /jax/MaxText/layers/linears.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/linears.py -------------------------------------------------------------------------------- /jax/MaxText/layers/llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/llama2.py -------------------------------------------------------------------------------- /jax/MaxText/layers/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/mistral.py -------------------------------------------------------------------------------- /jax/MaxText/layers/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/models.py -------------------------------------------------------------------------------- /jax/MaxText/layers/normalizations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/normalizations.py -------------------------------------------------------------------------------- /jax/MaxText/layers/quantizations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/quantizations.py -------------------------------------------------------------------------------- /jax/MaxText/llama_or_mistral_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/llama_or_mistral_ckpt.py -------------------------------------------------------------------------------- /jax/MaxText/max_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/max_logging.py -------------------------------------------------------------------------------- /jax/MaxText/max_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/max_utils.py -------------------------------------------------------------------------------- /jax/MaxText/maxtext_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/maxtext_utils.py -------------------------------------------------------------------------------- /jax/MaxText/multihost_dataloading.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/multihost_dataloading.py -------------------------------------------------------------------------------- /jax/MaxText/optimizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/optimizers.py -------------------------------------------------------------------------------- /jax/MaxText/pyconfig.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/pyconfig.py -------------------------------------------------------------------------------- /jax/MaxText/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/pytest.ini -------------------------------------------------------------------------------- /jax/MaxText/sequence_packing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/sequence_packing.py -------------------------------------------------------------------------------- /jax/MaxText/standalone_checkpointer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/standalone_checkpointer.py -------------------------------------------------------------------------------- /jax/MaxText/standalone_dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/standalone_dataloader.py -------------------------------------------------------------------------------- /jax/MaxText/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/tokenizer.py -------------------------------------------------------------------------------- /jax/MaxText/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/train.py -------------------------------------------------------------------------------- /jax/MaxText/train_compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/train_compile.py -------------------------------------------------------------------------------- /jax/MaxText/train_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/train_tokenizer.py -------------------------------------------------------------------------------- /jax/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/README.md -------------------------------------------------------------------------------- /jax/assets/tokenizer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/assets/tokenizer -------------------------------------------------------------------------------- /jax/assets/tokenizer.gemma: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/assets/tokenizer.gemma -------------------------------------------------------------------------------- /jax/assets/tokenizer.llama2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/assets/tokenizer.llama2 -------------------------------------------------------------------------------- /jax/images/405m_dcformer_pp_vs_transformer_pp_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/images/405m_dcformer_pp_vs_transformer_pp_loss.png -------------------------------------------------------------------------------- /jax/requirements_gpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/requirements_gpu.txt -------------------------------------------------------------------------------- /jax/requirements_tpu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/requirements_tpu.txt -------------------------------------------------------------------------------- /pytorch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/README.md -------------------------------------------------------------------------------- /pytorch/dcformer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/README.md -------------------------------------------------------------------------------- /pytorch/dcformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytorch/dcformer/build_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/build_dataset.py -------------------------------------------------------------------------------- /pytorch/dcformer/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/config.json -------------------------------------------------------------------------------- /pytorch/dcformer/configuration_dcformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/configuration_dcformer.py -------------------------------------------------------------------------------- /pytorch/dcformer/ds_zero0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero0.json -------------------------------------------------------------------------------- /pytorch/dcformer/ds_zero1_no_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero1_no_offload.json -------------------------------------------------------------------------------- /pytorch/dcformer/ds_zero2_no_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero2_no_offload.json -------------------------------------------------------------------------------- /pytorch/dcformer/ds_zero3_no_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero3_no_offload.json -------------------------------------------------------------------------------- /pytorch/dcformer/generation_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/generation_demo.py -------------------------------------------------------------------------------- /pytorch/dcformer/img/dcformer和llama3单步运行时间对比.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/img/dcformer和llama3单步运行时间对比.png -------------------------------------------------------------------------------- /pytorch/dcformer/img/test.jpg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pytorch/dcformer/maxtext2torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/maxtext2torch.py -------------------------------------------------------------------------------- /pytorch/dcformer/modeling_dcformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/modeling_dcformer.py -------------------------------------------------------------------------------- /pytorch/dcformer/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/requirements.txt -------------------------------------------------------------------------------- /pytorch/dcformer/run_clm_pt_dcformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_clm_pt_dcformer.py -------------------------------------------------------------------------------- /pytorch/dcformer/run_clm_sft_dcformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_clm_sft_dcformer.py -------------------------------------------------------------------------------- /pytorch/dcformer/run_pt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_pt.sh -------------------------------------------------------------------------------- /pytorch/dcformer/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_sft.sh -------------------------------------------------------------------------------- /pytorch/dcpythia/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytorch/dcpythia/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/config.json -------------------------------------------------------------------------------- /pytorch/dcpythia/configuration_dcpythia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/configuration_dcpythia.py -------------------------------------------------------------------------------- /pytorch/dcpythia/generation_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/generation_demo.py -------------------------------------------------------------------------------- /pytorch/dcpythia/modeling_dcpythia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/modeling_dcpythia.py -------------------------------------------------------------------------------- /pytorch/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/requirements.txt --------------------------------------------------------------------------------