├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── DCMHA_arch.jpg
├── data
    └── synthetic_dataset.jsonl
├── jax
    ├── MaxText
    │   ├── __init__.py
    │   ├── accelerator_to_spec_map.py
    │   ├── checkpointing.py
    │   ├── common_types.py
    │   ├── configs
    │   │   ├── dcformer_pp_2b8.yml
    │   │   ├── dcformer_pp_405m.yml
    │   │   └── transformer_pp_405m.yml
    │   ├── convert_gemma_chkpt.py
    │   ├── convert_gpt3_ckpt_from_paxml.py
    │   ├── decode.py
    │   ├── generate_param_only_checkpoint.py
    │   ├── inference_utils.py
    │   ├── input_pipeline
    │   │   ├── _grain_data_processing.py
    │   │   ├── _grain_operations.py
    │   │   ├── _grain_tokenizer.py
    │   │   ├── _pile_data_processing.py
    │   │   ├── _tfds_data_processing.py
    │   │   ├── _tfds_data_processing_c4_mlperf.py
    │   │   └── input_pipeline_interface.py
    │   ├── layers
    │   │   ├── attentions.py
    │   │   ├── dc_attentions.py
    │   │   ├── dcformer.py
    │   │   ├── embeddings.py
    │   │   ├── gemma.py
    │   │   ├── gpt3.py
    │   │   ├── initializers.py
    │   │   ├── linears.py
    │   │   ├── llama2.py
    │   │   ├── mistral.py
    │   │   ├── models.py
    │   │   ├── normalizations.py
    │   │   └── quantizations.py
    │   ├── llama_or_mistral_ckpt.py
    │   ├── max_logging.py
    │   ├── max_utils.py
    │   ├── maxtext_utils.py
    │   ├── multihost_dataloading.py
    │   ├── optimizers.py
    │   ├── pyconfig.py
    │   ├── pytest.ini
    │   ├── sequence_packing.py
    │   ├── standalone_checkpointer.py
    │   ├── standalone_dataloader.py
    │   ├── tokenizer.py
    │   ├── train.py
    │   ├── train_compile.py
    │   └── train_tokenizer.py
    ├── README.md
    ├── assets
    │   ├── tokenizer
    │   ├── tokenizer.gemma
    │   └── tokenizer.llama2
    ├── images
    │   └── 405m_dcformer_pp_vs_transformer_pp_loss.png
    ├── requirements_gpu.txt
    └── requirements_tpu.txt
└── pytorch
    ├── README.md
    ├── dcformer
        ├── README.md
        ├── __init__.py
        ├── build_dataset.py
        ├── config.json
        ├── configuration_dcformer.py
        ├── ds_zero0.json
        ├── ds_zero1_no_offload.json
        ├── ds_zero2_no_offload.json
        ├── ds_zero3_no_offload.json
        ├── generation_demo.py
        ├── img
        │   ├── dcformer和llama3单步运行时间对比.png
        │   └── test.jpg
        ├── maxtext2torch.py
        ├── modeling_dcformer.py
        ├── requirements.txt
        ├── run_clm_pt_dcformer.py
        ├── run_clm_sft_dcformer.py
        ├── run_pt.sh
        └── run_sft.sh
    ├── dcpythia
        ├── __init__.py
        ├── config.json
        ├── configuration_dcpythia.py
        ├── generation_demo.py
        └── modeling_dcpythia.py
    └── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/README.md


--------------------------------------------------------------------------------
/assets/DCMHA_arch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/assets/DCMHA_arch.jpg


--------------------------------------------------------------------------------
/data/synthetic_dataset.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/data/synthetic_dataset.jsonl


--------------------------------------------------------------------------------
/jax/MaxText/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/__init__.py


--------------------------------------------------------------------------------
/jax/MaxText/accelerator_to_spec_map.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/accelerator_to_spec_map.py


--------------------------------------------------------------------------------
/jax/MaxText/checkpointing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/checkpointing.py


--------------------------------------------------------------------------------
/jax/MaxText/common_types.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/common_types.py


--------------------------------------------------------------------------------
/jax/MaxText/configs/dcformer_pp_2b8.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/configs/dcformer_pp_2b8.yml


--------------------------------------------------------------------------------
/jax/MaxText/configs/dcformer_pp_405m.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/configs/dcformer_pp_405m.yml


--------------------------------------------------------------------------------
/jax/MaxText/configs/transformer_pp_405m.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/configs/transformer_pp_405m.yml


--------------------------------------------------------------------------------
/jax/MaxText/convert_gemma_chkpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/convert_gemma_chkpt.py


--------------------------------------------------------------------------------
/jax/MaxText/convert_gpt3_ckpt_from_paxml.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/convert_gpt3_ckpt_from_paxml.py


--------------------------------------------------------------------------------
/jax/MaxText/decode.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/decode.py


--------------------------------------------------------------------------------
/jax/MaxText/generate_param_only_checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/generate_param_only_checkpoint.py


--------------------------------------------------------------------------------
/jax/MaxText/inference_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/inference_utils.py


--------------------------------------------------------------------------------
/jax/MaxText/input_pipeline/_grain_data_processing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_grain_data_processing.py


--------------------------------------------------------------------------------
/jax/MaxText/input_pipeline/_grain_operations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_grain_operations.py


--------------------------------------------------------------------------------
/jax/MaxText/input_pipeline/_grain_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_grain_tokenizer.py


--------------------------------------------------------------------------------
/jax/MaxText/input_pipeline/_pile_data_processing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_pile_data_processing.py


--------------------------------------------------------------------------------
/jax/MaxText/input_pipeline/_tfds_data_processing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_tfds_data_processing.py


--------------------------------------------------------------------------------
/jax/MaxText/input_pipeline/_tfds_data_processing_c4_mlperf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/_tfds_data_processing_c4_mlperf.py


--------------------------------------------------------------------------------
/jax/MaxText/input_pipeline/input_pipeline_interface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/input_pipeline/input_pipeline_interface.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/attentions.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/attentions.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/dc_attentions.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/dc_attentions.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/dcformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/dcformer.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/embeddings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/embeddings.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/gemma.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/gemma.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/gpt3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/gpt3.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/initializers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/initializers.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/linears.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/linears.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/llama2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/llama2.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/mistral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/mistral.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/models.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/normalizations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/normalizations.py


--------------------------------------------------------------------------------
/jax/MaxText/layers/quantizations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/layers/quantizations.py


--------------------------------------------------------------------------------
/jax/MaxText/llama_or_mistral_ckpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/llama_or_mistral_ckpt.py


--------------------------------------------------------------------------------
/jax/MaxText/max_logging.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/max_logging.py


--------------------------------------------------------------------------------
/jax/MaxText/max_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/max_utils.py


--------------------------------------------------------------------------------
/jax/MaxText/maxtext_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/maxtext_utils.py


--------------------------------------------------------------------------------
/jax/MaxText/multihost_dataloading.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/multihost_dataloading.py


--------------------------------------------------------------------------------
/jax/MaxText/optimizers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/optimizers.py


--------------------------------------------------------------------------------
/jax/MaxText/pyconfig.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/pyconfig.py


--------------------------------------------------------------------------------
/jax/MaxText/pytest.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/pytest.ini


--------------------------------------------------------------------------------
/jax/MaxText/sequence_packing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/sequence_packing.py


--------------------------------------------------------------------------------
/jax/MaxText/standalone_checkpointer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/standalone_checkpointer.py


--------------------------------------------------------------------------------
/jax/MaxText/standalone_dataloader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/standalone_dataloader.py


--------------------------------------------------------------------------------
/jax/MaxText/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/tokenizer.py


--------------------------------------------------------------------------------
/jax/MaxText/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/train.py


--------------------------------------------------------------------------------
/jax/MaxText/train_compile.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/train_compile.py


--------------------------------------------------------------------------------
/jax/MaxText/train_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/MaxText/train_tokenizer.py


--------------------------------------------------------------------------------
/jax/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/README.md


--------------------------------------------------------------------------------
/jax/assets/tokenizer:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/assets/tokenizer


--------------------------------------------------------------------------------
/jax/assets/tokenizer.gemma:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/assets/tokenizer.gemma


--------------------------------------------------------------------------------
/jax/assets/tokenizer.llama2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/assets/tokenizer.llama2


--------------------------------------------------------------------------------
/jax/images/405m_dcformer_pp_vs_transformer_pp_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/images/405m_dcformer_pp_vs_transformer_pp_loss.png


--------------------------------------------------------------------------------
/jax/requirements_gpu.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/requirements_gpu.txt


--------------------------------------------------------------------------------
/jax/requirements_tpu.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/jax/requirements_tpu.txt


--------------------------------------------------------------------------------
/pytorch/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/README.md


--------------------------------------------------------------------------------
/pytorch/dcformer/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/README.md


--------------------------------------------------------------------------------
/pytorch/dcformer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytorch/dcformer/build_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/build_dataset.py


--------------------------------------------------------------------------------
/pytorch/dcformer/config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/config.json


--------------------------------------------------------------------------------
/pytorch/dcformer/configuration_dcformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/configuration_dcformer.py


--------------------------------------------------------------------------------
/pytorch/dcformer/ds_zero0.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero0.json


--------------------------------------------------------------------------------
/pytorch/dcformer/ds_zero1_no_offload.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero1_no_offload.json


--------------------------------------------------------------------------------
/pytorch/dcformer/ds_zero2_no_offload.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero2_no_offload.json


--------------------------------------------------------------------------------
/pytorch/dcformer/ds_zero3_no_offload.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/ds_zero3_no_offload.json


--------------------------------------------------------------------------------
/pytorch/dcformer/generation_demo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/generation_demo.py


--------------------------------------------------------------------------------
/pytorch/dcformer/img/dcformer和llama3单步运行时间对比.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/img/dcformer和llama3单步运行时间对比.png


--------------------------------------------------------------------------------
/pytorch/dcformer/img/test.jpg:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pytorch/dcformer/maxtext2torch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/maxtext2torch.py


--------------------------------------------------------------------------------
/pytorch/dcformer/modeling_dcformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/modeling_dcformer.py


--------------------------------------------------------------------------------
/pytorch/dcformer/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/requirements.txt


--------------------------------------------------------------------------------
/pytorch/dcformer/run_clm_pt_dcformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_clm_pt_dcformer.py


--------------------------------------------------------------------------------
/pytorch/dcformer/run_clm_sft_dcformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_clm_sft_dcformer.py


--------------------------------------------------------------------------------
/pytorch/dcformer/run_pt.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_pt.sh


--------------------------------------------------------------------------------
/pytorch/dcformer/run_sft.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcformer/run_sft.sh


--------------------------------------------------------------------------------
/pytorch/dcpythia/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytorch/dcpythia/config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/config.json


--------------------------------------------------------------------------------
/pytorch/dcpythia/configuration_dcpythia.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/configuration_dcpythia.py


--------------------------------------------------------------------------------
/pytorch/dcpythia/generation_demo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/generation_demo.py


--------------------------------------------------------------------------------
/pytorch/dcpythia/modeling_dcpythia.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/dcpythia/modeling_dcpythia.py


--------------------------------------------------------------------------------
/pytorch/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Caiyun-AI/DCFormer/HEAD/pytorch/requirements.txt


--------------------------------------------------------------------------------