├── README.md ├── README_en.md ├── src ├── .flake8 ├── build_dataset.py ├── config │ ├── cpm-bee-10b.json │ └── cpm-bee-3b.json ├── cpm_live │ ├── __init__.py │ ├── arguments.py │ ├── dataset │ │ ├── __init__.py │ │ ├── distributed_dataset.py │ │ ├── serializer.py │ │ └── utils.py │ ├── generation │ │ ├── __init__.py │ │ ├── ant.py │ │ ├── bee.py │ │ └── generation_utils.py │ ├── layers │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── blocks.py │ │ ├── embedding.py │ │ ├── feedforward.py │ │ ├── layernorm.py │ │ ├── linear.py │ │ ├── position_embedding.py │ │ └── transformer.py │ ├── models │ │ ├── __init__.py │ │ ├── ant.py │ │ ├── ant_torch.py │ │ ├── bee.py │ │ └── bee_torch.py │ ├── native_layers │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── blocks.py │ │ ├── embedding.py │ │ ├── feedforward.py │ │ ├── layernorm.py │ │ ├── linear.py │ │ ├── position_embedding.py │ │ └── transformer.py │ ├── tokenizers │ │ ├── __init__.py │ │ ├── ant.py │ │ └── bee.py │ ├── training_tasks │ │ ├── __init__.py │ │ ├── ant │ │ │ ├── __init__.py │ │ │ └── pretrain.py │ │ └── bee │ │ │ ├── __init__.py │ │ │ ├── finetune.py │ │ │ └── pretrain.py │ ├── utils │ │ ├── __init__.py │ │ ├── config.py │ │ ├── data_utils.py │ │ ├── export.py │ │ ├── gradient_shrink.py │ │ ├── log.py │ │ └── object.py │ └── vocabs │ │ ├── ant.txt │ │ └── bee.txt ├── datasets.json ├── finetune_cpm_bee.py ├── preprocess_dataset.py ├── pretrain_cpm_bee.py ├── pyproject.toml ├── requirements.txt ├── scripts │ ├── finetune_cpm_bee.sh │ ├── preprocess_dataset.sh │ └── pretrain_cpm_bee.sh ├── setup.py ├── text_generation.py └── text_generation_hf.py └── tutorials ├── basic_task_finetune ├── README.md ├── bee_data │ ├── eval.jsonl │ └── train.jsonl ├── bin_data │ ├── eval │ │ ├── ccpm_data │ │ └── meta.bin │ └── train │ │ ├── ccpm_data │ │ └── meta.bin ├── data_reformat.py └── raw_data │ ├── eval.jsonl │ └── train.jsonl └── decoder_tuning └── decoder_tuning.ipynb /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/README.md -------------------------------------------------------------------------------- /README_en.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/README_en.md -------------------------------------------------------------------------------- /src/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/.flake8 -------------------------------------------------------------------------------- /src/build_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/build_dataset.py -------------------------------------------------------------------------------- /src/config/cpm-bee-10b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/config/cpm-bee-10b.json -------------------------------------------------------------------------------- /src/config/cpm-bee-3b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/config/cpm-bee-3b.json -------------------------------------------------------------------------------- /src/cpm_live/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/cpm_live/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/arguments.py -------------------------------------------------------------------------------- /src/cpm_live/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/dataset/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/dataset/distributed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/dataset/distributed_dataset.py -------------------------------------------------------------------------------- /src/cpm_live/dataset/serializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/dataset/serializer.py -------------------------------------------------------------------------------- /src/cpm_live/dataset/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/dataset/utils.py -------------------------------------------------------------------------------- /src/cpm_live/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/generation/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/generation/ant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/generation/ant.py -------------------------------------------------------------------------------- /src/cpm_live/generation/bee.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/generation/bee.py -------------------------------------------------------------------------------- /src/cpm_live/generation/generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/generation/generation_utils.py -------------------------------------------------------------------------------- /src/cpm_live/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/layers/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/attention.py -------------------------------------------------------------------------------- /src/cpm_live/layers/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/blocks.py -------------------------------------------------------------------------------- /src/cpm_live/layers/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/embedding.py -------------------------------------------------------------------------------- /src/cpm_live/layers/feedforward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/feedforward.py -------------------------------------------------------------------------------- /src/cpm_live/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/layernorm.py -------------------------------------------------------------------------------- /src/cpm_live/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/linear.py -------------------------------------------------------------------------------- /src/cpm_live/layers/position_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/position_embedding.py -------------------------------------------------------------------------------- /src/cpm_live/layers/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/layers/transformer.py -------------------------------------------------------------------------------- /src/cpm_live/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/models/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/models/ant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/models/ant.py -------------------------------------------------------------------------------- /src/cpm_live/models/ant_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/models/ant_torch.py -------------------------------------------------------------------------------- /src/cpm_live/models/bee.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/models/bee.py -------------------------------------------------------------------------------- /src/cpm_live/models/bee_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/models/bee_torch.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/attention.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/blocks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/blocks.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/embedding.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/feedforward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/feedforward.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/layernorm.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/linear.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/position_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/position_embedding.py -------------------------------------------------------------------------------- /src/cpm_live/native_layers/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/native_layers/transformer.py -------------------------------------------------------------------------------- /src/cpm_live/tokenizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/tokenizers/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/tokenizers/ant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/tokenizers/ant.py -------------------------------------------------------------------------------- /src/cpm_live/tokenizers/bee.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/tokenizers/bee.py -------------------------------------------------------------------------------- /src/cpm_live/training_tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/training_tasks/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/training_tasks/ant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/training_tasks/ant/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/training_tasks/ant/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/training_tasks/ant/pretrain.py -------------------------------------------------------------------------------- /src/cpm_live/training_tasks/bee/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/training_tasks/bee/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/training_tasks/bee/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/training_tasks/bee/finetune.py -------------------------------------------------------------------------------- /src/cpm_live/training_tasks/bee/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/training_tasks/bee/pretrain.py -------------------------------------------------------------------------------- /src/cpm_live/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/utils/__init__.py -------------------------------------------------------------------------------- /src/cpm_live/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/utils/config.py -------------------------------------------------------------------------------- /src/cpm_live/utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/utils/data_utils.py -------------------------------------------------------------------------------- /src/cpm_live/utils/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/utils/export.py -------------------------------------------------------------------------------- /src/cpm_live/utils/gradient_shrink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/utils/gradient_shrink.py -------------------------------------------------------------------------------- /src/cpm_live/utils/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/utils/log.py -------------------------------------------------------------------------------- /src/cpm_live/utils/object.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/utils/object.py -------------------------------------------------------------------------------- /src/cpm_live/vocabs/ant.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/vocabs/ant.txt -------------------------------------------------------------------------------- /src/cpm_live/vocabs/bee.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/cpm_live/vocabs/bee.txt -------------------------------------------------------------------------------- /src/datasets.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/datasets.json -------------------------------------------------------------------------------- /src/finetune_cpm_bee.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/finetune_cpm_bee.py -------------------------------------------------------------------------------- /src/preprocess_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/preprocess_dataset.py -------------------------------------------------------------------------------- /src/pretrain_cpm_bee.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/pretrain_cpm_bee.py -------------------------------------------------------------------------------- /src/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/pyproject.toml -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/requirements.txt -------------------------------------------------------------------------------- /src/scripts/finetune_cpm_bee.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/scripts/finetune_cpm_bee.sh -------------------------------------------------------------------------------- /src/scripts/preprocess_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/scripts/preprocess_dataset.sh -------------------------------------------------------------------------------- /src/scripts/pretrain_cpm_bee.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/scripts/pretrain_cpm_bee.sh -------------------------------------------------------------------------------- /src/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/setup.py -------------------------------------------------------------------------------- /src/text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/text_generation.py -------------------------------------------------------------------------------- /src/text_generation_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/src/text_generation_hf.py -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/README.md -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/bee_data/eval.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/bee_data/eval.jsonl -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/bee_data/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/bee_data/train.jsonl -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/bin_data/eval/ccpm_data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/bin_data/eval/ccpm_data -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/bin_data/eval/meta.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/bin_data/eval/meta.bin -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/bin_data/train/ccpm_data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/bin_data/train/ccpm_data -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/bin_data/train/meta.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/bin_data/train/meta.bin -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/data_reformat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/data_reformat.py -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/raw_data/eval.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/raw_data/eval.jsonl -------------------------------------------------------------------------------- /tutorials/basic_task_finetune/raw_data/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/basic_task_finetune/raw_data/train.jsonl -------------------------------------------------------------------------------- /tutorials/decoder_tuning/decoder_tuning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/CPM-Bee/HEAD/tutorials/decoder_tuning/decoder_tuning.ipynb --------------------------------------------------------------------------------