├── .github
└── workflows
│ ├── lint.yml
│ └── typecheck.yml
├── .gitignore
├── README-ZH.md
├── README.md
├── cpm-live
├── .flake8
├── config
│ ├── cpm-ant-10b.json
│ ├── cpm-bee-10b.json
│ └── cpm-bee-3b.json
├── cpm_live
│ ├── __init__.py
│ ├── arguments.py
│ ├── dataset
│ │ ├── __init__.py
│ │ ├── distributed_dataset.py
│ │ ├── serializer.py
│ │ └── utils.py
│ ├── generation
│ │ ├── __init__.py
│ │ ├── ant.py
│ │ ├── bee.py
│ │ └── generation_utils.py
│ ├── layers
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── blocks.py
│ │ ├── embedding.py
│ │ ├── feedforward.py
│ │ ├── layernorm.py
│ │ ├── linear.py
│ │ ├── position_embedding.py
│ │ └── transformer.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── ant.py
│ │ ├── ant_torch.py
│ │ ├── bee.py
│ │ └── bee_torch.py
│ ├── native_layers
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── blocks.py
│ │ ├── embedding.py
│ │ ├── feedforward.py
│ │ ├── layernorm.py
│ │ ├── linear.py
│ │ ├── position_embedding.py
│ │ └── transformer.py
│ ├── tokenizers
│ │ ├── __init__.py
│ │ ├── ant.py
│ │ └── bee.py
│ ├── training_tasks
│ │ ├── __init__.py
│ │ ├── ant
│ │ │ ├── __init__.py
│ │ │ └── pretrain.py
│ │ └── bee
│ │ │ ├── __init__.py
│ │ │ ├── finetune.py
│ │ │ └── pretrain.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── data_utils.py
│ │ ├── export.py
│ │ ├── gradient_shrink.py
│ │ ├── log.py
│ │ └── object.py
│ └── vocabs
│ │ ├── ant.txt
│ │ └── bee.txt
├── cpmbee_translator.py
├── datasets.json
├── finetune_cpm_bee.py
├── preprocess_dataset.py
├── pretrain_cpm_bee.py
├── pyproject.toml
├── requirements.txt
├── scripts
│ ├── finetune_cpm_bee.sh
│ ├── preprocess_dataset.sh
│ └── pretrain_cpm_bee.sh
├── setup.py
└── text_generation.py
├── logs
├── 2022-05-30.md
├── 2022-05-31.md
├── 2022-06-01.md
├── 2022-06-02.md
├── 2022-06-03.md
├── 2022-06-04.md
├── 2022-06-05.md
├── 2022-06-06.md
├── 2022-06-08.md
├── 2022-06-09.md
├── 2022-06-10.md
├── 2022-06-12.md
├── 2022-06-13.md
├── 2022-06-14.md
├── 2022-06-15.md
├── 2022-06-16.md
├── 2022-06-17.md
├── 2022-06-18.md
├── 2022-06-19.md
├── 2022-06-20.md
├── 2022-06-21.md
├── 2022-06-22.md
├── 2022-06-23.md
├── 2022-06-24.md
├── 2022-06-25.md
├── 2022-06-26.md
├── 2022-06-27.md
├── 2022-06-28.md
├── 2022-06-29.md
├── 2022-06-30.md
├── 2022-07-01.md
├── 2022-07-02.md
├── 2022-07-03.md
├── 2022-07-04.md
├── 2022-07-05.md
├── 2022-07-06.md
├── 2022-07-07.md
├── 2022-07-08.md
├── 2022-07-09.md
├── 2022-07-10.md
├── 2022-07-11.md
├── 2022-07-12.md
├── 2022-07-13.md
├── 2022-07-14.md
├── 2022-07-15.md
├── 2022-07-16.md
├── 2022-07-17.md
├── 2022-07-18.md
├── 2022-07-19.md
├── 2022-07-20.md
├── 2022-07-21.md
├── 2022-07-22.md
├── 2022-07-23.md
├── 2022-07-24.md
├── 2022-07-25.md
├── 2022-07-26.md
├── 2022-07-27.md
├── 2022-07-28.md
├── 2022-07-29.md
├── 2022-07-30.md
├── 2022-07-31.md
├── 2022-08-01.md
├── 2022-08-02.md
├── 2022-08-03.md
├── 2022-08-04.md
├── 2022-08-05.md
├── 2022-08-06.md
├── 2022-08-07.md
├── 2022-08-08.md
├── 2022-08-09.md
├── 2022-08-10.md
├── 2022-08-11.md
├── 2022-08-12.md
├── 2022-08-13.md
├── 2022-08-14.md
├── 2022-08-15.md
├── 2022-08-16.md
├── 2022-08-17.md
├── 2022-08-18.md
├── 2022-08-19.md
├── 2022-08-20.md
├── 2022-08-21.md
├── 2022-08-22.md
├── 2022-08-23.md
├── 2022-08-24.md
├── 2022-08-25.md
├── 2022-08-26.md
├── 2022-08-27.md
├── 2022-08-28.md
├── 2022-08-29.md
├── 2022-08-30.md
├── 2022-08-31.md
├── 2022-09-01.md
├── 2022-09-02.md
├── 2022-09-03.md
├── 2022-09-04.md
├── 2022-09-05.md
├── 2022-09-06.md
├── 2022-09-07.md
├── 2022-09-08.md
├── 2022-09-10.md
├── 2022-09-11.md
├── 2022-09-12.md
├── 2022-09-13.md
├── 2022-09-14.md
├── 2022-09-15.md
├── 2022-09-16.md
├── 2022-09-17.md
├── 2022-09-18.md
├── 2022-09-19.md
├── 2022-09-20.md
├── 2022-09-21.md
├── 2022-09-22.md
├── 2022-09-23.md
├── 2022-09-24.md
├── 2022-09-25.md
├── 2022-09-26.md
├── 2022-09-27.md
├── 2022-09-28.md
├── 2022-09-29.md
├── 2022-09-30.md
├── 2022-10-01.md
├── 2022-10-02.md
├── 2022-10-03.md
├── 2022-10-04.md
├── 2022-10-05.md
├── 2022-10-06.md
├── 2022-10-07.md
├── 2022-10-08.md
├── 2022-10-09.md
├── 2022-10-10.md
├── 2022-10-11.md
├── 2022-10-12.md
├── 2022-10-13.md
├── 2022-10-14.md
├── 2022-10-15.md
├── 2022-10-16.md
├── 2022-10-17.md
├── 2022-10-18.md
├── 2022-10-19.md
├── 2022-10-20.md
├── 2022-10-21.md
├── 2022-10-22.md
├── 2022-10-23.md
├── 2022-10-24.md
├── 2022-10-25.md
├── 2022-10-26.md
├── 2022-10-27.md
├── 2022-10-28.md
├── 2022-10-29.md
├── 2022-10-30.md
├── 2022-10-31.md
├── 2022-11-01.md
├── 2022-11-02.md
├── 2022-11-03.md
├── 2022-11-04.md
├── 2022-11-05.md
├── 2022-11-06.md
├── 2022-11-07.md
├── 2022-11-08.md
├── 2022-11-09.md
├── 2022-11-10.md
├── 2022-11-11.md
├── 2022-11-12.md
├── 2022-11-13.md
├── 2022-11-14.md
├── 2022-11-15.md
├── 2022-11-16.md
├── 2022-11-17.md
├── 2022-11-18.md
├── 2022-11-19.md
├── 2022-11-2.md
├── 2022-11-20.md
├── 2022-11-21.md
├── 2022-11-22.md
├── 2022-11-23.md
├── 2022-11-24.md
├── 2022-11-25.md
├── 2022-11-26.md
├── 2022-11-27.md
├── 2022-11-28.md
├── 2022-11-29.md
├── 2022-11-30.md
├── 2022-12-01.md
├── 2022-12-02.md
├── 2022-12-03.md
├── 2022-12-04.md
├── 2022-12-05.md
├── 2022-12-06.md
├── 2022-12-07.md
├── 2022-12-08.md
├── 2022-12-09.md
├── 2022-12-10.md
├── 2022-12-11.md
├── 2022-12-12.md
├── 2022-12-13.md
├── 2022-12-14.md
├── 2022-12-15.md
├── 2022-12-16.md
├── 2022-12-17.md
├── 2022-12-18.md
├── 2022-12-19.md
├── 2022-12-20.md
├── 2022-12-21.md
├── 2022-12-22.md
├── 2022-12-23.md
├── 2022-12-24.md
├── 2022-12-25.md
├── 2022-12-26.md
├── 2022-12-27.md
├── 2022-12-28.md
├── 2022-12-29.md
├── 2022-12-30.md
├── 2022-12-31.md
├── 2023-01-01.md
├── 2023-01-02.md
├── 2023-01-03.md
├── 2023-01-04.md
├── 2023-01-05.md
├── 2023-01-06.md
├── 2023-01-07.md
├── 2023-01-08.md
├── 2023-01-09.md
├── 2023-01-10.md
├── 2023-01-11.md
├── 2023-01-12.md
├── 2023-01-13.md
├── 2023-01-14.md
├── 2023-01-15.md
├── 2023-01-16.md
├── 2023-01-17.md
├── 2023-01-18.md
├── 2023-01-19.md
├── 2023-01-20.md
├── 2023-01-21.md
├── 2023-01-22.md
├── 2023-01-23.md
├── 2023-01-24.md
├── 2023-01-25.md
├── 2023-01-26.md
├── 2023-01-27.md
├── 2023-01-28.md
├── 2023-01-29.md
├── 2023-01-30.md
├── 2023-01-31.md
├── 2023-02-01.md
├── 2023-02-02.md
├── 2023-02-03.md
├── 2023-02-04.md
├── 2023-02-05.md
├── 2023-02-06.md
├── 2023-02-07.md
├── 2023-02-08.md
├── 2023-02-09.md
├── 2023-02-10.md
├── 2023-02-11.md
├── 2023-02-12.md
├── 2023-02-13.md
├── 2023-02-14.md
├── 2023-02-15.md
├── 2023-02-16.md
├── 2023-02-17.md
├── 2023-02-18.md
├── 2023-02-19.md
├── 2023-02-20.md
├── 2023-02-21.md
├── 2023-02-22.md
├── 2023-02-23.md
├── 2023-02-24.md
├── 2023-02-25.md
├── 2023-02-26.md
├── 2023-02-27.md
├── 2023-02-28.md
├── 2023-03-01.md
├── 2023-03-02.md
├── 2023-03-03.md
├── 2023-03-04.md
├── 2023-03-05.md
├── 2023-03-06.md
├── 2023-03-07.md
├── 2023-03-08.md
├── 2023-03-09.md
├── 2023-03-10.md
├── 2023-03-11.md
├── 2023-03-12.md
├── 2023-03-13.md
├── 2023-03-14.md
├── 2023-03-15.md
├── 2023-03-16.md
├── 2023-03-17.md
├── 2023-03-18.md
├── 2023-03-19.md
├── 2023-03-20.md
├── 2023-03-21.md
├── 2023-03-22.md
├── 2023-03-23.md
└── 2023-03-24.md
└── plans
├── CPM-Ant训练计划书.md
├── CPM-Bee训练计划书.md
├── CPM-Live训练计划书.md
└── pics
└── framework.png
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: "lint"
2 | on:
3 | pull_request:
4 | branches:
5 | - "**"
6 | push:
7 | branches:
8 | - "**"
9 | jobs:
10 | lint:
11 | runs-on: ubuntu-latest
12 | strategy:
13 | matrix:
14 | python-version: ["3.7", "3.8", "3.9", "3.10"]
15 | steps:
16 | - uses: actions/checkout@v3
17 | - uses: actions/setup-python@v3
18 | with:
19 | python-version: ${{ matrix.python-version }}
20 | - name: Install flake8
21 | run: pip install flake8
22 | - name: Run flake8
23 | run: |
24 | cd cpm-live
25 | flake8
26 |
27 |
--------------------------------------------------------------------------------
/.github/workflows/typecheck.yml:
--------------------------------------------------------------------------------
1 | name: "type check"
2 | on:
3 | pull_request:
4 | branches:
5 | - "**"
6 | push:
7 | branches:
8 | - "**"
9 | jobs:
10 | pyright:
11 | runs-on: ubuntu-latest
12 | strategy:
13 | matrix:
14 | python-version: ["3.7", "3.8", "3.9", "3.10"]
15 | steps:
16 | - uses: actions/checkout@v3
17 | - uses: actions/setup-python@v3
18 | with:
19 | python-version: ${{ matrix.python-version }}
20 | - name: Install pyright
21 | run: pip install pyright
22 | - name: Run pyright
23 | run: |
24 | cd cpm-live
25 | pip3 install torch>=1.9
26 | pip3 install -r requirements.txt
27 | pyright
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | **/__pycache__/
4 | *.py[cod]
5 | *$py.class
6 | .DS_STORE
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 | local_settings.py
63 | db.sqlite3
64 | db.sqlite3-journal
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/build/
75 |
76 | # PyBuilder
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | .python-version
88 |
89 | # pipenv
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 | # install all needed dependencies.
94 | #Pipfile.lock
95 |
96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97 | __pypackages__/
98 |
99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
133 | .vscode/
134 |
135 | *.bin
136 | *.idx
137 | *.pt
138 |
139 | data
140 | data_raw
141 | results
142 | pretrain_data
--------------------------------------------------------------------------------
/README-ZH.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
CPM-Live
4 |
5 | **直播训练开源大模型**
6 |
7 |
8 | 官方网站 • 计划书 • 讨论区 • English
9 |
10 |
11 |
12 |
13 |
14 | ## 动态
15 | - 2023/05/27 [CPM-Bee](https://github.com/OpenBMB/CPM-Bee) 发布了!
16 | - 2023/04/12 CPM-Ant 可以在[HuggingFace Transformers](https://huggingface.co/openbmb/cpm-ant-10b)中使用了!
17 | - 2022/10/12 中英双语模型 [CPM-Ant+](https://github.com/OpenBMB/CPM-Live/tree/cpm-ant-plus/cpm-live) 已经发布!除了能够生成中文/英文文本,现在模型还可以处理问答、摘要和翻译任务!
18 | - 2022/09/16 [CPM-Ant](https://github.com/OpenBMB/CPM-Live/tree/cpm-ant/cpm-live) 已经发布!
19 | - 2022/05/29 CPM-Live的训练今天启动! 详情请查看[训练动态](https://live.openbmb.org/home)。
20 | - 2022/05/25 CPM-Live的[训练计划](./plans/CPM-Live训练计划书.md)现已公布。期待训练开始!
21 |
22 |
23 | ## 里程碑
24 | - **CPM-Bee** (2022/10/13-2023/05/27) [[代码](https://github.com/OpenBMB/CPM-Bee)][[模型](https://github.com/OpenBMB/CPM-Bee#%E6%A8%A1%E5%9E%8B)][[计划书](./plans/CPM-Bee训练计划书.md)]
25 | - **CPM-Ant+** (2022/08/05-2022/10/12) [[代码](https://github.com/OpenBMB/CPM-Live/tree/cpm-ant-plus/cpm-live)][[模型](https://github.com/OpenBMB/CPM-Live/tree/cpm-ant-plus/cpm-live#model-checkpoints)]
26 | - **CPM-Ant** (2022/05/29-2022/08/05) [[代码](https://github.com/OpenBMB/CPM-Live/tree/cpm-ant/cpm-live)][[模型](https://github.com/OpenBMB/CPM-Live/tree/cpm-ant/cpm-live#model-checkpoints)][[网站](https://live.openbmb.org/ant)][[博客](https://www.openbmb.org/en/community/blogs/blogpage?id=98afef2ce45f4fe9a4bc15a66d7ccb92)][[计划书](./plans/CPM-Ant训练计划书.md)]
27 |
28 | ## 训练计划
29 | 考虑到数据和计算资源的规模,CPM-Live将从10B模型开始训练并持续学习。
30 |
31 | ### 在训练过程中,我们将进行:
32 |
33 | - **实时**:显示模型训练指标
34 | - **每天**:发布模型训练日志
35 | - **每周**:处理社区的讨论和反馈
36 | - **不定期**:在模型训练期间发布允许公开下载的检查点
37 |
38 |
39 | ### 在训练期间你可以:
40 |
41 | - **提出你的模型倡议**:对模型架构、训练方法或数据源有好的想法?你可以在社区里提出你的模型倡议。如果该倡议得到更多的支持并且实际可行,我们将把它添加到我们正在训练的模型中,这样CPM-Live就可以在大家的帮助下不断学习和进步。
42 |
43 | - **开发你的应用程序**:基于CPM-Live,你可以向社区提交你初期想法、原型、开发代码或完成的应用程序。我们将在网站上展示最受欢迎的应用程序。
44 |
45 | - **在论坛上聊天**:你可以在我们的论坛上谈论任何与大模型有关的话题,如学术研究、工程实现、工具使用、应用设计等。无论你是否有经验,我们相信每个人都可以从积极和开放的讨论中受益。
46 |
47 | - **下载资源**:模型训练完成后,你可以在开放使用许可下自由下载模型参数。CPM-Live使用的是包括商业化许可的开放许可。通过模型压缩和推理加速工具,你可以在自己的电脑上体验大模型的威力!
48 |
49 |
50 |
51 | ## 社区
52 |
53 | 我们的[社区](https://github.com/OpenBMB/CPM-Live/discussions) 基于GitHub Discussions。
54 |
55 | 阅读[第一篇帖子](https://github.com/OpenBMB/CPM-Live/discussions/1),开始你对CPM-Live的探索吧!
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/cpm-live/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | per-file-ignores =
3 | # imported but unused
4 | __init__.py: F401
5 | max-line-length = 100
6 | extend-ignore = E712, E203
7 | exclude = examples/*.py
--------------------------------------------------------------------------------
/cpm-live/config/cpm-ant-10b.json:
--------------------------------------------------------------------------------
1 | {
2 | "vocab_size": 30720,
3 | "dim_model": 4096,
4 | "dim_ff" : 10240,
5 | "num_layers" : 48,
6 | "num_heads": 32,
7 | "dim_head" : 128,
8 | "dropout_p" : 0.0,
9 | "position_bias_num_buckets" : 512,
10 | "position_bias_max_distance" : 2048,
11 | "eps" : 1e-6,
12 | "half" : true,
13 | "prompt_types": 32,
14 | "prompt_length": 32,
15 | "segment_types": 32
16 | }
17 |
--------------------------------------------------------------------------------
/cpm-live/config/cpm-bee-10b.json:
--------------------------------------------------------------------------------
1 | {
2 | "vocab_size": 86583,
3 | "dim_model": 4096,
4 | "dim_ff" : 10240,
5 | "num_layers" : 48,
6 | "num_heads": 32,
7 | "dim_head" : 128,
8 | "dropout_p" : 0.0,
9 | "position_bias_num_buckets" : 256,
10 | "position_bias_num_segment_buckets": 256,
11 | "position_bias_max_distance" : 2048,
12 | "eps" : 1e-6,
13 | "half" : true
14 | }
15 |
--------------------------------------------------------------------------------
/cpm-live/config/cpm-bee-3b.json:
--------------------------------------------------------------------------------
1 | {
2 | "vocab_size": 86580,
3 | "dim_model": 2560,
4 | "dim_ff" : 3072,
5 | "num_layers" : 32,
6 | "num_heads": 32,
7 | "dim_head" : 80,
8 | "dropout_p" : 0.0,
9 | "position_bias_num_buckets" : 256,
10 | "position_bias_num_segment_buckets": 256,
11 | "position_bias_max_distance" : 2048,
12 | "eps" : 1e-6,
13 | "half" : true
14 | }
15 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenBMB/CPM-Live/8ad7aa69143fbb41753013aac98744bf87abe0b1/cpm-live/cpm_live/__init__.py
--------------------------------------------------------------------------------
/cpm-live/cpm_live/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_dataset import DistributedDataset, SimpleDataset, build_dataset
2 | from .utils import shuffle_dataset, compact_dataset, mask_dataset, merge_dataset
3 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/dataset/serializer.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2020 The OpenBMB team. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import pickle
17 | import json
18 |
19 |
20 | class Serializer:
21 | def __init__(self) -> None:
22 | pass
23 |
24 | def serialize(self, obj) -> bytes:
25 | raise NotImplementedError()
26 |
27 | def deserialize(self, data: bytes):
28 | raise NotImplementedError()
29 |
30 |
31 | class PickleSerializer(Serializer):
32 | def __init__(self) -> None:
33 | pass
34 |
35 | def serialize(self, obj) -> bytes:
36 | return pickle.dumps(obj)
37 |
38 | def deserialize(self, data: bytes):
39 | return pickle.loads(data)
40 |
41 |
42 | class JsonSerializer(Serializer):
43 | def __init__(self) -> None:
44 | pass
45 |
46 | def serialize(self, obj) -> bytes:
47 | return json.dumps(obj, ensure_ascii=False).encode("utf-8")
48 |
49 | def deserialize(self, data: bytes):
50 | return json.loads(data.decode("utf-8"))
51 |
52 |
53 | class RawSerializer(Serializer):
54 | def __init__(self) -> None:
55 | pass
56 |
57 | def serialize(self, obj) -> bytes:
58 | return obj
59 |
60 | def deserialize(self, data: bytes):
61 | return data
62 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/generation/__init__.py:
--------------------------------------------------------------------------------
1 | from .ant import CPMAntBeamSearch, CPMAntRandomSampling, CPMAntGeneration
2 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .embedding import Embedding, EmbeddingExt
2 | from .position_embedding import SegmentPositionEmbedding, BucketPositionBias, RotaryEmbedding
3 | from .linear import Linear
4 | from .layernorm import LayerNorm
5 | from .attention import Attention
6 | from .feedforward import FeedForward
7 | from .blocks import TransformerBlock
8 | from .transformer import Encoder
9 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/layers/layernorm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import bmtrain as bmt
3 |
4 |
5 | @torch.jit.script # type: ignore
6 | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float):
7 | old_dtype = hidden.dtype
8 | variance = hidden.to(torch.float32).pow(2).mean(dim=-1, keepdim=True)
9 | hidden = (hidden * torch.rsqrt(variance + eps)).to(old_dtype)
10 | return hidden * weight
11 |
12 |
13 | class LayerNorm(bmt.DistributedModule):
14 | """RMS LayerNorm"""
15 |
16 | def __init__(
17 | self,
18 | dim_norm: int,
19 | dtype: torch.dtype = torch.half,
20 | eps: float = 1e-6,
21 | init_var: float = 1.0,
22 | ):
23 |
24 | super().__init__()
25 |
26 | self.eps = eps
27 | self.dim_norm = dim_norm
28 | self.weight = bmt.DistributedParameter(torch.full((dim_norm,), init_var, dtype=dtype))
29 |
30 | def forward(self, x: torch.Tensor):
31 | """
32 | Args:
33 | x (:obj:`torch.Tensor` of shape ``(batch_size, seq_len, dim_norm)``): Input tensor that need to be normalized.
34 | Return:
35 | :obj:`torch.Tensor` of shape ``(batch_size, seq_len, dim_norm)``: The layernorm output.
36 | """ # noqa: E501
37 | assert x.size(-1) == self.dim_norm
38 | return rms_layernorm(x, self.weight, self.eps)
39 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/layers/linear.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 The OpenBMB team.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import torch
17 | import bmtrain as bmt
18 | import math
19 | import torch.nn.functional as F
20 |
21 |
22 | class Linear(bmt.DistributedModule):
23 | def __init__(
24 | self,
25 | dim_in: int,
26 | dim_out: int,
27 | dtype: torch.dtype = torch.half,
28 | init_mean: float = 0.0,
29 | init_std: float = 1,
30 | scale_before: bool = False,
31 | ):
32 | super().__init__()
33 | self.dim_in = self.in_features = dim_in
34 | self.dim_out = self.out_features = dim_out
35 | self.scale_before = scale_before
36 |
37 | self.weight = bmt.DistributedParameter(
38 | torch.empty((dim_out, dim_in), dtype=dtype),
39 | init_method=bmt.ParameterInitializer(
40 | torch.nn.init.normal_, mean=init_mean, std=init_std
41 | ),
42 | )
43 |
44 | def forward(self, x: torch.Tensor):
45 | """
46 | Args:
47 | x (:obj:`torch.Tensor` of shape ``(batch, seq_len, dim_in)``): The input of linear layer
48 | Returns:
49 | :obj:`torch.Tensor` of shape ``(batch, seq_len, dim_out)``: The output of the linear transform y.
50 | """ # noqa: E501
51 | if self.scale_before:
52 | x = x / math.sqrt(self.dim_in)
53 | x = F.linear(x, self.weight)
54 | else:
55 | x = F.linear(x, self.weight)
56 | x = x / math.sqrt(self.dim_in)
57 | return x
58 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .ant import CPMAntConfig, CPMAnt
2 | from .bee import CPMBeeConfig, CPMBee
3 | from .ant_torch import CPMAntTorch
4 | from .bee_torch import CPMBeeTorch
5 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/native_layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .embedding import Embedding, EmbeddingExt
2 | from .position_embedding import SegmentPositionEmbedding, BucketPositionBias, RotaryEmbedding
3 | from .linear import Linear
4 | from .layernorm import LayerNorm
5 | from .attention import Attention
6 | from .feedforward import FeedForward
7 | from .blocks import TransformerBlock
8 | from .transformer import Encoder
9 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/native_layers/layernorm.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | @torch.jit.script # type: ignore
5 | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float):
6 | old_dtype = hidden.dtype
7 | variance = hidden.to(torch.float32).pow(2).mean(dim=-1, keepdim=True)
8 | hidden = (hidden * torch.rsqrt(variance + eps)).to(old_dtype)
9 | return hidden * weight
10 |
11 |
12 | class LayerNorm(torch.nn.Module):
13 | """RMS LayerNorm"""
14 |
15 | def __init__(
16 | self,
17 | dim_norm: int,
18 | dtype: torch.dtype = torch.half,
19 | eps: float = 1e-6,
20 | init_var: float = 1.0,
21 | ):
22 |
23 | super().__init__()
24 |
25 | self.eps = eps
26 | self.dim_norm = dim_norm
27 | self.weight = torch.nn.parameter.Parameter(torch.full((dim_norm,), init_var, dtype=dtype))
28 |
29 | def forward(self, x: torch.Tensor):
30 | """
31 | Args:
32 | x (:obj:`torch.Tensor` of shape ``(batch_size, seq_len, dim_norm)``): Input tensor that need to be normalized.
33 | Return:
34 | :obj:`torch.Tensor` of shape ``(batch_size, seq_len, dim_norm)``: The layernorm output.
35 | """ # noqa: E501
36 | assert x.size(-1) == self.dim_norm
37 | return rms_layernorm(x, self.weight, self.eps)
38 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/native_layers/linear.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 The OpenBMB team.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import torch
17 | import math
18 | import torch.nn.functional as F
19 |
20 |
21 | class Linear(torch.nn.Module):
22 | def __init__(
23 | self,
24 | dim_in: int,
25 | dim_out: int,
26 | dtype: torch.dtype = torch.half,
27 | init_mean: float = 0.0,
28 | init_std: float = 1,
29 | scale_before: bool = False,
30 | ):
31 | super().__init__()
32 | self.dim_in = self.in_features = dim_in
33 | self.dim_out = self.out_features = dim_out
34 | self.scale_before = scale_before
35 |
36 | self.weight = torch.nn.parameter.Parameter(torch.empty((dim_out, dim_in), dtype=dtype))
37 |
38 | def forward(self, x: torch.Tensor):
39 | """
40 | Args:
41 | x (:obj:`torch.Tensor` of shape ``(batch, seq_len, dim_in)``): The input of linear layer
42 | Returns:
43 | :obj:`torch.Tensor` of shape ``(batch, seq_len, dim_out)``: The output of the linear transform y.
44 | """ # noqa: E501
45 | if self.scale_before:
46 | x = x / math.sqrt(self.dim_in)
47 | x = F.linear(x, self.weight)
48 | else:
49 | x = F.linear(x, self.weight)
50 | x = x / math.sqrt(self.dim_in)
51 | return x
52 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/tokenizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .ant import CPMAntTokenizer
2 | from .bee import CPMBeeTokenizer
3 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/training_tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from . import ant
2 | from . import bee
3 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/training_tasks/ant/__init__.py:
--------------------------------------------------------------------------------
1 | from .pretrain import CPMAntPretrainDataset
2 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/training_tasks/bee/__init__.py:
--------------------------------------------------------------------------------
1 | from .pretrain import MixedDataset
2 | from .finetune import FinetuneDataset
3 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/training_tasks/bee/finetune.py:
--------------------------------------------------------------------------------
1 | from ...tokenizers import CPMBeeTokenizer
2 | from .pretrain import _MixedDatasetBatchPacker, _MixedDatasetConfig, CPMBeeBatch
3 | from ...dataset import SimpleDataset
4 | import bmtrain as bmt
5 |
6 |
7 | class FinetuneDataset:
8 | def __init__(
9 | self,
10 | dataset_path: str,
11 | batch_size: int,
12 | max_length: int,
13 | tokenizer: CPMBeeTokenizer,
14 | max_depth: int = 16,
15 | task_name: str = "task",
16 | drop_last: bool = False,
17 | ) -> None:
18 | self._world_size = bmt.world_size()
19 | self._rank = bmt.rank()
20 | self._batch_size = batch_size
21 |
22 | self._packer = _MixedDatasetBatchPacker(
23 | batch_size * self._world_size, max_length, tokenizer, max_depth
24 | )
25 | self._drop_last = drop_last
26 |
27 | ds = SimpleDataset(dataset_path, shuffle=False)
28 | self._ds_cfg: _MixedDatasetConfig = {
29 | "weight": 1.0,
30 | "path": dataset_path,
31 | "transforms": [],
32 | "task_name": task_name,
33 | "dataset_name": "finetune",
34 | "incontext_weight": [1.0],
35 | "lines": len(ds),
36 | "dataset": ds,
37 | }
38 |
39 | def __batch_iter(self):
40 | while True:
41 | try:
42 | batch = self._packer.add_data(self._ds_cfg)
43 | except EOFError:
44 | break
45 | if batch is None:
46 | continue
47 | yield batch
48 | if len(self._packer) > 0:
49 | batch = self._packer.pack_batch(force=True)
50 | if not self._drop_last:
51 | yield batch
52 | self._ds_cfg["dataset"]._repeat_times = 0
53 |
54 | def __iter__(self):
55 | batch_st = self._batch_size * self._rank
56 | batch_end = self._batch_size * (self._rank + 1)
57 | for batch in self.__batch_iter():
58 | batch_size = batch["inputs"].shape[0]
59 | if batch_size <= batch_st:
60 | yield None
61 | else:
62 | ret: CPMBeeBatch = {
63 | kw: val[batch_st:batch_end] # type: ignore
64 | for kw, val in batch.items()
65 | if kw not in ["task_names", "raw_data", "ext_ids", "ext_sub"]
66 | } # type: ignore
67 | ret["task_names"] = batch["task_names"]
68 | ret["raw_data"] = batch["raw_data"]
69 | ret["ext_ids"] = batch["ext_ids"]
70 | ret["ext_sub"] = batch["ext_sub"]
71 | yield ret
72 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import Config
2 | from .data_utils import pad
3 | from .object import allgather_objects
4 | from .log import LogManager, logger
5 | from .config import load_dataset_config
6 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/utils/config.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 The OpenBMB team.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import json
17 | import os
18 | import copy
19 | from typing import Any, Dict, Union
20 | from .log import logger
21 |
22 |
23 | def load_dataset_config(dataset_path: str):
24 | cfg = json.load(open(dataset_path, "r", encoding="utf-8"))
25 |
26 | platform_config_path = os.getenv("PLATFORM_CONFIG_PATH")
27 | if platform_config_path is None:
28 | logger.info(
29 | "no platform_config_path. Directly load dataset_path({dataset_path})"
30 | .format(dataset_path=dataset_path)
31 | )
32 | return cfg
33 |
34 | path_dict = json.load(open(platform_config_path, "r", encoding="utf-8"))["dataset_map"]
35 | logger.info(
36 | "load dataset_path({dataset_path}) with platform_config_path({platform_config_path})"
37 | .format(dataset_path=dataset_path, platform_config_path=platform_config_path)
38 | )
39 | for dataset in cfg:
40 | dataset["path"] = os.path.join(path_dict[dataset["dataset_name"]], dataset["path"])
41 | dataset["transforms"] = os.path.join(
42 | path_dict[dataset["dataset_name"]], dataset["transforms"]
43 | )
44 | return cfg
45 |
46 |
47 | class Config(object):
48 | """model configuration"""
49 |
50 | def __init__(self):
51 | super().__init__()
52 |
53 | @classmethod
54 | def from_json_file(cls, json_file: Union[str, os.PathLike], **args):
55 | config_dict = cls._dict_from_json_file(json_file, **args)
56 | return cls(**config_dict)
57 |
58 | @classmethod
59 | def _dict_from_json_file(cls, json_file: Union[str, os.PathLike], **args):
60 | with open(json_file, "r", encoding="utf-8") as reader:
61 | text = reader.read()
62 | res = json.loads(text)
63 | for key in args:
64 | res[key] = args[key]
65 | return res
66 |
67 | def to_json_file(self, json_file_path: Union[str, os.PathLike]):
68 | with open(json_file_path, "w", encoding="utf-8") as writer:
69 | writer.write(self.to_json_string())
70 |
71 | def to_json_string(self) -> str:
72 | config_dict = self.to_dict()
73 | return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
74 |
75 | def to_dict(self) -> Dict[str, Any]:
76 | output = copy.deepcopy(self.__dict__)
77 | return output
78 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/utils/data_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def pad(orig_items, key, padding_value=0, padding_side="left"):
5 | items = []
6 | if isinstance(orig_items[0][key], list):
7 | assert isinstance(orig_items[0][key][0], torch.Tensor)
8 | for it in orig_items:
9 | for tr in it[key]:
10 | items.append({key: tr})
11 | else:
12 | assert isinstance(orig_items[0][key], torch.Tensor)
13 | items = orig_items
14 |
15 | batch_size = len(items)
16 | shape = items[0][key].shape
17 | dim = len(shape)
18 | assert dim <= 3
19 | max_length = max(item[key].shape[-1] for item in items)
20 | min_length = min(item[key].shape[-1] for item in items)
21 | dtype = items[0][key].dtype
22 |
23 | if dim == 1:
24 | return torch.cat([item[key] for item in items], dim=0)
25 | elif dim == 2:
26 | if max_length == min_length:
27 | return torch.cat([item[key] for item in items], dim=0)
28 | tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
29 | else:
30 | tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value
31 |
32 | for i, item in enumerate(items):
33 | if dim == 2:
34 | if padding_side == "left":
35 | tensor[i, -len(item[key][0]) :] = item[key][0].clone()
36 | else:
37 | tensor[i, : len(item[key][0])] = item[key][0].clone()
38 | elif dim == 3:
39 | if padding_side == "left":
40 | tensor[i, -len(item[key][0]) :, :] = item[key][0].clone()
41 | else:
42 | tensor[i, : len(item[key][0]), :] = item[key][0].clone()
43 |
44 | return tensor
45 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/utils/export.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import functools
4 | import torch
5 | import bmtrain as bmt
6 | import json
7 | from cpm_live.models import CPMBee
8 | from .log import logger
9 | from typing import List, Optional
10 |
11 |
12 | def rename_if_exists(file_path):
13 | if not os.path.exists(file_path):
14 | return
15 | timestamp = time.strftime('%Y%m%d%H%M%S')
16 | file_dir, file_name = os.path.split(file_path)
17 | file_root, file_ext = os.path.splitext(file_name)
18 | new_file_name = f"{file_root}_bak_{timestamp}{file_ext}"
19 | new_file_path = os.path.join(file_dir, new_file_name)
20 | try:
21 | os.rename(file_path, new_file_path)
22 | logger.info(f"File '{file_name}' already exists. Renamed to '{new_file_name}'")
23 | except Exception as e:
24 | logger.warn(
25 | "rename file failed,file_path={file_path}, new_file_path={new_file_path},err={err}"
26 | .format(file_path=file_path, new_file_path=new_file_path, err=str(e)))
27 |
28 |
29 | def rename_if_exists_decorator(func):
30 | @functools.wraps(func)
31 | def wrapper(file_path, *args, **kwargs):
32 | rename_if_exists(file_path)
33 | return func(file_path, *args, **kwargs)
34 | return wrapper
35 |
36 |
37 | @rename_if_exists_decorator
38 | def bmt_save(file_path: str, model: CPMBee, export_files: Optional[List[str]] = None):
39 | bmt.save(model, file_path)
40 | if export_files is not None:
41 | export_files.append(file_path)
42 |
43 |
44 | @rename_if_exists_decorator
45 | def torch_save(file_path: str, obj: object, export_files: Optional[List[str]] = None):
46 | torch.save(obj, file_path)
47 | if export_files is not None:
48 | export_files.append(file_path)
49 |
50 |
51 | @rename_if_exists_decorator
52 | def json_save(file_path: str, obj: object, export_files: Optional[List[str]] = None):
53 | with open(file_path, "w") as data_f:
54 | json.dump(obj, data_f)
55 | if export_files is not None:
56 | export_files.append(file_path)
57 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/utils/gradient_shrink.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class OpGradientShrink(torch.autograd.Function):
5 | @staticmethod
6 | def forward(ctx, x: torch.Tensor, alpha: float):
7 | ctx.alpha = alpha
8 | return x
9 |
10 | @staticmethod
11 | def backward(ctx, grad_output):
12 | return grad_output * ctx.alpha, None
13 |
14 |
15 | def gradient_shrink(x: torch.Tensor, alpha: float = 0.1):
16 | return OpGradientShrink.apply(x, alpha)
17 |
--------------------------------------------------------------------------------
/cpm-live/cpm_live/utils/object.py:
--------------------------------------------------------------------------------
1 | import bmtrain as bmt
2 | import pickle
3 | import torch
4 |
5 |
6 | def allgather_objects(obj):
7 | if bmt.world_size() == 1:
8 | return [obj]
9 |
10 | with torch.no_grad():
11 | data_bytes: bytes = pickle.dumps(obj)
12 | data_length: int = len(data_bytes)
13 |
14 | gpu_data_length = torch.tensor([data_length], device="cuda", dtype=torch.long)
15 | gathered_length = bmt.distributed.all_gather(gpu_data_length).view(-1).cpu()
16 | max_data_length = gathered_length.max().item()
17 |
18 | gpu_data_bytes = torch.zeros(max_data_length, dtype=torch.uint8, device="cuda")
19 | byte_storage = torch.ByteStorage.from_buffer(data_bytes)
20 | gpu_data_bytes[:data_length] = torch.ByteTensor(byte_storage)
21 |
22 | gathered_data = bmt.distributed.all_gather(gpu_data_bytes).cpu()
23 |
24 | ret = []
25 | for i in range(gathered_data.size(0)):
26 | data_bytes = gathered_data[i, : gathered_length[i].item()].numpy().tobytes()
27 | ret.append(pickle.loads(data_bytes))
28 | return ret
29 |
--------------------------------------------------------------------------------
/cpm-live/datasets.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "dataset_name": "pretrain",
4 | "task_name": "mlm",
5 | "weight": 1.0,
6 | "path": "/path/to/dataset",
7 | "transforms": [
8 | {
9 | "answer": "$answer",
10 | "document": "$source"
11 | },
12 | {
13 | "answer": "$answer",
14 | "query": "$source"
15 | },
16 | {
17 | "answer": "$answer",
18 | "input": "$source"
19 | }
20 | ]
21 | }
22 | ]
--------------------------------------------------------------------------------
/cpm-live/preprocess_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | from cpm_live.dataset import build_dataset, shuffle_dataset
3 | import shutil
4 | from tqdm import tqdm
5 | import json
6 | import argparse
7 |
8 |
9 | def get_args():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("--input", type=str, help="raw dataset path", required=True)
12 | parser.add_argument("--output_path", type=str, help="output dataset path", required=True)
13 | parser.add_argument("--output_name", type=str, help="output dataset name", required=True)
14 |
15 | args = parser.parse_args()
16 | return args
17 |
18 |
19 | def reformat_data(data):
20 | """set your data format"""
21 | return data
22 |
23 |
24 | def main():
25 | args = get_args()
26 | files = os.listdir(args.input)
27 | for ds in files:
28 | with build_dataset("tmp", "data") as dataset:
29 | with open(os.path.join(args.input, ds), "r", encoding="utf-8") as fin:
30 | for line in tqdm(fin.readlines(), desc=os.path.join(args.input, ds)):
31 | data = json.loads(line)
32 | dataset.write(reformat_data(data))
33 | shuffle_dataset(
34 | "tmp",
35 | os.path.join(args.output_path, ds.split(".")[0]),
36 | progress_bar=True,
37 | output_name=args.output_name
38 | )
39 | shutil.rmtree("tmp")
40 | return
41 |
42 |
43 | if __name__ == "__main__":
44 | main()
45 |
--------------------------------------------------------------------------------
/cpm-live/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pyright]
2 | include = ["cpm_live"]
3 | useLibraryCodeForTypes = false
4 |
5 | [tool.black]
6 | line-length = 100
7 | target-version = ['py37']
8 | include = '\.pyi?$'
9 |
--------------------------------------------------------------------------------
/cpm-live/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.10
2 | bmtrain==0.1.8.post1
3 | jieba
4 | tqdm
5 | tensorboard
6 | numpy>=1.21.0
7 | spacy
8 |
--------------------------------------------------------------------------------
/cpm-live/scripts/finetune_cpm_bee.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | export CUDA_VISIBLE_DEVICES=0,1,2,3
3 | GPUS_PER_NODE=4
4 |
5 | NNODES=1
6 | MASTER_ADDR="localhost"
7 | MASTER_PORT=12345
8 |
9 | OPTS=""
10 | OPTS+=" --use-delta"
11 | OPTS+=" --model-config config/cpm-bee-10b.json"
12 | OPTS+=" --dataset path/to/dataset"
13 | OPTS+=" --eval_dataset path/to/eval/dataset"
14 | OPTS+=" --epoch 100"
15 | OPTS+=" --batch-size 5"
16 | OPTS+=" --train-iters 100"
17 | OPTS+=" --save-name cpm_bee_finetune"
18 | OPTS+=" --max-length 2048"
19 | OPTS+=" --save results/"
20 | OPTS+=" --lr 0.0001"
21 | OPTS+=" --inspect-iters 100"
22 | OPTS+=" --warmup-iters 1"
23 | OPTS+=" --eval-interval 1000"
24 | OPTS+=" --early-stop-patience 5"
25 | OPTS+=" --lr-decay-style noam"
26 | OPTS+=" --weight-decay 0.01"
27 | OPTS+=" --clip-grad 1.0"
28 | OPTS+=" --loss-scale 32768"
29 | OPTS+=" --start-step 0"
30 | OPTS+=" --load model.pt"
31 |
32 | CMD="torchrun --nnodes=${NNODES} --nproc_per_node=${GPUS_PER_NODE} --rdzv_id=1 --rdzv_backend=c10d --rdzv_endpoint=${MASTER_ADDR}:${MASTER_PORT} finetune_cpm_bee.py ${OPTS}"
33 |
34 | echo ${CMD}
35 | $CMD
36 |
37 |
--------------------------------------------------------------------------------
/cpm-live/scripts/preprocess_dataset.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | CMD="python preprocess_dataset.py --input data_sample.txt --output_path path/to/binary/file --output_name data"
4 |
5 | echo $CMD
6 | $CMD
--------------------------------------------------------------------------------
/cpm-live/scripts/pretrain_cpm_bee.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
3 | GPUS_PER_NODE=8
4 |
5 | NNODES=1
6 | MASTER_ADDR="localhost"
7 | MASTER_PORT=12345
8 | MODEL_STEPS="0"
9 |
10 | OPTS=""
11 | OPTS+=" --model-config config/cpm-bee-10b.json"
12 | OPTS+=" --dataset=datasets.json"
13 | OPTS+=" --batch-size 8"
14 | OPTS+=" --train-iters 200000"
15 | OPTS+=" --save-iters 500"
16 | OPTS+=" --save-name cpm_live_checkpoint"
17 | OPTS+=" --max-length 2048"
18 | OPTS+=" --save results/"
19 | OPTS+=" --lr 0.1"
20 | OPTS+=" --inspect-iters 100"
21 | OPTS+=" --warmup-iters 2000"
22 | OPTS+=" --lr-decay-style noam"
23 | OPTS+=" --weight-decay 0.01"
24 | OPTS+=" --clip-grad 1.0"
25 | OPTS+=" --loss-scale 1048576"
26 | OPTS+=" --start-step ${MODEL_STEPS}"
27 | OPTS+=" --log-dir logs/train/"
28 | OPTS+=" --tensorboard logs/tensorboard/cpm_live_48_4096/"
29 | OPTS+=" --load results/cpm_live_checkpoint-${MODEL_STEPS}.pt"
30 |
31 | CMD="torchrun --nnodes=${NNODES} --nproc_per_node=${GPUS_PER_NODE} --rdzv_id=1 --rdzv_backend=c10d --rdzv_endpoint=${MASTER_ADDR}:${MASTER_PORT} pretrain_cpm_bee.py ${OPTS}"
32 |
33 | echo ${CMD}
34 | $CMD
35 |
36 |
--------------------------------------------------------------------------------
/cpm-live/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name="cpm_live",
5 | version="0.1.0",
6 | author="OpenBMB",
7 | author_email="openbmb@gmail.com",
8 | description="Toolkit for CPM-Live",
9 | packages=find_packages(),
10 | install_requires=[
11 | "numpy",
12 | "torch>=1.10",
13 | "bmtrain>=0.1.8",
14 | "jieba",
15 | "tqdm",
16 | "tensorboard",
17 | "numpy>=1.21.0",
18 | ],
19 | package_data={"cpm_live": ["vocabs/*.txt"]},
20 | )
21 |
--------------------------------------------------------------------------------
/cpm-live/text_generation.py:
--------------------------------------------------------------------------------
1 | from cpm_live.generation.bee import CPMBeeBeamSearch
2 | from cpm_live.models import CPMBeeTorch, CPMBeeConfig
3 | from cpm_live.tokenizers import CPMBeeTokenizer
4 | import torch
5 |
6 | if __name__ == "__main__":
7 |
8 | data_list = [
9 | {"document": "今天天气是真的", "": {"": ""}},
10 | ]
11 |
12 | config = CPMBeeConfig.from_json_file("config/cpm-bee-10b.json")
13 | ckpt_path = "path/to/checkpoint.pt"
14 | tokenizer = CPMBeeTokenizer()
15 | model = CPMBeeTorch(config=config)
16 |
17 | model.load_state_dict(torch.load(ckpt_path))
18 | model.cuda()
19 |
20 | # use beam search
21 | beam_search = CPMBeeBeamSearch(
22 | model=model,
23 | tokenizer=tokenizer,
24 | )
25 | inference_results = beam_search.generate(data_list, max_length=100)
26 | for res in inference_results:
27 | print(res)
28 |
--------------------------------------------------------------------------------
/logs/2022-05-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (May, 30)
3 |
4 | Time: May, 30 2022 16:00
5 |
6 | Recorder: zh-zheng
7 |
8 | ## Loss
9 | - Begin: 10.33
10 | - End: 3.76
11 |
12 | ## Completed Data
13 | - $\approx$ 4.37GB
14 |
15 | ## Average Grad Norm
16 | - 0.56
17 |
18 | ## Progress
19 | - 0.47%
20 |
21 | ## Comment
22 |
23 | CPM-Ant is starting to move forward! The training loss has dropped significantly compared to yesterday!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-05-31.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (May, 31)
3 |
4 | Time: May, 31 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 3.76
10 | - End: 3.29
11 |
12 | ## Completed Data
13 | - $\approx$ 8.79GB
14 |
15 | ## Average Grad Norm
16 | - 0.34
17 |
18 | ## Progress
19 | - 0.94%
20 |
21 | ## Comment
22 |
23 | The training process looks quite stable. Go ahead!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-06-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 1)
3 |
4 | Time: June, 1 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 3.29
10 | - End: 3.12
11 |
12 | ## Completed Data
13 | - $\approx$ 13.23GB
14 |
15 | ## Average Grad Norm
16 | - 0.30
17 |
18 | ## Progress
19 | - 1.42%
20 |
21 | ## Comment
22 |
23 | Goodbye May, hello June!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-06-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 2)
3 |
4 | Time: June, 2 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 3.12
10 | - End: 2.98
11 |
12 | ## Completed Data
13 | - $\approx$ 17.65GB
14 |
15 | ## Average Grad Norm
16 | - 0.29
17 |
18 | ## Progress
19 | - 1.90%
20 |
21 | ## Comment
22 |
23 | The training loss has dropped below 3.0!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-06-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 3)
3 |
4 | Time: June, 3 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.98
10 | - End: 2.94
11 |
12 | ## Completed Data
13 | - $\approx$ 22.08GB
14 |
15 | ## Average Grad Norm
16 | - 0.28
17 |
18 | ## Progress
19 | - 2.37%
20 |
21 | ## Comment
22 |
23 | CPM-Ant has been trained stably for five days!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-06-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 4)
3 |
4 | Time: June, 4 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.94
10 | - End: 3.04
11 |
12 | ## Completed Data
13 | - $\approx$ 26.51GB
14 |
15 | ## Average Grad Norm
16 | - 0.27
17 |
18 | ## Progress
19 | - 2.85%
20 |
21 | ## Comment
22 |
23 | The training loss fluctuates slightly and is back above 3.0 today🤔
24 |
--------------------------------------------------------------------------------
/logs/2022-06-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 5)
3 |
4 | Time: June, 5 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 3.04
10 | - End: 2.96
11 |
12 | ## Completed Data
13 | - $\approx$ 30.94GB
14 |
15 | ## Average Grad Norm
16 | - 0.27
17 |
18 | ## Progress
19 | - 3.32%
20 |
21 | ## Comment
22 |
23 | The training loss has dropped below 3.0 once again. Let's keep an eye on it!
24 |
--------------------------------------------------------------------------------
/logs/2022-06-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 6)
3 |
4 | Time: June, 6 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.96
10 | - End: 2.79
11 |
12 | ## Completed Data
13 | - $\approx$ 35.37GB
14 |
15 | ## Average Grad Norm
16 | - 0.27
17 |
18 | ## Progress
19 | - 3.80%
20 |
21 | ## Comment
22 |
23 | It looks like the loss is dropping steadily. Besides, the gradient is also quite stable these days.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 8)
3 |
4 | Time: June, 8 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.79
10 | - End: 2.98
11 |
12 | ## Completed Data
13 | - $\approx$ 56.20GB
14 |
15 | ## Average Grad Norm
16 | - 0.74
17 |
18 | ## Progress
19 | - 6.03%
20 |
21 | ## Comment
22 |
23 | Due to computing center maintenance, we suspended the training for around 17 hours yesterday. Then we have resumed the training on a cluster with 32 A100 GPUs. The throughput is quadrupled now!
24 |
--------------------------------------------------------------------------------
/logs/2022-06-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 9)
3 |
4 | Time: June, 9 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.98
10 | - End: 2.92
11 |
12 | ## Completed Data
13 | - $\approx$ 76.70GB
14 |
15 | ## Average Grad Norm
16 | - 0.82
17 |
18 | ## Progress
19 | - 8.24%
20 |
21 | ## Comment
22 |
23 | After the training resumed, the loss jumped above 3.0 and then started to drop slowly these two days. We will pay close attention to whether it can come back to the level it was before the training suspension.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 10)
3 |
4 | Time: June, 10 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.92
10 | - End: 2.90
11 |
12 | ## Completed Data
13 | - $\approx$ 97.24GB
14 |
15 | ## Average Grad Norm
16 | - 0.86
17 |
18 | ## Progress
19 | - 10.44%
20 |
21 | ## Comment
22 |
23 | We have found that the training loss drops very slowly, and we will try to figure out the problem🤔.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 12)
3 |
4 | Time: June, 12 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.90
10 | - End: 2.68
11 |
12 | ## Completed Data
13 | - $\approx$ 108.38GB
14 |
15 | ## Average Grad Norm
16 | - 2.28
17 |
18 | ## Progress
19 | - 11.64%
20 |
21 | ## Comment
22 |
23 | The training has been resumed at 2 a.m. Today is a big day, as we have made the following changes to the model:
24 |
25 | - According to our training plan, we increased the maximum input length from 512 to 1024, expecting the model to gradually learn to handle longer sequences.
26 | - Optimizer states will also be saved to facilitate continual training from a checkpoint.
27 | - We implemented a dynamic data expansion mechanism, so data can be added without interrupting training now!
28 |
29 | It looks like the loss is dropping rapidly this time.
30 |
31 |
--------------------------------------------------------------------------------
/logs/2022-06-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 13)
3 |
4 | Time: June, 13 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.68
10 | - End: 2.59
11 |
12 | ## Completed Data
13 | - $\approx$ 125.11GB
14 |
15 | ## Average Grad Norm
16 | - 0.71
17 |
18 | ## Progress
19 | - 13.43%
20 |
21 | ## Comment
22 |
23 | We can see that the training loss drops below 2.6 for the first time. Keep going!
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-06-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 14)
3 |
4 | Time: June, 14 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.59
10 | - End: 2.65
11 |
12 | ## Completed Data
13 | - $\approx$ 141.75GB
14 |
15 | ## Average Grad Norm
16 | - 0.68
17 |
18 | ## Progress
19 | - 15.22%
20 |
21 | ## Comment
22 |
23 | The training loss is slightly higher than yesterday, but we believe it's not a big problem😉
24 |
--------------------------------------------------------------------------------
/logs/2022-06-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 15)
3 |
4 | Time: June, 15 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.65
10 | - End: 2.58
11 |
12 | ## Completed Data
13 | - $\approx$ 148.17GB
14 |
15 | ## Average Grad Norm
16 | - 0.70
17 |
18 | ## Progress
19 | - 15.91%
20 |
21 | ## Comment
22 |
23 | We encountered an OOM error yesterday, which caused a break in the training. After a restart, the training returned to normal.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 16)
3 |
4 | Time: June, 16 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.58
10 | - End: 2.47
11 |
12 | ## Completed Data
13 | - $\approx$ 162.17GB
14 |
15 | ## Average Grad Norm
16 | - 0.77
17 |
18 | ## Progress
19 | - 17.41%
20 |
21 | ## Comment
22 |
23 | Around 8:30 this morning, one of the nodes in our computing cluster lost its GPUs. We restarted that node and resumed the training at 12:10.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 17)
3 |
4 | Time: June, 17 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.47
10 | - End: 2.49
11 |
12 | ## Completed Data
13 | - $\approx$ 178.73GB
14 |
15 | ## Average Grad Norm
16 | - 0.75
17 |
18 | ## Progress
19 | - 19.19%
20 |
21 | ## Comment
22 |
23 | Thank goodness our cluster finally has no downtime for the past day. Our model can finally be trained for a full day🤣. By the way, you can find the **detailed** training dynamics on our website now!
24 |
--------------------------------------------------------------------------------
/logs/2022-06-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 18)
3 |
4 | Time: June, 18 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.49
10 | - End: 2.50
11 |
12 | ## Completed Data
13 | - $\approx$ 195.25GB
14 |
15 | ## Average Grad Norm
16 | - 0.75
17 |
18 | ## Progress
19 | - 20.97%
20 |
21 | ## Comment
22 |
23 | The training is still quite stable today. Recently, a Google engineer reported that a chatting model LaMDA was sentient. Maybe later we could talk to CPM-Ant and find out some interesting results!
24 |
--------------------------------------------------------------------------------
/logs/2022-06-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 19)
3 |
4 | Time: June, 19 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.50
10 | - End: 2.48
11 |
12 | ## Completed Data
13 | - $\approx$ 207.08GB
14 |
15 | ## Average Grad Norm
16 | - 0.76
17 |
18 | ## Progress
19 | - 22.24%
20 |
21 | ## Comment
22 |
23 | At 9:10 this morning, we encountered the OOM issue once again. It's weird! We just resumed the training at 16:08. As this problem occurs frequently these days, we will try to find out the cause in the coming days🤔. If you have any ideas, please post them in the community!
24 |
--------------------------------------------------------------------------------
/logs/2022-06-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 20)
3 |
4 | Time: June, 20 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.48
10 | - End: 2.46
11 |
12 | ## Completed Data
13 | - $\approx$ 223.67GB
14 |
15 | ## Average Grad Norm
16 | - 0.79
17 |
18 | ## Progress
19 | - 24.02%
20 |
21 | ## Comment
22 |
23 | After the restart, our model worked fine and was trained steadily for a whole day. We suspect that the CUDA OOM issue yesterday may be related to GPU memory fragmentation in PyTorch.
24 |
25 | It's also worth mentioning that our WeChat official account (OpenBMB) posted an [article](https://mp.weixin.qq.com/s/ugvIrUGVSqSXnW-2A3bLjA) today about the technical principles of the [BMTrain](https://github.com/OpenBMB/BMTrain) toolkit, which is used to train CPM-Live efficiently. Read it if you are interested and any discussions are welcomed!
26 |
--------------------------------------------------------------------------------
/logs/2022-06-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 21)
3 |
4 | Time: June, 21 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.46
10 | - End: 2.39
11 |
12 | ## Completed Data
13 | - $\approx$ 240.33GB
14 |
15 | ## Average Grad Norm
16 | - 0.81
17 |
18 | ## Progress
19 | - 25.81%
20 |
21 | ## Comment
22 |
23 | Today is a peaceful day! I notice a [paper](https://arxiv.org/pdf/2206.08896.pdf) published by OpenAI, in which researchers use diffs and commit messages from GitHub to teach a large language model how to modify the code. I just wonder whether it is possible to teach our model to write some comments, giving it the training logs of two consecutive days. What do you think?😁
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-06-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 22)
3 |
4 | Time: June, 22 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.39
10 | - End: 2.53
11 |
12 | ## Completed Data
13 | - $\approx$ 256.99GB
14 |
15 | ## Average Grad Norm
16 | - 0.81
17 |
18 | ## Progress
19 | - 27.59%
20 |
21 | ## Comment
22 |
23 | We have noticed that training loss is gradually increasing over the past day. In order to figure out the reason, we checked the code and found out a bug in the dataloader🧐. The bug results in our model learning from the same data after each resumption of training. Since the model has seen new data in the past day, it is acceptable for the loss to increase.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 23)
3 |
4 | Time: June, 23 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.53
10 | - End: 2.73
11 |
12 | ## Completed Data
13 | - $\approx$ 272.89GB
14 |
15 | ## Average Grad Norm
16 | - 0.83
17 |
18 | ## Progress
19 | - 29.30%
20 |
21 | ## Comment
22 |
23 | Well, the loss is still increasing... If you don't know what happened, see our log yesterday. It seems that our model is a little confused with the new data. Looking at the loss curve, when do you think the loss will start to decrease🤔?
24 |
--------------------------------------------------------------------------------
/logs/2022-06-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 24)
3 |
4 | Time: June, 24 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.73
10 | - End: 2.70
11 |
12 | ## Completed Data
13 | - $\approx$ 288.10GB
14 |
15 | ## Average Grad Norm
16 | - 0.85
17 |
18 | ## Progress
19 | - 30.93%
20 |
21 | ## Comment
22 |
23 | We are glad to see the training loss is going down now😃. It looks like our model is on the right track. CPM-Ant Go! Go! Go!
24 |
--------------------------------------------------------------------------------
/logs/2022-06-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 25)
3 |
4 | Time: June, 25 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.70
10 | - End: 2.65
11 |
12 | ## Completed Data
13 | - $\approx$ 303.29GB
14 |
15 | ## Average Grad Norm
16 | - 0.87
17 |
18 | ## Progress
19 | - 32.57%
20 |
21 | ## Comment
22 |
23 | Today is a peaceful Saturday. We are happy to see that the training loss is still decreasing. Hopefully the model continues to run steadily so that we can also have a restful Sunday😊.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 26)
3 |
4 | Time: June, 26 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.65
10 | - End: 2.72
11 |
12 | ## Completed Data
13 | - $\approx$ 318.51GB
14 |
15 | ## Average Grad Norm
16 | - 0.88
17 |
18 | ## Progress
19 | - 34.20%
20 |
21 | ## Comment
22 |
23 | Well, again, the loss went up. Is this just a little fluctuation or a portent of something wrong🤔? Just calm down and come to see our log tomorrow.
24 |
--------------------------------------------------------------------------------
/logs/2022-06-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 27)
3 |
4 | Time: June, 27 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.72
10 | - End: 2.62
11 |
12 | ## Completed Data
13 | - $\approx$ 333.70GB
14 |
15 | ## Average Grad Norm
16 | - 0.89
17 |
18 | ## Progress
19 | - 35.83%
20 |
21 | ## Comment
22 |
23 | It seems that what we observed yesterday is a normal fluctuation of loss. What's more, we have released a new feature on our [website](https://live.openbmb.org/dynamic), which visualizes our model's parameters and gradients of different days. Click the *Parameter Dynamics (参数动态)* section to play with it! Any feedback is welcomed!
24 |
--------------------------------------------------------------------------------
/logs/2022-06-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 28)
3 |
4 | Time: June, 28 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.62
10 | - End: 2.65
11 |
12 | ## Completed Data
13 | - $\approx$ 348.90GB
14 |
15 | ## Average Grad Norm
16 | - 0.90
17 |
18 | ## Progress
19 | - 37.46%
20 |
21 | ## Comment
22 |
23 | Another peaceful day😁. Although there are still some fluctuations in the loss value, its downward trend remains unchanged. Today I notice an interesting [paper](https://arxiv.org/abs/2112.15594), in which researchers use OpenAI Codex to solve university-level mathematics problems. What's more, the explanation of the solution code can also be automatically generated. As you can see, when a model performs well and is explainable, it looks charming! We will also make this our goal when training CPM-Live.💪
24 |
--------------------------------------------------------------------------------
/logs/2022-06-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 29)
3 |
4 | Time: June, 29 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.65
10 | - End: 2.59
11 |
12 | ## Completed Data
13 | - $\approx$ 364.13GB
14 |
15 | ## Average Grad Norm
16 | - 0.91
17 |
18 | ## Progress
19 | - 39.10%
20 |
21 | ## Comment
22 |
23 | It looks that the loss of our model is decreasing SLOOOWLY. Please be patient and always believe that something wonderful is about to happen:)
24 |
--------------------------------------------------------------------------------
/logs/2022-06-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (June, 30)
3 |
4 | Time: June, 30 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.59
10 | - End: 2.65
11 |
12 | ## Completed Data
13 | - $\approx$ 379.34GB
14 |
15 | ## Average Grad Norm
16 | - 0.91
17 |
18 | ## Progress
19 | - 40.73%
20 |
21 | ## Comment
22 |
23 | The training progress has exceeded 40%, meaning that our model has seen about 80 billion tokens! Actually, an average human speaks 860,341,500 words in the entire life[1]. That is to say, the model has heard 23 people talk to him for a lifetime🤣.
24 |
25 | [1] Brandreth, Gyles Daubeney. The joy of lex: How to have fun with 860,341,500 words. William Morrow & Company, 1980.
26 |
--------------------------------------------------------------------------------
/logs/2022-07-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 1)
3 |
4 | Time: July, 1 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.65
10 | - End: 2.57
11 |
12 | ## Completed Data
13 | - $\approx$ 394.55GB
14 |
15 | ## Average Grad Norm
16 | - 0.92
17 |
18 | ## Progress
19 | - 42.36%
20 |
21 | ## Comment
22 |
23 | Another new month has arrived, and we have already accompanied CPM-Live through its first full month together. Gratefully, after a few accidents, the training of our model is now relatively stable. Please stay tuned to CPM-Live, and there will be more fascinating things waiting for you!
24 |
--------------------------------------------------------------------------------
/logs/2022-07-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 2)
3 |
4 | Time: July, 2 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.57
10 | - End: 2.60
11 |
12 | ## Completed Data
13 | - $\approx$ 409.76GB
14 |
15 | ## Average Grad Norm
16 | - 0.91
17 |
18 | ## Progress
19 | - 44.00%
20 |
21 | ## Comment
22 |
23 | It seems that nothing special happened in the past day. Enjoy the weekend!
24 |
--------------------------------------------------------------------------------
/logs/2022-07-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 3)
3 |
4 | Time: July, 3 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.60
10 | - End: 2.63
11 |
12 | ## Completed Data
13 | - $\approx$ 424.96GB
14 |
15 | ## Average Grad Norm
16 | - 0.92
17 |
18 | ## Progress
19 | - 45.63%
20 |
21 | ## Comment
22 |
23 | Even though the training loss has been increasing slightly for two consecutive days, we still think it's not a big deal. We will continue to monitor the loss to determine if there are problems with our model.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 4)
3 |
4 | Time: July, 4 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.63
10 | - End: 2.57
11 |
12 | ## Completed Data
13 | - $\approx$ 440.18GB
14 |
15 | ## Average Grad Norm
16 | - 0.92
17 |
18 | ## Progress
19 | - 47.26%
20 |
21 | ## Comment
22 |
23 | Time is **money**. I mean that literally😉. By tonight, we will spend 200K Yuan on training. Thankfully, our training loss started to decrease today. Stay calm and let's see what happens tomorrow.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 5)
3 |
4 | Time: July, 5 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.57
10 | - End: 2.43
11 |
12 | ## Completed Data
13 | - $\approx$ 455.86GB
14 |
15 | ## Average Grad Norm
16 | - 0.87
17 |
18 | ## Progress
19 | - 48.95%
20 |
21 | ## Comment
22 |
23 | The loss is falling very fast😲. We think this may be because the model has seen data that it has seen before. Therefore, we remain cautiously optimistic for now due to our previous experience.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 6)
3 |
4 | Time: July, 6 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.43
10 | - End: 2.34
11 |
12 | ## Completed Data
13 | - $\approx$ 471.59GB
14 |
15 | ## Average Grad Norm
16 | - 0.84
17 |
18 | ## Progress
19 | - 50.63%
20 |
21 | ## Comment
22 |
23 | Interestingly, the loss is still falling rapidly. We'll keep watching it for a few more days to see what happens next. Besides, we are happy to see that the training progress of CPM-Ant is over 50%! Keep it up!
24 |
--------------------------------------------------------------------------------
/logs/2022-07-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 7)
3 |
4 | Time: July, 7 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.34
10 | - End: 2.37
11 |
12 | ## Completed Data
13 | - $\approx$ 487.32GB
14 |
15 | ## Average Grad Norm
16 | - 0.84
17 |
18 | ## Progress
19 | - 52.33%
20 |
21 | ## Comment
22 |
23 | The training loss is no longer decreasing, and it seems to have an upward trend🧐.
24 | Something familiar seems to be happening again🤔. What do you think will happen tomorrow?
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-07-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 8)
3 |
4 | Time: July, 8 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.37
10 | - End: 2.55
11 |
12 | ## Completed Data
13 | - $\approx$ 502.95GB
14 |
15 | ## Average Grad Norm
16 | - 0.88
17 |
18 | ## Progress
19 | - 54.00%
20 |
21 | ## Comment
22 |
23 | Once again the loss has gone up! That's an interesting but probably reasonable result if you have seen our log for [this day](https://github.com/OpenBMB/CPM-Live/blob/master/logs/2022-06-22.md). That is to say, after learning from the data that has been seen many times, the loss of our model will return to the level it should be at.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 9)
3 |
4 | Time: July, 9 2022 16:00
5 |
6 | Recorder: @jayzzhou-thu
7 |
8 | ## Loss
9 | - Begin: 2.55
10 | - End: 2.60
11 |
12 | ## Completed Data
13 | - $\approx$ 518.54GB
14 |
15 | ## Average Grad Norm
16 | - 0.92
17 |
18 | ## Progress
19 | - 55.68%
20 |
21 | ## Comment
22 |
23 | The change in the training loss is currently in line with our expectations. A new model [NLLB](https://research.facebook.com/publications/no-language-left-behind/) is recently released by Meta. The model is multi-lingual and could translate between 200 languages. It is so cool and we wish CPM-Live could support more languages in the future.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 10)
3 |
4 | Time: July, 10 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.60
10 | - End: 2.56
11 |
12 | ## Completed Data
13 | - $\approx$ 534.10GB
14 |
15 | ## Average Grad Norm
16 | - 0.93
17 |
18 | ## Progress
19 | - 57.35%
20 |
21 | ## Comment
22 |
23 | Everything seems to be working fine so far. For the oscillation of loss these days, my impression is that the memory ability of the big model is quite amazing😄.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 11)
3 |
4 | Time: July, 11 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.56
10 | - End: 2.53
11 |
12 | ## Completed Data
13 | - $\approx$ 549.68GB
14 |
15 | ## Average Grad Norm
16 | - 0.94
17 |
18 | ## Progress
19 | - 59.02%
20 |
21 | ## Comment
22 |
23 | Another peaceful day. It is exciting to have your conjectures for the phenomena in training verified in the next few days, especially when everything is live. Nevertheless, I always hope that everything goes well in our training😁.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 12)
3 |
4 | Time: July, 12 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.53
10 | - End: 2.54
11 |
12 | ## Completed Data
13 | - $\approx$ 565.26GB
14 |
15 | ## Average Grad Norm
16 | - 0.95
17 |
18 | ## Progress
19 | - 60.69%
20 |
21 | ## Comment
22 |
23 | The training has entered a stable period. Since our model is capable of doing both generation and understanding tasks, today we test it on a Chinese poetry comprehension task.
24 |
25 | Explanation: 沿着松柏小径直奔神灵宫
26 |
27 | Option 👍: 松柏一径趋灵宫
28 |
29 | Option 😄: 转入玄宫松柏间
30 |
31 | Option 🎉: 竹柏深中一径开
32 |
33 | Opiton 🚀: 杖藜松径叩莲宫
34 |
35 | Which one do you think is correct? Vote for it with the emoji! We will post CPM-Ant's answer tomorrow.
36 |
37 |
--------------------------------------------------------------------------------
/logs/2022-07-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 13)
3 |
4 | Time: July, 13 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.54
10 | - End: 2.53
11 |
12 | ## Completed Data
13 | - $\approx$ 580.84GB
14 |
15 | ## Average Grad Norm
16 | - 0.95
17 |
18 | ## Progress
19 | - 62.37%
20 |
21 | ## Comment
22 |
23 | The answer given by CPM-Ant to the yesterday's [quiz](https://github.com/OpenBMB/CPM-Live/discussions/61) is 👍 (the first option), which is correct! Congratulations to CPM-Ant and everyone who got it right! You can see the original poetry [here](https://www.guwenxuexi.com/classical/12603.html).
24 |
--------------------------------------------------------------------------------
/logs/2022-07-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 14)
3 |
4 | Time: July, 14 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.53
10 | - End: 2.57
11 |
12 | ## Completed Data
13 | - $\approx$ 596.44GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 64.04%
20 |
21 | ## Comment
22 |
23 | Our training is still stable. Recently, a large open-access multilingual language model [BLOOM](https://huggingface.co/bigscience/bloom) was released. It has 176B parameters and supports 46 languages and 13 programming languages. As you can see, *open source* is the trend!
24 |
--------------------------------------------------------------------------------
/logs/2022-07-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 15)
3 |
4 | Time: July, 15 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.57
10 | - End: 2.56
11 |
12 | ## Completed Data
13 | - $\approx$ 612.02GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 65.72%
20 |
21 | ## Comment
22 |
23 | Since our model has seen all the data once, we can see that the training loss in this epoch is more stable than the previous epoch. Imagine you're taking an exam, and you'll definitely be more at ease if you've already prepped for it😄.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-07-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 16)
3 |
4 | Time: July, 16 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.56
10 | - End: 2.47
11 |
12 | ## Completed Data
13 | - $\approx$ 627.60GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 67.39%
20 |
21 | ## Comment
22 |
23 | Today is a peaceful day for our model, and a restful day for us. Enjoy it!
24 |
--------------------------------------------------------------------------------
/logs/2022-07-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 17)
3 |
4 | Time: July, 17 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.47
10 | - End: 2.51
11 |
12 | ## Completed Data
13 | - $\approx$ 643.21GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 69.06%
20 |
21 | ## Comment
22 |
23 | The training is still stable, over. **Logging** is the one thing that we're capable of perceiving, that transcends dimensions of time and space (from the movie *Interstellar*).
24 |
--------------------------------------------------------------------------------
/logs/2022-07-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 18)
3 |
4 | Time: July, 18 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.51
10 | - End: 2.52
11 |
12 | ## Completed Data
13 | - $\approx$ 658.80GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 70.74%
20 |
21 | ## Comment
22 |
23 | As a recorder, what stands between me and my happiness is the loss value of CPM-Live. Fortunately, it is very stable these days:)
24 |
--------------------------------------------------------------------------------
/logs/2022-07-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 19)
3 |
4 | Time: July, 19 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.52
10 | - End: 2.42
11 |
12 | ## Completed Data
13 | - $\approx$ 674.41GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 72.41%
20 |
21 | ## Comment
22 |
23 | Okay, it looks like the repertoire "loss plummet" is coming again🤣? Very interesting.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 20)
3 |
4 | Time: July, 20 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.42
10 | - End: 2.41
11 |
12 | ## Completed Data
13 | - $\approx$ 690.09GB
14 |
15 | ## Average Grad Norm
16 | - 0.92
17 |
18 | ## Progress
19 | - 74.10%
20 |
21 | ## Comment
22 |
23 | We are sure that the training loss has entered the next 'cycle'.
24 |
25 | Are you fascinated by big models?
26 |
27 | Do you want to explore the mysteries behind big models?
28 |
29 | Are you interested in the application of big models?
30 |
31 | You've come to the right place! Today we release the **Big Model Course** (in Chinese)! Click [here](https://www.openbmb.org/en/community/course) to start your journey with big models!
32 |
--------------------------------------------------------------------------------
/logs/2022-07-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 21)
3 |
4 | Time: July, 21 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.41
10 | - End: 2.34
11 |
12 | ## Completed Data
13 | - $\approx$ 705.75GB
14 |
15 | ## Average Grad Norm
16 | - 0.89
17 |
18 | ## Progress
19 | - 75.78%
20 |
21 | ## Comment
22 |
23 | "Memory is the mother of all wisdom" – Aeschylus.
24 |
25 | That's what I want to say after seeing today's loss.
26 |
--------------------------------------------------------------------------------
/logs/2022-07-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 22)
3 |
4 | Time: July, 22 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.34
10 | - End: 2.33
11 |
12 | ## Completed Data
13 | - $\approx$ 721.41GB
14 |
15 | ## Average Grad Norm
16 | - 0.89
17 |
18 | ## Progress
19 | - 77.46%
20 |
21 | ## Comment
22 |
23 | Today the model is still in the easy mode. Tedious. "Challenge and uncertainty are the greatest joys of training models" - me. 😆
24 |
--------------------------------------------------------------------------------
/logs/2022-07-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 23)
3 |
4 | Time: July, 23 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.33
10 | - End: 2.49
11 |
12 | ## Completed Data
13 | - $\approx$ 737.05GB
14 |
15 | ## Average Grad Norm
16 | - 0.93
17 |
18 | ## Progress
19 | - 79.14%
20 |
21 | ## Comment
22 |
23 | Hey CPM-Ant, the easy mode is over, and let's face the challenge again.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 24)
3 |
4 | Time: July, 24 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.49
10 | - End: 2.49
11 |
12 | ## Completed Data
13 | - $\approx$ 752.61GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 80.81%
20 |
21 | ## Comment
22 |
23 | It looks like the highest value of the loss, which bounced back from the lowest point, also decreases as the number of epochs increases. Our model is getting more and more familiar with this data.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 25)
3 |
4 | Time: July, 25 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.49
10 | - End: 2.50
11 |
12 | ## Completed Data
13 | - $\approx$ 768.19GB
14 |
15 | ## Average Grad Norm
16 | - 0.98
17 |
18 | ## Progress
19 | - 82.48%
20 |
21 | ## Comment
22 |
23 | The training has been very stable lately, and the loss is also changing regularly, which gives me the illusion that everything is predictable😁. Wonderful feeling!
24 |
--------------------------------------------------------------------------------
/logs/2022-07-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 26)
3 |
4 | Time: July, 26 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.50
10 | - End: 2.45
11 |
12 | ## Completed Data
13 | - $\approx$ 783.78GB
14 |
15 | ## Average Grad Norm
16 | - 0.98
17 |
18 | ## Progress
19 | - 84.16%
20 |
21 | ## Comment
22 |
23 | Every day I always marvel at how stable the training is. Thanks to BMTrain, an amazing toolkit.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 27)
3 |
4 | Time: July, 27 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.45
10 | - End: 2.52
11 |
12 | ## Completed Data
13 | - $\approx$ 799.36GB
14 |
15 | ## Average Grad Norm
16 | - 0.99
17 |
18 | ## Progress
19 | - 85.83%
20 |
21 | ## Comment
22 |
23 | Training a big model is not easy and requires a lot of time and computing resources, so I always wonder how many big models are there in the world?
24 |
--------------------------------------------------------------------------------
/logs/2022-07-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 28)
3 |
4 | Time: July, 28 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.52
10 | - End: 2.47
11 |
12 | ## Completed Data
13 | - $\approx$ 814.94GB
14 |
15 | ## Average Grad Norm
16 | - 0.99
17 |
18 | ## Progress
19 | - 87.50%
20 |
21 | ## Comment
22 |
23 | Do you want to know everything about big models in the world? You've come to the right place again!
24 |
25 | Today we release **[BMList](https://github.com/OpenBMB/BMList)**, in which we include various key information about big models, such as the number of parameters, source code, relevant paper, released checkpoint and API. Currently, BMList covers big models in multiple domains, i.e. text, vision, audio and code. We'll continue to add more models!
26 |
27 | **Contributions are welcomed!**
28 |
--------------------------------------------------------------------------------
/logs/2022-07-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 29)
3 |
4 | Time: July, 29 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.47
10 | - End: 2.49
11 |
12 | ## Completed Data
13 | - $\approx$ 830.54GB
14 |
15 | ## Average Grad Norm
16 | - 0.99
17 |
18 | ## Progress
19 | - 89.18%
20 |
21 | ## Comment
22 |
23 | The training is still very stable and the loss remains at a relatively low value compared to previous epochs.
24 |
--------------------------------------------------------------------------------
/logs/2022-07-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 30)
3 |
4 | Time: July, 30 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.49
10 | - End: 2.41
11 |
12 | ## Completed Data
13 | - $\approx$ 846.12GB
14 |
15 | ## Average Grad Norm
16 | - 0.99
17 |
18 | ## Progress
19 | - 90.85%
20 |
21 | ## Comment
22 |
23 | We are excited to see that the training progress has exceeded 90%! CPM-Ant has started the final sprint. The end is in sight!
24 |
--------------------------------------------------------------------------------
/logs/2022-07-31.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (July, 31)
3 |
4 | Time: July, 31 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.41
10 | - End: 2.45
11 |
12 | ## Completed Data
13 | - $\approx$ 861.72GB
14 |
15 | ## Average Grad Norm
16 | - 1.00
17 |
18 | ## Progress
19 | - 92.53%
20 |
21 | ## Comment
22 |
23 | July is coming to an end, and we are about to usher in a *hot* August. Stay tuned!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-08-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 1)
3 |
4 | Time: August, 1 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.45
10 | - End: 2.42
11 |
12 | ## Completed Data
13 | - $\approx$ 877.33GB
14 |
15 | ## Average Grad Norm
16 | - 0.99
17 |
18 | ## Progress
19 | - 94.20%
20 |
21 | ## Comment
22 |
23 |
24 | Our training has not had a single incident in the past July. There is nothing more enjoyable than seeing a [continuous and smooth loss curve](https://live.openbmb.org/dynamic) :)
25 |
26 |
27 |
--------------------------------------------------------------------------------
/logs/2022-08-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 2)
3 |
4 | Time: August, 2 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.42
10 | - End: 2.45
11 |
12 | ## Completed Data
13 | - $\approx$ 892.92GB
14 |
15 | ## Average Grad Norm
16 | - 1.00
17 |
18 | ## Progress
19 | - 95.88%
20 |
21 | ## Comment
22 |
23 | CPM-Ant is heading for the finish line! Three days left!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-08-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 3)
3 |
4 | Time: August, 3 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.45
10 | - End: 2.40
11 |
12 | ## Completed Data
13 | - $\approx$ 908.55GB
14 |
15 | ## Average Grad Norm
16 | - 0.99
17 |
18 | ## Progress
19 | - 97.55%
20 |
21 | ## Comment
22 |
23 | 👀Two days left!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 4)
3 |
4 | Time: August, 4 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.40
10 | - End: 2.32
11 |
12 | ## Completed Data
13 | - $\approx$ 924.25GB
14 |
15 | ## Average Grad Norm
16 | - 0.96
17 |
18 | ## Progress
19 | - 99.24%
20 |
21 | ## Comment
22 |
23 | 👀Less than 1% remaining!!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 5)
3 |
4 | Time: August, 5 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.32
10 | - End: 2.37
11 |
12 | ## Completed Data
13 | - $\approx$ 939.98GB
14 |
15 | ## Average Grad Norm
16 | - 0.94
17 |
18 | ## Progress
19 | - 100%
20 |
21 | ## Comment
22 |
23 | Successfully, the training of CPM-Ant has been completed! Here are our future plans:
24 |
25 | 1. The training of CPM-Ant comes into a new stage, and the model will evolve into CPM-Ant+. In this stage, we will learn from more data and extend new features beyond CPM-Ant. All these efforts will be released as CPM-Ant+ and also contribute to our next model, CPM-Bee.
26 |
27 | 2. We are preparing a brief conclusion of the CPM-Ant training and a poll for expected features of CPM-Bee, which will be released next week.
28 |
29 | 3. For the final release of CPM-Ant, we will need around a month to evaluate and compress our model. The final release will include a technical report, the final model (also with compressed versions), as well as interesting demos. They are expected to be open in early September 2022!
30 |
31 | So stay tuned and CPM-Live will always be running!
32 |
--------------------------------------------------------------------------------
/logs/2022-08-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 6)
3 |
4 | Time: August, 6 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.37
10 | - End: 2.27
11 |
12 | ## Completed Data
13 | - $\approx$ 944.67GB
14 |
15 | ## Average Grad Norm
16 | - 0.52
17 |
18 | ## Progress
19 | - 101.43%
20 |
21 | ## Comment
22 |
23 | The first day to update the log of CPM-Ant+! We reduced the computing resources from 32 to 8 A100 GPUs. Furthermore, we migrated the training data to a newly implemented distributed dataset, which is more flexible and robust.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 7)
3 |
4 | Time: August, 7 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.27
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 949.37GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 101.94%
20 |
21 | ## Comment
22 |
23 | Since we drastically shrank the batch size, the fluctuation of training loss became significantly larger🧐. It's not a big deal!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 8)
3 |
4 | Time: August, 8 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.33
11 |
12 | ## Completed Data
13 | - $\approx$ 954.07GB
14 |
15 | ## Average Grad Norm
16 | - 0.46
17 |
18 | ## Progress
19 | - 102.44%
20 |
21 | ## Comment
22 |
23 | CPM-Ant+ is crawling forward slowly, with undiminished determination.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 9)
3 |
4 | Time: August, 9 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.33
10 | - End: 2.34
11 |
12 | ## Completed Data
13 | - $\approx$ 958.79GB
14 |
15 | ## Average Grad Norm
16 | - 0.46
17 |
18 | ## Progress
19 | - 102.95%
20 |
21 | ## Comment
22 |
23 | With 8 GPUs, the progress of CPM-Ant+ advances 0.5% per day. My comment:
24 |
25 | "It's better to make slow progress than no progress at all."
26 |
27 | “不怕慢,就怕站。”
28 |
--------------------------------------------------------------------------------
/logs/2022-08-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 10)
3 |
4 | Time: August, 10 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.34
10 | - End: 2.28
11 |
12 | ## Completed Data
13 | - $\approx$ 963.49GB
14 |
15 | ## Average Grad Norm
16 | - 0.45
17 |
18 | ## Progress
19 | - 103.45%
20 |
21 | ## Comment
22 |
23 | Our [Big Model Course](https://www.openbmb.org/en/community/course) has been fully uploaded now! Come here to get your power (knowledge)!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 11)
3 |
4 | Time: August, 11 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.28
10 | - End: 2.21
11 |
12 | ## Completed Data
13 | - $\approx$ 968.20GB
14 |
15 | ## Average Grad Norm
16 | - 0.45
17 |
18 | ## Progress
19 | - 103.96%
20 |
21 | ## Comment
22 |
23 | CPM-Ant+ is still running, CPM-Bee is already in the distance! Click [here](https://github.com/OpenBMB/CPM-Live/discussions/110) to vote or raise proposals for new features of CPM-Bee!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 12)
3 |
4 | Time: August, 12 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.21
10 | - End: 2.34
11 |
12 | ## Completed Data
13 | - $\approx$ 972.90GB
14 |
15 | ## Average Grad Norm
16 | - 0.45
17 |
18 | ## Progress
19 | - 104.46%
20 |
21 | ## Comment
22 |
23 | See our [summarization](https://github.com/OpenBMB/CPM-Live/discussions/112) of CPM-Ant training!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 13)
3 |
4 | Time: August, 13 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.34
10 | - End: 2.38
11 |
12 | ## Completed Data
13 | - $\approx$ 977.59GB
14 |
15 | ## Average Grad Norm
16 | - 0.45
17 |
18 | ## Progress
19 | - 104.97%
20 |
21 | ## Comment
22 |
23 | The loss of CPM-Ant+ is rebounding to the normal level. If you haven't known why the loss fluctuates, see our latest [summary](https://github.com/OpenBMB/CPM-Live/discussions/112)!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 14)
3 |
4 | Time: August, 14 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.38
10 | - End: 2.41
11 |
12 | ## Completed Data
13 | - $\approx$ 982.29GB
14 |
15 | ## Average Grad Norm
16 | - 0.45
17 |
18 | ## Progress
19 | - 105.47%
20 |
21 | ## Comment
22 |
23 | A peaceful weekend. Good luck next week!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 15)
3 |
4 | Time: August, 15 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.41
10 | - End: 2.32
11 |
12 | ## Completed Data
13 | - $\approx$ 986.99GB
14 |
15 | ## Average Grad Norm
16 | - 0.45
17 |
18 | ## Progress
19 | - 105.98%
20 |
21 | ## Comment
22 |
23 | Currently, controllable generation, Multi-lingual, and Structuralization are the top-3 popular new features of CPM-Bee. Click [here](https://github.com/OpenBMB/CPM-Live) to vote for your favorite features!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 16)
3 |
4 | Time: August, 16 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.32
10 | - End: 2.30
11 |
12 | ## Completed Data
13 | - $\approx$ 991.69GB
14 |
15 | ## Average Grad Norm
16 | - 0.44
17 |
18 | ## Progress
19 | - 106.48%
20 |
21 | ## Comment
22 |
23 | “Progress is not achieved by luck or accident, but by working on yourself daily.” - Epictetus.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 17)
3 |
4 | Time: August, 17 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.30
10 | - End: 2.46
11 |
12 | ## Completed Data
13 | - $\approx$ 996.35GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 106.98%
20 |
21 | ## Comment
22 |
23 | Although the loss vibrated violently, we can see that it has an upward trend again. However, we have gotten used to it😆.
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-08-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 18)
3 |
4 | Time: August, 18 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.46
10 | - End: 2.37
11 |
12 | ## Completed Data
13 | - $\approx$ 1,001.04GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 107.49%
20 |
21 | ## Comment
22 |
23 | The training of CPM-Live reaches a small milestone today—learning from over 1,000G data. As we all know, data is the inspiration for big models. Keep going!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-08-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 19)
3 |
4 | Time: August, 19 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.37
10 | - End: 2.42
11 |
12 | ## Completed Data
13 | - $\approx$ 1,005.68GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 107.98%
20 |
21 | ## Comment
22 |
23 | Another week has passed, and for the new features of CPM-Bee, newcomers voted to add domain data, such as dialogue and medical. If you want other new features, come and propose [here](https://github.com/OpenBMB/CPM-Live/discussions/categories/model-proposals-%E6%A8%A1%E5%9E%8B%E6%8F%90%E8%AE%AE)!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 20)
3 |
4 | Time: August, 20 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.42
10 | - End: 2.42
11 |
12 | ## Completed Data
13 | - $\approx$ 1,010.34GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 108.48%
20 |
21 | ## Comment
22 |
23 | "No scripts, no cue cards. It isn't always Shakespeare but it's genuine." It's *CPM-Live*.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 21)
3 |
4 | Time: August, 21 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.42
10 | - End: 2.23
11 |
12 | ## Completed Data
13 | - $\approx$ 1,015.03GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 108.99%
20 |
21 | ## Comment
22 |
23 | Although we have weekends off, CPM-Ant+ never takes a break. What a hardworking little ant.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 22)
3 |
4 | Time: August, 22 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.23
10 | - End: 2.46
11 |
12 | ## Completed Data
13 | - $\approx$ 1,019.69GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 109.49%
20 |
21 | ## Comment
22 |
23 | CPM-Live is always running, and our model is evolving as well💪.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 23)
3 |
4 | Time: August, 23 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.46
10 | - End: 2.45
11 |
12 | ## Completed Data
13 | - $\approx$ 1,024.36GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 109.99%
20 |
21 | ## Comment
22 |
23 | "The race is not always to the swift, but to those who keep on running." 😎
24 |
--------------------------------------------------------------------------------
/logs/2022-08-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 24)
3 |
4 | Time: August, 24 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.45
10 | - End: 2.42
11 |
12 | ## Completed Data
13 | - $\approx$ 1,029.01GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 110.49%
20 |
21 | ## Comment
22 |
23 | The task learned by our model becomes progressively more complex, just as we move from lower to higher grades when going to school. Right now, it's taking a final exam!😄
24 |
--------------------------------------------------------------------------------
/logs/2022-08-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 25)
3 |
4 | Time: August, 25 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.42
10 | - End: 2.44
11 |
12 | ## Completed Data
13 | - $\approx$ 1,033.69GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 110.99%
20 |
21 | ## Comment
22 |
23 | CPM-Live has been running for 88 days, and Mercury has already orbited the sun once during this time! Time flies and CPM-Live is always here.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-08-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 26)
3 |
4 | Time: August, 26 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.44
10 | - End: 2.46
11 |
12 | ## Completed Data
13 | - $\approx$ 1,038.35GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 111.49%
20 |
21 | ## Comment
22 |
23 | How do you feel about OpenBMB? Do you have any suggestions or feedback for our open-sourced toolkits, courses, or community? [Tell us](https://mp.weixin.qq.com/s/NZJ3N_-ASPFQA8Qxz-MnbA)!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 27)
3 |
4 | Time: August, 27 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.46
10 | - End: 2.39
11 |
12 | ## Completed Data
13 | - $\approx$ 1,043.02GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 111.99%
20 |
21 | ## Comment
22 |
23 | After another week of voting, the top three most anticipated new features of CPM-Bee are controllable generation, multi-lingual, and the addition of dialogue data. Voting will be closed on September 1.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 28)
3 |
4 | Time: August, 28 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.39
10 | - End: 2.40
11 |
12 | ## Completed Data
13 | - $\approx$ 1,047.69GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 112.49%
20 |
21 | ## Comment
22 |
23 | What can we see from the loss curve of CPM-Live? I see stability.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 29)
3 |
4 | Time: August, 29 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.40
10 | - End: 2.38
11 |
12 | ## Completed Data
13 | - $\approx$ 1,052.34GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 112.99%
20 |
21 | ## Comment
22 |
23 | CPM-Live has been running for *three months*, and the train must go on!
24 |
--------------------------------------------------------------------------------
/logs/2022-08-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 30)
3 |
4 | Time: August, 30 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.38
10 | - End: 2.50
11 |
12 | ## Completed Data
13 | - $\approx$ 1,056.99GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 113.49%
20 |
21 | ## Comment
22 |
23 | What is CPM-Ant doing during all these days of training? It's learning to speak, and soon it will demonstrate its capabilities.
24 |
--------------------------------------------------------------------------------
/logs/2022-08-31.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (August, 31)
3 |
4 | Time: August, 31 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.50
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 1,061.66GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 114.00%
20 |
21 | ## Comment
22 |
23 | The last day of August and also the last day to decide the direction of *your* CPM-Bee model!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 1)
3 |
4 | Time: September, 1 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.47
11 |
12 | ## Completed Data
13 | - $\approx$ 1,066.32GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 114.50%
20 |
21 | ## Comment
22 |
23 | The voting for new features of CPM-Bee is closed. We will consider them comprehensively and add the top few features to our new model. Stay tuned!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 2)
3 |
4 | Time: September, 2 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.47
10 | - End: 2.39
11 |
12 | ## Completed Data
13 | - $\approx$ 1,070.98GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 115.00%
20 |
21 | ## Comment
22 |
23 | Another week has passed. In this week, CPM-Ant+ is still working hard to move forward. With the suggestions of the community, we have also started to plan the training of the CPM-Bee model.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-09-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 3)
3 |
4 | Time: September, 3 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.39
10 | - End: 2.44
11 |
12 | ## Completed Data
13 | - $\approx$ 1,075.63GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 115.50%
20 |
21 | ## Comment
22 |
23 | A quiet Saturday.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 4)
3 |
4 | Time: September, 4 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.44
10 | - End: 2.53
11 |
12 | ## Completed Data
13 | - $\approx$ 1,080.29GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 116.00%
20 |
21 | ## Comment
22 |
23 | For our model, learning is done day and night, weekdays and weekends.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 5)
3 |
4 | Time: September, 5 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.53
10 | - End: 2.53
11 |
12 | ## Completed Data
13 | - $\approx$ 1,084.98GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 116.50%
20 |
21 | ## Comment
22 |
23 | In the darkness, new babies of CPM-Live family were born. Who they are? The answer will be revealed in the near future👀.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 6)
3 |
4 | Time: September, 6 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.53
10 | - End: 2.48
11 |
12 | ## Completed Data
13 | - $\approx$ 1,089.63GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 117.00%
20 |
21 | ## Comment
22 |
23 | We are proud to announce that CPM-Live has been running for **100** days!! Thanks for your company these days. However, this is only a small step in the journey of continuous learning. Keep following us!
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-09-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 7)
3 |
4 | Time: September, 7 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.48
10 | - End: 2.45
11 |
12 | ## Completed Data
13 | - $\approx$ 1,094.29GB
14 |
15 | ## Average Grad Norm
16 | - 0.48
17 |
18 | ## Progress
19 | - 117.50%
20 |
21 | ## Comment
22 |
23 | Every model who works hard hopes to stand in front of the stage one day and show itself to everyone, and that day is not far away😉.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 8)
3 |
4 | Time: September, 8 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.45
10 | - End: 2.42
11 |
12 | ## Completed Data
13 | - $\approx$ 1,098.95GB
14 |
15 | ## Average Grad Norm
16 | - 0.47
17 |
18 | ## Progress
19 | - 118.00%
20 |
21 | ## Comment
22 |
23 | A [bug](https://github.com/OpenBMB/CPM-Live/pull/148) was found and fixed in CPM-Ant+. Thanks @xwwwwww.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 10)
3 |
4 | Time: September, 10 2022 16:00
5 |
6 | Recorder: @jayzzhou-thu
7 |
8 | ## Loss
9 | - Begin: 2.42
10 | - End: 2.11
11 |
12 | ## Completed Data
13 | - $\approx$ 1,102.18GB
14 |
15 | ## Average Grad Norm
16 | - 3.48
17 |
18 | ## Progress
19 | - 118.35%
20 |
21 | ## Comment
22 |
23 | The training started at round 9 p.m yesterday. Keep going!
--------------------------------------------------------------------------------
/logs/2022-09-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 11)
3 |
4 | Time: September, 11 2022 16:00
5 |
6 | Recorder: @jayzzhou-thu
7 |
8 | ## Loss
9 | - Begin: 2.11
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 1,105.69GB
14 |
15 | ## Average Grad Norm
16 | - 0.49
17 |
18 | ## Progress
19 | - 118.72%
20 |
21 | ## Comment
22 |
23 | Happy Mid-Autumn Festival! CPM-Ant keeps learning during the holiday. Move forward!
--------------------------------------------------------------------------------
/logs/2022-09-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 12)
3 |
4 | Time: September, 12 2022 16:00
5 |
6 | Recorder: @jayzzhou-thu
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 1,109.20GB
14 |
15 | ## Average Grad Norm
16 | - 0.49
17 |
18 | ## Progress
19 | - 119.10%
20 |
21 | ## Comment
22 |
23 | Another peaceful day. The release date for CPM-Ant is approaching, look forward to it!
--------------------------------------------------------------------------------
/logs/2022-09-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 13)
3 |
4 | Time: September, 13 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 1,112.72GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 119.48%
20 |
21 | ## Comment
22 |
23 | It has cost over 500,000 Yuan to train CPM-Live! Today I remember that proverb again: time is money.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 14)
3 |
4 | Time: September, 14 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 1,116.26GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 119.86%
20 |
21 | ## Comment
22 |
23 | Due to some unexpected operational errors, our training has been temporarily suspended and we will resume it as soon as possible.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 15)
3 |
4 | Time: September, 15 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 1,116.45GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 119.88%
20 |
21 | ## Comment
22 |
23 | Our training has been resumed.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 16)
3 |
4 | Time: September, 16 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 1,119.03GB
14 |
15 | ## Average Grad Norm
16 | - 0.51
17 |
18 | ## Progress
19 | - 120.15%
20 |
21 | ## Comment
22 |
23 | We observe that our computing cluster is not stable these days. In the early morning of today, the node where we run the model failed. We resumed the training again at 10:30.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 17)
3 |
4 | Time: September, 17 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 1,123.38GB
14 |
15 | ## Average Grad Norm
16 | - 0.51
17 |
18 | ## Progress
19 | - 120.62%
20 |
21 | ## Comment
22 |
23 | Today is a quiet day, and the computing cluster is stable. Keep going!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 18)
3 |
4 | Time: September, 18 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 1,127.74GB
14 |
15 | ## Average Grad Norm
16 | - 0.51
17 |
18 | ## Progress
19 | - 121.09%
20 |
21 | ## Comment
22 |
23 | We are happy to see that our computing cluster does not rest on weekends🤣.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 19)
3 |
4 | Time: September, 19 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 1,132.09GB
14 |
15 | ## Average Grad Norm
16 | - 0.52
17 |
18 | ## Progress
19 | - 121.56%
20 |
21 | ## Comment
22 |
23 | CPM-Ant+ is still running steadily, and something more special is being prepared. Look forward to it!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 20)
3 |
4 | Time: September, 20 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.08
11 |
12 | ## Completed Data
13 | - $\approx$ 1,136.44GB
14 |
15 | ## Average Grad Norm
16 | - 0.52
17 |
18 | ## Progress
19 | - 122.02%
20 |
21 | ## Comment
22 |
23 | In addition to [generating titles](https://live.openbmb.org/en/ant), what else do you want CPM-Live to do for you? Just tell us!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 21)
3 |
4 | Time: September, 21 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.08
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 1,140.80GB
14 |
15 | ## Average Grad Norm
16 | - 0.53
17 |
18 | ## Progress
19 | - 122.49%
20 |
21 | ## Comment
22 |
23 | We are polishing our title generation demo! Feel free to experience it and provide feedback to help us improve it.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 22)
3 |
4 | Time: September, 22 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 1,145.16GB
14 |
15 | ## Average Grad Norm
16 | - 0.53
17 |
18 | ## Progress
19 | - 122.96%
20 |
21 | ## Comment
22 |
23 | The number of NLP tasks supported by CPM-Live is constantly increasing, so stay tuned!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 23)
3 |
4 | Time: September, 23 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 1,149.52GB
14 |
15 | ## Average Grad Norm
16 | - 0.54
17 |
18 | ## Progress
19 | - 123.43%
20 |
21 | ## Comment
22 |
23 | We are preparing some English training data for CPM-Ant+. Bilingual big model is on the way!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-09-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 24)
3 |
4 | Time: September, 24 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 1,153.87GB
14 |
15 | ## Average Grad Norm
16 | - 0.54
17 |
18 | ## Progress
19 | - 123.90%
20 |
21 | ## Comment
22 |
23 | An amazing week has passed. In the next week, CPM-Ant+ will enter the sprint phase. Keep it up!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 25)
3 |
4 | Time: September, 25 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 1,158.23GB
14 |
15 | ## Average Grad Norm
16 | - 0.54
17 |
18 | ## Progress
19 | - 124.36%
20 |
21 | ## Comment
22 |
23 | Another week is coming, and the start of CPM-Bee training is getting closer.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 26)
3 |
4 | Time: September, 26 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 1,162.59GB
14 |
15 | ## Average Grad Norm
16 | - 0.55
17 |
18 | ## Progress
19 | - 124.83%
20 |
21 | ## Comment
22 |
23 | We are pleased to see that CPM-Ant+ already has the ability to answer questions. More capabilities are continually being added!
24 |
--------------------------------------------------------------------------------
/logs/2022-09-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 27)
3 |
4 | Time: September, 27 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 1,166.95GB
14 |
15 | ## Average Grad Norm
16 | - 0.55
17 |
18 | ## Progress
19 | - 125.30%
20 |
21 | ## Comment
22 |
23 | The training of one model is coming to an end, while another model is getting ready to be born.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 28)
3 |
4 | Time: September, 28 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 3.07
11 |
12 | ## Completed Data
13 | - $\approx$ 1,171.16GB
14 |
15 | ## Average Grad Norm
16 | - 0.71
17 |
18 | ## Progress
19 | - 125.75%
20 |
21 | ## Comment
22 |
23 | The training loss increased substantially today, as we have added English training data. Don't worry! Our model is trying to learn from the new data and the loss is dropping.
24 |
--------------------------------------------------------------------------------
/logs/2022-09-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 29)
3 |
4 | Time: September, 29 2022 16:00
5 |
6 | Recorder: @jayzzhou-thu
7 |
8 | ## Loss
9 | - Begin: 3.07
10 | - End: 2.39
11 |
12 | ## Completed Data
13 | - $\approx$ 1,179.38GB
14 |
15 | ## Average Grad Norm
16 | - 0.76
17 |
18 | ## Progress
19 | - 126.63%
20 |
21 | ## Comment
22 |
23 | The loss drops normally today and we look forward to the finish of English corpus training!
--------------------------------------------------------------------------------
/logs/2022-09-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (September, 30)
3 |
4 | Time: September, 30 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.39
10 | - End: 2.30
11 |
12 | ## Completed Data
13 | - $\approx$ 1,183.51GB
14 |
15 | ## Average Grad Norm
16 | - 0.60
17 |
18 | ## Progress
19 | - 127.08%
20 |
21 | ## Comment
22 |
23 | The loss is still falling today. From some test cases we can see that our model is getting better at English. Keep going!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 1)
3 |
4 | Time: October, 1 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.30
10 | - End: 2.31
11 |
12 | ## Completed Data
13 | - $\approx$ 1,187.63GB
14 |
15 | ## Average Grad Norm
16 | - 0.56
17 |
18 | ## Progress
19 | - 127.52%
20 |
21 | ## Comment
22 |
23 | Hello October! The training loss seems to have entered a stable period🤔.
24 |
--------------------------------------------------------------------------------
/logs/2022-10-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 2)
3 |
4 | Time: October, 2 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.31
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 1,191.75GB
14 |
15 | ## Average Grad Norm
16 | - 0.54
17 |
18 | ## Progress
19 | - 127.96%
20 |
21 | ## Comment
22 |
23 | It's glad to see that loss continues to fall. Keep up the good work and become an English expert as well!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 3)
3 |
4 | Time: October, 3 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.27
11 |
12 | ## Completed Data
13 | - $\approx$ 1,195.87GB
14 |
15 | ## Average Grad Norm
16 | - 0.53
17 |
18 | ## Progress
19 | - 128.41%
20 |
21 | ## Comment
22 |
23 | A peaceful day. The training is very stable. Keep learning!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 4)
3 |
4 | Time: October, 4 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.27
10 | - End: 2.23
11 |
12 | ## Completed Data
13 | - $\approx$ 1,199.99GB
14 |
15 | ## Average Grad Norm
16 | - 0.52
17 |
18 | ## Progress
19 | - 128.85%
20 |
21 | ## Comment
22 |
23 | Today is another peaceful day, and CPM-Ant+ would like to say: *"Happy New Year's Day!" We are all happy, and we will continue to do so in the years ahead. As always, CPM-Ant+ is a pleasure to work with. Thanks for reading and I hope you enjoy it as much as I do.*
24 |
25 | 😆😆😆Interesting.
26 |
--------------------------------------------------------------------------------
/logs/2022-10-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 5)
3 |
4 | Time: October, 5 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.23
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 1,204.12GB
14 |
15 | ## Average Grad Norm
16 | - 0.51
17 |
18 | ## Progress
19 | - 129.29%
20 |
21 | ## Comment
22 |
23 | Today I write a comment in Chinese:
24 |
25 | *今天又是风平浪静的一天。模型训练仍在继续,不断提高自己吧!*
26 |
27 | Then I use CPM-Ant+ to translate it:
28 |
29 | *Today is a quiet day. Model training continues, keep improving yourself!*
30 |
31 | Not bad.😉
32 |
--------------------------------------------------------------------------------
/logs/2022-10-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 6)
3 |
4 | Time: October, 6 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 1,208.24GB
14 |
15 | ## Average Grad Norm
16 | - 0.51
17 |
18 | ## Progress
19 | - 129.73%
20 |
21 | ## Comment
22 |
23 | The training has been going on for 130 days and still continues. We are getting closer to the day when the model evolves!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 7)
3 |
4 | Time: October, 7 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.24
11 |
12 | ## Completed Data
13 | - $\approx$ 1,212.36GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 130.18%
20 |
21 | ## Comment
22 |
23 | CPM-Ant+ has been training steadily for many days. Once again, we have witnessed the growth of a model.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-10-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 8)
3 |
4 | Time: October, 8 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.24
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 1,216.48GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 130.62%
20 |
21 | ## Comment
22 |
23 | It's time to test the model again!😁
24 |
25 | Prefix: Today CPM-Ant+ would like to say:
26 |
27 | Generated text: *"We are pleased with the performance of our CPM-Ant+, and we look forward to working closely with you in the future."*
28 |
29 | Our model is happy with itself🤣
30 |
--------------------------------------------------------------------------------
/logs/2022-10-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 9)
3 |
4 | Time: October, 9 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.22
11 |
12 | ## Completed Data
13 | - $\approx$ 1,220.61GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 131.06%
20 |
21 | ## Comment
22 |
23 | After 6,000 steps of training, what has changed in our model? Let's test it.
24 |
25 | Prefix: Today CPM-Ant+ would like to say:
26 |
27 | Generated text: *"We're sorry to hear that you are having problems with your device. We will be sending you a new one, and we hope it works as well as the first time around."*
28 |
29 | 🤔Today our model looks like a customer service person.
30 |
31 |
--------------------------------------------------------------------------------
/logs/2022-10-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 10)
3 |
4 | Time: October, 10 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.22
10 | - End: 2.22
11 |
12 | ## Completed Data
13 | - $\approx$ 1,224.72GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 131.50%
20 |
21 | ## Comment
22 |
23 | The preparations for CPM-Bee are in the final stage, and we believe that the training will officially begin very soon!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 11)
3 |
4 | Time: October, 11 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.22
10 | - End: 2.21
11 |
12 | ## Completed Data
13 | - $\approx$ 1,228.85GB
14 |
15 | ## Average Grad Norm
16 | - 0.50
17 |
18 | ## Progress
19 | - 131.95%
20 |
21 | ## Comment
22 |
23 | Today CPM-Ant+ wants to say:
24 |
25 | Prefix: Today is Tuesday
26 |
27 | Generated text: *, and I've been waiting for this game to come out. It was a long wait, but it finally came out today.*
28 |
29 | 😃Our model is expecting something to happen. We will announce our big event this week. Stay tuned!
30 |
--------------------------------------------------------------------------------
/logs/2022-10-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 12)
3 |
4 | Time: October, 12 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.21
10 | - End: 2.20
11 |
12 | ## Completed Data
13 | - $\approx$ 1,232.96GB
14 |
15 | ## Average Grad Norm
16 | - 0.49
17 |
18 | ## Progress
19 | - 132.39%
20 |
21 | ## Comment
22 |
23 | CPM-Ant+ model has been released today! Check [here](https://github.com/OpenBMB/CPM-Live/tree/cpm-ant-plus/cpm-live) for more information.
24 |
25 | The training is still ongoing, and we'll start training CPM-Bee *tomorrow* based on the checkpoint of CPM-Ant+!
26 |
--------------------------------------------------------------------------------
/logs/2022-10-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 13)
3 |
4 | Time: October, 13 2022 16:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Progress
9 | - 0.0%
10 |
11 | ## Comment
12 |
13 | The training of CPM-Bee will launch today! Check our [training plan](https://github.com/OpenBMB/CPM-Live/blob/master/plans/CPM-Bee%E8%AE%AD%E7%BB%83%E8%AE%A1%E5%88%92%E4%B9%A6.md). Good luck!
14 |
--------------------------------------------------------------------------------
/logs/2022-10-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 14)
3 |
4 | Time: October, 14 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 10.41
10 | - End: 2.75
11 |
12 | ## Completed Data
13 | - $\approx$ 1,246.88GB
14 |
15 | ## Average Grad Norm
16 | - 2.74
17 |
18 | ## Progress
19 | - 1.0%
20 |
21 | ## Comment
22 |
23 | The first day of CPM-Bee training! Since we inherit the checkpoint of CPM-Ant+, the training loss drops very fast. We can see that our model performs quite well in the daily test. Keep going!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 15)
3 |
4 | Time: October, 15 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 10.75
10 | - End: 2.44
11 |
12 | ## Completed Data
13 | - $\approx$ 1,260.09GB
14 |
15 | ## Average Grad Norm
16 | - 26.26
17 |
18 | ## Progress
19 | - 2.0%
20 |
21 | ## Comment
22 |
23 | Today's key events:
24 |
25 | - The training loss increased rapidly at 5:00 and eventually became NaN. We reduced the learning rate by half and resumed the training at 10:40.
26 |
27 | - We observed that the loss increased again at 16:40, so we reduced the learning rate by a factor of 0.2.
28 |
29 | We'll keep an eye on the training process.
30 |
--------------------------------------------------------------------------------
/logs/2022-10-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 16)
3 |
4 | Time: October, 16 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.44
10 | - End: 2.71
11 |
12 | ## Completed Data
13 | - $\approx$ 1,274.11GB
14 |
15 | ## Average Grad Norm
16 | - 2.26
17 |
18 | ## Progress
19 | - 3.0%
20 |
21 | ## Comment
22 |
23 | We observed that the training loss was still going up, so we skipped some training data at 23:00 last night.
24 |
--------------------------------------------------------------------------------
/logs/2022-10-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 17)
3 |
4 | Time: October, 17 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.71
10 | - End: 2.59
11 |
12 | ## Completed Data
13 | - $\approx$ 1,276.51GB
14 |
15 | ## Average Grad Norm
16 | - 1.77
17 |
18 | ## Progress
19 | - 3.1%
20 |
21 | ## Comment
22 |
23 | Key events:
24 |
25 | - 2022/10/16 19:30: Since the recent training is not very stable, we choose to delete the title generation and summarization datasets.
26 |
27 | - 2022/10/16 21:55: We encounter an issue with the data loader. The training has temporarily stopped, and we are fixing this problem.
28 |
29 |
--------------------------------------------------------------------------------
/logs/2022-10-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 18)
3 |
4 | Time: October, 18 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.59
10 | - End: 2.65
11 |
12 | ## Completed Data
13 | - $\approx$ 1,287.53GB
14 |
15 | ## Average Grad Norm
16 | - 3.02
17 |
18 | ## Progress
19 | - 3.92%
20 |
21 | ## Comment
22 |
23 | We have fixed the data loader and resumed the training. Besides, the learning rate has been restored to its original scale. We expect the model to run stably now.
24 |
--------------------------------------------------------------------------------
/logs/2022-10-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 19)
3 |
4 | Time: October, 19 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.65
10 | - End: 2.73
11 |
12 | ## Completed Data
13 | - $\approx$ 1,287.53GB
14 |
15 | ## Average Grad Norm
16 | - 0.86
17 |
18 | ## Progress
19 | - 4.92%
20 |
21 | ## Comment
22 |
23 | It is glad to see that the training is much more stable today. Keep it up!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 20)
3 |
4 | Time: October, 20 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.73
10 | - End: 2.60
11 |
12 | ## Completed Data
13 | - $\approx$ 1,315.19GB
14 |
15 | ## Average Grad Norm
16 | - 0.64
17 |
18 | ## Progress
19 | - 5.91%
20 |
21 | ## Comment
22 |
23 | Finally, the loss began to drop normally, and the training returned to the right track.
24 |
--------------------------------------------------------------------------------
/logs/2022-10-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 21)
3 |
4 | Time: October, 21 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.60
10 | - End: 2.54
11 |
12 | ## Completed Data
13 | - $\approx$ 1,328.82GB
14 |
15 | ## Average Grad Norm
16 | - 0.61
17 |
18 | ## Progress
19 | - 6.89%
20 |
21 | ## Comment
22 |
23 | We've added back the summarization and title generation tasks. By the way, we tested the QA capability of our model today, which is quite good!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 22)
3 |
4 | Time: October, 22 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.54
10 | - End: 2.51
11 |
12 | ## Completed Data
13 | - $\approx$ 1,342.41GB
14 |
15 | ## Average Grad Norm
16 | - 0.60
17 |
18 | ## Progress
19 | - 7.86%
20 |
21 | ## Comment
22 |
23 | Today we continue to test the question answering ability of our model and find that it is pretty smart.
24 | It looks like we'll have to test it on harder questions later.
25 |
--------------------------------------------------------------------------------
/logs/2022-10-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 23)
3 |
4 | Time: October, 23 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.51
10 | - End: 2.48
11 |
12 | ## Completed Data
13 | - $\approx$ 1,354.55GB
14 |
15 | ## Average Grad Norm
16 | - 0.62
17 |
18 | ## Progress
19 | - 8.74%
20 |
21 | ## Comment
22 |
23 | We observed that the training loss became NaN this morning, and we stabilized the training by modifying the computation of attention scores. It seems to be working so far.
24 |
--------------------------------------------------------------------------------
/logs/2022-10-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 24)
3 |
4 | Time: October, 24 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.48
10 | - End: 2.47
11 |
12 | ## Completed Data
13 | - $\approx$ 1,369.96GB
14 |
15 | ## Average Grad Norm
16 | - 0.60
17 |
18 | ## Progress
19 | - 9.84%
20 |
21 | ## Comment
22 |
23 | We are constantly adding new tasks to our model. Stay tuned!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 25)
3 |
4 | Time: October, 25 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.47
10 | - End: 2.45
11 |
12 | ## Completed Data
13 | - $\approx$ 1,384.44GB
14 |
15 | ## Average Grad Norm
16 | - 0.68
17 |
18 | ## Progress
19 | - 10.88%
20 |
21 | ## Comment
22 |
23 | The training progress is over 10%! It is gratifying to watch the model progress day by day😁.
24 |
--------------------------------------------------------------------------------
/logs/2022-10-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 26)
3 |
4 | Time: October, 26 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.45
10 | - End: 2.43
11 |
12 | ## Completed Data
13 | - $\approx$ 1,399.63GB
14 |
15 | ## Average Grad Norm
16 | - 0.61
17 |
18 | ## Progress
19 | - 11.98%
20 |
21 | ## Comment
22 |
23 | For sentence-level translations, we believe our model is currently relatively competent, although there are occasionally some minor omissions in the translation. Check the daily test!
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-10-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 27)
3 |
4 | Time: October, 27 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.43
10 | - End: 2.42
11 |
12 | ## Completed Data
13 | - $\approx$ 1,414.79GB
14 |
15 | ## Average Grad Norm
16 | - 0.66
17 |
18 | ## Progress
19 | - 13.06%
20 |
21 | ## Comment
22 |
23 | The training is steady these days, and the model gets the question right again today. Good!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-10-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 28)
3 |
4 | Time: October, 28 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.42
10 | - End: 2.39
11 |
12 | ## Completed Data
13 | - $\approx$ 1,429.95GB
14 |
15 | ## Average Grad Norm
16 | - 0.66
17 |
18 | ## Progress
19 | - 14.15%
20 |
21 | ## Comment
22 |
23 | We've added more capabilities to CPM-Bee, see our capabilities tree for details, and you can check out how the model performs on these new tasks in the Daily Test now!
24 |
--------------------------------------------------------------------------------
/logs/2022-10-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 29)
3 |
4 | Time: October, 29 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.39
10 | - End: 2.37
11 |
12 | ## Completed Data
13 | - $\approx$ 1,445.11GB
14 |
15 | ## Average Grad Norm
16 | - 0.67
17 |
18 | ## Progress
19 | - 15.24%
20 |
21 | ## Comment
22 |
23 | We show the cross-lingual question-answering capability of CPM-Bee in the Daily Test.
24 |
--------------------------------------------------------------------------------
/logs/2022-10-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 30)
3 |
4 | Time: October, 30 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.37
10 | - End: 2.37
11 |
12 | ## Completed Data
13 | - $\approx$ 1,452.58GB
14 |
15 | ## Average Grad Norm
16 | - 0.70
17 |
18 | ## Progress
19 | - 15.78%
20 |
21 | ## Comment
22 |
23 | Due to an unexpected issue in our data, the training was interrupted for about 12 hours and resumed at 12:00 today.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-10-31.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (October, 31)
3 |
4 | Time: October, 31 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.37
10 | - End: 2.37
11 |
12 | ## Completed Data
13 | - $\approx$ 1,467.69GB
14 |
15 | ## Average Grad Norm
16 | - 0.91
17 |
18 | ## Progress
19 | - 16.87%
20 |
21 | ## Comment
22 |
23 | The training is relatively stable at present, and we are helping the model learn more NLP capabilities. It is gratifying that we can see its progress.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-11-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 1)
3 |
4 | Time: November, 1 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.37
10 | - End: 2.35
11 |
12 | ## Completed Data
13 | - $\approx$ 1,482.79GB
14 |
15 | ## Average Grad Norm
16 | - 0.77
17 |
18 | ## Progress
19 | - 17.95%
20 |
21 | ## Comment
22 |
23 | Hi November! CPM-Bee will continue to pursue knowledge in the new month.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 2)
3 |
4 | Time: November, 2 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.35
10 | - End: 2.31
11 |
12 | ## Completed Data
13 | - $\approx$ 1,497.92GB
14 |
15 | ## Average Grad Norm
16 | - 0.73
17 |
18 | ## Progress
19 | - 19.03%
20 |
21 | ## Comment
22 |
23 | Today, I am once again amazed at the QA ability of our model. I believe it really understands something.
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-11-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 3)
3 |
4 | Time: November, 3 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.31
10 | - End: 2.33
11 |
12 | ## Completed Data
13 | - $\approx$ 1,513.06GB
14 |
15 | ## Average Grad Norm
16 | - 0.74
17 |
18 | ## Progress
19 | - 20.12%
20 |
21 | ## Comment
22 |
23 | The training progress has exceeded 20%, and more tasks are being added to the model.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 4)
3 |
4 | Time: November, 4 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.33
10 | - End: 2.31
11 |
12 | ## Completed Data
13 | - $\approx$ 1,528.24GB
14 |
15 | ## Average Grad Norm
16 | - 0.71
17 |
18 | ## Progress
19 | - 21.22%
20 |
21 | ## Comment
22 |
23 | I think CPM-Bee did a good job of filling in the blanks in the Daily Test today. What do you think?
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-11-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 5)
3 |
4 | Time: November, 5 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.31
10 | - End: 2.26
11 |
12 | ## Completed Data
13 | - $\approx$ 1,543.42GB
14 |
15 | ## Average Grad Norm
16 | - 0.73
17 |
18 | ## Progress
19 | - 22.31%
20 |
21 | ## Comment
22 |
23 | The training is quite stable recently, and the QA ability is continuously improving.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-11-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 6)
3 |
4 | Time: November, 6 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.26
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 1,558.59GB
14 |
15 | ## Average Grad Norm
16 | - 0.75
17 |
18 | ## Progress
19 | - 23.40%
20 |
21 | ## Comment
22 |
23 | A peaceful weekend. Today's daily test was not that easy, but the model still got the answer right. Good job!
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-11-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 7)
3 |
4 | Time: November, 7 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.24
11 |
12 | ## Completed Data
13 | - $\approx$ 1,573.76GB
14 |
15 | ## Average Grad Norm
16 | - 0.76
17 |
18 | ## Progress
19 | - 24.49%
20 |
21 | ## Comment
22 |
23 | In the Daily Test, we can see that the translation ability of CPM-Bee is already quite good.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-11-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 8)
3 |
4 | Time: November, 8 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.24
10 | - End: 2.26
11 |
12 | ## Completed Data
13 | - $\approx$ 1,588.72GB
14 |
15 | ## Average Grad Norm
16 | - 0.77
17 |
18 | ## Progress
19 | - 25.56%
20 |
21 | ## Comment
22 |
23 | We released a [video](https://b23.tv/WHpj6IT) to briefly demonstrate the capabilities of CPM-Bee. Check it!
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-11-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 9)
3 |
4 | Time: November, 9 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.26
10 | - End: 2.25
11 |
12 | ## Completed Data
13 | - $\approx$ 1,603.77GB
14 |
15 | ## Average Grad Norm
16 | - 0.81
17 |
18 | ## Progress
19 | - 26.64%
20 |
21 | ## Comment
22 |
23 | The training has been quite stable, and we are still working on adding more new features to the model.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-11-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 10)
3 |
4 | Time: November, 10 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.25
10 | - End: 2.23
11 |
12 | ## Completed Data
13 | - $\approx$ 1,618.84GB
14 |
15 | ## Average Grad Norm
16 | - 0.79
17 |
18 | ## Progress
19 | - 27.73%
20 |
21 | ## Comment
22 |
23 | Guess what I'm going to say? Yes, a peaceful day😁.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 11)
3 |
4 | Time: November, 11 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.23
10 | - End: 2.24
11 |
12 | ## Completed Data
13 | - $\approx$ 1,633.90GB
14 |
15 | ## Average Grad Norm
16 | - 0.80
17 |
18 | ## Progress
19 | - 28.81%
20 |
21 | ## Comment
22 |
23 | It seems that our model is good at solving brain teasers😎.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 12)
3 |
4 | Time: November, 12 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.24
10 | - End: 2.22
11 |
12 | ## Completed Data
13 | - $\approx$ 1,648.96GB
14 |
15 | ## Average Grad Norm
16 | - 0.83
17 |
18 | ## Progress
19 | - 29.89%
20 |
21 | ## Comment
22 |
23 | In Daily Test, faced with a more difficult question, our model got the answer wrong this time. Try it, can you answer it correctly?
24 |
--------------------------------------------------------------------------------
/logs/2022-11-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 13)
3 |
4 | Time: November, 13 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.22
10 | - End: 2.22
11 |
12 | ## Completed Data
13 | - $\approx$ 1,663.99GB
14 |
15 | ## Average Grad Norm
16 | - 0.88
17 |
18 | ## Progress
19 | - 30.97%
20 |
21 | ## Comment
22 |
23 | You can see that we use CPM-Bee to translate sentences in a research paper today. It performs fairly well.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 14)
3 |
4 | Time: November, 14 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.22
10 | - End: 2.22
11 |
12 | ## Completed Data
13 | - $\approx$ 1,679.06GB
14 |
15 | ## Average Grad Norm
16 | - 1.16
17 |
18 | ## Progress
19 | - 32.05%
20 |
21 | ## Comment
22 |
23 | In our test today, the dialogue ability of the model is much better than before.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 15)
3 |
4 | Time: November, 15 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.22
10 | - End: 2.20
11 |
12 | ## Completed Data
13 | - $\approx$ 1,694.12GB
14 |
15 | ## Average Grad Norm
16 | - 0.98
17 |
18 | ## Progress
19 | - 33.13%
20 |
21 | ## Comment
22 |
23 | In today's test, our model missed a sentence in the translation. This seems to be a common problem when translating long documents🤔.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 16)
3 |
4 | Time: November, 16 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.20
10 | - End: 2.19
11 |
12 | ## Completed Data
13 | - $\approx$ 1,709.06GB
14 |
15 | ## Average Grad Norm
16 | - 0.86
17 |
18 | ## Progress
19 | - 34.21%
20 |
21 | ## Comment
22 |
23 | We are working on adding some advanced text generation features to the model.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 17)
3 |
4 | Time: November, 17 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.19
10 | - End: 2.22
11 |
12 | ## Completed Data
13 | - $\approx$ 1,724.07GB
14 |
15 | ## Average Grad Norm
16 | - 0.88
17 |
18 | ## Progress
19 | - 35.29%
20 |
21 | ## Comment
22 |
23 | What's the easiest way you've thought of to use a large language model?
24 |
--------------------------------------------------------------------------------
/logs/2022-11-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 18)
3 |
4 | Time: November, 18 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.22
10 | - End: 2.22
11 |
12 | ## Completed Data
13 | - $\approx$ 1,739.09GB
14 |
15 | ## Average Grad Norm
16 | - 0.86
17 |
18 | ## Progress
19 | - 36.37%
20 |
21 | ## Comment
22 |
23 | What's the mission of OpenBMB? It's to lower the barriers to use big models!
24 |
--------------------------------------------------------------------------------
/logs/2022-11-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 19)
3 |
4 | Time: November, 19 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.22
10 | - End: 2.21
11 |
12 | ## Completed Data
13 | - $\approx$ 1,754.12GB
14 |
15 | ## Average Grad Norm
16 | - 0.88
17 |
18 | ## Progress
19 | - 37.45%
20 |
21 | ## Comment
22 |
23 | In these days, the training loss is very smooth and looks very nice. But this does not mean that the model has stopped learning, on the contrary, it is still improving.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-11-2.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 24)
3 |
4 | Time: November, 24 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.15
10 | - End: 2.14
11 |
12 | ## Completed Data
13 | - $\approx$ 1826.75GB
14 |
15 | ## Average Grad Norm
16 | - 2.29
17 |
18 | ## Progress
19 | - 42.66%
20 |
21 | ## Comment
22 |
23 | The loss still continues to fall.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 20)
3 |
4 | Time: November, 20 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.21
10 | - End: 2.21
11 |
12 | ## Completed Data
13 | - $\approx$ 1,769.14GB
14 |
15 | ## Average Grad Norm
16 | - 0.90
17 |
18 | ## Progress
19 | - 38.52%
20 |
21 | ## Comment
22 |
23 | Our goal is to train the model to become an excellent decathlon player. At present, it is qualified in terms of the number of tasks.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 21)
3 |
4 | Time: November, 21 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.21
10 | - End: 2.21
11 |
12 | ## Completed Data
13 | - $\approx$ 1,783.96GB
14 |
15 | ## Average Grad Norm
16 | - 1.07
17 |
18 | ## Progress
19 | - 39.59%
20 |
21 | ## Comment
22 |
23 | From today's test, we can see that the current model is not good enough for the translation of professional terms.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 22)
3 |
4 | Time: November, 22 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.21
10 | - End: 2.17
11 |
12 | ## Completed Data
13 | - $\approx$ 1798.23GB
14 |
15 | ## Average Grad Norm
16 | - 1.22
17 |
18 | ## Progress
19 | - 40.62%
20 |
21 | ## Comment
22 |
23 | Today, the training loss became NaN at around 12:00. We solved this problem by scaling weights. We'll keep an eye on the model in the next few days.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 23)
3 |
4 | Time: November, 23 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.17
10 | - End: 2.15
11 |
12 | ## Completed Data
13 | - $\approx$ 1811.87GB
14 |
15 | ## Average Grad Norm
16 | - 2.74
17 |
18 | ## Progress
19 | - 41.60%
20 |
21 | ## Comment
22 |
23 | After we reduced the learning rate and scaled down the weights, the training was quite stable today.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 24)
3 |
4 | Time: November, 24 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.15
10 | - End: 2.14
11 |
12 | ## Completed Data
13 | - $\approx$ 1826.75GB
14 |
15 | ## Average Grad Norm
16 | - 2.29
17 |
18 | ## Progress
19 | - 42.66%
20 |
21 | ## Comment
22 |
23 | The loss still continues to fall.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 25)
3 |
4 | Time: November, 25 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.14
10 | - End: 2.14
11 |
12 | ## Completed Data
13 | - $\approx$ 1841.73GB
14 |
15 | ## Average Grad Norm
16 | - 2.17
17 |
18 | ## Progress
19 | - 43.74%
20 |
21 | ## Comment
22 |
23 | We are proud to see that the CPM-Live project has been running for 180 days. Keep going!
24 |
--------------------------------------------------------------------------------
/logs/2022-11-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 26)
3 |
4 | Time: November, 26 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.14
10 | - End: 2.13
11 |
12 | ## Completed Data
13 | - $\approx$ 1856.73GB
14 |
15 | ## Average Grad Norm
16 | - 2.18
17 |
18 | ## Progress
19 | - 44.82%
20 |
21 | ## Comment
22 |
23 | Since we reduced the weights of the model and the learning rate, the training loss has been decreasing. Very interesting!
24 |
--------------------------------------------------------------------------------
/logs/2022-11-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 27)
3 |
4 | Time: November, 27 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.13
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 1871.71GB
14 |
15 | ## Average Grad Norm
16 | - 2.17
17 |
18 | ## Progress
19 | - 45.89%
20 |
21 | ## Comment
22 |
23 | From today's test, it can be seen that our model is not sensitive to numbers when doing the translation task.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 28)
3 |
4 | Time: November, 28 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.13
11 |
12 | ## Completed Data
13 | - $\approx$ 1886.68GB
14 |
15 | ## Average Grad Norm
16 | - 2.17
17 |
18 | ## Progress
19 | - 46.97%
20 |
21 | ## Comment
22 |
23 | It seems that some simple questions can't overwhelm our model.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 29)
3 |
4 | Time: November, 29 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.13
10 | - End: 2.13
11 |
12 | ## Completed Data
13 | - $\approx$ 1901.66GB
14 |
15 | ## Average Grad Norm
16 | - 2.19
17 |
18 | ## Progress
19 | - 48.05%
20 |
21 | ## Comment
22 |
23 | The training of the model has recently entered a stable period.
24 |
--------------------------------------------------------------------------------
/logs/2022-11-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (November, 30)
3 |
4 | Time: November, 30 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.13
10 | - End: 2.13
11 |
12 | ## Completed Data
13 | - $\approx$ 1916.64GB
14 |
15 | ## Average Grad Norm
16 | - 2.19
17 |
18 | ## Progress
19 | - 49.12%
20 |
21 | ## Comment
22 |
23 | Goodbye November!
24 |
--------------------------------------------------------------------------------
/logs/2022-12-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 1)
3 |
4 | Time: December, 1 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.13
10 | - End: 2.13
11 |
12 | ## Completed Data
13 | - $\approx$ 1931.62GB
14 |
15 | ## Average Grad Norm
16 | - 2.20
17 |
18 | ## Progress
19 | - 50.20%
20 |
21 | ## Comment
22 |
23 | We are glad to see that the training progress has exceeded 50%!
24 |
--------------------------------------------------------------------------------
/logs/2022-12-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 2)
3 |
4 | Time: December, 2 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.13
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 1946.59GB
14 |
15 | ## Average Grad Norm
16 | - 2.19
17 |
18 | ## Progress
19 | - 51.27%
20 |
21 | ## Comment
22 |
23 | After the training loss kept completely consistent for several days, it finally dropped a little today. Very interesting🤣.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 3)
3 |
4 | Time: December, 3 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.11
11 |
12 | ## Completed Data
13 | - $\approx$ 1961.56GB
14 |
15 | ## Average Grad Norm
16 | - 2.20
17 |
18 | ## Progress
19 | - 52.35%
20 |
21 | ## Comment
22 |
23 | It's funny to see from today's test that our model can translate "Globo Esporte" correctly, but it cannot recognize the player's name properly.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 4)
3 |
4 | Time: December, 4 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.11
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 1976.56GB
14 |
15 | ## Average Grad Norm
16 | - 2.27
17 |
18 | ## Progress
19 | - 53.43%
20 |
21 | ## Comment
22 |
23 | CPM-Bee made a prediction for tonight's World Cup match😝.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 5)
3 |
4 | Time: December, 5 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.13
11 |
12 | ## Completed Data
13 | - $\approx$ 1991.54GB
14 |
15 | ## Average Grad Norm
16 | - 2.25
17 |
18 | ## Progress
19 | - 54.51%
20 |
21 | ## Comment
22 |
23 | Compared with yesterday's direct question, today I gave the model some historical records to predict the result of today's football match, and it seems that its prediction is quite reasonable.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 6)
3 |
4 | Time: December, 6 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.13
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2006.54GB
14 |
15 | ## Average Grad Norm
16 | - 2.22
17 |
18 | ## Progress
19 | - 55.58%
20 |
21 | ## Comment
22 |
23 | The CPM-Live series models have seen over 2,000GB of data. Keep getting more erudite!
24 |
--------------------------------------------------------------------------------
/logs/2022-12-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 7)
3 |
4 | Time: December, 7 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.11
11 |
12 | ## Completed Data
13 | - $\approx$ 2021.51GB
14 |
15 | ## Average Grad Norm
16 | - 2.21
17 |
18 | ## Progress
19 | - 56.66%
20 |
21 | ## Comment
22 |
23 | The model has been trained steadily for quite some time, and I have somewhat forgotten when the training was last broken.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 8)
3 |
4 | Time: December, 8 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.11
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2036.49GB
14 |
15 | ## Average Grad Norm
16 | - 2.22
17 |
18 | ## Progress
19 | - 58.66%
20 |
21 | ## Comment
22 |
23 | Some smaller versions of CPM-Bee are being produced. Stay tuned!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-12-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 9)
3 |
4 | Time: December, 9 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2050.15GB
14 |
15 | ## Average Grad Norm
16 | - 2.10
17 |
18 | ## Progress
19 | - 60.66%
20 |
21 | ## Comment
22 |
23 | An eventful week has passed! CPM-Bee's loss has dropped a bit more.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 10)
3 |
4 | Time: December, 10 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.10
11 |
12 | ## Completed Data
13 | - $\approx$ 2054.60GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 62.66%
20 |
21 | ## Comment
22 |
23 | Although today is a weekend, CPM-Bee keeps rushing to the finish line.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-12-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 11)
3 |
4 | Time: December, 11 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2063.50GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 64.66%
20 |
21 | ## Comment
22 |
23 | The weekend is over, so let's welcome the next week full of unknowns.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 12)
3 |
4 | Time: December, 12 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.11
11 |
12 | ## Completed Data
13 | - $\approx$ 2067.94GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 66.66%
20 |
21 | ## Comment
22 |
23 | In the daily test, we give CPM-Bee a prompt and let it talk a bit more.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 13)
3 |
4 | Time: December, 13 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.11
10 | - End: 2.11
11 |
12 | ## Completed Data
13 | - $\approx$ 2072.40GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 68.66%
20 |
21 | ## Comment
22 |
23 | In today's test, we let CPM-Bee discuss the topic of food.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 14)
3 |
4 | Time: December, 14 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.11
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2076.86GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 70.66%
20 |
21 | ## Comment
22 |
23 | CPM-Bee has been training for over two months! We're excited to see it continue to improve!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-12-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 15)
3 |
4 | Time: December, 15 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.08
11 |
12 | ## Completed Data
13 | - $\approx$ 2081.31GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 72.66%
20 |
21 | ## Comment
22 |
23 | Another milestone: 200-day anniversary of CPM-Live training!🎉🎉🎉
24 |
--------------------------------------------------------------------------------
/logs/2022-12-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 16)
3 |
4 | Time: December, 16 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.08
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2085.76GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 74.66%
20 |
21 | ## Comment
22 |
23 | Another week went by and we asked CPM-Bee to talk about his thoughts. Check the daily test.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 17)
3 |
4 | Time: December, 17 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2090.20GB
14 |
15 | ## Average Grad Norm
16 | - 1.14
17 |
18 | ## Progress
19 | - 76.66%
20 |
21 | ## Comment
22 |
23 | I think the model answered the question fairly well in today's test.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 18)
3 |
4 | Time: December, 18 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.08
11 |
12 | ## Completed Data
13 | - $\approx$ 2094.65GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 78.66%
20 |
21 | ## Comment
22 |
23 | The weekend is over. Let's keep fighting in the last two weeks of 2022.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 19)
3 |
4 | Time: December, 19 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.08
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2099.08GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 80.66%
20 |
21 | ## Comment
22 |
23 | The training progress exceeds 80%!
24 |
--------------------------------------------------------------------------------
/logs/2022-12-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 20)
3 |
4 | Time: December, 20 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.01
11 |
12 | ## Completed Data
13 | - $\approx$ 2103.55GB
14 |
15 | ## Average Grad Norm
16 | - 1.14
17 |
18 | ## Progress
19 | - 81.66%
20 |
21 | ## Comment
22 |
23 | Today, the training loss dropped a little more.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 21)
3 |
4 | Time: December, 21 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.01
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2108.01GB
14 |
15 | ## Average Grad Norm
16 | - 1.19
17 |
18 | ## Progress
19 | - 82.66%
20 |
21 | ## Comment
22 |
23 | In today's test, we use our model for topic classification. Well done!
24 |
--------------------------------------------------------------------------------
/logs/2022-12-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 22)
3 |
4 | Time: December, 22 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.00
11 |
12 | ## Completed Data
13 | - $\approx$ 2112.49GB
14 |
15 | ## Average Grad Norm
16 | - 1.24
17 |
18 | ## Progress
19 | - 83.66%
20 |
21 | ## Comment
22 |
23 | Oops, the training loss reached 2.00!
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2022-12-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 23)
3 |
4 | Time: December, 23 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.00
10 | - End: 1.99
11 |
12 | ## Completed Data
13 | - $\approx$ 2116.98GB
14 |
15 | ## Average Grad Norm
16 | - 1.21
17 |
18 | ## Progress
19 | - 84.66%
20 |
21 | ## Comment
22 |
23 | Today, we let the model tell us the next sentence of an ancient poem. Good job!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-12-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 24)
3 |
4 | Time: December, 24 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 1.99
10 | - End: 2.00
11 |
12 | ## Completed Data
13 | - $\approx$ 2121.43GB
14 |
15 | ## Average Grad Norm
16 | - 1.20
17 |
18 | ## Progress
19 | - 85.66%
20 |
21 | ## Comment
22 |
23 | To my surprise, our model can also complete the lyrics. Interesting!
24 |
--------------------------------------------------------------------------------
/logs/2022-12-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 25)
3 |
4 | Time: December, 25 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.00
10 | - End: 2.00
11 |
12 | ## Completed Data
13 | - $\approx$ 2125.88GB
14 |
15 | ## Average Grad Norm
16 | - 1.20
17 |
18 | ## Progress
19 | - 86.66%
20 |
21 | ## Comment
22 |
23 | The last five days of 2022 to go, and CPM-Live is with you.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 26)
3 |
4 | Time: December, 26 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.00
10 | - End: 2.03
11 |
12 | ## Completed Data
13 | - $\approx$ 2130.34GB
14 |
15 | ## Average Grad Norm
16 | - 1.19
17 |
18 | ## Progress
19 | - 87.66%
20 |
21 | ## Comment
22 |
23 | Our model can currently do some simple common sense based QA.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 27)
3 |
4 | Time: December, 27 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.03
10 | - End: 2.03
11 |
12 | ## Completed Data
13 | - $\approx$ 2134.79GB
14 |
15 | ## Average Grad Norm
16 | - 1.14
17 |
18 | ## Progress
19 | - 88.66%
20 |
21 | ## Comment
22 |
23 | In today's Q&A, I asked cpm-bee to give an explanation along with the answer.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 28)
3 |
4 | Time: December, 28 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.03
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2139.24GB
14 |
15 | ## Average Grad Norm
16 | - 1.14
17 |
18 | ## Progress
19 | - 89.66%
20 |
21 | ## Comment
22 |
23 | Sometimes the model's answer makes us confused, and we will try to improve it in the future.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2022-12-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 29)
3 |
4 | Time: December, 29 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.03
11 |
12 | ## Completed Data
13 | - $\approx$ 2143.70GB
14 |
15 | ## Average Grad Norm
16 | - 1.13
17 |
18 | ## Progress
19 | - 90.66%
20 |
21 | ## Comment
22 |
23 | A big event: the total cost of CPM-Live project exceeded 1 MILLION Yuan!
24 |
--------------------------------------------------------------------------------
/logs/2022-12-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 30)
3 |
4 | Time: December, 30 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.03
10 | - End: 2.04
11 |
12 | ## Completed Data
13 | - $\approx$ 2148.16GB
14 |
15 | ## Average Grad Norm
16 | - 1.14
17 |
18 | ## Progress
19 | - 90.76%
20 |
21 | ## Comment
22 |
23 | On the last working day of 2022, CPM-Live spent in the steady training.
24 |
--------------------------------------------------------------------------------
/logs/2022-12-31.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (December, 31)
3 |
4 | Time: December, 31 2022 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.04
10 | - End: 2.04
11 |
12 | ## Completed Data
13 | - $\approx$ 2152.57GB
14 |
15 | ## Average Grad Norm
16 | - 1.14
17 |
18 | ## Progress
19 | - 90.86%
20 |
21 | ## Comment
22 |
23 | Happy new year!
24 |
--------------------------------------------------------------------------------
/logs/2023-01-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 1)
3 |
4 | Time: January, 1 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.04
10 | - End: 2.04
11 |
12 | ## Completed Data
13 | - $\approx$ 2157.03GB
14 |
15 | ## Average Grad Norm
16 | - 1.15
17 |
18 | ## Progress
19 | - 90.96%
20 |
21 | ## Comment
22 |
23 | The first day of 2023! The CPM-Live project has entered its second year. More surprises on the way!
24 |
--------------------------------------------------------------------------------
/logs/2023-01-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 2)
3 |
4 | Time: January, 2 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.04
10 | - End: 2.02
11 |
12 | ## Completed Data
13 | - $\approx$ 2161.48GB
14 |
15 | ## Average Grad Norm
16 | - 1.14
17 |
18 | ## Progress
19 | - 91.06%
20 |
21 | ## Comment
22 |
23 | It's fun to keep exploring the knowledge in the big model😁.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2023-01-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 3)
3 |
4 | Time: January, 3 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.02
10 | - End: 2.17
11 |
12 | ## Completed Data
13 | - $\approx$ 2165.93GB
14 |
15 | ## Average Grad Norm
16 | - 1.16
17 |
18 | ## Progress
19 | - 91.16%
20 |
21 | ## Comment
22 |
23 | As you can see from today's test, our model can perform some common sense reasoning.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 4)
3 |
4 | Time: January, 4 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.17
10 | - End: 2.10
11 |
12 | ## Completed Data
13 | - $\approx$ 2168.59GB
14 |
15 | ## Average Grad Norm
16 | - 1.18
17 |
18 | ## Progress
19 | - 91.26%
20 |
21 | ## Comment
22 |
23 | Today's question is not so well answered by the model, maybe it needs to search the Internet to get some information.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 5)
3 |
4 | Time: January, 5 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.10
10 | - End: 2.11
11 |
12 | ## Completed Data
13 | - $\approx$ 2173.00GB
14 |
15 | ## Average Grad Norm
16 | - 1.18
17 |
18 | ## Progress
19 | - 91.36%
20 |
21 | ## Comment
22 |
23 | I think the recipe generated by the model today is very reasonable. What do you think🤔?
24 |
--------------------------------------------------------------------------------
/logs/2023-01-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 6)
3 |
4 | Time: January, 6 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.11
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2177.41GB
14 |
15 | ## Average Grad Norm
16 | - 1.19
17 |
18 | ## Progress
19 | - 91.46%
20 |
21 | ## Comment
22 |
23 | Today I asked cpm-bee to generate another recipe. It not only met my needs, but also told me the characteristics of the dish, the key points of cooking and some tips. Good job!
24 |
--------------------------------------------------------------------------------
/logs/2023-01-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 7)
3 |
4 | Time: January, 7 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 2181.81GB
14 |
15 | ## Average Grad Norm
16 | - 1.19
17 |
18 | ## Progress
19 | - 91.56%
20 |
21 | ## Comment
22 |
23 | In today's test, our model wrote an essay based on the given topic.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 8)
3 |
4 | Time: January, 8 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2186.21GB
14 |
15 | ## Average Grad Norm
16 | - 1.19
17 |
18 | ## Progress
19 | - 91.66%
20 |
21 | ## Comment
22 |
23 | Today, we asked the model to write a science fiction article. Quite funny🤣
24 |
--------------------------------------------------------------------------------
/logs/2023-01-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 9)
3 |
4 | Time: January, 9 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.10
11 |
12 | ## Completed Data
13 | - $\approx$ 2190.62GB
14 |
15 | ## Average Grad Norm
16 | - 1.19
17 |
18 | ## Progress
19 | - 91.76%
20 |
21 | ## Comment
22 |
23 | Today I was thinking about whether it would be better to buy an Android phone or an iPhone for my next phone. I asked our model for help and got the answer.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 10)
3 |
4 | Time: January, 10 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.10
10 | - End: 2.08
11 |
12 | ## Completed Data
13 | - $\approx$ 2195.02GB
14 |
15 | ## Average Grad Norm
16 | - 1.17
17 |
18 | ## Progress
19 | - 91.86%
20 |
21 | ## Comment
22 |
23 | I asked CPM-Bee what gift I should give to my girlfriend, and it recommended 40 items to me😆.
24 |
25 |
26 |
--------------------------------------------------------------------------------
/logs/2023-01-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 11)
3 |
4 | Time: January, 11 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.08
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2199.43GB
14 |
15 | ## Average Grad Norm
16 | - 1.18
17 |
18 | ## Progress
19 | - 91.96%
20 |
21 | ## Comment
22 |
23 | Our model answered my question about the lipstick number carefully, but I asked an expert and learned that the model's answer was not really accurate. Maybe the model needs some external knowledge to answer questions in this area.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2023-01-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 12)
3 |
4 | Time: January, 12 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2203.85GB
14 |
15 | ## Average Grad Norm
16 | - 1.18
17 |
18 | ## Progress
19 | - 92.06%
20 |
21 | ## Comment
22 |
23 | Have you ever thought about text processing in Excel? CPM-Bee can help you! Try our [MFTable (模力表格)](https://live.openbmb.org/playground)!
24 |
--------------------------------------------------------------------------------
/logs/2023-01-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 13)
3 |
4 | Time: January, 13 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2208.26GB
14 |
15 | ## Average Grad Norm
16 | - 1.18
17 |
18 | ## Progress
19 | - 92.16%
20 |
21 | ## Comment
22 |
23 | Today, let's translate a poem with our model.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 14)
3 |
4 | Time: January, 14 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2212.67GB
14 |
15 | ## Average Grad Norm
16 | - 1.19
17 |
18 | ## Progress
19 | - 92.26%
20 |
21 | ## Comment
22 |
23 | For the QA task, our goal is to make the model truly understand the article, and answer some less obvious questions.
24 |
25 |
--------------------------------------------------------------------------------
/logs/2023-01-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 15)
3 |
4 | Time: January, 15 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 2217.08GB
14 |
15 | ## Average Grad Norm
16 | - 1.20
17 |
18 | ## Progress
19 | - 92.36%
20 |
21 | ## Comment
22 |
23 | I sometimes wonder if CPM-bee's fill-in-the-blank ability has surpassed mine.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 16)
3 |
4 | Time: January, 16 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.08
11 |
12 | ## Completed Data
13 | - $\approx$ 2221.52GB
14 |
15 | ## Average Grad Norm
16 | - 1.20
17 |
18 | ## Progress
19 | - 92.46%
20 |
21 | ## Comment
22 |
23 | In today's test, we let the model teach us how to make a *dark cuisine*. Well, it still looks reasonable.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 17)
3 |
4 | Time: January, 17 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.08
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 2225.93GB
14 |
15 | ## Average Grad Norm
16 | - 1.20
17 |
18 | ## Progress
19 | - 92.56%
20 |
21 | ## Comment
22 |
23 | Recently, we are still enhancing the capabilities of CPM-Bee. Stay tuned!
24 |
--------------------------------------------------------------------------------
/logs/2023-01-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 18)
3 |
4 | Time: January, 18 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 2230.35GB
14 |
15 | ## Average Grad Norm
16 | - 1.22
17 |
18 | ## Progress
19 | - 92.66%
20 |
21 | ## Comment
22 |
23 | Our model succeeds in giving an itinerary for a tour of Chongqing, but it is too abbreviated.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 19)
3 |
4 | Time: January, 19 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.04
11 |
12 | ## Completed Data
13 | - $\approx$ 2234.77GB
14 |
15 | ## Average Grad Norm
16 | - 1.24
17 |
18 | ## Progress
19 | - 92.76%
20 |
21 | ## Comment
22 |
23 | In today's test, CPM-Bee is a very competent shopping guide, except for recommending the lipstick, as this contradicts the context.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 20)
3 |
4 | Time: January, 20 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.04
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2239.18GB
14 |
15 | ## Average Grad Norm
16 | - 1.25
17 |
18 | ## Progress
19 | - 92.86%
20 |
21 | ## Comment
22 |
23 | We have entered the 100th day of CPM-Live training. Glad to see that everything is running smoothly!
24 |
25 |
--------------------------------------------------------------------------------
/logs/2023-01-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 21)
3 |
4 | Time: January, 21 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.02
11 |
12 | ## Completed Data
13 | - $\approx$ 2243.59GB
14 |
15 | ## Average Grad Norm
16 | - 1.25
17 |
18 | ## Progress
19 | - 92.96%
20 |
21 | ## Comment
22 |
23 | We wish everyone a Happy Chinese New Year!
24 |
--------------------------------------------------------------------------------
/logs/2023-01-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 22)
3 |
4 | Time: January, 22 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.02
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2248.01GB
14 |
15 | ## Average Grad Norm
16 | - 1.25
17 |
18 | ## Progress
19 | - 93.06%
20 |
21 | ## Comment
22 |
23 | The first day of the Chinese New Year, people are celebrating, the model is still learning.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 23)
3 |
4 | Time: January, 23 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2252.42GB
14 |
15 | ## Average Grad Norm
16 | - 1.25
17 |
18 | ## Progress
19 | - 93.16%
20 |
21 | ## Comment
22 |
23 | Today I consulted CPM-Bee when organizing a family entertainment activity and it recommended playing mahjong. I adopted it and had a great time.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 24)
3 |
4 | Time: January, 24 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.08
11 |
12 | ## Completed Data
13 | - $\approx$ 2256.85GB
14 |
15 | ## Average Grad Norm
16 | - 1.25
17 |
18 | ## Progress
19 | - 93.26%
20 |
21 | ## Comment
22 |
23 | When I asked CPM-Bee for its opinion on exploding fireworks, it seriously gave me a hilarious answer🤣.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 25)
3 |
4 | Time: January, 25 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.08
10 | - End: 2.04
11 |
12 | ## Completed Data
13 | - $\approx$ 2261.27GB
14 |
15 | ## Average Grad Norm
16 | - 1.25
17 |
18 | ## Progress
19 | - 93.36%
20 |
21 | ## Comment
22 |
23 | If you want to know the content of *"Man Jiang Hong"*, ask CPM-Bee😉.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 26)
3 |
4 | Time: January, 26 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.04
10 | - End: 2.03
11 |
12 | ## Completed Data
13 | - $\approx$ 2265.70GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 93.46%
20 |
21 | ## Comment
22 |
23 | What's the *Wondering Earth* project? CPM-Bee can tell you a version.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 27)
3 |
4 | Time: January, 27 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.03
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2270.12GB
14 |
15 | ## Average Grad Norm
16 | - 1.27
17 |
18 | ## Progress
19 | - 93.56%
20 |
21 | ## Comment
22 |
23 | This phase of the training has cost half a million Yuan.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 28)
3 |
4 | Time: January, 28 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 2274.54GB
14 |
15 | ## Average Grad Norm
16 | - 1.25
17 |
18 | ## Progress
19 | - 93.66%
20 |
21 | ## Comment
22 |
23 | The CPM-Bee training is in its final stretch. Keep it up!
24 |
--------------------------------------------------------------------------------
/logs/2023-01-29.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 29)
3 |
4 | Time: January, 29 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 2278.95GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 93.76%
20 |
21 | ## Comment
22 |
23 | My nephew asks me what the difference is between a laptop and a desktop computer. I ask CPM-Bee to tell him.
24 |
--------------------------------------------------------------------------------
/logs/2023-01-30.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 30)
3 |
4 | Time: January, 30 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2283.37GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 93.86%
20 |
21 | ## Comment
22 |
23 | How to translate Classical Chinese to English? Let CPM-Bee try it out!
24 |
25 | Chinese: 围师必阙,穷寇勿迫
26 |
27 | Translation: surround the enemy without closing in on him, and do not press the defeated enemy
28 |
--------------------------------------------------------------------------------
/logs/2023-01-31.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (January, 31)
3 |
4 | Time: January, 31 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2287.79GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 93.96%
20 |
21 | ## Comment
22 |
23 | Today we let our model appreciate ancient poetry.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 1)
3 |
4 | Time: February, 1 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2292.21GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 94.06%
20 |
21 | ## Comment
22 |
23 | How about the answer given by cpm-bee today? I think it is excellent!
24 |
--------------------------------------------------------------------------------
/logs/2023-02-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 2)
3 |
4 | Time: February, 2 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.07
11 |
12 | ## Completed Data
13 | - $\approx$ 2296.55GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 94.16%
20 |
21 | ## Comment
22 |
23 | Want to know how to get to the airport? CPM-Bee can tell you.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 3)
3 |
4 | Time: February, 3 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.07
10 | - End: 2.04
11 |
12 | ## Completed Data
13 | - $\approx$ 2300.82GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 94.26%
20 |
21 | ## Comment
22 |
23 | Let our model show you some investment advice in the big model sector!
24 |
--------------------------------------------------------------------------------
/logs/2023-02-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 4)
3 |
4 | Time: February, 4 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.04
10 | - End: 2.02
11 |
12 | ## Completed Data
13 | - $\approx$ 2305.11GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 94.36%
20 |
21 | ## Comment
22 |
23 | When a big model has common sense, it becomes even more fascinating.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 5)
3 |
4 | Time: February, 5 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.02
10 | - End: 2.06
11 |
12 | ## Completed Data
13 | - $\approx$ 2309.52GB
14 |
15 | ## Average Grad Norm
16 | - 1.26
17 |
18 | ## Progress
19 | - 94.46%
20 |
21 | ## Comment
22 |
23 | Surprisingly, CPM-Bee has the ability to generate several rounds of dialogue for you.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 6)
3 |
4 | Time: February, 6 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.06
10 | - End: 2.05
11 |
12 | ## Completed Data
13 | - $\approx$ 2313.91GB
14 |
15 | ## Average Grad Norm
16 | - 1.27
17 |
18 | ## Progress
19 | - 94.56%
20 |
21 | ## Comment
22 |
23 | CPM-Bee can generate a letter template for you. Check it!
24 |
--------------------------------------------------------------------------------
/logs/2023-02-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 7)
3 |
4 | Time: February, 7 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.05
10 | - End: 2.04
11 |
12 | ## Completed Data
13 | - $\approx$ 2318.29GB
14 |
15 | ## Average Grad Norm
16 | - 1.27
17 |
18 | ## Progress
19 | - 94.66%
20 |
21 | ## Comment
22 |
23 | Our model generates a few more rounds of conversation today, and I think it's quite insightful. What do you think?
24 |
--------------------------------------------------------------------------------
/logs/2023-02-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 8)
3 |
4 | Time: February, 8 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.04
10 | - End: 2.38
11 |
12 | ## Completed Data
13 | - $\approx$ 2323.23GB
14 |
15 | ## Average Grad Norm
16 | - 1.33
17 |
18 | ## Progress
19 | - 94.76%
20 |
21 | ## Comment
22 |
23 | The dialogue generated today is very vivid. Quite good😁.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 9)
3 |
4 | Time: February, 9 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.38
10 | - End: 2.33
11 |
12 | ## Completed Data
13 | - $\approx$ 2334.51GB
14 |
15 | ## Average Grad Norm
16 | - 2.44
17 |
18 | ## Progress
19 | - 94.86%
20 |
21 | ## Comment
22 |
23 | As you can see, the training loss is higher than it was, because we are upgrading the model! Stay tuned!
24 |
--------------------------------------------------------------------------------
/logs/2023-02-10.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 10)
3 |
4 | Time: February, 10 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.33
10 | - End: 2.33
11 |
12 | ## Completed Data
13 | - $\approx$ 2349.66GB
14 |
15 | ## Average Grad Norm
16 | - 2.30
17 |
18 | ## Progress
19 | - 94.96%
20 |
21 | ## Comment
22 |
23 | If you are looking for some material to help you write an article, CPM-Bee can help you.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-11.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 11)
3 |
4 | Time: February, 11 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.33
10 | - End: 2.33
11 |
12 | ## Completed Data
13 | - $\approx$ 2364.83GB
14 |
15 | ## Average Grad Norm
16 | - 2.30
17 |
18 | ## Progress
19 | - 95.06%
20 |
21 | ## Comment
22 |
23 | I asked CPM-Bee to give me some advice today. The text it wrote is very well structured, and I hope it's right😄.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-12.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 12)
3 |
4 | Time: February, 12 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.33
10 | - End: 2.20
11 |
12 | ## Completed Data
13 | - $\approx$ 2376.99GB
14 |
15 | ## Average Grad Norm
16 | - 2.33
17 |
18 | ## Progress
19 | - 95.16%
20 |
21 | ## Comment
22 |
23 | There is a large drop in the training loss as we have added some new data in addition to the plain text😉.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-13.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 13)
3 |
4 | Time: February, 13 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.20
10 | - End: 2.17
11 |
12 | ## Completed Data
13 | - $\approx$ 2392.13GB
14 |
15 | ## Average Grad Norm
16 | - 2.32
17 |
18 | ## Progress
19 | - 95.26%
20 |
21 | ## Comment
22 |
23 | It's fun to watch a model learn a skill from scratch like a child. If it eventually becomes an elite in that field, we will have a great sense of achievement.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-14.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 14)
3 |
4 | Time: February, 14 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.17
10 | - End: 2.17
11 |
12 | ## Completed Data
13 | - $\approx$ 2407.23GB
14 |
15 | ## Average Grad Norm
16 | - 2.32
17 |
18 | ## Progress
19 | - 95.36%
20 |
21 | ## Comment
22 |
23 | Whether or not to have a Valentine's Day is a question worth pondering, and CPM-Bee gives a realistic answer.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-15.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 15)
3 |
4 | Time: February, 15 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.17
10 | - End: 2.18
11 |
12 | ## Completed Data
13 | - $\approx$ 2422.30GB
14 |
15 | ## Average Grad Norm
16 | - 2.33
17 |
18 | ## Progress
19 | - 95.46%
20 |
21 | ## Comment
22 |
23 | If you read CPM-Bee's answer today without the relevant background knowledge, you will probably choose to trust it.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-16.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 16)
3 |
4 | Time: February, 16 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.18
10 | - End: 2.16
11 |
12 | ## Completed Data
13 | - $\approx$ 2437.43GB
14 |
15 | ## Average Grad Norm
16 | - 2.35
17 |
18 | ## Progress
19 | - 95.56%
20 |
21 | ## Comment
22 |
23 | Can you find any flaws in the text generated by the model today without searching for anything?
24 |
25 |
--------------------------------------------------------------------------------
/logs/2023-02-17.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 17)
3 |
4 | Time: February, 17 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.16
10 | - End: 2.15
11 |
12 | ## Completed Data
13 | - $\approx$ 2452.54GB
14 |
15 | ## Average Grad Norm
16 | - 2.38
17 |
18 | ## Progress
19 | - 95.66%
20 |
21 | ## Comment
22 |
23 | The training loss was reduced from 2.4 to 2.1, and the model performance gained some improvement.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-18.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 18)
3 |
4 | Time: February, 18 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.15
10 | - End: 2.15
11 |
12 | ## Completed Data
13 | - $\approx$ 2467.66GB
14 |
15 | ## Average Grad Norm
16 | - 2.41
17 |
18 | ## Progress
19 | - 95.76%
20 |
21 | ## Comment
22 |
23 | You could try to get CPM-Bee to give you some advice, and they seem to be reliable overall.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-19.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 19)
3 |
4 | Time: February, 19 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.15
10 | - End: 2.14
11 |
12 | ## Completed Data
13 | - $\approx$ 2482.74GB
14 |
15 | ## Average Grad Norm
16 | - 2.41
17 |
18 | ## Progress
19 | - 95.86%
20 |
21 | ## Comment
22 |
23 | I asked the model a question about the three-body problem, and its answer was about civilization and the universe🤔. It seems the model is very fond of that novel.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-20.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 20)
3 |
4 | Time: February, 20 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.14
10 | - End: 2.15
11 |
12 | ## Completed Data
13 | - $\approx$ 2497.91GB
14 |
15 | ## Average Grad Norm
16 | - 2.42
17 |
18 | ## Progress
19 | - 95.96%
20 |
21 | ## Comment
22 |
23 | CPM-Bee is quite good at memorizing ancient poetries.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-21.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 21)
3 |
4 | Time: February, 21 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.15
10 | - End: 2.14
11 |
12 | ## Completed Data
13 | - $\approx$ 2513.00GB
14 |
15 | ## Average Grad Norm
16 | - 2.44
17 |
18 | ## Progress
19 | - 96.06%
20 |
21 | ## Comment
22 |
23 | Today's answer: "I'll take them all"😆.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-22.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 22)
3 |
4 | Time: February, 22 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.14
10 | - End: 2.15
11 |
12 | ## Completed Data
13 | - $\approx$ 2528.11GB
14 |
15 | ## Average Grad Norm
16 | - 2.45
17 |
18 | ## Progress
19 | - 96.16%
20 |
21 | ## Comment
22 |
23 | What do you think is the correct answer to today's quiz🤔?
24 |
--------------------------------------------------------------------------------
/logs/2023-02-23.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 23)
3 |
4 | Time: February, 23 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.15
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2543.21GB
14 |
15 | ## Average Grad Norm
16 | - 2.47
17 |
18 | ## Progress
19 | - 96.26%
20 |
21 | ## Comment
22 |
23 | Today our model wrote a qualified essay for primary school students.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-24.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 24)
3 |
4 | Time: February, 24 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2558.32GB
14 |
15 | ## Average Grad Norm
16 | - 2.55
17 |
18 | ## Progress
19 | - 96.36%
20 |
21 | ## Comment
22 |
23 | After reading today's answer, have you seen the progress of the model?
24 |
25 |
--------------------------------------------------------------------------------
/logs/2023-02-25.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 25)
3 |
4 | Time: February, 25 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2573.39GB
14 |
15 | ## Average Grad Norm
16 | - 2.54
17 |
18 | ## Progress
19 | - 96.46%
20 |
21 | ## Comment
22 |
23 | In today's test, the model recommends several reasonable ways to travel, but there are a few problems with minor details.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-26.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 26)
3 |
4 | Time: February, 26 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2588.50GB
14 |
15 | ## Average Grad Norm
16 | - 2.55
17 |
18 | ## Progress
19 | - 96.56%
20 |
21 | ## Comment
22 |
23 | Want CPM-Bee to answer two questions for you at once? No problem!
24 |
--------------------------------------------------------------------------------
/logs/2023-02-27.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 27)
3 |
4 | Time: February, 27 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2603.63GB
14 |
15 | ## Average Grad Norm
16 | - 2.58
17 |
18 | ## Progress
19 | - 96.66%
20 |
21 | ## Comment
22 |
23 | CPM-Bee is making rapid progress towards a goal of becoming a competent AI writing assistant.
24 |
--------------------------------------------------------------------------------
/logs/2023-02-28.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (February, 28)
3 |
4 | Time: February, 28 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.12
11 |
12 | ## Completed Data
13 | - $\approx$ 2618.78GB
14 |
15 | ## Average Grad Norm
16 | - 2.59
17 |
18 | ## Progress
19 | - 96.76%
20 |
21 | ## Comment
22 |
23 | Goodbye February! The training of CPM-Bee is in its final stretch.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-01.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 1)
3 |
4 | Time: March, 1 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.12
10 | - End: 2.09
11 |
12 | ## Completed Data
13 | - $\approx$ 2633.83GB
14 |
15 | ## Average Grad Norm
16 | - 2.60
17 |
18 | ## Progress
19 | - 96.86%
20 |
21 | ## Comment
22 |
23 | It seems that CPM-Bee has some knowledge of the "Dream of the Red Chamber".
24 |
--------------------------------------------------------------------------------
/logs/2023-03-02.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 2)
3 |
4 | Time: March, 2 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.09
10 | - End: 2.11
11 |
12 | ## Completed Data
13 | - $\approx$ 2648.91GB
14 |
15 | ## Average Grad Norm
16 | - 2.61
17 |
18 | ## Progress
19 | - 96.96%
20 |
21 | ## Comment
22 |
23 | March is a vibrant time of year, with endless possibilities for everything😉.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-03.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 3)
3 |
4 | Time: March, 3 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 2.11
10 | - End: 1.86
11 |
12 | ## Completed Data
13 | - $\approx$ 2664.01GB
14 |
15 | ## Average Grad Norm
16 | - 2.63
17 |
18 | ## Progress
19 | - 97.06%
20 |
21 | ## Comment
22 |
23 | The loss is substantially reduced! Probably because the newly added training data is easier to learn.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-04.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 4)
3 |
4 | Time: March, 4 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 1.86
10 | - End: 1.84
11 |
12 | ## Completed Data
13 | - $\approx$ 2678.92GB
14 |
15 | ## Average Grad Norm
16 | - 2.60
17 |
18 | ## Progress
19 | - 97.16%
20 |
21 | ## Comment
22 |
23 | It always brings a smile to my face when I see the model acquiring another new ability.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-05.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 5)
3 |
4 | Time: March, 5 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 1.84
10 | - End: 1.82
11 |
12 | ## Completed Data
13 | - $\approx$ 2693.74GB
14 |
15 | ## Average Grad Norm
16 | - 2.50
17 |
18 | ## Progress
19 | - 97.26%
20 |
21 | ## Comment
22 |
23 | The first week of March ended on a busy note.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-06.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 6)
3 |
4 | Time: March, 6 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 1.82
10 | - End: 1.79
11 |
12 | ## Completed Data
13 | - $\approx$ 2708.53GB
14 |
15 | ## Average Grad Norm
16 | - 2.50
17 |
18 | ## Progress
19 | - 97.36%
20 |
21 | ## Comment
22 |
23 | Our model can tell you what can help you fall asleep.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-07.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 7)
3 |
4 | Time: March, 7 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 1.79
10 | - End: 1.79
11 |
12 | ## Completed Data
13 | - $\approx$ 2723.41GB
14 |
15 | ## Average Grad Norm
16 | - 2.49
17 |
18 | ## Progress
19 | - 97.46%
20 |
21 | ## Comment
22 |
23 | Give the model some specific requirements and it can help you write your article.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-08.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 8)
3 |
4 | Time: March, 8 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 1.79
10 | - End: 1.79
11 |
12 | ## Completed Data
13 | - $\approx$ 2738.29GB
14 |
15 | ## Average Grad Norm
16 | - 2.51
17 |
18 | ## Progress
19 | - 97.56%
20 |
21 | ## Comment
22 |
23 | Happy IWD to every woman!
24 |
--------------------------------------------------------------------------------
/logs/2023-03-09.md:
--------------------------------------------------------------------------------
1 |
2 | # CPM-Live Training Log (March, 9)
3 |
4 | Time: March, 9 2023 19:00
5 |
6 | Recorder: @zh-zheng
7 |
8 | ## Loss
9 | - Begin: 1.79
10 | - End: 1.78
11 |
12 | ## Completed Data
13 | - $\approx$ 2753.17GB
14 |
15 | ## Average Grad Norm
16 | - 2.46
17 |
18 | ## Progress
19 | - 97.66%
20 |
21 | ## Comment
22 |
23 | It would be very cool if a model could generate more than just plain text.
24 |
--------------------------------------------------------------------------------
/logs/2023-03-10.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 10)
2 |
3 | Time: March, 10 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.78
9 | - End: 1.78
10 |
11 | ## Completed Data
12 | - $\approx$ 2768.05GB
13 |
14 | ## Average Grad Norm
15 | - 2.48
16 |
17 | ## Progress
18 | - 97.76%
19 |
20 | ## Comment
21 |
22 | The dressing advice given by the model today is very well thought out.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-11.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 11)
2 |
3 | Time: March, 11 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.78
9 | - End: 1.79
10 |
11 | ## Completed Data
12 | - $\approx$ 2782.78GB
13 |
14 | ## Average Grad Norm
15 | - 2.56
16 |
17 | ## Progress
18 | - 97.86%
19 |
20 | ## Comment
21 |
22 | It looks like the loss has stabilized again over the last few days.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-12.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 12)
2 |
3 | Time: March, 12 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.79
9 | - End: 1.78
10 |
11 | ## Completed Data
12 | - $\approx$ 2797.49GB
13 |
14 | ## Average Grad Norm
15 | - 2.58
16 |
17 | ## Progress
18 | - 97.96%
19 |
20 | ## Comment
21 |
22 | The weekend is over. Let's see what happens next week!
23 |
--------------------------------------------------------------------------------
/logs/2023-03-13.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 13)
2 |
3 | Time: March, 13 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.78
9 | - End: 1.77
10 |
11 | ## Completed Data
12 | - $\approx$ 2812.35GB
13 |
14 | ## Average Grad Norm
15 | - 2.60
16 |
17 | ## Progress
18 | - 98.06%
19 |
20 | ## Comment
21 |
22 | Our model has a certain understanding of the parameters of TV, and this advertisement is written reasonably.
23 |
24 |
25 |
--------------------------------------------------------------------------------
/logs/2023-03-14.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 14)
2 |
3 | Time: March, 14 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.77
9 | - End: 1.77
10 |
11 | ## Completed Data
12 | - $\approx$ 2827.24GB
13 |
14 | ## Average Grad Norm
15 | - 2.61
16 |
17 | ## Progress
18 | - 98.16%
19 |
20 | ## Comment
21 |
22 | The evolution of models seems never-ending...
23 |
--------------------------------------------------------------------------------
/logs/2023-03-15.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 15)
2 |
3 | Time: March, 15 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.77
9 | - End: 1.76
10 |
11 | ## Completed Data
12 | - $\approx$ 2842.09GB
13 |
14 | ## Average Grad Norm
15 | - 2.64
16 |
17 | ## Progress
18 | - 98.26%
19 |
20 | ## Comment
21 |
22 | Sometimes I wonder how many days a day in the model world equals in the human world.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-16.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 16)
2 |
3 | Time: March, 16 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.76
9 | - End: 1.77
10 |
11 | ## Completed Data
12 | - $\approx$ 2856.94GB
13 |
14 | ## Average Grad Norm
15 | - 2.66
16 |
17 | ## Progress
18 | - 98.36%
19 |
20 | ## Comment
21 |
22 | Who do you think is better, Newton or Einstein? Check the model's answer.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-17.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 17)
2 |
3 | Time: March, 17 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.76
9 | - End: 1.73
10 |
11 | ## Completed Data
12 | - $\approx$ 2871.81GB
13 |
14 | ## Average Grad Norm
15 | - 2.68
16 |
17 | ## Progress
18 | - 98.46%
19 |
20 | ## Comment
21 |
22 | What a lively week!
23 |
--------------------------------------------------------------------------------
/logs/2023-03-18.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 18)
2 |
3 | Time: March, 18 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.73
9 | - End: 1.75
10 |
11 | ## Completed Data
12 | - $\approx$ 2886.72GB
13 |
14 | ## Average Grad Norm
15 | - 2.70
16 |
17 | ## Progress
18 | - 98.56%
19 |
20 | ## Comment
21 |
22 | Today, we test the performance of our model with a what if question.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-19.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 19)
2 |
3 | Time: March, 19 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.75
9 | - End: 1.76
10 |
11 | ## Completed Data
12 | - $\approx$ 2901.50GB
13 |
14 | ## Average Grad Norm
15 | - 2.72
16 |
17 | ## Progress
18 | - 98.66%
19 |
20 | ## Comment
21 |
22 | The weekend is over and next week is another one to look forward to.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-20.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 20)
2 |
3 | Time: March, 20 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.76
9 | - End: 1.74
10 |
11 | ## Completed Data
12 | - $\approx$ 2916.33GB
13 |
14 | ## Average Grad Norm
15 | - 2.74
16 |
17 | ## Progress
18 | - 98.76%
19 |
20 | ## Comment
21 |
22 | We are lucky because we are witnessing the impossible becoming a reality.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-21.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 21)
2 |
3 | Time: March, 21 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.74
9 | - End: 1.73
10 |
11 | ## Completed Data
12 | - $\approx$ 2925.37GB
13 |
14 | ## Average Grad Norm
15 | - 2.20
16 |
17 | ## Progress
18 | - 98.86%
19 |
20 | ## Comment
21 |
22 | Maybe the next step we have to do is to make the purchase information recommended by the model to us more accurate and convincing.
23 |
24 |
--------------------------------------------------------------------------------
/logs/2023-03-22.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 22)
2 |
3 | Time: March, 22 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.73
9 | - End: 1.74
10 |
11 | ## Completed Data
12 | - $\approx$ 2932.65GB
13 |
14 | ## Average Grad Norm
15 | - 2.06
16 |
17 | ## Progress
18 | - 98.96%
19 |
20 | ## Comment
21 |
22 | In the past, we asked what models can do, but now we ask what models cannot do.
23 |
--------------------------------------------------------------------------------
/logs/2023-03-23.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 23)
2 |
3 | Time: March, 23 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.74
9 | - End: 1.80
10 |
11 | ## Completed Data
12 | - $\approx$ 2939.92GB
13 |
14 | ## Average Grad Norm
15 | - 2.07
16 |
17 | ## Progress
18 | - 99.06%
19 |
20 | ## Comment
21 |
22 | Sometimes we often sigh at the rapid development of technology. Perhaps AGI is coming soon?
23 |
24 |
--------------------------------------------------------------------------------
/logs/2023-03-24.md:
--------------------------------------------------------------------------------
1 | # CPM-Live Training Log (March, 24)
2 |
3 | Time: March, 24 2023 19:00
4 |
5 | Recorder: @zh-zheng
6 |
7 | ## Loss
8 | - Begin: 1.80
9 | - End: 1.77
10 |
11 | ## Completed Data
12 | - $\approx$ 2946.88GB
13 |
14 | ## Average Grad Norm
15 | - 2.09
16 |
17 | ## Progress
18 | - 99.16%
19 |
20 | ## Comment
21 |
22 | In the past, it might have been hard to imagine that we would one day judge a paragraph to have been written by a model because it was so well written.
23 |
--------------------------------------------------------------------------------
/plans/CPM-Ant训练计划书.md:
--------------------------------------------------------------------------------
1 | # CPM-Ant训练计划书
2 |
3 | 综合考虑数据和算力规模,CPM-Live将以10B模型训练为起点,我们将其命名为CPM-Ant。CPM-Ant 10B大模型训练将于2022年5月29日启动,预计整个训练周期为5个月。
4 |
5 | ## 一、模型架构
6 |
7 | CPM-Ant的模型架构与主要设置如下:
8 |
9 |
10 |

11 |
12 |
13 | - **基于提示模板的多段式框架设计(prompt-based multi-segment framework)**:提示模板(prompt)用以实现模型在理解、生成、摘要等功能之间的快速切换,也易于添加新的功能(学习新的prompt)。
14 |
15 | 文本段(segment)提供基础的文本编码能力,通过段表示(segment embedding)来影响模型编码模式,复杂的文本编码模式可以拆解成若干基础段的组合,例如编码-解码框架可以拆解成一个编码段+一个解码段的组合。对于每一个基础段,段内采用相对位置编码。
16 |
17 | 基于提示模板和文本段的组合、拼接,结构简单且易于实现增加、修改模块,进行持续学习和功能更新。
18 |
19 | - **共享embedding**:CPM-Ant输入embedding及输出embedding会共享参数,这点与BERT、GPT、T5一致,与T5-1.1、mT5不一致。我们的实验表明共享输入输出的embedding参数会极大增强训练稳定程度,而不共享embedding参数易于导致训练过程出现NaN。
20 | - **无bias**:我们的模型中,各类线性变换及layer norm均不设置bias。一方面源于不设置bias的模型训练稳定性会更强,另一方面也是不设置bias的模型在计算速度及显存消耗上要更占优。
21 | - **动态词表**:对于词表,初始阶段我们将提供大小为30000的中文词表,在后续训练过程中会结合新数据情况进行动态变动。
22 |
23 | ## 二、模型训练
24 |
25 | 模型训练过程中重点进行性能监控与问题处理,收集功能倡议与社区讨论意见并进行模型修改与反馈。主要计划开展相关工作如下:
26 | - 实时:展示模型训练指标曲线
27 | - 每日:发布单日模型训练日志
28 | - 每周:集中处理反馈来自社区的讨论与意见
29 | - 不定期:发布模型训练的中间checkpoint并提供用户下载
30 |
31 | 模型训练完成后,CPM-Ant将对社区模型提议进行整理与集成,并将模型进行相应尺度的压缩,准备总结报告并陆续公布相关模型。此外,对于未能采纳的部分社区提议,我们将考虑引入到下一代模型训练中,并开始进行下一代模型的训练筹备。
32 |
33 | ## 三、数据分析
34 |
35 | CPM-Ant采用1TB原始数据,清洗后得到200GB高质量数据,数据详情如下表所示:
36 |
37 |
38 |
39 | 数据来源 |
40 | 占比 |
41 | 文档平均长度(字) |
42 | 句子平均长度(字) |
43 | 平均PPL(mGPT)* |
44 |
45 |
46 | 书籍 |
47 | 33.02% |
48 | 248495.71 |
49 | 32.93 |
50 | 273.777 |
51 |
52 |
53 | 网页 |
54 | 21.52% |
55 | 665.83 |
56 | 28.304 |
57 | 141.53 |
58 |
59 |
60 | 小说 |
61 | 20.76% |
62 | 62317.79 |
63 | 30.839 |
64 | 69.98 |
65 |
66 |
67 | 杂志 |
68 | 11.95% |
69 | 2534.16 |
70 | 39.06 |
71 | 83.22 |
72 |
73 |
74 | 学术 |
75 | 4.77% |
76 | 173.8 |
77 | 58.044 |
78 | 39.04 |
79 |
80 |
81 | 百科 |
82 | 2.25% |
83 | 1081.33 |
84 | 32.466 |
85 | 2072.53 |
86 |
87 |
88 | 新闻 |
89 | 1.79% |
90 | 717.87 |
91 | 43.717 |
92 | 56.85 |
93 |
94 |
95 | 其它 |
96 | 3.95% |
97 | 852.36 |
98 | 37.68 |
99 | 395.26 |
100 |
101 | * 使用mGPT计算得到句子平均PPL |
102 |
103 |
--------------------------------------------------------------------------------
/plans/CPM-Bee训练计划书.md:
--------------------------------------------------------------------------------
1 | # CPM-Bee训练计划书
2 |
3 | ## 一、模型架构
4 |
5 | CPM-Bee的结构整体与CPM-Ant保持一致,可参考[模型细节](https://www.openbmb.org/community/blogs/blogpage?id=98afef2ce45f4fe9a4bc15a66d7ccb92)。
6 |
7 | 相较于CPM-Ant,CPM-Bee增加了如下特性:
8 |
9 | - **增加结构化数据处理(Structured Data Processing)**:CPM-Bee将支持结构化数据(例如Json文件)的处理,以便更好地支持下游应用;
10 | - **增加预训练硬提示(Pre-trained Hard Prompts)**:为了让CPM-Bee更好地掌握不同任务所对应的结构化数据形式,我们通过硬提示来组织结构化数据,并在预训练中充分学习这些硬提示;
11 | - **增加词表扩展功能(Vocabulary Expansion)**:CPM-Bee是多语言模型,因此我们加入了词表扩展功能,以支持更多语言的持续学习,我们也引入了对不同语言同时进行处理的分词器;
12 | - **增加数量自适应[UNK]和[MASK]**:传统的预训练模型采用统一的[UNK]和[MASK]标签(或者类似T5预训练固定数量的[MASK]标签),难以对[UNK]及[MASK]进行精细处理。这里我们将引入数量自适应的[UNK]和[MASK]机制,即模型能自动识别输入数据中的[UNK]和[MASK],编码为[MASK#1]、[MASK#2]…以及[UNK#1]、[UNK#2]…,并对每个[UNK]和[MASK]单独进行特征处理。该机制十分有利于处理一些具有复杂上下文的网络文本,例如带有emoji的文本。
13 |
14 | 出于性能影响以及使用便利性的角度,CPM-Bee在CPM-Ant的基础上也引入了一些简化:
15 |
16 | - **删除段向量(Segment Embeddings)**:基于CPM-Ant的大量实验表明,在CPM-Ant的多段机制及相对位置编码(Multi-segment Mechanism & Relative Position Bias)中,结合段信息进行相对位置编码已经可以有效区分输入序列的分段特性,无需再采用类似BERT中的段向量来指导模型。此外,段向量往往面临泛化性的问题,即大量的预训练数据只存在1~2个分段,训练出的模型难以处理拥有2个分段以上的输入数据。因此,我们在CPM-Bee中去除了段向量的设计。
17 |
18 |
19 | ## 二、模型特性
20 |
21 | CPM-Bee具有如下模型特性:
22 |
23 | ### (一)多语言融合
24 |
25 | 在CPM-Ant中,我们着重构建以中文为核心的大模型。在CPM-Bee中,我们将逐渐加入英文以及其他语种的数据(包括各语言独立数据及跨语言平行数据),最终形成以中文为核心,多语种兼顾的大规模预训练语言模型。
26 |
27 | ### (二)复杂结构处理
28 |
29 | 已有的预训练语言模型主要立足于利用非结构化文本进行训练,因而对于半结构化及结构化数据的处理能力较弱。在CPM-Bee中,我们会加入各类半结构化及结构化数据的处理功能,以更好地支持网页、代码等结构化复杂文本的处理能力。下面展示了一条结构化训练数据:
30 |
31 | ```json
32 | {
33 | "document": "今天天气是真的,我们去了,玩得非常。",
34 | "": {
35 | "": "好",
36 | "": "颐和园",
37 | "": "开心"
38 | }
39 | }
40 | ```
41 |
42 | ### (三)任务模式增强
43 |
44 | 在CPM-Bee中,我们会在预训练过程中引入各类常见任务模式的数据增强,包括生成、问答、摘要、翻译等,支持CPM-Bee在各类文本处理任务上开箱即用,提升结合少量样本的参数高效微调的性能。
45 |
46 |
47 | ## 三、训练细节
48 |
49 | ### (一)训练数据
50 | CPM-Ant采用1TB原始数据,清洗后得到200GB高质量中文数据,这部分数据会被继续在CPM-Bee中使用。此外,CPM-Bee引入了400GB的多语言数据来进行预训练。基于上述600GB数据,我们设计了多样化的数据增广算法,使CPM-Bee具备多种语言能力,这些数据增广算法我们会在后续的技术报告中详细展开。
51 |
52 | ### (二)训练工具
53 | 在CPM-Ant中,我们已经验证了OpenBMB的大模型全流程计算框架的可靠性,因此CPM-Bee也将基于下列工具进行训练、微调、压缩与推理:
54 | - 训练工具 [[BMTrain](https://github.com/OpenBMB/BMTrain)]
55 | - 微调工具 [[OpenDelta](https://github.com/thunlp/OpenDelta)]
56 | - 压缩工具 [[BMCook](https://github.com/OpenBMB/BMCook)]
57 | - 推理工具 [[BMInf](https://github.com/OpenBMB/BMInf)]
58 |
59 |
60 | ### (三)训练计划
61 |
62 | CPM-Bee 10B大模型训练将于2022年10月13日启动,并将每月发布一版模型。
63 |
64 | 模型训练过程中重点进行性能监控与问题处理,收集功能倡议与社区讨论意见并进行模型修改与反馈。主要计划开展相关工作如下:
65 | - 实时:展示模型训练指标曲线
66 | - 每日:发布单日模型训练日志
67 | - 每周:集中处理反馈来自社区的讨论与意见
68 | - 定期:发布模型训练的中间checkpoint并提供用户下载
69 |
70 | 模型训练完成后,CPM-Bee将对社区模型提议进行整理与集成,并将模型进行相应尺度的压缩,准备总结报告并陆续公布相关模型。此外,对于未能采纳的部分社区提议,我们将考虑引入到下一代模型训练中,并开始进行下一代模型的训练筹备。
71 |
--------------------------------------------------------------------------------
/plans/pics/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenBMB/CPM-Live/8ad7aa69143fbb41753013aac98744bf87abe0b1/plans/pics/framework.png
--------------------------------------------------------------------------------