├── .gitignore ├── LICENSE ├── README.md ├── chapter1 ├── mobilenetv2 │ ├── README.md │ ├── docker_install.md │ ├── eval.py │ ├── export_mindir.py │ ├── full_op.md │ ├── full_op_v1.1.1.md │ ├── simple_train_op.md │ ├── src │ │ ├── args.py │ │ ├── config.py │ │ ├── dataset.py │ │ ├── lr_generator.py │ │ ├── mobilenetV2.py │ │ ├── models.py │ │ └── utils.py │ └── train.py └── readme.md ├── chapter2 ├── README.md └── bert │ ├── README.md │ ├── convert_example.py │ ├── mindspore_hub_conf.py │ ├── pretrain_eval.py │ ├── run_classifier.py │ ├── run_ner.py │ ├── run_pretrain.py │ ├── run_squad.py │ ├── scripts │ ├── ascend_distributed_launcher │ │ ├── README.md │ │ ├── __init__.py │ │ ├── get_distribute_pretrain_cmd.py │ │ └── hyper_parameter_config.ini │ ├── run_classifier.sh │ ├── run_distributed_pretrain_ascend.sh │ ├── run_distributed_pretrain_for_gpu.sh │ ├── run_ner.sh │ ├── run_squad.sh │ ├── run_standalone_pretrain_ascend.sh │ └── run_standalone_pretrain_for_gpu.sh │ ├── src │ ├── CRF.py │ ├── __init__.py │ ├── assessment_method.py │ ├── bert_for_finetune.py │ ├── bert_for_pre_training.py │ ├── bert_model.py │ ├── clue_classification_dataset_process.py │ ├── cluener_evaluation.py │ ├── config.py │ ├── dataset.py │ ├── finetune_eval_config.py │ ├── finetune_eval_model.py │ ├── sample_process.py │ ├── score.py │ ├── tokenization.py │ └── utils.py │ ├── tokenization.py │ └── vocab.txt ├── chapter3 ├── README.md ├── docs │ ├── data_upload_obs.jpg │ ├── resnet50_predictconfig.jpg │ └── resnet50_trainconfig.jpg ├── mushroom-dataset │ └── .gitkeep ├── resnet_ascend │ ├── README.md │ ├── ckpt_files │ │ └── .gitkeep │ ├── resnet50_eval.py │ ├── resnet50_predict.py │ ├── resnet50_train.py │ └── src │ │ ├── CrossEntropySmooth.py │ │ ├── config.py │ │ ├── dataset.py │ │ └── resnet.py └── resnet_gpu │ ├── README.md │ ├── ckpt_files │ └── .gitkeep │ ├── eval.py │ ├── predict.py │ ├── src │ ├── CrossEntropySmooth.py │ ├── config.py │ ├── dataset.py │ ├── lr_generator.py │ └── resnet.py │ ├── train.py │ └── tum.jpg ├── chapter4 ├── README.md ├── basketball-dataset │ └── .gitkeep ├── docs │ ├── 00086.jpg │ ├── data_upload_obs.jpg │ ├── output.jpg │ ├── yolov3_evalconfig.jpg │ └── yolov3_predictconfig.jpg ├── yolov3_ascend │ ├── README.md │ ├── ckpt_files │ │ └── .gitkeep │ ├── eval.py │ ├── predict.py │ ├── src │ │ ├── __init__.py │ │ ├── config.py │ │ ├── darknet.py │ │ ├── distributed_sampler.py │ │ ├── initializer.py │ │ ├── logger.py │ │ ├── loss.py │ │ ├── lr_scheduler.py │ │ ├── transforms.py │ │ ├── util.py │ │ ├── yolo.py │ │ └── yolo_dataset.py │ └── train.py ├── yolov3_gpu │ ├── README.md │ ├── ckpt_files │ │ └── .gitkeep │ ├── eval.py │ ├── predict.py │ ├── src │ │ ├── __init__.py │ │ ├── config.py │ │ ├── darknet.py │ │ ├── distributed_sampler.py │ │ ├── initializer.py │ │ ├── logger.py │ │ ├── loss.py │ │ ├── lr_scheduler.py │ │ ├── transforms.py │ │ ├── util.py │ │ ├── yolo.py │ │ └── yolo_dataset.py │ └── train.py └── yolov4_ascend │ ├── eval.py │ ├── predict.py │ ├── src │ ├── __init__.py │ ├── config.py │ ├── cspdarknet53.py │ ├── distributed_sampler.py │ ├── initializer.py │ ├── logger.py │ ├── loss.py │ ├── lr_scheduler.py │ ├── transforms.py │ ├── util.py │ ├── yolo.py │ └── yolo_dataset.py │ └── train.py └── chapter5 ├── readme.md ├── wide_deep_ascend ├── eval.py ├── precess_train_eval.py ├── src │ ├── __init__.py │ ├── callbacks.py │ ├── config.py │ ├── datasets.py │ ├── metrics.py │ ├── preprocess_data.py │ ├── util.py │ └── wide_and_deep.py └── train_and_eval.py ├── wide_deep_ascend_v1.1.1 ├── README_CN.md ├── __init__.py ├── eval.py ├── process_train_eval.py ├── requirements.txt ├── src │ ├── __init__.py │ ├── callbacks.py │ ├── config.py │ ├── datasets.py │ ├── metrics.py │ ├── preprocess_data.py │ ├── util.py │ └── wide_and_deep.py └── train_and_eval.py └── wide_deep_gpu ├── README.md ├── eval.py ├── operation.md ├── src ├── __init__.py ├── callbacks.py ├── config.py ├── count_line.py ├── datasets.py ├── metrics.py ├── preprocess_data.py └── wide_and_deep.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | **/*.ckpt 132 | **/*.meta 133 | 134 | .idea 135 | .DS_Store 136 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mindspore-21-days-tutorials 2 | MindSpore 21 days tutorials. 3 | 4 | # 作业提交指南 5 | 为加快github下载速度,将作业提交指南移出21天教程仓,存放于华为云OBS平台,有需要的童鞋自行获取 6 | 7 | ### Windows/Mac用户 8 | 9 | 可直接点击[下载链接](https://21days-course.obs.cn-north-4.myhuaweicloud.com/homework_submit_guidance.docx)获取作业提交指南 10 | 11 | #### Linux用户 12 | 13 | 可使用wget命令获取作业提交指南 14 | ``` 15 | wget https://21days-course.obs.cn-north-4.myhuaweicloud.com/homework_submit_guidance.docx 16 | ``` 17 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/docker_install.md: -------------------------------------------------------------------------------- 1 | 注:此文档内容参考MindSpore官方代码仓[README.md](https://gitee.com/mindspore/mindspore#docker%E9%95%9C%E5%83%8F)文档 2 | 3 | ### 对于GPU后端,请确保提前安装好nvidia-container-toolkit,Ubuntu用户可参考如下指南: 4 | ``` 5 | DISTRIBUTION=$(. /etc/os-release; echo $ID$VERSION_ID) 6 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add - 7 | curl -s -L https://nvidia.github.io/nvidia-docker/$DISTRIBUTION/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list 8 | 9 | sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit nvidia-docker2 10 | sudo systemctl restart docker 11 | ``` 12 | 然后执行如下命令,修改docker配置: 13 | ``` 14 | # 编辑daemon.json文件: 15 | vim /etc/docker/daemon.json 16 | 17 | # 在daemon.json里添加如下内容: 18 | { 19 | "runtimes": { 20 | "nvidia": { 21 | "path": "nvidia-container-runtime", 22 | "runtimeArgs": [] 23 | } 24 | } 25 | } 26 | 27 | # 保存并关闭daemon.json文件,然后重启docker: 28 | sudo systemctl daemon-reload 29 | sudo systemctl restart docker 30 | ``` 31 | 32 | ### 使用以下命令拉取/启动MindSpore 1.0.0 版本GPU镜像,若需要使用其他版本(如:1.1.1),则拉取/启动对应镜像版本即可。 33 | ``` 34 | # 拉取镜像 35 | docker pull mindspore/mindspore-gpu:1.0.0 36 | 37 | # 启动镜像 38 | docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:1.0.0 /bin/bash 39 | ``` 40 | 41 | ### 在容器终端输入`exit`命令即可退出容器 42 | 更多docker相关操作材料可自行查阅资料 43 | 44 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ 16 | eval. 17 | """ 18 | from mindspore import nn 19 | from mindspore.train.model import Model 20 | from mindspore.common import dtype as mstype 21 | 22 | from src.dataset import create_dataset 23 | from src.config import set_config 24 | from src.args import parse_args 25 | from src.models import define_net, load_ckpt 26 | from src.utils import switch_precision, set_context 27 | 28 | if __name__ == '__main__': 29 | args_opt = parse_args() 30 | config = set_config(args_opt) 31 | set_context(config) 32 | backbone_net, head_net, net = define_net(config, args_opt.is_training) 33 | 34 | load_ckpt(net, args_opt.pretrain_ckpt) 35 | 36 | switch_precision(net, mstype.float16, config) 37 | 38 | dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, config=config) 39 | step_size = dataset.get_dataset_size() 40 | if step_size == 0: 41 | raise ValueError("The step_size of dataset is zero. Check if the images count of eval dataset is more \ 42 | than batch_size in config.py") 43 | 44 | net.set_train(False) 45 | 46 | loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') 47 | model = Model(net, loss_fn=loss, metrics={'acc'}) 48 | 49 | res = model.eval(dataset) 50 | print(f"result:{res}\npretrain_ckpt={args_opt.pretrain_ckpt}") 51 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/export_mindir.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ 16 | export .mindir format file for MindSpore Lite reasoning. 17 | """ 18 | from mindspore.train.serialization import export, load_checkpoint, load_param_into_net 19 | from mindspore import Tensor 20 | from src.mobilenetV2 import MobileNetV2Backbone, MobileNetV2Head, mobilenet_v2 21 | import numpy as np 22 | import argparse 23 | 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser(description='export .mindir model file in the training side.') 26 | parser.add_argument('--platform', type=str, default='GPU', choices=['Ascend', 'GPU', 'CPU'], 27 | help='run platform, only support CPU, GPU and Ascend') 28 | parser.add_argument('--ckpt_path', type=str, required=True, default='./mobilenetV2-10_1562.ckpt', 29 | help='Pretrained checkpoint path') 30 | parser.add_argument('--mindir_name', type=str, default='mobilenetv2.mindir', 31 | help='.mindir model file name') 32 | args = parser.parse_args() 33 | backbone_net = MobileNetV2Backbone() 34 | head_net = MobileNetV2Head(input_channel=backbone_net.out_channels, 35 | num_classes=10, 36 | activation="Softmax") 37 | mobilenet = mobilenet_v2(backbone_net, head_net) 38 | # return a parameter dict for model 39 | param_dict = load_checkpoint(args.ckpt_path) 40 | # load the parameter into net 41 | load_param_into_net(mobilenet, param_dict) 42 | input = np.random.uniform(0.0, 1.0, size=[32, 3, 224, 224]).astype(np.float32) 43 | export(mobilenet, Tensor(input), file_name=args.mindir_name, file_format='MINDIR') 44 | 45 | 46 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/full_op.md: -------------------------------------------------------------------------------- 1 | 注:需事先安装好docker gpu环境,可参考[docker_install.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter1/mobilenetv2/docker_install.md)文件 2 | 3 | 若需要使用MindSpore v1.1.1版本,可参考[full_op_v1.1.1.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter1/mobilenetv2/full_op_v1.1.1.md)文件 4 | 5 | # 训练准备阶段 6 | ### 下载mindspore 1.0.0版本代码 7 | ``` 8 | # 在root用户主目录执行如下命令 9 | git clone https://gitee.com/mindspore/mindspore.git -b r1.0 10 | ``` 11 | 12 | ### 训练及模型导出脚本准备 13 | ##### 拷贝官方model_zoo提供的mobilenetv2训练脚本,做自定义修改 14 | ``` 15 | mkdir -p /root/workspace/mobile 16 | cp -r /root/mindspore/model_zoo/official/cv/mobilenetv2 /root/workspace/mobile/mobilenetv2 17 | ``` 18 | 若想快速体验运行脚本,并不想做自定义修改,可不执行上述cp命令,直接使用如下命令下载第一讲课程提供的[mobilenetv2脚本](https://github.com/mindspore-ai/mindspore-21-days-tutorials/tree/main/chapter1/mobilenetv2),将mobilenetv2目录拷贝至/root/workspace/mobile目录下即可。 19 | 20 | ``` 21 | git clone https://github.com/mindspore-ai/mindspore-21-days-tutorials.git 22 | cp -r /root/mindspore-21-days-tutorials/chapter1/mobilenetv2 /root/workspace/mobile 23 | ``` 24 | 25 | ##### 准备cifar-10数据集(binary二进制格式) 26 | 使用tar命令将下载好的数据集解压,生成5个训练集.bin文件和1个测试集.bin文件。 27 | ``` 28 | # 创建用于存放训练集的目录,将训练集5个.bin文件拷贝到该目录下 29 | mkdir -p /root/workspace/mobile/data/train 30 | 31 | # 创建用于存放测试集的目录,将测试集1个.bin文件拷贝到该目录下 32 | mkdir -p /root/workspace/mobile/data/eval 33 | ``` 34 | 35 | ##### 创建模型导出脚本 36 | 可直接使用第一讲课程提供的[mobilenetv2脚本](https://github.com/mindspore-ai/mindspore-21-days-tutorials/tree/main/chapter1/mobilenetv2),其命名为export_mindir.py 37 | 38 | 39 | ### 端侧converter_lite模型转换工具准备】 40 | ##### 编译端侧converter_lite模型转换工具 41 | 进入mindspore代码仓,开始编译,converter_lite目前仅支持x86_64架构,因此-I参数值为x86_64,j8表示系统是8核CPU。 42 | ``` 43 | cd /root/mindspore 44 | bash build.sh -I x86_64 -j8 //(约等待15分钟左右) 45 | ``` 46 | 编译成功后,会在output目录下生成`mindspore-lite-1.0.0-converter-ubuntu.tar.gz`和`mindspore-lite-1.0.0-runtime-x86-cpu.tar.gz`两个压缩文件。 47 | 48 | ##### 解压并配置converter_lite工具 49 | ``` 50 | cd /root/mindspore/output 51 | mkdir converter && mkdir runtime-x86 52 | tar -zxvf mindspore-lite-1.0.0-converter-ubuntu.tar.gz -C ./converter --strip-components 1 53 | tar -zxvf mindspore-lite-1.0.0-runtime-x86-cpu.tar.gz -C ./runtime-x86 --strip-components 1 54 | ``` 55 | 56 | 若使用 57 | 58 | 59 | # 训练启动阶段 60 | ### 启动GPU容器 61 | 使用GPU mindspore-1.0.0版本镜像,将端侧编译工具所在目录及训练脚本所在目录挂载到容器环境中 62 | ``` 63 | docker run -it -v /root/mindspore/output:/mslite_lib/ -v /root/workspace/mobile:/mobile --runtime=nvidia --privileged=true mindspore/mindspore-gpu:1.0.0 /bin/bash 64 | ``` 65 | 66 | ### 配置converter_lite所需库环境 67 | ``` 68 | cp /mslite_lib/converter/converter/converter_lite /usr/local/bin 69 | export LD_LIBRARY_PATH=/mslite_lib/converter/third_party/protobuf/lib:/mslite_lib/converter/third_party/flatbuffers/lib:/mslite_lib/runtime-x86/lib:${LD_LIBRARY_PATH} 70 | ``` 71 | 72 | ### 开始训练 73 | ``` 74 | cd /mobile/mobilenetv2 75 | python train.py --is_training=True --epoch_size=10 76 | ``` 77 | 78 | ### 验证结果 79 | ``` 80 | python eval.py --is_training=False --pretrain_ckpt=ckpt_0/mobilenetv2-10_1562.ckpt 81 | ``` 82 | 83 | ### 导出.mindir模型文件 84 | ``` 85 | python export_mindir.py --ckpt_path=ckpt_0/mobilenetv2-10_1562.ckpt 86 | ``` 87 | 88 | ### 转换生成端侧模型 89 | ``` 90 | converter_lite --fmk=MINDIR --modelFile=./mobilenetv2.mindir --outputFile=mobilenetv2 91 | ``` 92 | 93 | # 操作步骤视频 94 | [视频访问地址](https://mslite-app.obs.cn-north-4.myhuaweicloud.com:443/%E6%93%8D%E4%BD%9C%E8%A7%86%E9%A2%91-%E7%BB%88%E7%89%882.mp4?AccessKeyId=PQ7DQUATQUMX3VMMPIPM&Expires=1606355515&Signature=A5ZpMN1CqGm8btd57Egvf9LjSuQ%3D) 95 | 96 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/full_op_v1.1.1.md: -------------------------------------------------------------------------------- 1 | 注:需事先安装好docker gpu环境,可参考[docker_install.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter1/mobilenetv2/docker_install.md)文件。 2 | 3 | # 训练准备阶段 4 | ### 启动GPU容器 5 | 使用GPU mindspore-1.1.1版本镜像,启动容器 6 | ``` 7 | docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:1.1.1 /bin/bash 8 | ``` 9 | 10 | ### 下载mindspore 1.1.1版本代码 11 | ``` 12 | # 在容器root用户主目录执行如下命令 13 | git clone https://gitee.com/mindspore/mindspore.git -b r1.1 14 | ``` 15 | 16 | ### 训练及模型导出脚本准备 17 | ##### 拷贝官方model_zoo提供的mobilenetv2训练脚本,做自定义修改 18 | ``` 19 | mkdir -p /root/workspace/mobile 20 | cp -r /root/mindspore/model_zoo/official/cv/mobilenetv2 /root/workspace/mobile/mobilenetv2 21 | ``` 22 | 若想快速体验运行脚本,并不想做自定义修改,可不执行上述cp命令,直接使用如下命令下载第一讲课程提供的[mobilenetv2脚本](https://github.com/mindspore-ai/mindspore-21-days-tutorials/tree/main/chapter1/mobilenetv2),将mobilenetv2目录拷贝至/root/workspace/mobile目录下即可。 23 | 24 | ``` 25 | git clone https://github.com/mindspore-ai/mindspore-21-days-tutorials.git 26 | cp -r /root/mindspore-21-days-tutorials/chapter1/mobilenetv2 /root/workspace/mobile 27 | ``` 28 | 29 | ##### 准备cifar-10数据集(binary二进制格式) 30 | 使用tar命令将下载好的数据集解压,生成5个训练集.bin文件和1个测试集.bin文件。 31 | ``` 32 | # 创建用于存放训练集的目录,将训练集5个.bin文件拷贝到该目录下 33 | mkdir -p /root/workspace/mobile/data/train 34 | 35 | # 创建用于存放测试集的目录,将测试集1个.bin文件拷贝到该目录下 36 | mkdir -p /root/workspace/mobile/data/eval 37 | ``` 38 | 39 | ### 端侧converter_lite模型转换工具准备】 40 | ##### 编译端侧converter_lite模型转换工具 41 | 进入mindspore代码仓,开始编译,converter_lite目前仅支持x86_64架构,因此-I参数值为x86_64,j8表示系统是8核CPU。 42 | ``` 43 | cd /root/mindspore 44 | bash build.sh -I x86_64 -j8 //(约等待15分钟左右) 45 | ``` 46 | 编译成功后,会在output目录下生成`mindspore-lite-1.1.1-converter-linux-64.tar.gz`和`mindspore-lite-1.1.1-inference-linux-x64.tar.gz` 47 | 48 | ##### 解压并配置converter_lite工具 49 | ``` 50 | cd /root/mindspore/output 51 | mkdir -p /usr/local/converter 52 | tar -zxvf mindspore-lite-1.1.1-converter-linux-64.tar.gz -C /usr/local/converter --strip-components 1 53 | ``` 54 | 55 | ##### 配置converter_lite所需库环境 56 | ``` 57 | export LD_LIBRARY_PATH=/usr/local/converter/lib:/usr/local/converter/third_party/glog/lib:${LD_LIBRARY_PATH} 58 | ``` 59 | 60 | # 训练启动阶段 61 | ### 开始训练 62 | ``` 63 | cd /root/workspace/mobile/mobilenetv2 64 | python train.py --is_training=True --epoch_size=10 65 | ``` 66 | 67 | ### 验证结果 68 | ``` 69 | python eval.py --is_training=False --pretrain_ckpt=ckpt_0/mobilenetv2-10_1562.ckpt 70 | ``` 71 | 72 | ### 导出.mindir模型文件 73 | ``` 74 | python export_mindir.py --ckpt_path=ckpt_0/mobilenetv2-10_1562.ckpt 75 | ``` 76 | 77 | ### 转换生成端侧模型 78 | ``` 79 | converter_lite --fmk=MINDIR --modelFile=./mobilenetv2.mindir --outputFile=mobilenetv2 80 | ``` 81 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/simple_train_op.md: -------------------------------------------------------------------------------- 1 | 注:需事先安装好docker gpu环境,可参考[docker_install.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter1/mobilenetv2/docker_install.md)文件 2 | 3 | # 训练准备阶段 4 | ### 下载mobilenetv2体验脚本 5 | ``` 6 | # 在root用户主目录下执行如下命令 7 | git clone https://github.com/mindspore-ai/mindspore-21-days-tutorials.git 8 | mkdir -p /root/workspace/mobile 9 | cp -r /root/mindspore-21-days-tutorials/chapter1/mobilenetv2 /root/workspace/mobile 10 | ``` 11 | 12 | ### 准备cifar-10数据集(binary二进制格式) 13 | 将下载好的数据集解压,生成5个训练集.bin文件和1个测试集.bin文件。 14 | ``` 15 | # 下载并解压cifar-10数据集, 生成5个训练集.bin文件和1个测试集.bin文件 16 | wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz 17 | tar -zxvf cifar-10-binary.tar.gz 18 | 19 | # 创建用于存放训练集的目录,将训练集5个.bin文件拷贝到该目录下 20 | mkdir -p /root/workspace/mobile/data/train 21 | cp /root/cifar-10-batches-bin/data_*.bin /root/workspace/mobile/data/train 22 | 23 | # 创建用于存放测试集的目录,将测试集1个.bin文件拷贝到该目录下 24 | mkdir -p /root/workspace/mobile/data/eval 25 | cp /root/cifar-10-batches-bin/test_batch.bin /root/workspace/mobile/data/eval 26 | ``` 27 | 28 | ### 训练启动阶段 29 | ##### 启动GPU容器 30 | 使用GPU mindspore-1.0.0版本镜像,将训练脚本及数据集所在目录挂载到容器环境中 31 | ``` 32 | docker run -it -v /root/workspace/mobile:/mobile --runtime=nvidia --privileged=true mindspore/mindspore-gpu:1.0.0 /bin/bash 33 | ``` 34 | 35 | ##### 开始训练 36 | ``` 37 | cd /mobile/mobilenetv2 38 | python train.py --is_training=True --epoch_size=10 39 | ``` 40 | 41 | ##### 验证结果 42 | ``` 43 | python eval.py --is_training=False --pretrain_ckpt=ckpt_0/mobilenetv2-10_1562.ckpt 44 | ``` 45 | 46 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/src/args.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | import argparse 17 | import ast 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description='Image classification') 22 | parser.add_argument('--is_training', type=bool, default=True, 23 | help='training is True, eval is False.') 24 | parser.add_argument('--platform', type=str, default="GPU", choices=("CPU", "GPU", "Ascend"), 25 | help='run platform, only support CPU, GPU and Ascend') 26 | parser.add_argument('--dataset_path', type=str, default='../data/', help='Dataset path') 27 | parser.add_argument('--epoch_size', type=int, default=1, help='Train epoch size') 28 | parser.add_argument('--pretrain_ckpt', type=str, default="", 29 | help='Pretrained checkpoint path for fine tune or incremental learning') 30 | parser.add_argument('--freeze_layer', type=str, default="", choices=["", "none", "backbone"], 31 | help="freeze the weights of network from start to which layers") 32 | parser.add_argument('--run_distribute', type=ast.literal_eval, default=False, help='Run distribute') 33 | args = parser.parse_args() 34 | return args 35 | 36 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ 16 | network config setting, will be used in train.py and eval.py 17 | """ 18 | import os 19 | from easydict import EasyDict as ed 20 | 21 | 22 | def set_config(args): 23 | config_cpu = ed({ 24 | "num_classes": 26, 25 | "image_height": 224, 26 | "image_width": 224, 27 | "batch_size": 150, 28 | "warmup_epochs": 0, 29 | "lr_init": .0, 30 | "lr_end": 0.03, 31 | "lr_max": 0.03, 32 | "momentum": 0.9, 33 | "weight_decay": 4e-5, 34 | "label_smooth": 0.1, 35 | "loss_scale": 1024, 36 | "save_checkpoint": True, 37 | "save_checkpoint_epochs": 1, 38 | "keep_checkpoint_max": 20, 39 | "save_checkpoint_path": "./", 40 | "platform": args.platform, 41 | "run_distribute": False, 42 | "activation": "Softmax" 43 | }) 44 | config_gpu = ed({ 45 | "num_classes": 10, 46 | "image_height": 224, 47 | "image_width": 224, 48 | "batch_size": 32, 49 | "warmup_epochs": 1, 50 | "lr_init": .0, 51 | "lr_end": .0, 52 | "lr_max": 0.1, 53 | "momentum": 0.9, 54 | "weight_decay": 4e-5, 55 | "label_smooth": 0.1, 56 | "loss_scale": 1024, 57 | "save_checkpoint": True, 58 | "save_checkpoint_epochs": 1, 59 | "keep_checkpoint_max": 1, 60 | "save_checkpoint_path": "./", 61 | "platform": args.platform, 62 | "ccl": "nccl", 63 | "run_distribute": args.run_distribute, 64 | "activation": "Softmax" 65 | }) 66 | config_ascend = ed({ 67 | "num_classes": 1000, 68 | "image_height": 224, 69 | "image_width": 224, 70 | "batch_size": 256, 71 | "warmup_epochs": 4, 72 | "lr_init": 0.00, 73 | "lr_end": 0.00, 74 | "lr_max": 0.4, 75 | "momentum": 0.9, 76 | "weight_decay": 4e-5, 77 | "label_smooth": 0.1, 78 | "loss_scale": 1024, 79 | "save_checkpoint": True, 80 | "save_checkpoint_epochs": 1, 81 | "keep_checkpoint_max": 200, 82 | "save_checkpoint_path": "./", 83 | "platform": args.platform, 84 | "ccl": "hccl", 85 | "device_id": int(os.getenv('DEVICE_ID', '0')), 86 | "rank_id": int(os.getenv('RANK_ID', '0')), 87 | "rank_size": int(os.getenv('RANK_SIZE', '1')), 88 | "run_distribute": int(os.getenv('RANK_SIZE', '1')) > 1., 89 | "activation": "Softmax" 90 | }) 91 | config = ed({"CPU": config_cpu, 92 | "GPU": config_gpu, 93 | "Ascend": config_ascend}) 94 | 95 | if args.platform not in config.keys(): 96 | raise ValueError("Unsupport platform.") 97 | 98 | return config[args.platform] 99 | 100 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/src/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Create train or eval dataset.""" 16 | import os 17 | import mindspore.common.dtype as mstype 18 | import mindspore.dataset.engine as de 19 | import mindspore.dataset.vision.c_transforms as C 20 | import mindspore.dataset.transforms.c_transforms as C2 21 | 22 | 23 | def create_dataset(dataset_path, do_train, config, repeat_num=1): 24 | """ 25 | Create a train or eval dataset. 26 | 27 | Args: 28 | dataset_path (string): The path of dataset. 29 | do_train (bool): Whether dataset is used for train or eval. 30 | config: configuration 31 | repeat_num (int): The repeat times of dataset. Default: 1. 32 | Returns: 33 | Dataset. 34 | """ 35 | if do_train: 36 | dataset_path = os.path.join(dataset_path, 'train') 37 | do_shuffle = True 38 | else: 39 | dataset_path = os.path.join(dataset_path, 'eval') 40 | do_shuffle = False 41 | 42 | device_id = 0 43 | device_num = 1 44 | if config.platform == "GPU": 45 | if config.run_distribute: 46 | from mindspore.communication.management import get_rank, get_group_size 47 | device_id = get_rank() 48 | device_num = get_group_size() 49 | elif config.platform == "Ascend": 50 | device_id = int(os.getenv('DEVICE_ID')) 51 | device_num = int(os.getenv('RANK_SIZE')) 52 | 53 | if device_num == 1 or not do_train: 54 | ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=do_shuffle) 55 | else: 56 | ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=do_shuffle, 57 | num_shards=device_num, shard_id=device_id) 58 | 59 | resize_height = config.image_height 60 | resize_width = config.image_width 61 | buffer_size = 100 62 | rescale = 1.0 / 255.0 63 | shift = 0.0 64 | 65 | # define map operations 66 | random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4)) 67 | random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1)) 68 | 69 | resize_op = C.Resize((resize_height, resize_width)) 70 | rescale_op = C.Rescale(rescale, shift) 71 | normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) 72 | 73 | change_swap_op = C.HWC2CHW() 74 | 75 | trans = [] 76 | if do_train: 77 | trans += [random_crop_op, random_horizontal_flip_op] 78 | 79 | trans += [resize_op, rescale_op, normalize_op, change_swap_op] 80 | 81 | type_cast_op = C2.TypeCast(mstype.int32) 82 | 83 | ds = ds.map(input_columns="label", operations=type_cast_op) 84 | ds = ds.map(input_columns="image", operations=trans) 85 | 86 | # apply shuffle operations 87 | ds = ds.shuffle(buffer_size=buffer_size) 88 | 89 | # apply batch operations 90 | ds = ds.batch(config.batch_size, drop_remainder=True) 91 | 92 | # apply dataset repeat operation 93 | ds = ds.repeat(repeat_num) 94 | 95 | return ds 96 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/src/lr_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """learning rate generator""" 16 | import math 17 | import numpy as np 18 | 19 | 20 | def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): 21 | """ 22 | generate learning rate array 23 | 24 | Args: 25 | global_step(int): total steps of the training 26 | lr_init(float): init learning rate 27 | lr_end(float): end learning rate 28 | lr_max(float): max learning rate 29 | warmup_epochs(int): number of warmup epochs 30 | total_epochs(int): total epoch of training 31 | steps_per_epoch(int): steps of one epoch 32 | 33 | Returns: 34 | np.array, learning rate array 35 | """ 36 | lr_each_step = [] 37 | total_steps = steps_per_epoch * total_epochs 38 | warmup_steps = steps_per_epoch * warmup_epochs 39 | for i in range(total_steps): 40 | if i < warmup_steps: 41 | lr = lr_init + (lr_max - lr_init) * i / warmup_steps 42 | else: 43 | lr = lr_end + \ 44 | (lr_max - lr_end) * \ 45 | (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. 46 | if lr < 0.0: 47 | lr = 0.0 48 | lr_each_step.append(lr) 49 | 50 | current_step = global_step 51 | lr_each_step = np.array(lr_each_step).astype(np.float32) 52 | learning_rate = lr_each_step[current_step:] 53 | 54 | return learning_rate 55 | -------------------------------------------------------------------------------- /chapter1/mobilenetv2/src/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | from mindspore import context 17 | from mindspore import nn 18 | from mindspore.common import dtype as mstype 19 | from mindspore.train.model import ParallelMode 20 | from mindspore.train.callback import ModelCheckpoint, CheckpointConfig 21 | from mindspore.communication.management import get_rank, init, get_group_size 22 | 23 | from src.models import Monitor 24 | 25 | 26 | def switch_precision(net, data_type, config): 27 | if config.platform == "Ascend": 28 | net.to_float(data_type) 29 | for _, cell in net.cells_and_names(): 30 | if isinstance(cell, nn.Dense): 31 | cell.to_float(mstype.float32) 32 | 33 | 34 | def context_device_init(config): 35 | if config.platform == "CPU": 36 | context.set_context(mode=context.GRAPH_MODE, device_target=config.platform, save_graphs=False) 37 | 38 | elif config.platform == "GPU": 39 | context.set_context(mode=context.GRAPH_MODE, device_target=config.platform, save_graphs=False) 40 | if config.run_distribute: 41 | init("nccl") 42 | context.set_auto_parallel_context(device_num=get_group_size(), 43 | parallel_mode=ParallelMode.DATA_PARALLEL, 44 | gradients_mean=True) 45 | 46 | elif config.platform == "Ascend": 47 | context.set_context(mode=context.GRAPH_MODE, device_target=config.platform, device_id=config.device_id, 48 | save_graphs=False) 49 | if config.run_distribute: 50 | context.set_auto_parallel_context(device_num=config.rank_size, 51 | parallel_mode=ParallelMode.DATA_PARALLEL, 52 | gradients_mean=True) 53 | init() 54 | else: 55 | raise ValueError("Only support CPU, GPU and Ascend.") 56 | 57 | 58 | def set_context(config): 59 | if config.platform == "CPU": 60 | context.set_context(mode=context.GRAPH_MODE, device_target=config.platform, 61 | save_graphs=False) 62 | elif config.platform == "Ascend": 63 | context.set_context(mode=context.GRAPH_MODE, device_target=config.platform, 64 | device_id=config.device_id, save_graphs=False) 65 | elif config.platform == "GPU": 66 | context.set_context(mode=context.GRAPH_MODE, 67 | device_target=config.platform, save_graphs=False) 68 | 69 | 70 | def config_ckpoint(config, lr, step_size): 71 | cb = None 72 | if config.platform in ("CPU", "GPU") or config.rank_id == 0: 73 | cb = [Monitor(lr_init=lr.asnumpy())] 74 | 75 | if config.save_checkpoint: 76 | config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size, 77 | keep_checkpoint_max=config.keep_checkpoint_max) 78 | 79 | rank = 0 80 | if config.run_distribute: 81 | rank = get_rank() 82 | 83 | ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(rank) + "/" 84 | ckpt_cb = ModelCheckpoint(prefix="mobilenetv2", directory=ckpt_save_dir, config=config_ck) 85 | cb += [ckpt_cb] 86 | return cb 87 | -------------------------------------------------------------------------------- /chapter1/readme.md: -------------------------------------------------------------------------------- 1 | # 端侧图像分类应用安装包 2 | [下载链接](https://mslite-app.obs.cn-north-4.myhuaweicloud.com:443/app-ms.apk?AccessKeyId=PQ7DQUATQUMX3VMMPIPM&Expires=1606143317&Signature=Of6H2O9/tVpZx6KyIepv8/la//g%3D) 3 | 4 | # 应用介绍及逻辑代码思维导图 5 | [导图1_访问地址](https://mslite-app.obs.cn-north-4.myhuaweicloud.com:443/app_introduction.png?AccessKeyId=PQ7DQUATQUMX3VMMPIPM&Expires=1606143283&Signature=Y9ZR%2B/zvMPbeUyLUSYeyBtTH7/I%3D) 6 | 7 | [导图2_访问地址](https://mslite-app.obs.cn-north-4.myhuaweicloud.com:443/app_code.png?AccessKeyId=PQ7DQUATQUMX3VMMPIPM&Expires=1606143243&Signature=T00iYTzDVPyl4HuS1o/pzS6Rlj0%3D) 8 | 9 | [导图3_访问地址](https://mslite-app.obs.cn-north-4.myhuaweicloud.com:443/app_code_v1.png?AccessKeyId=PQ7DQUATQUMX3VMMPIPM&Expires=1606143174&Signature=QHT/%2B%2BiCyWHwBP%2B%2BHPwhO%2BXYxeE%3D) 10 | 11 | # 安卓手机USB调试模式设置参考视频 12 | [视频地址](https://mslite-app.obs.cn-north-4.myhuaweicloud.com:443/phone_usb.mp4?AccessKeyId=PQ7DQUATQUMX3VMMPIPM&Expires=1606355419&Signature=FkBj9/M4rxz7qQpZxt2CbOxj4Ik%3D) 13 | 14 | # 端侧作业说明文档 15 | 为加快github下载速度,将作业说明文档移出21天教程仓,存放于华为云OBS平台,有需要的同事自行获取 16 | 17 | Windows/Mac用户: 18 | 19 | 可直接点击[下载链接](https://mslite-app.obs.cn-north-4.myhuaweicloud.com/MindSpore_Lite_homework_v2.docx)获取作业说明文档 20 | 21 | Linux用户: 22 | 23 | 可使用wget命令获取作业说明文档 24 | ``` 25 | wget https://mslite-app.obs.cn-north-4.myhuaweicloud.com/MindSpore_Lite_homework_v2.docx 26 | ``` 27 | 28 | # 端侧应用安装指导文档 29 | 为加快github下载速度,将安装指导文档移出21天教程仓,存放于华为云OBS平台,有需要的同事自行获取 30 | 31 | Windows/Mac用户: 32 | 33 | 可直接点击[下载链接](https://mslite-app.obs.cn-north-4.myhuaweicloud.com/MSLite_App_Install_guidance.docx)获取作业说明文档 34 | 35 | Linux用户: 36 | 37 | 可使用wget命令获取作业说明文档 38 | ``` 39 | wget https://mslite-app.obs.cn-north-4.myhuaweicloud.com/MSLite_App_Install_guidance.docx 40 | ``` 41 | 42 | # 使用容器执行训练等操作 43 | 注:仅提供相关脚本供开发者快速使用,对模型未进行过深度调优。 44 | 45 | 如果需要体验模型训练和转换整个过程,可访chapter1目录下[full_op.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter1/mobilenetv2/full_op.md)文件 46 | 47 | 如果只是想体验模型训练的过程,可访问chapter1目录下[simple_train_op.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter1/mobilenetv2/simple_train_op.md) 48 | 49 | # 视频教程 50 | [基于MindSpore Lite开发图像识别与检测的安卓APP实战](https://www.bilibili.com/video/BV1dA411L7Lg) 51 | -------------------------------------------------------------------------------- /chapter2/README.md: -------------------------------------------------------------------------------- 1 | #### 因容量原因,将指导文档移出repo,课前准备文档、体验作业文档和进阶作业文档可从以下链接中获取 2 | 3 | # 课前准备文档 4 | [MindSpore Bert课前准备文档](https://e-share.obs-website.cn-north-1.myhuaweicloud.com?token=SvjC1tq+9EC0fILT0WI5w1SIJKMVsoAn6d3hiNI5bHpGQMMKuJiZhGfVhyPTgqUIFIVIYKtAmENUpN6ssH654op9P9KNXqX021meACXkf2EZ+O58B+FMoSmBJoaoi+97k52R89dhiOtO4HUMuIK6JYdZImjBOaZ1gxeB+gXCgHBKRNpzfcccU4d41LlA3BPBHqj522A/6usmFwbVKnSPbzaAGtCEd3ii5fnplIb9ERpkeJslRvc2LvLbc3m/Mssq6WjER3h0y0YZo6ZdXLnWsWx80qMcPA9ZV7b+guGx1bZrGhxpfPOCvewuTBgPl8WLlGhKx2EWL6G/iOe/ANpGPqvQKyQ7YYzNY215QzFbBQOKnckNqjgpA7zEN3/pNF4jSeVkEa64/i/X7JIrt9/i5AJn3/5/pCti37azPA9oWp466Q7TFVCb++NefcFcCsFe10ycrzjnyzS1Ki6UYzU5yxy2dsmYVdliE8vlvsSZH66KwIJ1z6UR/N7+5n4FPOzSuQuS1VWbK4QButAmBPpGz2OjdCHiNqV/9i1XBt8/OsfxLmQGf8zt7OHnY6auLYJEBfe6Hk0xI6n95sRSx/X6JzLt+fsSspm5Aht3cI9W+7tUYlEXywJHLkNs/a+7JPyZqN2ZhZbI08kuilO4ehHtWA==) (提取码: 123456) 5 | 6 | # Bert体验作业文档 7 | [Bert体验作业](https://e-share.obs-website.cn-north-1.myhuaweicloud.com?token=YtZdwzSsMhmsWVIOM1bqDVXFzpdT/ss8chNzNd8w4KTbE8i55j0ROtDTGFZLBGdefsHCCTeGqH2k6Pris4DrmaW7QBT7z/2dpVn+YViM4/qeKWpWaNyqZwm7cZXXXE8ratJvYYnRjqVO5vBQS394ytq27P/ojt5sWa2KQugxh2Rx6zn8MPJrhKGi336C6pbPRcW96GMzyGEwlXjpUYcTEvGCZsCofnwk7bKx+N3jEWmn+MPgMbY7Dz1gzOdN9NqrQwX1EaLdtAoA4TWf5ewJ2uT8Y9MWqDo2u6UHoNmGicoRSYzYzhMWLB6IzqfsAHKjvtKIo1Qr2u3Dbb6Dp5HPVxW9jcaIr+Htom0asXHyvwSR5UPBWCQIMhb/p7zVV2frmL5KriIZ7Doey9g60efvFW8+GOnulr8LqlJOMB7+Surp65jAqwqSqvc8z9zd9eswXeeiT1S/X4+oSwbDg7wVr4JyNxV5/1Ny+hkIDfZxha2kl/pu/lSpfySh+wpEJ7y5AEy6nKwTOZzvtaub5QGfk9GnTLqgvqCs/i8OGXJyb8QE2xAtZQBn2KK/rrum5jR1N4amMwC+mzSTCJl61ctxJA==) (提取码: 123456) 8 | 9 | # Bert进阶作业文档 10 | [Bert进阶作业](https://e-share.obs-website.cn-north-1.myhuaweicloud.com?token=YtZdwzSsMhmsWVIOM1bqDVXFzpdT/ss8chNzNd8w4KTbE8i55j0ROtDTGFZLBGdefsHCCTeGqH2k6Pris4DrmaW7QBT7z/2dpVn+YViM4/qeKWpWaNyqZwm7cZXXXE8ratJvYYnRjqVO5vBQS394ytq27P/ojt5sWa2KQugxh2Rx6zn8MPJrhKGi336C6pbPRcW96GMzyGEwlXjpUYcTEsDwrVMnN2lAeflCCiaN63kZ3+03SenbKq39eW3MNhB3K7sWPkfUIZWdc3juGzG2pnsXXa44Scl3fXU/7he1Yp94pJCHs79CsrtB3dZqxD7Maayy4SCQrk7W19QD7C4Jk6FPwRnhZ++JQAT1lxPOxf0v3cTw6gPbN3kgZW+XrfPqRmkNIm9jztv46Unt6VFsjXWgNRoFJRU+Qymc4sd7sL0ldJtOqYOvTD3A/O+MSOp5UDSVcpvZ2pamb34z9pgifUtQg/4ah2AJZQf/tjPMA9bn3euw+Ffto44pJ4mKLmtgmMDnngP9+zpi+MSjBPhFp/N9Y4TuT+55IhipgdBy0dg0J8+iT0D5pEjhxJwPzcx5W3J4IEPgxexG0W3+RfoQU4FKG2jc5Klmx6qUQ1it0ho=) (提取码: 123456) 11 | 12 | # 文件下载链接 13 | [tnews数据集下载链接](https://e-share.obs-website.cn-north-1.myhuaweicloud.com?token=SvjC1tq+9EC0fILT0WI5w1SIJKMVsoAn6d3hiNI5bHpGQMMKuJiZhGfVhyPTgqUIFIVIYKtAmENUpN6ssH654op9P9KNXqX021meACXkf2EZ+O58B+FMoSmBJoaoi+97k52R89dhiOtO4HUMuIK6JYdZImjBOaZ1gxeB+gXCgHBKRNpzfcccU4d41LlA3BPBLqhJ24K0Q76EZdwPUFdf+oRgVqr9d4n01yNmZ9PlIYQlLFSxfQGeIojXp5jif1EEISBUvsvSm880An8VEAqYCMdiLcyfVE9rv0YFodid0EPzGmngOMa5p2Q/P6q3jJsjaPsElkX4SMpQ80v4DkJ4J+un4gISVfwf5VWIxObY7CbnP2YIJ+K7F+m9OrRN8oDm4aUooUfi0u8xlKIij4NqFyWMjUuPCsAIHavtpB+WQqMFOZooFxE45VAIZSXQiWIL3UKYVSm/Eyvi+cyg8PON0I501QJkhONQH7inkdoUMinwQoWOx/lyBzKiw8f94rtRtCWWNwI9DZsz/tDSF+yeMo3hI7p7NILWiFUbvFK2lUc=) (提取码: 123456) 14 | 15 | [源代码下载链接](https://bert-21days.obs.myhuaweicloud.com:443/bert.zip?AccessKeyId=M7KX8KLMT0ZL1P8QWXZ5&Expires=1635990060&Signature=iNGp2m8i93raBE6wS4Fmwg33u2k%3D) 16 | 17 | [tnews ckpt下载链接](https://bert-21days.obs.myhuaweicloud.com:443/bert/tnews-3_3335.ckpt?AccessKeyId=M7KX8KLMT0ZL1P8QWXZ5&Expires=1635990129&Signature=uj8akw%2B/gPZGVS539Yb4ymttzmU%3D) 18 | 19 | [bert_base ckpt下载链接](https://bert-21days.obs.myhuaweicloud.com:443/bert/bert_base.ckpt?AccessKeyId=M7KX8KLMT0ZL1P8QWXZ5&Expires=1635991421&Signature=cruWVmPy68Fz%2Bfr90Do5sS1WN0E%3D) 20 | 21 | # 视频地址 22 | [基于Bert进行中文新闻分类](https://www.bilibili.com/video/BV1YK4y177t2) 23 | -------------------------------------------------------------------------------- /chapter2/bert/convert_example.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tokenization 3 | 4 | class Sample: 5 | """Sample file""" 6 | def __init__(self, text, label): 7 | super(Sample, self).__init__() 8 | self.text_a = text 9 | self.text_b = None 10 | self.label = label 11 | self.guid = 0 12 | 13 | 14 | class InputFeatures(object): 15 | """A single set of features of data.""" 16 | 17 | def __init__(self, 18 | input_ids, 19 | input_mask, 20 | segment_ids, 21 | label_id, 22 | is_real_example=True): 23 | self.input_ids = input_ids 24 | self.input_mask = input_mask 25 | self.segment_ids = segment_ids 26 | self.label_id = label_id 27 | self.is_real_example = is_real_example 28 | 29 | 30 | def convert_single_example(ex_index, example, max_seq_length, 31 | tokenizer): 32 | """Converts a single `InputExample` into a single `InputFeatures`.""" 33 | 34 | print(example.text_a) 35 | tokens_a = tokenizer.tokenize(example.text_a) 36 | print(tokens_a) 37 | tokens_b = None 38 | if example.text_b: 39 | tokens_b = tokenizer.tokenize(example.text_b) 40 | 41 | if tokens_b: 42 | # Modifies `tokens_a` and `tokens_b` in place so that the total 43 | # length is less than the specified length. 44 | # Account for [CLS], [SEP], [SEP] with "- 3" 45 | _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) 46 | else: 47 | # Account for [CLS] and [SEP] with "- 2" 48 | if len(tokens_a) > max_seq_length - 2: 49 | tokens_a = tokens_a[0:(max_seq_length - 2)] 50 | 51 | # The convention in BERT is: 52 | # (a) For sequence pairs: 53 | # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP] 54 | # type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 55 | # (b) For single sequences: 56 | # tokens: [CLS] the dog is hairy . [SEP] 57 | # type_ids: 0 0 0 0 0 0 0 58 | # 59 | # Where "type_ids" are used to indicate whether this is the first 60 | # sequence or the second sequence. The embedding vectors for `type=0` and 61 | # `type=1` were learned during pre-training and are added to the wordpiece 62 | # embedding vector (and position vector). This is not *strictly* necessary 63 | # since the [SEP] token unambiguously separates the sequences, but it makes 64 | # it easier for the model to learn the concept of sequences. 65 | # 66 | # For classification tasks, the first vector (corresponding to [CLS]) is 67 | # used as the "sentence vector". Note that this only makes sense because 68 | # the entire model is fine-tuned. 69 | tokens = [] 70 | segment_ids = [] 71 | tokens.append("[CLS]") 72 | segment_ids.append(0) 73 | for token in tokens_a: 74 | tokens.append(token) 75 | segment_ids.append(0) 76 | tokens.append("[SEP]") 77 | segment_ids.append(0) 78 | 79 | if tokens_b: 80 | for token in tokens_b: 81 | tokens.append(token) 82 | segment_ids.append(1) 83 | tokens.append("[SEP]") 84 | segment_ids.append(1) 85 | 86 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 87 | 88 | # The mask has 1 for real tokens and 0 for padding tokens. Only real 89 | # tokens are attended to. 90 | input_mask = [1] * len(input_ids) 91 | 92 | # Zero-pad up to the sequence length. 93 | while len(input_ids) < max_seq_length: 94 | input_ids.append(0) 95 | input_mask.append(0) 96 | segment_ids.append(0) 97 | 98 | assert len(input_ids) == max_seq_length 99 | assert len(input_mask) == max_seq_length 100 | assert len(segment_ids) == max_seq_length 101 | 102 | if ex_index < 5: 103 | print("*** Example ***") 104 | print("guid: %s" % (example.guid)) 105 | print("tokens: %s" % " ".join( 106 | [tokenization.printable_text(x) for x in tokens])) 107 | print("input_ids: %s" % " ".join([str(x) for x in input_ids])) 108 | print("input_mask: %s" % " ".join([str(x) for x in input_mask])) 109 | print("segment_ids: %s" % " ".join([str(x) for x in segment_ids])) 110 | 111 | feature = InputFeatures( 112 | input_ids=input_ids, 113 | input_mask=input_mask, 114 | segment_ids=segment_ids, 115 | label_id=0, 116 | is_real_example=True) 117 | return feature 118 | 119 | 120 | def convert_text(text, vocab_file, seq_length): 121 | tokenizer = tokenization.FullTokenizer( 122 | vocab_file=vocab_file, do_lower_case=True) 123 | output = convert_single_example(1, Sample(text, "100"), seq_length, tokenizer) 124 | return output 125 | 126 | 127 | def main(): 128 | inputs = sys.argv[1] 129 | print(inputs) 130 | output = convert_text(inputs, "chinese_L-12_H-768_A-12/vocab.txt", 128) 131 | print(output) 132 | 133 | 134 | if __name__ == "__main__": 135 | main() 136 | -------------------------------------------------------------------------------- /chapter2/bert/mindspore_hub_conf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | ''' 16 | Bert hub interface for bert base and bert nezha 17 | ''' 18 | from src.bert_model import BertModel 19 | from src.bert_model import BertConfig 20 | import mindspore.common.dtype as mstype 21 | bert_net_cfg_base = BertConfig( 22 | seq_length=128, 23 | vocab_size=21128, 24 | hidden_size=768, 25 | num_hidden_layers=12, 26 | num_attention_heads=12, 27 | intermediate_size=3072, 28 | hidden_act="gelu", 29 | hidden_dropout_prob=0.1, 30 | attention_probs_dropout_prob=0.1, 31 | max_position_embeddings=512, 32 | type_vocab_size=2, 33 | initializer_range=0.02, 34 | use_relative_positions=False, 35 | dtype=mstype.float32, 36 | compute_type=mstype.float16 37 | ) 38 | bert_net_cfg_nezha = BertConfig( 39 | seq_length=128, 40 | vocab_size=21128, 41 | hidden_size=1024, 42 | num_hidden_layers=24, 43 | num_attention_heads=16, 44 | intermediate_size=4096, 45 | hidden_act="gelu", 46 | hidden_dropout_prob=0.1, 47 | attention_probs_dropout_prob=0.1, 48 | max_position_embeddings=512, 49 | type_vocab_size=2, 50 | initializer_range=0.02, 51 | use_relative_positions=True, 52 | dtype=mstype.float32, 53 | compute_type=mstype.float16 54 | ) 55 | def create_network(name, *args, **kwargs): 56 | ''' 57 | Create bert network for base and nezha. 58 | ''' 59 | if name == 'bert_base': 60 | if "seq_length" in kwargs: 61 | bert_net_cfg_base.seq_length = kwargs["seq_length"] 62 | is_training = kwargs.get("is_training", False) 63 | return BertModel(bert_net_cfg_base, is_training, *args) 64 | if name == 'bert_nezha': 65 | if "seq_length" in kwargs: 66 | bert_net_cfg_nezha.seq_length = kwargs["seq_length"] 67 | is_training = kwargs.get("is_training", False) 68 | return BertModel(bert_net_cfg_nezha, is_training, *args) 69 | raise NotImplementedError(f"{name} is not implemented in the repo") 70 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/ascend_distributed_launcher/README.md: -------------------------------------------------------------------------------- 1 | # Run distribute pretrain 2 | 3 | ## description 4 | The number of D chips can be automatically allocated based on the device_num set in hccl config file, You don not need to specify that. 5 | 6 | 7 | ## how to use 8 | For example, if we want to generate the launch command of the distributed training of Bert model on D chip, we can run the following command in `/bert/` dir: 9 | ``` 10 | python ./scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py --run_script_dir ./run_pretrain.py --hyper_parameter_config_dir ./scripts/ascend_distributed_launcher/hyper_parameter_config.ini --data_dir /path/dataset/ --hccl_config_dir model_zoo/utils/hccl_tools/hccl_2p_56_x.x.x.x.json 11 | ``` 12 | 13 | output: 14 | 15 | ``` 16 | hccl_config_dir: model_zoo/utils/hccl_tools/hccl_2p_56_x.x.x.x.json 17 | the number of logical core: 192 18 | avg_core_per_rank: 96 19 | rank_size: 2 20 | 21 | start training for rank 0, device 5: 22 | rank_id: 0 23 | device_id: 5 24 | core nums: 0-95 25 | epoch_size: 8 26 | data_dir: /data/small_512/ 27 | schema_dir: 28 | log file dir: ./LOG5/log.txt 29 | 30 | start training for rank 1, device 6: 31 | rank_id: 1 32 | device_id: 6 33 | core nums: 96-191 34 | epoch_size: 8 35 | data_dir: /data/small_512/ 36 | schema_dir: 37 | log file dir: ./LOG6/log.txt 38 | ``` 39 | 40 | ## Note 41 | 42 | 1. Note that `hccl_2p_56_x.x.x.x.json` can use [hccl_tools.py](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate. 43 | 44 | 2. For hyper parameter, please note that you should customize the scripts `hyper_parameter_config.ini`. Please note that these two hyper parameters are not allowed to be configured here: 45 | - device_id 46 | - device_num 47 | - data_dir 48 | 49 | 3. For Other Model, please note that you should customize the option `run_script` and Corresponding `hyper_parameter_config.ini`. 50 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/ascend_distributed_launcher/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter2/bert/scripts/ascend_distributed_launcher/__init__.py -------------------------------------------------------------------------------- /chapter2/bert/scripts/ascend_distributed_launcher/hyper_parameter_config.ini: -------------------------------------------------------------------------------- 1 | [config] 2 | distribute=true 3 | epoch_size=40 4 | enable_save_ckpt=true 5 | enable_lossscale=true 6 | do_shuffle=true 7 | enable_data_sink=true 8 | data_sink_steps=100 9 | accumulation_steps=1 10 | save_checkpoint_path=./ 11 | save_checkpoint_steps=10000 12 | save_checkpoint_num=1 -------------------------------------------------------------------------------- /chapter2/bert/scripts/run_classifier.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Huawei Technologies Co., Ltd 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================ 16 | 17 | echo "==============================================================================================================" 18 | echo "Please run the scipt as: " 19 | echo "bash scripts/run_classifier.sh" 20 | echo "for example: bash scripts/run_classifier.sh" 21 | echo "assessment_method include: [MCC, Spearman_correlation ,Accuracy]" 22 | echo "==============================================================================================================" 23 | 24 | mkdir -p ms_log 25 | CUR_DIR=`pwd` 26 | PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) 27 | export GLOG_log_dir=${CUR_DIR}/ms_log 28 | export GLOG_logtostderr=0 29 | python ${PROJECT_DIR}/../run_classifier.py \ 30 | --device_target="Ascend" \ 31 | --do_train="true" \ 32 | --do_eval="false" \ 33 | --assessment_method="Accuracy" \ 34 | --device_id=0 \ 35 | --epoch_num=1 \ 36 | --num_class=2 \ 37 | --train_data_shuffle="true" \ 38 | --eval_data_shuffle="false" \ 39 | --save_finetune_checkpoint_path="" \ 40 | --load_pretrain_checkpoint_path="" \ 41 | --load_finetune_checkpoint_path="" \ 42 | --train_data_file_path="" \ 43 | --eval_data_file_path="" \ 44 | --schema_file_path="" > classfifier_log.txt 2>&1 & 45 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/run_distributed_pretrain_ascend.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Huawei Technologies Co., Ltd 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================ 16 | 17 | echo "==============================================================================================================" 18 | echo "Please run the scipt as: " 19 | echo "bash run_distributed_pretrain_ascend.sh DATA_DIR RANK_TABLE_FILE" 20 | echo "for example: bash run_distributed_pretrain_ascend.sh /path/dataset /path/hccl.json" 21 | echo "It is better to use absolute path." 22 | echo "For hyper parameter, please note that you should customize the scripts: 23 | '{CUR_DIR}/scripts/ascend_distributed_launcher/hyper_parameter_config.ini' " 24 | echo "==============================================================================================================" 25 | CUR_DIR=`pwd` 26 | 27 | python ${CUR_DIR}/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py \ 28 | --run_script_dir=${CUR_DIR}/run_pretrain.py \ 29 | --hyper_parameter_config_dir=${CUR_DIR}/scripts/ascend_distributed_launcher/hyper_parameter_config.ini \ 30 | --data_dir=$1 \ 31 | --hccl_config_dir=$2 \ 32 | --cmd_file=distributed_cmd.sh 33 | 34 | bash distributed_cmd.sh 35 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/run_distributed_pretrain_for_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Huawei Technologies Co., Ltd 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================ 16 | 17 | echo "==============================================================================================================" 18 | echo "Please run the scipt as: " 19 | echo "bash run_distributed_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR" 20 | echo "for example: bash run_distributed_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json" 21 | echo "It is better to use absolute path." 22 | echo "==============================================================================================================" 23 | 24 | RANK_SIZE=$1 25 | EPOCH_SIZE=$2 26 | DATA_DIR=$3 27 | SCHEMA_DIR=$4 28 | 29 | mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \ 30 | python run_pretrain.py \ 31 | --device_target="GPU" \ 32 | --distribute="true" \ 33 | --epoch_size=$EPOCH_SIZE \ 34 | --enable_save_ckpt="true" \ 35 | --enable_lossscale="false" \ 36 | --do_shuffle="true" \ 37 | --enable_data_sink="true" \ 38 | --data_sink_steps=1 \ 39 | --load_checkpoint_path="" \ 40 | --save_checkpoint_steps=10000 \ 41 | --save_checkpoint_num=1 \ 42 | --data_dir=$DATA_DIR \ 43 | --schema_dir=$SCHEMA_DIR > log.txt 2>&1 & 44 | 45 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/run_ner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Huawei Technologies Co., Ltd 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================ 16 | 17 | echo "==============================================================================================================" 18 | echo "Please run the scipt as: " 19 | echo "bash scripts/run_ner.sh" 20 | echo "for example: bash scripts/run_ner.sh" 21 | echo "assessment_method include: [F1, clue_benchmark]" 22 | echo "==============================================================================================================" 23 | 24 | mkdir -p ms_log 25 | CUR_DIR=`pwd` 26 | PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) 27 | export GLOG_log_dir=${CUR_DIR}/ms_log 28 | export GLOG_logtostderr=0 29 | python ${PROJECT_DIR}/../run_ner.py \ 30 | --device_target="Ascend" \ 31 | --do_train="true" \ 32 | --do_eval="false" \ 33 | --assessment_method="F1" \ 34 | --use_crf="false" \ 35 | --device_id=0 \ 36 | --epoch_num=1 \ 37 | --num_class=2 \ 38 | --train_data_shuffle="true" \ 39 | --eval_data_shuffle="false" \ 40 | --vocab_file_path="" \ 41 | --label_file_path="" \ 42 | --save_finetune_checkpoint_path="" \ 43 | --load_pretrain_checkpoint_path="" \ 44 | --load_finetune_checkpoint_path="" \ 45 | --train_data_file_path="" \ 46 | --eval_data_file_path="" \ 47 | --schema_file_path="" > ner_log.txt 2>&1 & 48 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/run_squad.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Huawei Technologies Co., Ltd 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================ 16 | 17 | echo "==============================================================================================================" 18 | echo "Please run the scipt as: " 19 | echo "bash scripts/run_squad.sh" 20 | echo "for example: bash scripts/run_squad.sh" 21 | echo "assessment_method include: [Accuracy]" 22 | echo "==============================================================================================================" 23 | 24 | mkdir -p ms_log 25 | CUR_DIR=`pwd` 26 | PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) 27 | export GLOG_log_dir=${CUR_DIR}/ms_log 28 | export GLOG_logtostderr=0 29 | python ${PROJECT_DIR}/../run_squad.py \ 30 | --device_target="Ascend" \ 31 | --do_train="true" \ 32 | --do_eval="false" \ 33 | --device_id=0 \ 34 | --epoch_num=1 \ 35 | --num_class=2 \ 36 | --train_data_shuffle="true" \ 37 | --eval_data_shuffle="false" \ 38 | --vocab_file_path="" \ 39 | --eval_json_path="" \ 40 | --save_finetune_checkpoint_path="" \ 41 | --load_pretrain_checkpoint_path="" \ 42 | --load_finetune_checkpoint_path="" \ 43 | --train_data_file_path="" \ 44 | --eval_data_file_path="" \ 45 | --schema_file_path="" > squad_log.txt 2>&1 & 46 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/run_standalone_pretrain_ascend.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Huawei Technologies Co., Ltd 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================ 16 | 17 | echo "==============================================================================================================" 18 | echo "Please run the scipt as: " 19 | echo "bash run_standalone_pretrain_ascend.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR" 20 | echo "for example: bash run_standalone_pretrain_ascend.sh 0 40 /path/zh-wiki/ /path/Schema.json" 21 | echo "==============================================================================================================" 22 | 23 | DEVICE_ID=$1 24 | EPOCH_SIZE=$2 25 | DATA_DIR=$3 26 | SCHEMA_DIR=$4 27 | 28 | mkdir -p ms_log 29 | PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) 30 | CUR_DIR=`pwd` 31 | export GLOG_log_dir=${CUR_DIR}/ms_log 32 | export GLOG_logtostderr=0 33 | python ${PROJECT_DIR}/../run_pretrain.py \ 34 | --distribute="false" \ 35 | --epoch_size=$EPOCH_SIZE \ 36 | --device_id=$DEVICE_ID \ 37 | --enable_save_ckpt="true" \ 38 | --enable_lossscale="true" \ 39 | --do_shuffle="true" \ 40 | --enable_data_sink="true" \ 41 | --data_sink_steps=1 \ 42 | --accumulation_steps=1 \ 43 | --load_checkpoint_path="" \ 44 | --save_checkpoint_steps=10000 \ 45 | --save_checkpoint_num=1 \ 46 | --data_dir=$DATA_DIR \ 47 | --schema_dir=$SCHEMA_DIR > pretraining_log.txt 2>&1 & 48 | -------------------------------------------------------------------------------- /chapter2/bert/scripts/run_standalone_pretrain_for_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Huawei Technologies Co., Ltd 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================ 16 | 17 | echo "==============================================================================================================" 18 | echo "Please run the scipt as: " 19 | echo "bash run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR" 20 | echo "for example: bash run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json" 21 | echo "==============================================================================================================" 22 | 23 | DEVICE_ID=$1 24 | EPOCH_SIZE=$2 25 | DATA_DIR=$3 26 | SCHEMA_DIR=$4 27 | 28 | export CUDA_VISIBLE_DEVICES=$DEVICE_ID 29 | 30 | mkdir -p ms_log 31 | CUR_DIR=`pwd` 32 | export GLOG_log_dir=${CUR_DIR}/ms_log 33 | export GLOG_logtostderr=0 34 | python run_pretrain.py \ 35 | --device_target="GPU" \ 36 | --distribute="false" \ 37 | --epoch_size=$EPOCH_SIZE \ 38 | --enable_save_ckpt="true" \ 39 | --enable_lossscale="false" \ 40 | --do_shuffle="true" \ 41 | --enable_data_sink="true" \ 42 | --data_sink_steps=1 \ 43 | --load_checkpoint_path="" \ 44 | --save_checkpoint_path="" \ 45 | --save_checkpoint_steps=10000 \ 46 | --save_checkpoint_num=1 \ 47 | --data_dir=$DATA_DIR \ 48 | --schema_dir=$SCHEMA_DIR > log.txt 2>&1 & 49 | -------------------------------------------------------------------------------- /chapter2/bert/src/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Bert Init.""" 16 | from .bert_for_pre_training import BertNetworkWithLoss, BertPreTraining, \ 17 | BertPretrainingLoss, GetMaskedLMOutput, GetNextSentenceOutput, \ 18 | BertTrainOneStepCell, BertTrainOneStepWithLossScaleCell, \ 19 | BertTrainAccumulateStepsWithLossScaleCell 20 | from .bert_model import BertAttention, BertConfig, BertEncoderCell, BertModel, \ 21 | BertOutput, BertSelfAttention, BertTransformer, EmbeddingLookup, \ 22 | EmbeddingPostprocessor, RelaPosEmbeddingsGenerator, RelaPosMatrixGenerator, \ 23 | SaturateCast, CreateAttentionMaskFromInputMask 24 | 25 | __all__ = [ 26 | "BertNetworkWithLoss", "BertPreTraining", "BertPretrainingLoss", 27 | "GetMaskedLMOutput", "GetNextSentenceOutput", "BertTrainOneStepCell", 28 | "BertTrainOneStepWithLossScaleCell", "BertTrainAccumulateStepsWithLossScaleCell", 29 | "BertAttention", "BertConfig", "BertEncoderCell", "BertModel", "BertOutput", 30 | "BertSelfAttention", "BertTransformer", "EmbeddingLookup", 31 | "EmbeddingPostprocessor", "RelaPosEmbeddingsGenerator", 32 | "RelaPosMatrixGenerator", "SaturateCast", "CreateAttentionMaskFromInputMask" 33 | ] 34 | -------------------------------------------------------------------------------- /chapter2/bert/src/assessment_method.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | ''' 17 | Bert evaluation assessment method script. 18 | ''' 19 | import math 20 | import numpy as np 21 | from .CRF import postprocess 22 | 23 | class Accuracy(): 24 | ''' 25 | calculate accuracy 26 | ''' 27 | def __init__(self): 28 | self.acc_num = 0 29 | self.total_num = 0 30 | def update(self, logits, labels): 31 | labels = labels.asnumpy() 32 | labels = np.reshape(labels, -1) 33 | logits = logits.asnumpy() 34 | logit_id = np.argmax(logits, axis=-1) 35 | self.acc_num += np.sum(labels == logit_id) 36 | self.total_num += len(labels) 37 | 38 | class F1(): 39 | ''' 40 | calculate F1 score 41 | ''' 42 | def __init__(self, use_crf=False, num_labels=2): 43 | self.TP = 0 44 | self.FP = 0 45 | self.FN = 0 46 | self.use_crf = use_crf 47 | self.num_labels = num_labels 48 | 49 | def update(self, logits, labels): 50 | ''' 51 | update F1 score 52 | ''' 53 | labels = labels.asnumpy() 54 | labels = np.reshape(labels, -1) 55 | if self.use_crf: 56 | backpointers, best_tag_id = logits 57 | best_path = postprocess(backpointers, best_tag_id) 58 | logit_id = [] 59 | for ele in best_path: 60 | logit_id.extend(ele) 61 | else: 62 | logits = logits.asnumpy() 63 | logit_id = np.argmax(logits, axis=-1) 64 | logit_id = np.reshape(logit_id, -1) 65 | pos_eva = np.isin(logit_id, [i for i in range(1, self.num_labels)]) 66 | pos_label = np.isin(labels, [i for i in range(1, self.num_labels)]) 67 | self.TP += np.sum(pos_eva&pos_label) 68 | self.FP += np.sum(pos_eva&(~pos_label)) 69 | self.FN += np.sum((~pos_eva)&pos_label) 70 | 71 | class MCC(): 72 | ''' 73 | Calculate Matthews Correlation Coefficient 74 | ''' 75 | def __init__(self): 76 | self.TP = 0 77 | self.FP = 0 78 | self.FN = 0 79 | self.TN = 0 80 | def update(self, logits, labels): 81 | ''' 82 | MCC update 83 | ''' 84 | labels = labels.asnumpy() 85 | labels = np.reshape(labels, -1) 86 | labels = labels.astype(np.bool) 87 | logits = logits.asnumpy() 88 | logit_id = np.argmax(logits, axis=-1) 89 | logit_id = np.reshape(logit_id, -1) 90 | logit_id = logit_id.astype(np.bool) 91 | ornot = logit_id ^ labels 92 | 93 | self.TP += (~ornot & labels).sum() 94 | self.FP += (ornot & ~labels).sum() 95 | self.FN += (ornot & labels).sum() 96 | self.TN += (~ornot & ~labels).sum() 97 | 98 | def cal(self): 99 | mcc = (self.TP*self.TN - self.FP*self.FN)/math.sqrt((self.TP+self.FP)*(self.TP+self.FN) * 100 | (self.TN+self.FP)*(self.TN+self.FN)) 101 | return mcc 102 | 103 | class Spearman_Correlation(): 104 | ''' 105 | Calculate Spearman Correlation Coefficient 106 | ''' 107 | def __init__(self): 108 | self.label = [] 109 | self.logit = [] 110 | 111 | def update(self, logits, labels): 112 | labels = labels.asnumpy() 113 | labels = np.reshape(labels, -1) 114 | logits = logits.asnumpy() 115 | logits = np.reshape(logits, -1) 116 | self.label.append(labels) 117 | self.logit.append(logits) 118 | 119 | def cal(self): 120 | ''' 121 | Calculate Spearman Correlation 122 | ''' 123 | label = np.concatenate(self.label) 124 | logit = np.concatenate(self.logit) 125 | sort_label = label.argsort()[::-1] 126 | sort_logit = logit.argsort()[::-1] 127 | n = len(label) 128 | d_acc = 0 129 | for i in range(n): 130 | d = np.where(sort_label == i)[0] - np.where(sort_logit == i)[0] 131 | d_acc += d**2 132 | ps = 1 - 6*d_acc/n/(n**2-1) 133 | return ps 134 | -------------------------------------------------------------------------------- /chapter2/bert/src/cluener_evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | '''bert clue evaluation''' 17 | 18 | import json 19 | import numpy as np 20 | import mindspore.common.dtype as mstype 21 | from mindspore.common.tensor import Tensor 22 | from src import tokenization 23 | from src.sample_process import label_generation, process_one_example_p 24 | from src.CRF import postprocess 25 | from src.finetune_eval_config import bert_net_cfg 26 | from src.score import get_result 27 | 28 | def process(model=None, text="", tokenizer_=None, use_crf="", tag_to_index=None, vocab=""): 29 | """ 30 | process text. 31 | """ 32 | data = [text] 33 | features = [] 34 | res = [] 35 | ids = [] 36 | for i in data: 37 | feature = process_one_example_p(tokenizer_, vocab, i, max_seq_len=bert_net_cfg.seq_length) 38 | features.append(feature) 39 | input_ids, input_mask, token_type_id = feature 40 | input_ids = Tensor(np.array(input_ids), mstype.int32) 41 | input_mask = Tensor(np.array(input_mask), mstype.int32) 42 | token_type_id = Tensor(np.array(token_type_id), mstype.int32) 43 | if use_crf.lower() == "true": 44 | backpointers, best_tag_id = model.predict(input_ids, input_mask, token_type_id, Tensor(1)) 45 | best_path = postprocess(backpointers, best_tag_id) 46 | logits = [] 47 | for ele in best_path: 48 | logits.extend(ele) 49 | ids = logits 50 | else: 51 | logits = model.predict(input_ids, input_mask, token_type_id, Tensor(1)) 52 | ids = logits.asnumpy() 53 | ids = np.argmax(ids, axis=-1) 54 | ids = list(ids) 55 | res = label_generation(text=text, probs=ids, tag_to_index=tag_to_index) 56 | return res 57 | 58 | def submit(model=None, path="", vocab_file="", use_crf="", label_file="", tag_to_index=None): 59 | """ 60 | submit task 61 | """ 62 | tokenizer_ = tokenization.FullTokenizer(vocab_file=vocab_file) 63 | data = [] 64 | for line in open(path): 65 | if not line.strip(): 66 | continue 67 | oneline = json.loads(line.strip()) 68 | res = process(model=model, text=oneline["text"], tokenizer_=tokenizer_, 69 | use_crf=use_crf, tag_to_index=tag_to_index, vocab=vocab_file) 70 | data.append(json.dumps({"label": res}, ensure_ascii=False)) 71 | open("ner_predict.json", "w").write("\n".join(data)) 72 | labels = [] 73 | with open(label_file) as f: 74 | for label in f: 75 | labels.append(label.strip()) 76 | get_result(labels, "ner_predict.json", path) 77 | -------------------------------------------------------------------------------- /chapter2/bert/src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ 16 | network config setting, will be used in dataset.py, run_pretrain.py 17 | """ 18 | from easydict import EasyDict as edict 19 | import mindspore.common.dtype as mstype 20 | from .bert_model import BertConfig 21 | cfg = edict({ 22 | 'batch_size': 32, 23 | 'bert_network': 'base', 24 | 'loss_scale_value': 65536, 25 | 'scale_factor': 2, 26 | 'scale_window': 1000, 27 | 'optimizer': 'Lamb', 28 | 'enable_global_norm': False, 29 | 'AdamWeightDecay': edict({ 30 | 'learning_rate': 3e-5, 31 | 'end_learning_rate': 0.0, 32 | 'power': 5.0, 33 | 'weight_decay': 1e-5, 34 | 'decay_filter': lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower(), 35 | 'eps': 1e-6, 36 | 'warmup_steps': 10000, 37 | }), 38 | 'Lamb': edict({ 39 | 'learning_rate': 3e-5, 40 | 'end_learning_rate': 0.0, 41 | 'power': 10.0, 42 | 'warmup_steps': 10000, 43 | 'weight_decay': 0.01, 44 | 'decay_filter': lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower(), 45 | 'eps': 1e-6, 46 | }), 47 | 'Momentum': edict({ 48 | 'learning_rate': 2e-5, 49 | 'momentum': 0.9, 50 | }), 51 | }) 52 | 53 | ''' 54 | Including two kinds of network: \ 55 | base: Google BERT-base(the base version of BERT model). 56 | large: BERT-NEZHA(a Chinese pretrained language model developed by Huawei, which introduced a improvement of \ 57 | Functional Relative Posetional Encoding as an effective positional encoding scheme). 58 | ''' 59 | if cfg.bert_network == 'base': 60 | cfg.batch_size = 64 61 | bert_net_cfg = BertConfig( 62 | seq_length=128, 63 | vocab_size=21128, 64 | hidden_size=768, 65 | num_hidden_layers=12, 66 | num_attention_heads=12, 67 | intermediate_size=3072, 68 | hidden_act="gelu", 69 | hidden_dropout_prob=0.1, 70 | attention_probs_dropout_prob=0.1, 71 | max_position_embeddings=512, 72 | type_vocab_size=2, 73 | initializer_range=0.02, 74 | use_relative_positions=False, 75 | dtype=mstype.float32, 76 | compute_type=mstype.float16 77 | ) 78 | if cfg.bert_network == 'nezha': 79 | cfg.batch_size = 96 80 | bert_net_cfg = BertConfig( 81 | seq_length=128, 82 | vocab_size=21128, 83 | hidden_size=1024, 84 | num_hidden_layers=24, 85 | num_attention_heads=16, 86 | intermediate_size=4096, 87 | hidden_act="gelu", 88 | hidden_dropout_prob=0.1, 89 | attention_probs_dropout_prob=0.1, 90 | max_position_embeddings=512, 91 | type_vocab_size=2, 92 | initializer_range=0.02, 93 | use_relative_positions=True, 94 | dtype=mstype.float32, 95 | compute_type=mstype.float16 96 | ) 97 | if cfg.bert_network == 'large': 98 | cfg.batch_size = 24 99 | bert_net_cfg = BertConfig( 100 | seq_length=512, 101 | vocab_size=30522, 102 | hidden_size=1024, 103 | num_hidden_layers=24, 104 | num_attention_heads=16, 105 | intermediate_size=4096, 106 | hidden_act="gelu", 107 | hidden_dropout_prob=0.1, 108 | attention_probs_dropout_prob=0.1, 109 | max_position_embeddings=512, 110 | type_vocab_size=2, 111 | initializer_range=0.02, 112 | use_relative_positions=False, 113 | dtype=mstype.float32, 114 | compute_type=mstype.float16 115 | ) 116 | -------------------------------------------------------------------------------- /chapter2/bert/src/finetune_eval_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ 17 | config settings, will be used in finetune.py 18 | """ 19 | 20 | from easydict import EasyDict as edict 21 | import mindspore.common.dtype as mstype 22 | from .bert_model import BertConfig 23 | 24 | optimizer_cfg = edict({ 25 | 'batch_size': 16, 26 | 'optimizer': 'Lamb', 27 | 'AdamWeightDecay': edict({ 28 | 'learning_rate': 2e-5, 29 | 'end_learning_rate': 1e-7, 30 | 'power': 1.0, 31 | 'weight_decay': 1e-5, 32 | 'decay_filter': lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower(), 33 | 'eps': 1e-6, 34 | }), 35 | 'Lamb': edict({ 36 | 'learning_rate': 2e-5, 37 | 'end_learning_rate': 1e-7, 38 | 'power': 1.0, 39 | 'weight_decay': 0.01, 40 | 'decay_filter': lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower(), 41 | }), 42 | 'Momentum': edict({ 43 | 'learning_rate': 2e-5, 44 | 'momentum': 0.9, 45 | }), 46 | }) 47 | 48 | cloud_cfg = edict({ 49 | 'vocab_file': '/home/work/user-job-dir/bert/vocab.txt', 50 | 'finetune_ckpt': '/home/work/user-job-dir/bert/tnews-3_3335.ckpt' 51 | }) 52 | 53 | bert_net_cfg = BertConfig( 54 | seq_length=128, 55 | vocab_size=21128, 56 | hidden_size=768, 57 | num_hidden_layers=12, 58 | num_attention_heads=12, 59 | intermediate_size=3072, 60 | hidden_act="gelu", 61 | hidden_dropout_prob=0.1, 62 | attention_probs_dropout_prob=0.1, 63 | max_position_embeddings=512, 64 | type_vocab_size=2, 65 | initializer_range=0.02, 66 | use_relative_positions=False, 67 | dtype=mstype.float32, 68 | compute_type=mstype.float16, 69 | ) 70 | -------------------------------------------------------------------------------- /chapter2/bert/src/sample_process.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """process txt""" 17 | 18 | import re 19 | from src.tokenization import convert_tokens_to_ids 20 | 21 | def process_one_example_p(tokenizer, vocab, text, max_seq_len=128): 22 | """process one testline""" 23 | textlist = list(text) 24 | tokens = [] 25 | for _, word in enumerate(textlist): 26 | token = tokenizer.tokenize(word) 27 | tokens.extend(token) 28 | if len(tokens) >= max_seq_len - 1: 29 | tokens = tokens[0:(max_seq_len - 2)] 30 | ntokens = [] 31 | segment_ids = [] 32 | label_ids = [] 33 | ntokens.append("[CLS]") 34 | segment_ids.append(0) 35 | for _, token in enumerate(tokens): 36 | ntokens.append(token) 37 | segment_ids.append(0) 38 | ntokens.append("[SEP]") 39 | segment_ids.append(0) 40 | input_ids = convert_tokens_to_ids(vocab, ntokens) 41 | input_mask = [1] * len(input_ids) 42 | while len(input_ids) < max_seq_len: 43 | input_ids.append(0) 44 | input_mask.append(0) 45 | segment_ids.append(0) 46 | label_ids.append(0) 47 | ntokens.append("**NULL**") 48 | assert len(input_ids) == max_seq_len 49 | assert len(input_mask) == max_seq_len 50 | assert len(segment_ids) == max_seq_len 51 | 52 | feature = (input_ids, input_mask, segment_ids) 53 | return feature 54 | 55 | def label_generation(text="", probs=None, tag_to_index=None): 56 | """generate label""" 57 | data = [text] 58 | probs = [probs] 59 | result = [] 60 | label2id = tag_to_index 61 | id2label = [k for k, v in label2id.items()] 62 | 63 | for index, prob in enumerate(probs): 64 | for v in prob[1:len(data[index]) + 1]: 65 | result.append(id2label[int(v)]) 66 | 67 | labels = {} 68 | start = None 69 | index = 0 70 | for _, t in zip("".join(data), result): 71 | if re.search("^[BS]", t): 72 | if start is not None: 73 | label = result[index - 1][2:] 74 | if labels.get(label): 75 | te_ = text[start:index] 76 | labels[label][te_] = [[start, index - 1]] 77 | else: 78 | te_ = text[start:index] 79 | labels[label] = {te_: [[start, index - 1]]} 80 | start = index 81 | if re.search("^O", t): 82 | if start is not None: 83 | label = result[index - 1][2:] 84 | if labels.get(label): 85 | te_ = text[start:index] 86 | labels[label][te_] = [[start, index - 1]] 87 | else: 88 | te_ = text[start:index] 89 | labels[label] = {te_: [[start, index - 1]]} 90 | start = None 91 | index += 1 92 | if start is not None: 93 | label = result[start][2:] 94 | if labels.get(label): 95 | te_ = text[start:index] 96 | labels[label][te_] = [[start, index - 1]] 97 | else: 98 | te_ = text[start:index] 99 | labels[label] = {te_: [[start, index - 1]]} 100 | return labels 101 | -------------------------------------------------------------------------------- /chapter2/bert/src/score.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ 17 | Calculate average F1 score among labels. 18 | """ 19 | 20 | import json 21 | 22 | def get_f1_score_for_each_label(pre_lines, gold_lines, label): 23 | """ 24 | Get F1 score for each label. 25 | Args: 26 | pre_lines: listed label info from pre_file. 27 | gold_lines: listed label info from gold_file. 28 | label: 29 | 30 | Returns: 31 | F1 score for this label. 32 | """ 33 | TP = 0 34 | FP = 0 35 | FN = 0 36 | index = 0 37 | while index < len(pre_lines): 38 | pre_line = pre_lines[index].get(label, {}) 39 | gold_line = gold_lines[index].get(label, {}) 40 | for sample in pre_line: 41 | if sample in gold_line: 42 | TP += 1 43 | else: 44 | FP += 1 45 | for sample in gold_line: 46 | if sample not in pre_line: 47 | FN += 1 48 | index += 1 49 | f1 = 2 * TP / (2 * TP + FP + FN) 50 | return f1 51 | 52 | 53 | def get_f1_score(labels, pre_file, gold_file): 54 | """ 55 | Get F1 scores for each label. 56 | Args: 57 | labels: list of labels. 58 | pre_file: prediction file. 59 | gold_file: ground truth file. 60 | 61 | Returns: 62 | average F1 score on all labels. 63 | """ 64 | pre_lines = [json.loads(line.strip())['label'] for line in open(pre_file) if line.strip()] 65 | gold_lines = [json.loads(line.strip())['label'] for line in open(gold_file) if line.strip()] 66 | if len(pre_lines) != len(gold_lines): 67 | raise ValueError("pre file and gold file have different line count.") 68 | f1_sum = 0 69 | for label in labels: 70 | f1 = get_f1_score_for_each_label(pre_lines, gold_lines, label) 71 | print('label: %s, F1: %.6f' % (label, f1)) 72 | f1_sum += f1 73 | 74 | return f1_sum/len(labels) 75 | 76 | 77 | def get_result(labels, pre_file, gold_file): 78 | avg = get_f1_score(labels, pre_file=pre_file, gold_file=gold_file) 79 | print("avg F1: %.6f" % avg) 80 | -------------------------------------------------------------------------------- /chapter3/README.md: -------------------------------------------------------------------------------- 1 | # ms-resnet50-mushroom 2 | 3 | 该教程旨在指导大家使用`Ascend`/`GPU`资源完成MindSpore ResNet-50毒蘑菇识别的教程。 4 | 5 | > **注意:** 因容量原因,将作业指导文档移出repo,体验作业和进阶作业文档可从如下链接中获取: 6 | 7 | * 体验作业文档 8 | 9 | [ResNet-50体验作业](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/demo/ResNet-50%E4%BD%93%E9%AA%8C%E4%BD%9C%E4%B8%9A.pdf) 10 | 11 | * 进阶作业文档 12 | 13 | [ResNet-50进阶作业](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/demo/ResNet-50%E8%BF%9B%E9%98%B6%E4%BD%9C%E4%B8%9A.pdf) 14 | 15 | * 视频教程 16 | 17 | [基于ResNet50实现毒蘑菇识别实战](https://www.bilibili.com/video/BV1ny4y1r7HA) 18 | 19 | ## 免责声明 20 | 21 | MindSpore ModelZoo only provides scripts that downloads and preprocesses public datasets. We do not own these datasets and are not responsible for their quality or maintenance. Please make sure you have permission to use the dataset under the dataset’s license. The models trained on these dataset are for non-commercial research and educational purpose only. 22 | 23 | To dataset owners: we will remove or update all public content upon request if you don’t want your dataset included on MindSpore ModelZoo, or wish to update it in any way. Please contact us through a [Gitee](https://gitee.com/mindspore/mindspore/issues)/[GitHub](https://github.com/mindspore-ai/mindspore/issues) issue. Your understanding and contribution to this community is greatly appreciated. 24 | 25 | ## 许可证 26 | 27 | [Apache License 2.0](../LICENSE) 28 | -------------------------------------------------------------------------------- /chapter3/docs/data_upload_obs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter3/docs/data_upload_obs.jpg -------------------------------------------------------------------------------- /chapter3/docs/resnet50_predictconfig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter3/docs/resnet50_predictconfig.jpg -------------------------------------------------------------------------------- /chapter3/docs/resnet50_trainconfig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter3/docs/resnet50_trainconfig.jpg -------------------------------------------------------------------------------- /chapter3/mushroom-dataset/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter3/mushroom-dataset/.gitkeep -------------------------------------------------------------------------------- /chapter3/resnet_ascend/README.md: -------------------------------------------------------------------------------- 1 | # MindSpore ResNet-50毒蘑菇识别教程指导(Ascend环境) 2 | 3 | 该教程旨在指导大家通过[ModelArts云服务](https://www.huaweicloud.com/product/modelarts.html)完成MindSpore ResNet-50毒蘑菇识别的教程。 4 | 5 | > **注意:** 该教程的代码是基于`v0.5`版本的MindSpore [ModelZoo](https://gitee.com/mindspore/mindspore/tree/r0.5/model_zoo/resnet)开发完成的。 6 | 7 | ## 上手指导 8 | 9 | ### 数据准备 10 | 11 | * 下载蘑菇数据集 12 | 13 | ``` 14 | cd mushroom-dataset/ && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/mushrooms/mushrooms.zip 15 | unzip mushrooms.zip && rm mushrooms.zip 16 | ``` 17 | 18 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/mushrooms/mushrooms.zip](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/mushrooms/mushrooms.zip) 从浏览器中下载该数据集,手动解压。 19 | 20 | * 下载ResNet-50预训练模型(推理任务使用) 21 | 22 | ``` 23 | cd ../resnet_ascend/ckpt_files && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/ckpt_files/resnet-90_209.ckpt 24 | ``` 25 | 26 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/ckpt_files/resnet-90_209.ckpt](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/ckpt_files/resnet-90_209.ckpt) 从浏览器中下载预训练模型。 27 | 28 | * 将数据集、预训练模型以及源代码上传到OBS服务 29 | 30 | 请将前面下载的蘑菇数据集、预训练模型和源代码上传到[华为OBS云服务](https://www.huaweicloud.com/product/obs.html),上传格式如下: 31 | 32 | OBS Data Upload 33 | 34 | ### 模型训练 35 | 36 | 首先,用户需要手动配置训练任务的参数,格式如下: 37 | 38 | ResNet-50 Train Config 39 | 40 | 然后根据如下视频启动ModelArts训练任务: 41 | 42 | [观看视频](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/demo/resnet50_train_demo.mp4) 43 | 44 | ### 模型推理 45 | 46 | 首先,用户需要手动配置推理任务的参数,格式如下: 47 | 48 | ResNet-50 Predict Config 49 | 50 | 然后根据如下视频启动ModelArts推理任务: 51 | 52 | [观看视频](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/demo/resnet50_predict_demo.mp4) 53 | 54 | ## 许可证 55 | 56 | [Apache License 2.0](../../LICENSE) 57 | -------------------------------------------------------------------------------- /chapter3/resnet_ascend/ckpt_files/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter3/resnet_ascend/ckpt_files/.gitkeep -------------------------------------------------------------------------------- /chapter3/resnet_ascend/resnet50_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ResNet50 model eval with MindSpore""" 16 | import argparse 17 | import random 18 | import numpy as np 19 | import moxing as mox 20 | 21 | import mindspore.context as context 22 | from mindspore.train.model import Model 23 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 24 | 25 | from src.config import cfg 26 | from src.resnet import resnet50 27 | from src.dataset import create_dataset 28 | from src.CrossEntropySmooth import CrossEntropySmooth 29 | 30 | random.seed(1) 31 | np.random.seed(1) 32 | 33 | 34 | def resnet50_eval(args_opt): 35 | class_num = cfg.class_num 36 | local_data_path = '/cache/data' 37 | ckpt_file_slice = args_opt.checkpoint_path.split('/') 38 | ckpt_file = ckpt_file_slice[len(ckpt_file_slice)-1] 39 | local_ckpt_path = '/cache/'+ckpt_file 40 | 41 | # set graph mode and parallel mode 42 | context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=False) 43 | 44 | # data download 45 | print('Download data.') 46 | mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) 47 | mox.file.copy_parallel(src_url=args_opt.checkpoint_path, dst_url=local_ckpt_path) 48 | 49 | # create dataset 50 | dataset = create_dataset(dataset_path=local_data_path, do_train=False, 51 | batch_size=cfg.batch_size) 52 | 53 | # load checkpoint into net 54 | net = resnet50(class_num=class_num) 55 | param_dict = load_checkpoint(local_ckpt_path) 56 | load_param_into_net(net, param_dict) 57 | net.set_train(False) 58 | 59 | # define loss and model 60 | if not cfg.use_label_smooth: 61 | cfg.label_smooth_factor = 0.0 62 | loss = CrossEntropySmooth(sparse=True, reduction='mean', 63 | smooth_factor=cfg.label_smooth_factor, 64 | num_classes=cfg.class_num) 65 | model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) 66 | 67 | # eval model 68 | res = model.eval(dataset) 69 | print("result:", res, "ckpt=", args_opt.checkpoint_path) 70 | 71 | 72 | if __name__ == '__main__': 73 | parser = argparse.ArgumentParser(description='ResNet50 eval.') 74 | parser.add_argument('--data_url', required=True, default=None, help='Location of data.') 75 | parser.add_argument('--train_url', required=True, default=None, help='Location of training outputs.') 76 | parser.add_argument('--checkpoint_path', required=True, type=str, default=None, help='Checkpoint file path') 77 | parser.add_argument('--device_target', type=str, default='Ascend', help='Device target. Default: Ascend.') 78 | args_opt, unknown = parser.parse_known_args() 79 | 80 | resnet50_eval(args_opt) 81 | print('ResNet50 evaluation success!') 82 | -------------------------------------------------------------------------------- /chapter3/resnet_ascend/resnet50_predict.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ResNet50 model predict with MindSpore""" 16 | import os 17 | import argparse 18 | import random 19 | import cv2 20 | import numpy as np 21 | import moxing as mox 22 | 23 | import mindspore 24 | from mindspore import context, Tensor 25 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 26 | 27 | from src.config import cfg 28 | from src.resnet import resnet50 29 | 30 | random.seed(1) 31 | np.random.seed(1) 32 | 33 | label_list = ["Agaricus双孢蘑菇,伞菌目,蘑菇科,蘑菇属,广泛分布于北半球温带,无毒", 34 | "Amanita毒蝇伞,伞菌目,鹅膏菌科,鹅膏菌属,主要分布于我国黑龙江、吉林、四川、西藏、云南等地,有毒", 35 | "Boletus丽柄牛肝菌,伞菌目,牛肝菌科,牛肝菌属,分布于云南、陕西、甘肃、西藏等地,有毒", 36 | "Cortinarius掷丝膜菌,伞菌目,丝膜菌科,丝膜菌属,分布于湖南等地(夏秋季在山毛等阔叶林地上生长)", 37 | "Entoloma霍氏粉褶菌,伞菌目,粉褶菌科,粉褶菌属,主要分布于新西兰北岛和南岛西部,有毒", 38 | "Hygrocybe浅黄褐湿伞,伞菌目,蜡伞科,湿伞属,分布于香港(见于松仔园),有毒", 39 | "Lactarius松乳菇,红菇目,红菇科,乳菇属,广泛分布于亚热带松林地,无毒", 40 | "Russula褪色红菇,伞菌目,红菇科,红菇属,分布于河北、吉林、四川、江苏、西藏等地,无毒", 41 | "Suillus乳牛肝菌,牛肝菌目,乳牛肝菌科,乳牛肝菌属,分布于吉林、辽宁、山西、安徽、江西、浙江、湖南、四川、贵州等地,无毒", 42 | ] 43 | 44 | 45 | def _crop_center(img, cropx, cropy): 46 | y, x, _ = img.shape 47 | startx = x // 2 - (cropx // 2) 48 | starty = y // 2 - (cropy // 2) 49 | return img[starty:starty + cropy, startx:startx + cropx, :] 50 | 51 | 52 | def _normalize(img, mean, std): 53 | # This method is borrowed from: 54 | # https://github.com/open-mmlab/mmcv/blob/master/mmcv/image/photometric.py 55 | assert img.dtype != np.uint8 56 | mean = np.float64(mean.reshape(1, -1)) 57 | stdinv = 1 / np.float64(std.reshape(1, -1)) 58 | cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) 59 | cv2.subtract(img, mean, img) 60 | cv2.multiply(img, stdinv, img) 61 | return img 62 | 63 | 64 | def data_preprocess(img_path): 65 | img = cv2.imread(img_path, 1) 66 | img = cv2.resize(img, (256, 256)) 67 | img = _crop_center(img, 224, 224) 68 | mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] 69 | std = [0.229 * 255, 0.224 * 255, 0.225 * 255] 70 | img = _normalize(img.astype(np.float32), np.asarray(mean), np.asarray(std)) 71 | img = img.transpose(2, 0, 1) 72 | 73 | return img 74 | 75 | 76 | def resnet50_predict(args_opt): 77 | class_num = cfg.class_num 78 | local_data_path = '/cache/data' 79 | ckpt_file_slice = args_opt.checkpoint_path.split('/') 80 | ckpt_file = ckpt_file_slice[len(ckpt_file_slice)-1] 81 | local_ckpt_path = '/cache/'+ckpt_file 82 | 83 | # set graph mode and parallel mode 84 | context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=False) 85 | 86 | # data download 87 | print('Download data.') 88 | mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) 89 | mox.file.copy_parallel(src_url=args_opt.checkpoint_path, dst_url=local_ckpt_path) 90 | 91 | # load checkpoint into net 92 | net = resnet50(class_num=class_num) 93 | param_dict = load_checkpoint(local_ckpt_path) 94 | load_param_into_net(net, param_dict) 95 | net.set_train(False) 96 | 97 | # preprocess the image 98 | images = os.listdir(local_data_path) 99 | for image in images: 100 | img = data_preprocess(os.path.join(local_data_path, image)) 101 | # predict model 102 | res = net(Tensor(img.reshape((1, 3, 224, 224)), mindspore.float32)).asnumpy() 103 | 104 | predict_label = label_list[res[0].argmax()] 105 | print("预测的蘑菇标签为:\n\t"+predict_label+"\n") 106 | 107 | 108 | if __name__ == '__main__': 109 | parser = argparse.ArgumentParser(description='ResNet50 predict.') 110 | parser.add_argument('--data_url', required=True, default=None, help='Location of data.') 111 | parser.add_argument('--train_url', required=True, default=None, help='Location of training outputs.') 112 | parser.add_argument('--checkpoint_path', required=True, type=str, default=None, help='Checkpoint file path') 113 | parser.add_argument('--device_target', type=str, default='Ascend', help='Device target. Default: Ascend.') 114 | args_opt, unknown = parser.parse_known_args() 115 | 116 | resnet50_predict(args_opt) 117 | print('ResNet50 prediction success!') 118 | -------------------------------------------------------------------------------- /chapter3/resnet_ascend/src/CrossEntropySmooth.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """define loss function for network""" 16 | import mindspore.nn as nn 17 | from mindspore import Tensor 18 | from mindspore.common import dtype as mstype 19 | from mindspore.nn.loss.loss import _Loss 20 | from mindspore.ops import functional as F 21 | from mindspore.ops import operations as P 22 | 23 | 24 | class CrossEntropySmooth(_Loss): 25 | """CrossEntropy""" 26 | 27 | def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): 28 | super(CrossEntropySmooth, self).__init__() 29 | self.onehot = P.OneHot() 30 | self.sparse = sparse 31 | self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) 32 | self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) 33 | self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction) 34 | 35 | def construct(self, logit, label): 36 | if self.sparse: 37 | label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) 38 | loss = self.ce(logit, label) 39 | return loss 40 | -------------------------------------------------------------------------------- /chapter3/resnet_ascend/src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ 16 | network config setting, will be used in resnet50_train.py and resnet50_predict.py 17 | """ 18 | from easydict import EasyDict as ed 19 | 20 | # config for resnet50, imagenet2012 21 | cfg = ed({ 22 | "class_num": 9, 23 | "batch_size": 32, 24 | "loss_scale": 1024, 25 | "momentum": 0.9, 26 | "weight_decay": 1e-4, 27 | "epoch_size": 90, 28 | "pretrain_epoch_size": 0, 29 | "save_checkpoint": True, 30 | "save_checkpoint_epochs": 5, 31 | "keep_checkpoint_max": 10, 32 | "save_checkpoint_path": "./", 33 | "warmup_epochs": 0, 34 | "lr_decay_mode": "linear", 35 | "use_label_smooth": True, 36 | "label_smooth_factor": 0.1, 37 | "lr_init": 0, 38 | "lr_max": 0.8, 39 | "lr_end": 0.0 40 | }) 41 | -------------------------------------------------------------------------------- /chapter3/resnet_ascend/src/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Create train or eval dataset.""" 16 | import os 17 | import mindspore.common.dtype as mstype 18 | import mindspore.dataset.engine as de 19 | import mindspore.dataset.vision.c_transforms as C 20 | import mindspore.dataset.transforms.c_transforms as C2 21 | 22 | 23 | def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): 24 | """ 25 | Create a train or eval dataset. 26 | 27 | Args: 28 | dataset_path (str): The path of dataset. 29 | do_train (bool): Whether dataset is used for train or eval. 30 | repeat_num (int): The repeat times of dataset. Default: 1. 31 | batch_size (int): The batch size of dataset. Default: 32. 32 | 33 | Returns: 34 | Dataset. 35 | """ 36 | device_num, rank_id = _get_rank_info() 37 | 38 | if device_num == 1: 39 | ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) 40 | else: 41 | ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True, 42 | num_shards=device_num, shard_id=rank_id) 43 | 44 | image_size = 224 45 | mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] 46 | std = [0.229 * 255, 0.224 * 255, 0.225 * 255] 47 | 48 | # define map operations 49 | if do_train: 50 | trans = [ 51 | C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), 52 | C.RandomHorizontalFlip(prob=0.5), 53 | C.Normalize(mean=mean, std=std), 54 | C.HWC2CHW() 55 | ] 56 | else: 57 | trans = [ 58 | C.Decode(), 59 | C.Resize(256), 60 | C.CenterCrop(image_size), 61 | C.Normalize(mean=mean, std=std), 62 | C.HWC2CHW() 63 | ] 64 | 65 | type_cast_op = C2.TypeCast(mstype.int32) 66 | 67 | ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) 68 | ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) 69 | 70 | # apply batch operations 71 | ds = ds.batch(batch_size, drop_remainder=True) 72 | 73 | # apply dataset repeat operation 74 | ds = ds.repeat(repeat_num) 75 | 76 | return ds 77 | 78 | 79 | def _get_rank_info(): 80 | """ 81 | get rank size and rank id 82 | """ 83 | rank_size = int(os.environ.get("RANK_SIZE", 1)) 84 | rank_id = 0 85 | 86 | return rank_size, rank_id 87 | -------------------------------------------------------------------------------- /chapter3/resnet_gpu/README.md: -------------------------------------------------------------------------------- 1 | # MindSpore ResNet-50毒蘑菇识别教程指导(GPU环境) 2 | 3 | 该教程旨在指导大家使用GPU资源完成MindSpore ResNet-50毒蘑菇识别的教程。 4 | 5 | > **注意:** 该教程的代码是基于`v1.0`版本的MindSpore [ModelZoo](https://github.com/mindspore-ai/mindspore/tree/r1.0/model_zoo/official/cv/resnet)开发完成的。 6 | 7 | ## 上手指导 8 | 9 | ### 安装系统库 10 | 11 | * 系统库 12 | 13 | ``` 14 | sudo apt install -y unzip 15 | ``` 16 | 17 | * Python库 18 | 19 | ``` 20 | pip install opencv-python 21 | ``` 22 | 23 | * MindSpore (**v1.0**) 24 | 25 | MindSpore的安装教程请移步至 [MindSpore安装页面](https://www.mindspore.cn/install). 26 | 27 | ### 下载蘑菇数据集 28 | 29 | ``` 30 | cd mushroom-dataset/ && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/mushrooms/mushrooms.zip 31 | unzip mushrooms.zip && rm mushrooms.zip 32 | cd ../resnet_gpu/ 33 | ``` 34 | 35 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/mushrooms/mushrooms.zip](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/mushrooms/mushrooms.zip) 从浏览器中下载该数据集,手动解压。 36 | 37 | ### 模型训练 38 | 39 | ``` 40 | python train.py --dataset_path ../mushroom-dataset/train 41 | ``` 42 | ``` 43 | epoch: 90 step: 201, loss is 1.2514226 44 | epoch: 90 step: 202, loss is 1.033073 45 | epoch: 90 step: 203, loss is 1.3107909 46 | epoch: 90 step: 204, loss is 1.0333312 47 | epoch: 90 step: 205, loss is 1.255686 48 | epoch: 90 step: 206, loss is 1.1876906 49 | epoch: 90 step: 207, loss is 1.123241 50 | epoch: 90 step: 208, loss is 1.2708801 51 | epoch: 90 step: 209, loss is 1.1666311 52 | Epoch time: 11931.645, per step time: 57.089 53 | ``` 54 | 55 | ### 下载ResNet-50预训练模型(推理任务使用) 56 | 57 | ``` 58 | cd ./ckpt_files && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/ckpt_files/resnet-90_209.ckpt 59 | ``` 60 | 61 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/ckpt_files/resnet-90_209.ckpt](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/resnet-50/ckpt_files/resnet-90_209.ckpt) 从浏览器中下载预训练模型。 62 | 63 | ### 模型精度验证 64 | 65 | ``` 66 | python eval.py --checkpoint_path ./ckpt_files/resnet-90_209.ckpt --dataset_path ../mushroom-dataset/eval 67 | ``` 68 | ``` 69 | result: {'top_5_accuracy': 0.9895833333333334, 'top_1_accuracy': 0.78125} ckpt= resnet-90_209.ckpt 70 | ``` 71 | 72 | ### 模型推理 73 | 74 | ``` 75 | python predict.py --checkpoint_path ./ckpt_files/resnet-90_209.ckpt --image_path ./tum.jpg 76 | ``` 77 | ``` 78 | 预测的蘑菇标签为: 79 | Agaricus双孢蘑菇,伞菌目,蘑菇科,蘑菇属,广泛分布于北半球温带,无毒 80 | ``` 81 | 82 | ## 许可证 83 | 84 | [Apache License 2.0](../../LICENSE) 85 | -------------------------------------------------------------------------------- /chapter3/resnet_gpu/ckpt_files/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter3/resnet_gpu/ckpt_files/.gitkeep -------------------------------------------------------------------------------- /chapter3/resnet_gpu/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """train resnet.""" 16 | import os 17 | import argparse 18 | from mindspore import context 19 | from mindspore.common import set_seed 20 | from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits 21 | from mindspore.train.model import Model 22 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 23 | from src.CrossEntropySmooth import CrossEntropySmooth 24 | 25 | parser = argparse.ArgumentParser(description='Image classification') 26 | parser.add_argument('--net', type=str, default='resnet50', 27 | help='Resnet Model, either resnet50 or resnet101. Default: resnet50') 28 | parser.add_argument('--dataset', type=str, default='imagenet2012', 29 | help='Dataset, either cifar10 or imagenet2012. Default: imagenet2012') 30 | parser.add_argument('--checkpoint_path', required=True, type=str, default=None, help='Checkpoint file path') 31 | parser.add_argument('--dataset_path', required=True, type=str, default=None, help='Dataset path') 32 | parser.add_argument('--device_target', type=str, default='GPU', help='Device target. Default: GPU') 33 | args_opt = parser.parse_args() 34 | 35 | set_seed(1) 36 | 37 | if args_opt.net == "resnet50": 38 | from src.resnet import resnet50 as resnet 39 | if args_opt.dataset == "cifar10": 40 | from src.config import config1 as config 41 | from src.dataset import create_dataset1 as create_dataset 42 | else: 43 | from src.config import config2 as config 44 | from src.dataset import create_dataset2 as create_dataset 45 | elif args_opt.net == "resnet101": 46 | from src.resnet import resnet101 as resnet 47 | from src.config import config3 as config 48 | from src.dataset import create_dataset3 as create_dataset 49 | else: 50 | from src.resnet import se_resnet50 as resnet 51 | from src.config import config4 as config 52 | from src.dataset import create_dataset4 as create_dataset 53 | 54 | if __name__ == '__main__': 55 | target = args_opt.device_target 56 | 57 | # init context 58 | context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False) 59 | if target != "GPU": 60 | device_id = int(os.getenv('DEVICE_ID')) 61 | context.set_context(device_id=device_id) 62 | 63 | # create dataset 64 | dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size, 65 | target=target) 66 | step_size = dataset.get_dataset_size() 67 | 68 | # define net 69 | net = resnet(class_num=config.class_num) 70 | 71 | # load checkpoint 72 | param_dict = load_checkpoint(args_opt.checkpoint_path) 73 | load_param_into_net(net, param_dict) 74 | net.set_train(False) 75 | 76 | # define loss, model 77 | if args_opt.dataset == "imagenet2012": 78 | if not config.use_label_smooth: 79 | config.label_smooth_factor = 0.0 80 | loss = CrossEntropySmooth(sparse=True, reduction='mean', 81 | smooth_factor=config.label_smooth_factor, num_classes=config.class_num) 82 | else: 83 | loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') 84 | 85 | # define model 86 | model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) 87 | 88 | # eval model 89 | res = model.eval(dataset) 90 | print("result:", res, "ckpt=", args_opt.checkpoint_path) 91 | -------------------------------------------------------------------------------- /chapter3/resnet_gpu/predict.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """predict resnet.""" 16 | from src.resnet import resnet50 as resnet 17 | from src.config import config2 as config 18 | 19 | import cv2 20 | import argparse 21 | import numpy as np 22 | import mindspore 23 | from mindspore import context, Tensor 24 | from mindspore.common import set_seed 25 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 26 | 27 | parser = argparse.ArgumentParser(description='Image classification') 28 | 29 | parser.add_argument('--checkpoint_path', required=True, type=str, default=None, help='Checkpoint file path') 30 | parser.add_argument('--image_path', required=True, type=str, default=None, help='Image path') 31 | parser.add_argument('--device_target', type=str, default='GPU', help='Device target. Default: GPU') 32 | args_opt = parser.parse_args() 33 | 34 | set_seed(1) 35 | 36 | label_list = ["Agaricus双孢蘑菇,伞菌目,蘑菇科,蘑菇属,广泛分布于北半球温带,无毒", 37 | "Amanita毒蝇伞,伞菌目,鹅膏菌科,鹅膏菌属,主要分布于我国黑龙江、吉林、四川、西藏、云南等地,有毒", 38 | "Boletus丽柄牛肝菌,伞菌目,牛肝菌科,牛肝菌属,分布于云南、陕西、甘肃、西藏等地,有毒", 39 | "Cortinarius掷丝膜菌,伞菌目,丝膜菌科,丝膜菌属,分布于湖南等地(夏秋季在山毛等阔叶林地上生长)", 40 | "Entoloma霍氏粉褶菌,伞菌目,粉褶菌科,粉褶菌属,主要分布于新西兰北岛和南岛西部,有毒", 41 | "Hygrocybe浅黄褐湿伞,伞菌目,蜡伞科,湿伞属,分布于香港(见于松仔园),有毒", 42 | "Lactarius松乳菇,红菇目,红菇科,乳菇属,广泛分布于亚热带松林地,无毒", 43 | "Russula褪色红菇,伞菌目,红菇科,红菇属,分布于河北、吉林、四川、江苏、西藏等地,无毒", 44 | "Suillus乳牛肝菌,牛肝菌目,乳牛肝菌科,乳牛肝菌属,分布于吉林、辽宁、山西、安徽、江西、浙江、湖南、四川、贵州等地,无毒", 45 | ] 46 | 47 | 48 | def _crop_center(img, cropx, cropy): 49 | y, x, _ = img.shape 50 | startx = x // 2 - (cropx // 2) 51 | starty = y // 2 - (cropy // 2) 52 | return img[starty:starty + cropy, startx:startx + cropx, :] 53 | 54 | 55 | def _normalize(img, mean, std): 56 | # This method is borrowed from: 57 | # https://github.com/open-mmlab/mmcv/blob/master/mmcv/image/photometric.py 58 | assert img.dtype != np.uint8 59 | mean = np.float64(mean.reshape(1, -1)) 60 | stdinv = 1 / np.float64(std.reshape(1, -1)) 61 | cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) 62 | cv2.subtract(img, mean, img) 63 | cv2.multiply(img, stdinv, img) 64 | return img 65 | 66 | 67 | def data_preprocess(img_path): 68 | img = cv2.imread(img_path, 1) 69 | img = cv2.resize(img, (256, 256)) 70 | img = _crop_center(img, 224, 224) 71 | mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] 72 | std = [0.229 * 255, 0.224 * 255, 0.225 * 255] 73 | img = _normalize(img.astype(np.float32), np.asarray(mean), np.asarray(std)) 74 | img = img.transpose(2, 0, 1) 75 | 76 | return img 77 | 78 | 79 | if __name__ == '__main__': 80 | target = args_opt.device_target 81 | 82 | # init context 83 | context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False) 84 | 85 | # define net 86 | net = resnet(class_num=config.class_num) 87 | 88 | # load checkpoint 89 | param_dict = load_checkpoint(args_opt.checkpoint_path) 90 | load_param_into_net(net, param_dict) 91 | net.set_train(False) 92 | 93 | # preprocess the image 94 | img = data_preprocess(args_opt.image_path) 95 | # predict model 96 | res = net(Tensor(img.reshape((1, 3, 224, 224)), mindspore.float32)).asnumpy() 97 | 98 | predict_label = label_list[res[0].argmax()] 99 | print("预测的蘑菇标签为:\n\t"+predict_label+"\n") 100 | -------------------------------------------------------------------------------- /chapter3/resnet_gpu/src/CrossEntropySmooth.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """define loss function for network""" 16 | import mindspore.nn as nn 17 | from mindspore import Tensor 18 | from mindspore.common import dtype as mstype 19 | from mindspore.nn.loss.loss import _Loss 20 | from mindspore.ops import functional as F 21 | from mindspore.ops import operations as P 22 | 23 | 24 | class CrossEntropySmooth(_Loss): 25 | """CrossEntropy""" 26 | def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): 27 | super(CrossEntropySmooth, self).__init__() 28 | self.onehot = P.OneHot() 29 | self.sparse = sparse 30 | self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) 31 | self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) 32 | self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction) 33 | 34 | def construct(self, logit, label): 35 | if self.sparse: 36 | label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) 37 | loss = self.ce(logit, label) 38 | return loss 39 | -------------------------------------------------------------------------------- /chapter3/resnet_gpu/src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ 16 | network config setting, will be used in train.py and eval.py 17 | """ 18 | from easydict import EasyDict as ed 19 | 20 | # config for resent50, cifar10 21 | config1 = ed({ 22 | "class_num": 10, 23 | "batch_size": 32, 24 | "loss_scale": 1024, 25 | "momentum": 0.9, 26 | "weight_decay": 1e-4, 27 | "epoch_size": 90, 28 | "pretrain_epoch_size": 0, 29 | "save_checkpoint": True, 30 | "save_checkpoint_epochs": 5, 31 | "keep_checkpoint_max": 10, 32 | "save_checkpoint_path": "./", 33 | "warmup_epochs": 5, 34 | "lr_decay_mode": "poly", 35 | "lr_init": 0.01, 36 | "lr_end": 0.00001, 37 | "lr_max": 0.1 38 | }) 39 | 40 | # config for resnet50, imagenet2012 41 | config2 = ed({ 42 | "class_num": 9, 43 | "batch_size": 32, 44 | "loss_scale": 1024, 45 | "momentum": 0.9, 46 | "weight_decay": 1e-4, 47 | "epoch_size": 90, 48 | "pretrain_epoch_size": 0, 49 | "save_checkpoint": True, 50 | "save_checkpoint_epochs": 5, 51 | "keep_checkpoint_max": 10, 52 | "save_checkpoint_path": "./", 53 | "warmup_epochs": 0, 54 | "lr_decay_mode": "linear", 55 | "use_label_smooth": True, 56 | "label_smooth_factor": 0.1, 57 | "lr_init": 0, 58 | "lr_max": 0.8, 59 | "lr_end": 0.0 60 | }) 61 | 62 | # config for resent101, imagenet2012 63 | config3 = ed({ 64 | "class_num": 1001, 65 | "batch_size": 32, 66 | "loss_scale": 1024, 67 | "momentum": 0.9, 68 | "weight_decay": 1e-4, 69 | "epoch_size": 120, 70 | "pretrain_epoch_size": 0, 71 | "save_checkpoint": True, 72 | "save_checkpoint_epochs": 5, 73 | "keep_checkpoint_max": 10, 74 | "save_checkpoint_path": "./", 75 | "warmup_epochs": 0, 76 | "lr_decay_mode": "cosine", 77 | "use_label_smooth": True, 78 | "label_smooth_factor": 0.1, 79 | "lr": 0.1 80 | }) 81 | 82 | # config for se-resnet50, imagenet2012 83 | config4 = ed({ 84 | "class_num": 1001, 85 | "batch_size": 32, 86 | "loss_scale": 1024, 87 | "momentum": 0.9, 88 | "weight_decay": 1e-4, 89 | "epoch_size": 28, 90 | "train_epoch_size": 24, 91 | "pretrain_epoch_size": 0, 92 | "save_checkpoint": True, 93 | "save_checkpoint_epochs": 4, 94 | "keep_checkpoint_max": 10, 95 | "save_checkpoint_path": "./", 96 | "warmup_epochs": 3, 97 | "lr_decay_mode": "cosine", 98 | "use_label_smooth": True, 99 | "label_smooth_factor": 0.1, 100 | "lr_init": 0.0, 101 | "lr_max": 0.3, 102 | "lr_end": 0.0001 103 | }) 104 | -------------------------------------------------------------------------------- /chapter3/resnet_gpu/tum.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter3/resnet_gpu/tum.jpg -------------------------------------------------------------------------------- /chapter4/README.md: -------------------------------------------------------------------------------- 1 | # ms-yolov3-basketball 2 | 3 | 该教程旨在指导大家使用`Ascend`/`GPU`资源完成MindSpore YOLOv3-DarkNet53篮球检测的教程。 4 | 5 | > **注意:** 因容量原因,将作业指导文档移出repo,体验作业和进阶作业文档可从如下链接中获取: 6 | 7 | * 体验作业文档 8 | 9 | [YOLOv3体验作业](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/demo/YOLOv3%E4%BD%93%E9%AA%8C%E4%BD%9C%E4%B8%9A.pdf) 10 | 11 | * 进阶作业文档 12 | 13 | [YOLOv3进阶作业](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/demo/YOLOv3%E8%BF%9B%E9%98%B6%E4%BD%9C%E4%B8%9A.pdf) 14 | 15 | * YOLOv4-CSPDarkNet53预训练模型 16 | 17 | [YOLOv4预训练模型下载地址](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/ckpt_files/yolov4-320_20800.ckpt) 18 | 19 | * 视频教程 20 | 21 | [基于YOLOV3-DarkNet50的篮球检测模型](https://www.bilibili.com/video/BV1PT4y1c7gp) 22 | 23 | ## 免责声明 24 | 25 | MindSpore ModelZoo only provides scripts that downloads and preprocesses public datasets. We do not own these datasets and are not responsible for their quality or maintenance. Please make sure you have permission to use the dataset under the dataset’s license. The models trained on these dataset are for non-commercial research and educational purpose only. 26 | 27 | To dataset owners: we will remove or update all public content upon request if you don’t want your dataset included on MindSpore ModelZoo, or wish to update it in any way. Please contact us through a [Gitee](https://gitee.com/mindspore/mindspore/issues)/[GitHub](https://github.com/mindspore-ai/mindspore/issues) issue. Your understanding and contribution to this community is greatly appreciated. 28 | 29 | ## 许可证 30 | 31 | [Apache License 2.0](LICENSE) 32 | -------------------------------------------------------------------------------- /chapter4/basketball-dataset/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/basketball-dataset/.gitkeep -------------------------------------------------------------------------------- /chapter4/docs/00086.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/docs/00086.jpg -------------------------------------------------------------------------------- /chapter4/docs/data_upload_obs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/docs/data_upload_obs.jpg -------------------------------------------------------------------------------- /chapter4/docs/output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/docs/output.jpg -------------------------------------------------------------------------------- /chapter4/docs/yolov3_evalconfig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/docs/yolov3_evalconfig.jpg -------------------------------------------------------------------------------- /chapter4/docs/yolov3_predictconfig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/docs/yolov3_predictconfig.jpg -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/README.md: -------------------------------------------------------------------------------- 1 | # MindSpore YOLOv3-DarkNet53篮球检测教程指导(Ascend环境) 2 | 3 | 该教程旨在指导大家通过[ModelArts云服务](https://www.huaweicloud.com/product/modelarts.html)完成MindSpore YOLOv3-DarkNet53篮球检测的教程。 4 | 5 | > **注意:** 该教程的代码是基于`v0.5`版本的MindSpore [ModelZoo](https://gitee.com/mindspore/mindspore/tree/r0.5/model_zoo/yolov3_darknet53)开发完成的。 6 | 7 | > **注意:** 考虑到预训练过程会占用大量时间,本次课程我们不会提供完整的数据集用于模型训练,但我们会提供YOLOv3预训练模型以及测试数据集,方便大家用于模型验证和推理工作。 8 | 9 | ## 上手指导 10 | 11 | ### 数据准备 12 | 13 | * 下载测试数据集(验证任务使用) 14 | 15 | ``` 16 | cd basketball-dataset/ && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3-darknet53/basketball-dataset/basketball-dataset.zip 17 | unzip basketball-dataset.zip && rm basketball-dataset.zip 18 | ``` 19 | 20 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/basketball-dataset/basketball-dataset.zip](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/basketball-dataset/basketball-dataset.zip) 从浏览器中下载该数据集,手动解压。 21 | 22 | * 下载YOLOv3-DarkNet53预训练模型(验证/推理任务使用) 23 | 24 | ``` 25 | cd ../resnet_ascend/ckpt_files && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/ckpt_files/yolov3-320_168000.ckpt 26 | ``` 27 | 28 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/ckpt_files/yolov3-320_168000.ckpt](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/ckpt_files/yolov3-320_168000.ckpt) 从浏览器中下载预训练模型。 29 | 30 | * 将数据集、预训练模型以及源代码上传到OBS服务 31 | 32 | 请将前面下载的篮球数据集、预训练模型和源代码上传到[华为OBS云服务](https://www.huaweicloud.com/product/obs.html),上传格式如下: 33 | 34 | OBS Data Upload 35 | 36 | ### 模型验证 37 | 38 | 首先,用户需要手动配置验证任务的参数,格式如下: 39 | 40 | YOLOv3 Evaluation Config 41 | 42 | 然后根据如下视频启动ModelArts训练任务: 43 | 44 | [观看视频](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/demo/yolov3_eval_demo.mp4) 45 | 46 | ### 模型推理 47 | 48 | 首先,用户需要手动配置推理任务的参数,格式如下: 49 | 50 | YOLOv3 Predict Config 51 | 52 | 然后根据如下视频启动ModelArts推理任务: 53 | 54 | [观看视频](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/demo/yolov3_predict_demo.mp4) 55 | 56 | 输入图片: 57 | 58 | Input Image 59 | 60 | 输出结果: 61 | 62 | Output Image 63 | 64 | ## 许可证 65 | 66 | [Apache License 2.0](../../LICENSE) 67 | -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/ckpt_files/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/yolov3_ascend/ckpt_files/.gitkeep -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/yolov3_ascend/src/__init__.py -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Config parameters for Darknet based yolov3_darknet53 models.""" 16 | 17 | 18 | class ConfigYOLOV3DarkNet53: 19 | """ 20 | Config parameters for the yolov3_darknet53. 21 | 22 | Examples: 23 | ConfigYOLOV3DarkNet53() 24 | """ 25 | # train_param 26 | # data augmentation related 27 | hue = 0.1 28 | saturation = 1.5 29 | value = 1.5 30 | jitter = 0.3 31 | 32 | resize_rate = 1 33 | multi_scale = [[320, 320], 34 | [352, 352], 35 | [384, 384], 36 | [416, 416], 37 | [448, 448], 38 | [480, 480], 39 | [512, 512], 40 | [544, 544], 41 | [576, 576], 42 | [608, 608] 43 | ] 44 | 45 | num_classes = 5 46 | max_box = 50 47 | 48 | backbone_input_shape = [32, 64, 128, 256, 512] 49 | backbone_shape = [64, 128, 256, 512, 1024] 50 | backbone_layers = [1, 2, 8, 8, 4] 51 | 52 | # confidence under ignore_threshold means no object when training 53 | ignore_threshold = 0.7 54 | 55 | # h->w 56 | anchor_scales = [(10, 13), 57 | (16, 30), 58 | (33, 23), 59 | (30, 61), 60 | (62, 45), 61 | (59, 119), 62 | (116, 90), 63 | (156, 198), 64 | (373, 326)] 65 | out_channel = (num_classes + 5) * 3 66 | 67 | # test_param 68 | test_img_shape = [416, 416] 69 | -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/src/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Yolo dataset distributed sampler.""" 16 | from __future__ import division 17 | import math 18 | import numpy as np 19 | 20 | 21 | class DistributedSampler: 22 | """Distributed sampler.""" 23 | def __init__(self, dataset_size, num_replicas=None, rank=None, shuffle=True): 24 | if num_replicas is None: 25 | print("***********Setting world_size to 1 since it is not passed in ******************") 26 | num_replicas = 1 27 | if rank is None: 28 | print("***********Setting rank to 0 since it is not passed in ******************") 29 | rank = 0 30 | self.dataset_size = dataset_size 31 | self.num_replicas = num_replicas 32 | self.rank = rank 33 | self.epoch = 0 34 | self.num_samples = int(math.ceil(dataset_size * 1.0 / self.num_replicas)) 35 | self.total_size = self.num_samples * self.num_replicas 36 | self.shuffle = shuffle 37 | 38 | def __iter__(self): 39 | # deterministically shuffle based on epoch 40 | if self.shuffle: 41 | indices = np.random.RandomState(seed=self.epoch).permutation(self.dataset_size) 42 | # np.array type. number from 0 to len(dataset_size)-1, used as index of dataset 43 | indices = indices.tolist() 44 | self.epoch += 1 45 | # change to list type 46 | else: 47 | indices = list(range(self.dataset_size)) 48 | 49 | # add extra samples to make it evenly divisible 50 | indices += indices[:(self.total_size - len(indices))] 51 | assert len(indices) == self.total_size 52 | 53 | # subsample 54 | indices = indices[self.rank:self.total_size:self.num_replicas] 55 | assert len(indices) == self.num_samples 56 | 57 | return iter(indices) 58 | 59 | def __len__(self): 60 | return self.num_samples 61 | -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/src/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Custom Logger.""" 16 | import os 17 | import sys 18 | import logging 19 | from datetime import datetime 20 | 21 | 22 | class LOGGER(logging.Logger): 23 | """ 24 | Logger. 25 | 26 | Args: 27 | logger_name: String. Logger name. 28 | rank: Integer. Rank id. 29 | """ 30 | def __init__(self, logger_name, rank=0): 31 | super(LOGGER, self).__init__(logger_name) 32 | self.rank = rank 33 | if rank % 8 == 0: 34 | console = logging.StreamHandler(sys.stdout) 35 | console.setLevel(logging.INFO) 36 | formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s') 37 | console.setFormatter(formatter) 38 | self.addHandler(console) 39 | 40 | def setup_logging_file(self, log_dir, rank=0): 41 | """Setup logging file.""" 42 | self.rank = rank 43 | if not os.path.exists(log_dir): 44 | os.makedirs(log_dir, exist_ok=True) 45 | log_name = datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S') + '_rank_{}.log'.format(rank) 46 | self.log_fn = os.path.join(log_dir, log_name) 47 | fh = logging.FileHandler(self.log_fn) 48 | fh.setLevel(logging.INFO) 49 | formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s') 50 | fh.setFormatter(formatter) 51 | self.addHandler(fh) 52 | 53 | def info(self, msg, *args, **kwargs): 54 | if self.isEnabledFor(logging.INFO): 55 | self._log(logging.INFO, msg, args, **kwargs) 56 | 57 | def save_args(self, args): 58 | self.info('Args:') 59 | args_dict = vars(args) 60 | for key in args_dict.keys(): 61 | self.info('--> %s: %s', key, args_dict[key]) 62 | self.info('') 63 | 64 | def important_info(self, msg, *args, **kwargs): 65 | if self.isEnabledFor(logging.INFO) and self.rank == 0: 66 | line_width = 2 67 | important_msg = '\n' 68 | important_msg += ('*'*70 + '\n')*line_width 69 | important_msg += ('*'*line_width + '\n')*2 70 | important_msg += '*'*line_width + ' '*8 + msg + '\n' 71 | important_msg += ('*'*line_width + '\n')*2 72 | important_msg += ('*'*70 + '\n')*line_width 73 | self.info(important_msg, *args, **kwargs) 74 | 75 | 76 | def get_logger(path, rank): 77 | """Get Logger.""" 78 | logger = LOGGER('yolov3_darknet53', rank) 79 | logger.setup_logging_file(path, rank) 80 | return logger 81 | -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/src/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """YOLOV3 loss.""" 16 | import mindspore.nn as nn 17 | from mindspore.ops import operations as P 18 | 19 | 20 | class XYLoss(nn.Cell): 21 | """Loss for x and y.""" 22 | 23 | def __init__(self): 24 | super(XYLoss, self).__init__() 25 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 26 | self.reduce_sum = P.ReduceSum() 27 | 28 | def construct(self, object_mask, box_loss_scale, predict_xy, true_xy): 29 | xy_loss = object_mask * box_loss_scale * self.cross_entropy(predict_xy, true_xy) 30 | xy_loss = self.reduce_sum(xy_loss, ()) 31 | return xy_loss 32 | 33 | 34 | class WHLoss(nn.Cell): 35 | """Loss for w and h.""" 36 | 37 | def __init__(self): 38 | super(WHLoss, self).__init__() 39 | self.square = P.Square() 40 | self.reduce_sum = P.ReduceSum() 41 | 42 | def construct(self, object_mask, box_loss_scale, predict_wh, true_wh): 43 | wh_loss = object_mask * box_loss_scale * 0.5 * P.Square()(true_wh - predict_wh) 44 | wh_loss = self.reduce_sum(wh_loss, ()) 45 | return wh_loss 46 | 47 | 48 | class ConfidenceLoss(nn.Cell): 49 | """Loss for confidence.""" 50 | 51 | def __init__(self): 52 | super(ConfidenceLoss, self).__init__() 53 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 54 | self.reduce_sum = P.ReduceSum() 55 | 56 | def construct(self, object_mask, predict_confidence, ignore_mask): 57 | confidence_loss = self.cross_entropy(predict_confidence, object_mask) 58 | confidence_loss = object_mask * confidence_loss + (1 - object_mask) * confidence_loss * ignore_mask 59 | confidence_loss = self.reduce_sum(confidence_loss, ()) 60 | return confidence_loss 61 | 62 | 63 | class ClassLoss(nn.Cell): 64 | """Loss for classification.""" 65 | 66 | def __init__(self): 67 | super(ClassLoss, self).__init__() 68 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 69 | self.reduce_sum = P.ReduceSum() 70 | 71 | def construct(self, object_mask, predict_class, class_probs): 72 | class_loss = object_mask * self.cross_entropy(predict_class, class_probs) 73 | class_loss = self.reduce_sum(class_loss, ()) 74 | return class_loss 75 | -------------------------------------------------------------------------------- /chapter4/yolov3_ascend/src/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Util class or function.""" 16 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 17 | 18 | 19 | class AverageMeter: 20 | """Computes and stores the average and current value""" 21 | 22 | def __init__(self, name, fmt=':f', tb_writer=None): 23 | self.name = name 24 | self.fmt = fmt 25 | self.reset() 26 | self.tb_writer = tb_writer 27 | self.cur_step = 1 28 | self.val = 0 29 | self.avg = 0 30 | self.sum = 0 31 | self.count = 0 32 | 33 | def reset(self): 34 | self.val = 0 35 | self.avg = 0 36 | self.sum = 0 37 | self.count = 0 38 | 39 | def update(self, val, n=1): 40 | self.val = val 41 | self.sum += val * n 42 | self.count += n 43 | self.avg = self.sum / self.count 44 | if self.tb_writer is not None: 45 | self.tb_writer.add_scalar(self.name, self.val, self.cur_step) 46 | self.cur_step += 1 47 | 48 | def __str__(self): 49 | fmtstr = '{name}:{avg' + self.fmt + '}' 50 | return fmtstr.format(**self.__dict__) 51 | 52 | 53 | def load_backbone(net, ckpt_path, args): 54 | """Load darknet53 backbone checkpoint.""" 55 | param_dict = load_checkpoint(ckpt_path) 56 | net.init_parameters_data() 57 | load_param_into_net(net, param_dict) 58 | 59 | args.logger.info('===== load {} successfully ====='.format(ckpt_path)) 60 | 61 | return net 62 | 63 | 64 | def default_wd_filter(x): 65 | """default weight decay filter.""" 66 | parameter_name = x.name 67 | if parameter_name.endswith('.bias'): 68 | # all bias not using weight decay 69 | return False 70 | if parameter_name.endswith('.gamma'): 71 | # bn weight bias not using weight decay, be carefully for now x not include BN 72 | return False 73 | if parameter_name.endswith('.beta'): 74 | # bn weight bias not using weight decay, be carefully for now x not include BN 75 | return False 76 | 77 | return True 78 | 79 | 80 | def get_param_groups(network): 81 | """Param groups for optimizer.""" 82 | decay_params = [] 83 | no_decay_params = [] 84 | for x in network.trainable_params(): 85 | parameter_name = x.name 86 | if parameter_name.endswith('.bias'): 87 | # all bias not using weight decay 88 | no_decay_params.append(x) 89 | elif parameter_name.endswith('.gamma'): 90 | # bn weight bias not using weight decay, be carefully for now x not include BN 91 | no_decay_params.append(x) 92 | elif parameter_name.endswith('.beta'): 93 | # bn weight bias not using weight decay, be carefully for now x not include BN 94 | no_decay_params.append(x) 95 | else: 96 | decay_params.append(x) 97 | 98 | return [{'params': no_decay_params, 'weight_decay': 0.0}, {'params': decay_params}] 99 | 100 | 101 | class ShapeRecord: 102 | """Log image shape.""" 103 | 104 | def __init__(self): 105 | self.shape_record = { 106 | 320: 0, 107 | 352: 0, 108 | 384: 0, 109 | 416: 0, 110 | 448: 0, 111 | 480: 0, 112 | 512: 0, 113 | 544: 0, 114 | 576: 0, 115 | 608: 0, 116 | 'total': 0 117 | } 118 | 119 | def set(self, shape): 120 | if len(shape) > 1: 121 | shape = shape[0] 122 | shape = int(shape) 123 | self.shape_record[shape] += 1 124 | self.shape_record['total'] += 1 125 | 126 | def show(self, logger): 127 | for key in self.shape_record: 128 | rate = self.shape_record[key] / float(self.shape_record['total']) 129 | logger.info('shape {}: {:.2f}%'.format(key, rate*100)) 130 | -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/README.md: -------------------------------------------------------------------------------- 1 | # MindSpore YOLOv3-DarkNet53篮球检测教程指导(GPU环境) 2 | 3 | 该教程旨在指导大家使用GPU资源完成MindSpore YOLOv3-DarkNet53篮球检测的教程。 4 | 5 | > **注意:** 该教程的代码是基于`v1.0`版本的MindSpore [ModelZoo](https://github.com/mindspore-ai/mindspore/tree/r1.0/model_zoo/official/cv/yolov3_darknet53)开发完成的。 6 | 7 | > **注意:** 考虑到预训练过程会占用大量时间,本次课程我们不会提供完整的数据集用于模型训练,但我们会提供YOLOv3预训练模型以及测试数据集,方便大家用于模型验证和推理工作。 8 | 9 | ## 上手指导 10 | 11 | ### 安装系统库 12 | 13 | * 系统库 14 | 15 | ``` 16 | sudo apt install -y unzip 17 | ``` 18 | 19 | * Python库 20 | 21 | ``` 22 | pip install opencv-python pycocotools 23 | ``` 24 | 25 | * MindSpore (**v1.0**) 26 | 27 | MindSpore的安装教程请移步至 [MindSpore安装页面](https://www.mindspore.cn/install). 28 | 29 | ### 数据准备 30 | 31 | * 下载测试数据集(验证任务使用) 32 | 33 | ``` 34 | cd basketball-dataset/ && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3-darknet53/basketball-dataset/basketball-dataset.zip 35 | unzip basketball-dataset.zip && rm basketball-dataset.zip 36 | ``` 37 | 38 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/basketball-dataset/basketball-dataset.zip](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/basketball-dataset/basketball-dataset.zip) 从浏览器中下载该数据集,手动解压。 39 | 40 | * 下载YOLOv3-DarkNet53预训练模型(验证/推理任务使用) 41 | 42 | ``` 43 | cd ../resnet_gpu/ckpt_files && wget https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/ckpt_files/yolov3-320_168000.ckpt 44 | ``` 45 | 46 | 或者您可以直接点击 [https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/ckpt_files/yolov3-320_168000.ckpt](https://ascend-tutorials.obs.cn-north-4.myhuaweicloud.com/yolov3_darknet53/ckpt_files/yolov3-320_168000.ckpt) 从浏览器中下载预训练模型。 47 | 48 | ### 模型验证 49 | 50 | ``` 51 | python eval.py --data_dir ../basketball-dataset/ --pretrained ./ckpt_files/yolov3-320_168000.ckpt 52 | ``` 53 | ``` 54 | =============coco eval result========= 55 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.568 56 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.829 57 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.716 58 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000 59 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.550 60 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.568 61 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.339 62 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.645 63 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.646 64 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000 65 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.600 66 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.646 67 | ``` 68 | 69 | ### 模型推理 70 | 71 | ``` 72 | python predict.py --image_path ./00086.jpg --pretrained ./ckpt_files/yolov3-320_168000.ckpt 73 | ``` 74 | 75 | 输入图片: 76 | 77 | Input Image 78 | 79 | 输出结果: 80 | 81 | Output Image 82 | 83 | ## 许可证 84 | 85 | [Apache License 2.0](../../LICENSE) 86 | -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/ckpt_files/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/yolov3_gpu/ckpt_files/.gitkeep -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter4/yolov3_gpu/src/__init__.py -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Config parameters for Darknet based yolov3_darknet53 models.""" 16 | 17 | 18 | class ConfigYOLOV3DarkNet53: 19 | """ 20 | Config parameters for the yolov3_darknet53. 21 | 22 | Examples: 23 | ConfigYOLOV3DarkNet53() 24 | """ 25 | # train_param 26 | # data augmentation related 27 | hue = 0.1 28 | saturation = 1.5 29 | value = 1.5 30 | jitter = 0.3 31 | 32 | resize_rate = 1 33 | multi_scale = [[320, 320], 34 | [352, 352], 35 | [384, 384], 36 | [416, 416], 37 | [448, 448], 38 | [480, 480], 39 | [512, 512], 40 | [544, 544], 41 | [576, 576], 42 | [608, 608] 43 | ] 44 | 45 | num_classes = 5 46 | max_box = 50 47 | 48 | backbone_input_shape = [32, 64, 128, 256, 512] 49 | backbone_shape = [64, 128, 256, 512, 1024] 50 | backbone_layers = [1, 2, 8, 8, 4] 51 | 52 | # confidence under ignore_threshold means no object when training 53 | ignore_threshold = 0.7 54 | 55 | # h->w 56 | anchor_scales = [(10, 13), 57 | (16, 30), 58 | (33, 23), 59 | (30, 61), 60 | (62, 45), 61 | (59, 119), 62 | (116, 90), 63 | (156, 198), 64 | (373, 326)] 65 | out_channel = (num_classes + 5) * 3 66 | 67 | # test_param 68 | test_img_shape = [416, 416] 69 | -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/src/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Yolo dataset distributed sampler.""" 16 | from __future__ import division 17 | import math 18 | import numpy as np 19 | 20 | 21 | class DistributedSampler: 22 | """Distributed sampler.""" 23 | def __init__(self, dataset_size, num_replicas=None, rank=None, shuffle=True): 24 | if num_replicas is None: 25 | print("***********Setting world_size to 1 since it is not passed in ******************") 26 | num_replicas = 1 27 | if rank is None: 28 | print("***********Setting rank to 0 since it is not passed in ******************") 29 | rank = 0 30 | self.dataset_size = dataset_size 31 | self.num_replicas = num_replicas 32 | self.rank = rank 33 | self.epoch = 0 34 | self.num_samples = int(math.ceil(dataset_size * 1.0 / self.num_replicas)) 35 | self.total_size = self.num_samples * self.num_replicas 36 | self.shuffle = shuffle 37 | 38 | def __iter__(self): 39 | # deterministically shuffle based on epoch 40 | if self.shuffle: 41 | indices = np.random.RandomState(seed=self.epoch).permutation(self.dataset_size) 42 | # np.array type. number from 0 to len(dataset_size)-1, used as index of dataset 43 | indices = indices.tolist() 44 | self.epoch += 1 45 | # change to list type 46 | else: 47 | indices = list(range(self.dataset_size)) 48 | 49 | # add extra samples to make it evenly divisible 50 | indices += indices[:(self.total_size - len(indices))] 51 | assert len(indices) == self.total_size 52 | 53 | # subsample 54 | indices = indices[self.rank:self.total_size:self.num_replicas] 55 | assert len(indices) == self.num_samples 56 | 57 | return iter(indices) 58 | 59 | def __len__(self): 60 | return self.num_samples 61 | -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/src/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Custom Logger.""" 16 | import os 17 | import sys 18 | import logging 19 | from datetime import datetime 20 | 21 | 22 | class LOGGER(logging.Logger): 23 | """ 24 | Logger. 25 | 26 | Args: 27 | logger_name: String. Logger name. 28 | rank: Integer. Rank id. 29 | """ 30 | def __init__(self, logger_name, rank=0): 31 | super(LOGGER, self).__init__(logger_name) 32 | self.rank = rank 33 | if rank % 8 == 0: 34 | console = logging.StreamHandler(sys.stdout) 35 | console.setLevel(logging.INFO) 36 | formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s') 37 | console.setFormatter(formatter) 38 | self.addHandler(console) 39 | 40 | def setup_logging_file(self, log_dir, rank=0): 41 | """Setup logging file.""" 42 | self.rank = rank 43 | if not os.path.exists(log_dir): 44 | os.makedirs(log_dir, exist_ok=True) 45 | log_name = datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S') + '_rank_{}.log'.format(rank) 46 | self.log_fn = os.path.join(log_dir, log_name) 47 | fh = logging.FileHandler(self.log_fn) 48 | fh.setLevel(logging.INFO) 49 | formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s') 50 | fh.setFormatter(formatter) 51 | self.addHandler(fh) 52 | 53 | def info(self, msg, *args, **kwargs): 54 | if self.isEnabledFor(logging.INFO): 55 | self._log(logging.INFO, msg, args, **kwargs) 56 | 57 | def save_args(self, args): 58 | self.info('Args:') 59 | args_dict = vars(args) 60 | for key in args_dict.keys(): 61 | self.info('--> %s: %s', key, args_dict[key]) 62 | self.info('') 63 | 64 | def important_info(self, msg, *args, **kwargs): 65 | if self.isEnabledFor(logging.INFO) and self.rank == 0: 66 | line_width = 2 67 | important_msg = '\n' 68 | important_msg += ('*'*70 + '\n')*line_width 69 | important_msg += ('*'*line_width + '\n')*2 70 | important_msg += '*'*line_width + ' '*8 + msg + '\n' 71 | important_msg += ('*'*line_width + '\n')*2 72 | important_msg += ('*'*70 + '\n')*line_width 73 | self.info(important_msg, *args, **kwargs) 74 | 75 | 76 | def get_logger(path, rank): 77 | """Get Logger.""" 78 | logger = LOGGER('yolov3_darknet53', rank) 79 | logger.setup_logging_file(path, rank) 80 | return logger 81 | -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/src/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """YOLOV3 loss.""" 16 | from mindspore.ops import operations as P 17 | import mindspore.nn as nn 18 | 19 | 20 | class XYLoss(nn.Cell): 21 | """Loss for x and y.""" 22 | def __init__(self): 23 | super(XYLoss, self).__init__() 24 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 25 | self.reduce_sum = P.ReduceSum() 26 | 27 | def construct(self, object_mask, box_loss_scale, predict_xy, true_xy): 28 | xy_loss = object_mask * box_loss_scale * self.cross_entropy(predict_xy, true_xy) 29 | xy_loss = self.reduce_sum(xy_loss, ()) 30 | return xy_loss 31 | 32 | 33 | class WHLoss(nn.Cell): 34 | """Loss for w and h.""" 35 | def __init__(self): 36 | super(WHLoss, self).__init__() 37 | self.square = P.Square() 38 | self.reduce_sum = P.ReduceSum() 39 | 40 | def construct(self, object_mask, box_loss_scale, predict_wh, true_wh): 41 | wh_loss = object_mask * box_loss_scale * 0.5 * P.Square()(true_wh - predict_wh) 42 | wh_loss = self.reduce_sum(wh_loss, ()) 43 | return wh_loss 44 | 45 | 46 | class ConfidenceLoss(nn.Cell): 47 | """Loss for confidence.""" 48 | def __init__(self): 49 | super(ConfidenceLoss, self).__init__() 50 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 51 | self.reduce_sum = P.ReduceSum() 52 | 53 | def construct(self, object_mask, predict_confidence, ignore_mask): 54 | confidence_loss = self.cross_entropy(predict_confidence, object_mask) 55 | confidence_loss = object_mask * confidence_loss + (1 - object_mask) * confidence_loss * ignore_mask 56 | confidence_loss = self.reduce_sum(confidence_loss, ()) 57 | return confidence_loss 58 | 59 | 60 | class ClassLoss(nn.Cell): 61 | """Loss for classification.""" 62 | def __init__(self): 63 | super(ClassLoss, self).__init__() 64 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 65 | self.reduce_sum = P.ReduceSum() 66 | 67 | def construct(self, object_mask, predict_class, class_probs): 68 | class_loss = object_mask * self.cross_entropy(predict_class, class_probs) 69 | class_loss = self.reduce_sum(class_loss, ()) 70 | return class_loss 71 | -------------------------------------------------------------------------------- /chapter4/yolov3_gpu/src/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Util class or function.""" 16 | import mindspore.common.dtype as mstype 17 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 18 | 19 | from .yolo import YoloLossBlock 20 | 21 | 22 | class AverageMeter: 23 | """Computes and stores the average and current value""" 24 | 25 | def __init__(self, name, fmt=':f', tb_writer=None): 26 | self.name = name 27 | self.fmt = fmt 28 | self.reset() 29 | self.tb_writer = tb_writer 30 | self.cur_step = 1 31 | self.val = 0 32 | self.avg = 0 33 | self.sum = 0 34 | self.count = 0 35 | 36 | def reset(self): 37 | self.val = 0 38 | self.avg = 0 39 | self.sum = 0 40 | self.count = 0 41 | 42 | def update(self, val, n=1): 43 | self.val = val 44 | self.sum += val * n 45 | self.count += n 46 | self.avg = self.sum / self.count 47 | if self.tb_writer is not None: 48 | self.tb_writer.add_scalar(self.name, self.val, self.cur_step) 49 | self.cur_step += 1 50 | 51 | def __str__(self): 52 | fmtstr = '{name}:{avg' + self.fmt + '}' 53 | return fmtstr.format(**self.__dict__) 54 | 55 | 56 | def load_backbone(net, ckpt_path, args): 57 | """Load darknet53 backbone checkpoint.""" 58 | param_dict = load_checkpoint(ckpt_path) 59 | net.init_parameters_data() 60 | load_param_into_net(net, param_dict) 61 | 62 | args.logger.info('===== load {} successfully ====='.format(ckpt_path)) 63 | 64 | return net 65 | 66 | 67 | def default_wd_filter(x): 68 | """default weight decay filter.""" 69 | parameter_name = x.name 70 | if parameter_name.endswith('.bias'): 71 | # all bias not using weight decay 72 | return False 73 | if parameter_name.endswith('.gamma'): 74 | # bn weight bias not using weight decay, be carefully for now x not include BN 75 | return False 76 | if parameter_name.endswith('.beta'): 77 | # bn weight bias not using weight decay, be carefully for now x not include BN 78 | return False 79 | 80 | return True 81 | 82 | 83 | def get_param_groups(network): 84 | """Param groups for optimizer.""" 85 | decay_params = [] 86 | no_decay_params = [] 87 | for x in network.trainable_params(): 88 | parameter_name = x.name 89 | if parameter_name.endswith('.bias'): 90 | # all bias not using weight decay 91 | no_decay_params.append(x) 92 | elif parameter_name.endswith('.gamma'): 93 | # bn weight bias not using weight decay, be carefully for now x not include BN 94 | no_decay_params.append(x) 95 | elif parameter_name.endswith('.beta'): 96 | # bn weight bias not using weight decay, be carefully for now x not include BN 97 | no_decay_params.append(x) 98 | else: 99 | decay_params.append(x) 100 | 101 | return [{'params': no_decay_params, 'weight_decay': 0.0}, {'params': decay_params}] 102 | 103 | 104 | class ShapeRecord: 105 | """Log image shape.""" 106 | 107 | def __init__(self): 108 | self.shape_record = { 109 | 320: 0, 110 | 352: 0, 111 | 384: 0, 112 | 416: 0, 113 | 448: 0, 114 | 480: 0, 115 | 512: 0, 116 | 544: 0, 117 | 576: 0, 118 | 608: 0, 119 | 'total': 0 120 | } 121 | 122 | def set(self, shape): 123 | if len(shape) > 1: 124 | shape = shape[0] 125 | shape = int(shape) 126 | self.shape_record[shape] += 1 127 | self.shape_record['total'] += 1 128 | 129 | def show(self, logger): 130 | for key in self.shape_record: 131 | rate = self.shape_record[key] / float(self.shape_record['total']) 132 | logger.info('shape {}: {:.2f}%'.format(key, rate*100)) 133 | 134 | 135 | def keep_loss_fp32(network): 136 | """Keep loss of network with float32""" 137 | for _, cell in network.cells_and_names(): 138 | if isinstance(cell, (YoloLossBlock,)): 139 | cell.to_float(mstype.float32) 140 | -------------------------------------------------------------------------------- /chapter4/yolov4_ascend/src/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | -------------------------------------------------------------------------------- /chapter4/yolov4_ascend/src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Config parameters for Darknet based yolov4_cspdarknet53 models.""" 16 | 17 | 18 | class ConfigYOLOV4CspDarkNet53: 19 | """ 20 | Config parameters for the yolov4_cspdarknet53. 21 | 22 | Examples: 23 | ConfigYOLOV4CspDarkNet53() 24 | """ 25 | # train_param 26 | # data augmentation related 27 | hue = 0.1 28 | saturation = 1.5 29 | value = 1.5 30 | jitter = 0.3 31 | 32 | resize_rate = 10 33 | multi_scale = [[416, 416], 34 | [448, 448], 35 | [480, 480], 36 | [512, 512], 37 | [544, 544], 38 | [576, 576], 39 | [608, 608], 40 | [640, 640], 41 | [672, 672], 42 | [704, 704], 43 | [736, 736] 44 | ] 45 | 46 | num_classes = 5 47 | max_box = 90 48 | 49 | backbone_input_shape = [32, 64, 128, 256, 512] 50 | backbone_shape = [64, 128, 256, 512, 1024] 51 | backbone_layers = [1, 2, 8, 8, 4] 52 | 53 | # confidence under ignore_threshold means no object when training 54 | ignore_threshold = 0.7 55 | 56 | # h->w 57 | anchor_scales = [(12, 16), 58 | (19, 36), 59 | (40, 28), 60 | (36, 75), 61 | (76, 55), 62 | (72, 146), 63 | (142, 110), 64 | (192, 243), 65 | (459, 401)] 66 | out_channel = (num_classes + 5) * 3 67 | 68 | # test_param 69 | test_img_shape = [416, 416] 70 | -------------------------------------------------------------------------------- /chapter4/yolov4_ascend/src/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Yolo dataset distributed sampler.""" 16 | from __future__ import division 17 | import math 18 | import numpy as np 19 | 20 | 21 | class DistributedSampler: 22 | """Distributed sampler.""" 23 | def __init__(self, dataset_size, num_replicas=None, rank=None, shuffle=True): 24 | if num_replicas is None: 25 | print("***********Setting world_size to 1 since it is not passed in ******************") 26 | num_replicas = 1 27 | if rank is None: 28 | print("***********Setting rank to 0 since it is not passed in ******************") 29 | rank = 0 30 | self.dataset_size = dataset_size 31 | self.num_replicas = num_replicas 32 | self.rank = rank 33 | self.epoch = 0 34 | self.num_samples = int(math.ceil(dataset_size * 1.0 / self.num_replicas)) 35 | self.total_size = self.num_samples * self.num_replicas 36 | self.shuffle = shuffle 37 | 38 | def __iter__(self): 39 | # deterministically shuffle based on epoch 40 | if self.shuffle: 41 | indices = np.random.RandomState(seed=self.epoch).permutation(self.dataset_size) 42 | # np.array type. number from 0 to len(dataset_size)-1, used as index of dataset 43 | indices = indices.tolist() 44 | self.epoch += 1 45 | # change to list type 46 | else: 47 | indices = list(range(self.dataset_size)) 48 | 49 | # add extra samples to make it evenly divisible 50 | indices += indices[:(self.total_size - len(indices))] 51 | assert len(indices) == self.total_size 52 | 53 | # subsample 54 | indices = indices[self.rank:self.total_size:self.num_replicas] 55 | assert len(indices) == self.num_samples 56 | 57 | return iter(indices) 58 | 59 | def __len__(self): 60 | return self.num_samples 61 | -------------------------------------------------------------------------------- /chapter4/yolov4_ascend/src/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Custom Logger.""" 16 | import os 17 | import sys 18 | import logging 19 | from datetime import datetime 20 | 21 | 22 | class LOGGER(logging.Logger): 23 | """ 24 | Logger. 25 | 26 | Args: 27 | logger_name: String. Logger name. 28 | rank: Integer. Rank id. 29 | """ 30 | def __init__(self, logger_name, rank=0): 31 | super(LOGGER, self).__init__(logger_name) 32 | self.rank = rank 33 | if rank % 8 == 0: 34 | console = logging.StreamHandler(sys.stdout) 35 | console.setLevel(logging.INFO) 36 | formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s') 37 | console.setFormatter(formatter) 38 | self.addHandler(console) 39 | 40 | def setup_logging_file(self, log_dir, rank=0): 41 | """Setup logging file.""" 42 | self.rank = rank 43 | if not os.path.exists(log_dir): 44 | os.makedirs(log_dir, exist_ok=True) 45 | log_name = datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S') + '_rank_{}.log'.format(rank) 46 | self.log_fn = os.path.join(log_dir, log_name) 47 | fh = logging.FileHandler(self.log_fn) 48 | fh.setLevel(logging.INFO) 49 | formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s') 50 | fh.setFormatter(formatter) 51 | self.addHandler(fh) 52 | 53 | def info(self, msg, *args, **kwargs): 54 | if self.isEnabledFor(logging.INFO): 55 | self._log(logging.INFO, msg, args, **kwargs) 56 | 57 | def save_args(self, args): 58 | self.info('Args:') 59 | args_dict = vars(args) 60 | for key in args_dict.keys(): 61 | self.info('--> %s: %s', key, args_dict[key]) 62 | self.info('') 63 | 64 | def important_info(self, msg, *args, **kwargs): 65 | if self.isEnabledFor(logging.INFO) and self.rank == 0: 66 | line_width = 2 67 | important_msg = '\n' 68 | important_msg += ('*'*70 + '\n')*line_width 69 | important_msg += ('*'*line_width + '\n')*2 70 | important_msg += '*'*line_width + ' '*8 + msg + '\n' 71 | important_msg += ('*'*line_width + '\n')*2 72 | important_msg += ('*'*70 + '\n')*line_width 73 | self.info(important_msg, *args, **kwargs) 74 | 75 | 76 | def get_logger(path, rank): 77 | """Get Logger.""" 78 | logger = LOGGER('yolov4_cspdarknet53', rank) 79 | logger.setup_logging_file(path, rank) 80 | return logger 81 | -------------------------------------------------------------------------------- /chapter4/yolov4_ascend/src/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """YOLOV4 loss.""" 16 | from mindspore.ops import operations as P 17 | import mindspore.nn as nn 18 | 19 | 20 | class XYLoss(nn.Cell): 21 | """Loss for x and y.""" 22 | def __init__(self): 23 | super(XYLoss, self).__init__() 24 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 25 | self.reduce_sum = P.ReduceSum() 26 | 27 | def construct(self, object_mask, box_loss_scale, predict_xy, true_xy): 28 | xy_loss = object_mask * box_loss_scale * self.cross_entropy(predict_xy, true_xy) 29 | xy_loss = self.reduce_sum(xy_loss, ()) 30 | return xy_loss 31 | 32 | 33 | class WHLoss(nn.Cell): 34 | """Loss for w and h.""" 35 | def __init__(self): 36 | super(WHLoss, self).__init__() 37 | self.square = P.Square() 38 | self.reduce_sum = P.ReduceSum() 39 | 40 | def construct(self, object_mask, box_loss_scale, predict_wh, true_wh): 41 | wh_loss = object_mask * box_loss_scale * 0.5 * P.Square()(true_wh - predict_wh) 42 | wh_loss = self.reduce_sum(wh_loss, ()) 43 | return wh_loss 44 | 45 | 46 | class ConfidenceLoss(nn.Cell): 47 | """Loss for confidence.""" 48 | def __init__(self): 49 | super(ConfidenceLoss, self).__init__() 50 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 51 | self.reduce_sum = P.ReduceSum() 52 | 53 | def construct(self, object_mask, predict_confidence, ignore_mask): 54 | confidence_loss = self.cross_entropy(predict_confidence, object_mask) 55 | confidence_loss = object_mask * confidence_loss + (1 - object_mask) * confidence_loss * ignore_mask 56 | confidence_loss = self.reduce_sum(confidence_loss, ()) 57 | return confidence_loss 58 | 59 | 60 | class ClassLoss(nn.Cell): 61 | """Loss for classification.""" 62 | def __init__(self): 63 | super(ClassLoss, self).__init__() 64 | self.cross_entropy = P.SigmoidCrossEntropyWithLogits() 65 | self.reduce_sum = P.ReduceSum() 66 | 67 | def construct(self, object_mask, predict_class, class_probs): 68 | class_loss = object_mask * self.cross_entropy(predict_class, class_probs) 69 | class_loss = self.reduce_sum(class_loss, ()) 70 | return class_loss 71 | -------------------------------------------------------------------------------- /chapter4/yolov4_ascend/src/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Util class or function.""" 16 | import mindspore.nn as nn 17 | import mindspore.common.dtype as mstype 18 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 19 | 20 | from .yolo import YoloLossBlock 21 | 22 | 23 | class AverageMeter: 24 | """Computes and stores the average and current value""" 25 | 26 | def __init__(self, name, fmt=':f', tb_writer=None): 27 | self.name = name 28 | self.fmt = fmt 29 | self.reset() 30 | self.tb_writer = tb_writer 31 | self.cur_step = 1 32 | self.val = 0 33 | self.avg = 0 34 | self.sum = 0 35 | self.count = 0 36 | 37 | def reset(self): 38 | self.val = 0 39 | self.avg = 0 40 | self.sum = 0 41 | self.count = 0 42 | 43 | def update(self, val, n=1): 44 | self.val = val 45 | self.sum += val * n 46 | self.count += n 47 | self.avg = self.sum / self.count 48 | if self.tb_writer is not None: 49 | self.tb_writer.add_scalar(self.name, self.val, self.cur_step) 50 | self.cur_step += 1 51 | 52 | def __str__(self): 53 | fmtstr = '{name}:{avg' + self.fmt + '}' 54 | return fmtstr.format(**self.__dict__) 55 | 56 | 57 | def load_backbone(net, ckpt_path, args): 58 | """Load cspdarknet53 backbone checkpoint.""" 59 | param_dict = load_checkpoint(ckpt_path) 60 | net.init_parameters_data() 61 | load_param_into_net(net, param_dict) 62 | 63 | args.logger.info('===== load {} successfully ====='.format(ckpt_path)) 64 | 65 | return net 66 | 67 | 68 | def default_wd_filter(x): 69 | """default weight decay filter.""" 70 | parameter_name = x.name 71 | if parameter_name.endswith('.bias'): 72 | # all bias not using weight decay 73 | return False 74 | if parameter_name.endswith('.gamma'): 75 | # bn weight bias not using weight decay, be carefully for now x not include BN 76 | return False 77 | if parameter_name.endswith('.beta'): 78 | # bn weight bias not using weight decay, be carefully for now x not include BN 79 | return False 80 | 81 | return True 82 | 83 | 84 | def get_param_groups(network): 85 | """Param groups for optimizer.""" 86 | decay_params = [] 87 | no_decay_params = [] 88 | for x in network.trainable_params(): 89 | parameter_name = x.name 90 | if parameter_name.endswith('.bias'): 91 | # all bias not using weight decay 92 | no_decay_params.append(x) 93 | elif parameter_name.endswith('.gamma'): 94 | # bn weight bias not using weight decay, be carefully for now x not include BN 95 | no_decay_params.append(x) 96 | elif parameter_name.endswith('.beta'): 97 | # bn weight bias not using weight decay, be carefully for now x not include BN 98 | no_decay_params.append(x) 99 | else: 100 | decay_params.append(x) 101 | 102 | return [{'params': no_decay_params, 'weight_decay': 0.0}, {'params': decay_params}] 103 | 104 | 105 | class ShapeRecord: 106 | """Log image shape.""" 107 | 108 | def __init__(self): 109 | self.shape_record = { 110 | 416: 0, 111 | 448: 0, 112 | 480: 0, 113 | 512: 0, 114 | 544: 0, 115 | 576: 0, 116 | 608: 0, 117 | 640: 0, 118 | 672: 0, 119 | 704: 0, 120 | 736: 0, 121 | 'total': 0 122 | } 123 | 124 | def set(self, shape): 125 | if len(shape) > 1: 126 | shape = shape[0] 127 | shape = int(shape) 128 | self.shape_record[shape] += 1 129 | self.shape_record['total'] += 1 130 | 131 | def show(self, logger): 132 | for key in self.shape_record: 133 | rate = self.shape_record[key] / float(self.shape_record['total']) 134 | logger.info('shape {}: {:.2f}%'.format(key, rate*100)) 135 | 136 | 137 | def keep_loss_fp32(network): 138 | """Keep loss of network with float32""" 139 | for _, cell in network.cells_and_names(): 140 | if isinstance(cell, (YoloLossBlock,)): 141 | cell.to_float(mstype.float32) 142 | -------------------------------------------------------------------------------- /chapter5/readme.md: -------------------------------------------------------------------------------- 1 | # Wide & Deep 2 | 3 | # 代码目录说明 4 | wide_deep_ascend: 适用于华为云ModelArts平台上运行的代码 5 | 6 | wide_deep_gpu: 适用于在GPU Docker环境中运行的代码 7 | 8 | # 作业操作文档 9 | ### 华为云ModelArts Ascend环境操作文档 10 | 11 | WORD版:[下载链接](https://wide-deep-21.obs.cn-north-4.myhuaweicloud.com/Wide_Deep%E4%BD%9C%E4%B8%9A%E6%93%8D%E4%BD%9C%E6%96%87%E6%A1%A3_ascend%E7%8E%AF%E5%A2%83.docx) 12 | 13 | PDF版:[下载链接](https://wide-deep-21.obs.cn-north-4.myhuaweicloud.com/Wide_Deep%E4%BD%9C%E4%B8%9A%E6%93%8D%E4%BD%9C%E6%96%87%E6%A1%A3_ascend%E7%8E%AF%E5%A2%83.pdf) 14 | 15 | ### GPU环境操作文档 16 | 可查看[operation.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter5/wide_deep_gpu/operation.md)文件 17 | 18 | 注:此处提供的GPU环境操作文档是在ubuntu系统中起Docker运行的 19 | 20 | # 操作视频 21 | [GPU环境操作视频](https://wide-deep-21.obs.cn-north-4.myhuaweicloud.com/%E6%93%8D%E4%BD%9C%E8%A7%86%E9%A2%91.mp4) 22 | 23 | # 作业说明文档 24 | [下载链接](https://wide-deep-21.obs.cn-north-4.myhuaweicloud.com/MindSpore21%E5%A4%A9%E5%AE%9E%E6%88%98%E8%90%A5%E7%AC%AC%E4%BA%94%E8%AE%B2%E4%BD%9C%E4%B8%9A%E8%AF%B4%E6%98%8E%E6%96%87%E6%A1%A3.docx) 25 | 26 | # 视频教程 27 | [基于MindSpore Wide & Deep实现CTR预估实战](https://www.bilibili.com/video/BV1CV411y7oj) 28 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ test_training """ 17 | 18 | import os 19 | import tarfile 20 | from mindspore import Model, context 21 | from mindspore.train.serialization import load_checkpoint, load_param_into_net,\ 22 | build_searched_strategy, merge_sliced_parameter 23 | 24 | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel 25 | from src.callbacks import LossCallBack, EvalCallBack 26 | from src.datasets import create_dataset, DataType 27 | from src.metrics import AUCMetric 28 | from src.config import WideDeepConfig 29 | from src.util import find_ckpt 30 | import moxing as mox 31 | 32 | 33 | def untar(fname, dirs): 34 | try: 35 | t = tarfile.open(fname) 36 | t.extractall(path = dirs) 37 | return True 38 | except Exception as e: 39 | print(e) 40 | return False 41 | 42 | 43 | def get_WideDeep_net(config): 44 | """ 45 | Get network of wide&deep model. 46 | """ 47 | WideDeep_net = WideDeepModel(config) 48 | 49 | loss_net = NetWithLossClass(WideDeep_net, config) 50 | train_net = TrainStepWrap(loss_net) 51 | eval_net = PredictWithSigmoid(WideDeep_net) 52 | 53 | return train_net, eval_net 54 | 55 | 56 | class ModelBuilder(): 57 | """ 58 | Wide and deep model builder 59 | """ 60 | def __init__(self): 61 | pass 62 | 63 | def get_hook(self): 64 | pass 65 | 66 | def get_train_hook(self): 67 | hooks = [] 68 | callback = LossCallBack() 69 | hooks.append(callback) 70 | 71 | if int(os.getenv('DEVICE_ID')) == 0: 72 | pass 73 | return hooks 74 | 75 | def get_net(self, config): 76 | return get_WideDeep_net(config) 77 | 78 | 79 | def test_eval(config): 80 | """ 81 | test evaluate 82 | """ 83 | data_path = config.data_path 84 | ckpt_path = config.ckpt_path 85 | batch_size = config.batch_size 86 | if config.dataset_type == "tfrecord": 87 | dataset_type = DataType.TFRECORD 88 | elif config.dataset_type == "mindrecord": 89 | dataset_type = DataType.MINDRECORD 90 | else: 91 | dataset_type = DataType.H5 92 | 93 | 94 | # data upload 95 | print('Upload data from obs to modelarts server.') 96 | mox.file.copy_parallel(src_url=config.data_url, dst_url=data_path) 97 | mox.file.copy_parallel(src_url=config.ckpt_url, dst_url=ckpt_path) 98 | 99 | tar_file = data_path + "train_demo.tar.gz" 100 | untar(tar_file, data_path) 101 | data_path = data_path + config.dataset_type 102 | 103 | ds_eval = create_dataset(data_path, train_mode=False, epochs=1, 104 | batch_size=batch_size, data_type=dataset_type) 105 | print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) 106 | 107 | net_builder = ModelBuilder() 108 | train_net, eval_net = net_builder.get_net(config) 109 | 110 | param_dict = load_checkpoint(find_ckpt(ckpt_path)) 111 | load_param_into_net(eval_net, param_dict) 112 | 113 | auc_metric = AUCMetric() 114 | model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) 115 | 116 | eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) 117 | 118 | model.eval(ds_eval, callbacks=eval_callback) 119 | 120 | 121 | if __name__ == "__main__": 122 | widedeep_config = WideDeepConfig() 123 | widedeep_config.argparse_init() 124 | 125 | context.set_context(mode=context.GRAPH_MODE, device_target=widedeep_config.device_target) 126 | test_eval(widedeep_config) 127 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend/precess_train_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ Process train and eval""" 15 | from src.config import WideDeepConfig 16 | from mindspore import context 17 | from src.preprocess_data import StatsDict, mkdir_path, statsdata, random_split_trans2mindrecord 18 | from train_and_eval import train_eval 19 | import moxing as mox 20 | 21 | 22 | if __name__ == "__main__": 23 | config = WideDeepConfig() 24 | config.argparse_init() 25 | config.data_argparse_init() 26 | 27 | data_file = config.data_file 28 | data_path = config.data_path 29 | 30 | # data upload 31 | print('Upload data from obs to modelarts server.') 32 | mox.file.copy_parallel(src_url=config.data_url, dst_url=data_path) 33 | 34 | target_field_size = config.dense_dim + config.slot_dim 35 | stats = StatsDict(field_size=target_field_size, dense_dim=config.dense_dim, slot_dim=config.slot_dim, 36 | skip_id_convert=config.skip_id_convert) 37 | data_file_path = data_path + data_file 38 | stats_output_path = data_path + "stats_dict/" 39 | mkdir_path(stats_output_path) 40 | statsdata(data_file_path, stats_output_path, stats, dense_dim=config.dense_dim, slot_dim=config.slot_dim) 41 | 42 | stats.load_dict(dict_path=stats_output_path, prefix="") 43 | stats.get_cat2id(threshold=config.threshold) 44 | 45 | in_file_path = data_path + data_file 46 | output_path = data_path + config.dataset_type 47 | mkdir_path(output_path) 48 | random_split_trans2mindrecord(in_file_path, output_path, stats, part_rows=2000000, 49 | train_line_count=config.train_line_count, line_per_sample=1000, 50 | test_size=0.1, seed=2020, dense_dim=config.dense_dim, slot_dim=config.slot_dim) 51 | 52 | context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) 53 | train_eval(config) 54 | print('Done all the jobs.') -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter5/wide_deep_ascend/src/__init__.py -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend/src/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ 17 | Area under cure metric 18 | """ 19 | 20 | from sklearn.metrics import roc_auc_score 21 | from mindspore import context 22 | from mindspore.nn.metrics import Metric 23 | from mindspore.communication.management import get_rank, get_group_size 24 | 25 | class AUCMetric(Metric): 26 | """ 27 | Area under cure metric 28 | """ 29 | 30 | def __init__(self): 31 | super(AUCMetric, self).__init__() 32 | self.clear() 33 | self.full_batch = context.get_auto_parallel_context("full_batch") 34 | 35 | def clear(self): 36 | """Clear the internal evaluation result.""" 37 | self.true_labels = [] 38 | self.pred_probs = [] 39 | 40 | def update(self, *inputs): # inputs 41 | """Update list of predicts and labels.""" 42 | all_predict = inputs[1].asnumpy().flatten().tolist() # predict 43 | all_label = inputs[2].asnumpy().flatten().tolist() # label 44 | self.pred_probs.extend(all_predict) 45 | if self.full_batch: 46 | rank_id = get_rank() 47 | group_size = get_group_size() 48 | gap = len(all_label) // group_size 49 | self.true_labels.extend(all_label[rank_id*gap: (rank_id+1)*gap]) 50 | else: 51 | self.true_labels.extend(all_label) 52 | 53 | def eval(self): 54 | if len(self.true_labels) != len(self.pred_probs): 55 | raise RuntimeError( 56 | 'true_labels.size is not equal to pred_probs.size()') 57 | 58 | auc = roc_auc_score(self.true_labels, self.pred_probs) 59 | print("====" * 20 + " auc_metric end") 60 | print("====" * 20 + " auc: {}".format(auc)) 61 | return auc 62 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend/src/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def count_line(filepath): 5 | count = 0 6 | f = open(filepath, "r") 7 | for line in f.readlines(): 8 | count = count + 1 9 | return count 10 | 11 | def find_ckpt(ckpt_path): 12 | files = os.listdir(ckpt_path) 13 | for fi in files: 14 | fi_d = os.path.join(ckpt_path, fi) 15 | if fi.endswith(".ckpt"): 16 | return fi_d 17 | 18 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend/train_and_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ train and eval """ 17 | 18 | import os 19 | from mindspore import Model, context 20 | from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor 21 | from mindspore.train.serialization import load_checkpoint, load_param_into_net,\ 22 | build_searched_strategy, merge_sliced_parameter 23 | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel 24 | from src.callbacks import LossCallBack, EvalCallBack 25 | from src.datasets import create_dataset, DataType 26 | from src.metrics import AUCMetric 27 | from src.util import find_ckpt 28 | import moxing as mox 29 | 30 | 31 | def get_WideDeep_net(config): 32 | """ 33 | Get network of wide&deep model. 34 | """ 35 | WideDeep_net = WideDeepModel(config) 36 | 37 | loss_net = NetWithLossClass(WideDeep_net, config) 38 | train_net = TrainStepWrap(loss_net) 39 | eval_net = PredictWithSigmoid(WideDeep_net) 40 | 41 | return train_net, eval_net 42 | 43 | 44 | class ModelBuilder(): 45 | """ 46 | Wide and deep model builder 47 | """ 48 | def __init__(self): 49 | pass 50 | 51 | def get_hook(self): 52 | pass 53 | 54 | def get_train_hook(self): 55 | hooks = [] 56 | callback = LossCallBack() 57 | hooks.append(callback) 58 | 59 | if int(os.getenv('DEVICE_ID')) == 0: 60 | pass 61 | return hooks 62 | 63 | def get_net(self, config): 64 | return get_WideDeep_net(config) 65 | 66 | 67 | def train_eval(config): 68 | """ 69 | test evaluate 70 | """ 71 | data_path = config.data_path + config.dataset_type 72 | ckpt_path = config.ckpt_path 73 | epochs = config.epochs 74 | batch_size = config.batch_size 75 | if config.dataset_type == "tfrecord": 76 | dataset_type = DataType.TFRECORD 77 | elif config.dataset_type == "mindrecord": 78 | dataset_type = DataType.MINDRECORD 79 | else: 80 | dataset_type = DataType.H5 81 | 82 | ds_train = create_dataset(data_path, train_mode=True, epochs=1, 83 | batch_size=batch_size, data_type=dataset_type) 84 | print("ds_train.size: {}".format(ds_train.get_dataset_size())) 85 | ds_eval = create_dataset(data_path, train_mode=False, epochs=1, 86 | batch_size=batch_size, data_type=dataset_type) 87 | print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) 88 | 89 | net_builder = ModelBuilder() 90 | train_net, eval_net = net_builder.get_net(config) 91 | train_net.set_train() 92 | 93 | train_model = Model(train_net) 94 | train_callback = LossCallBack(config=config) 95 | ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), 96 | keep_checkpoint_max=1) 97 | ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig) 98 | train_model.train(epochs, ds_train, callbacks=[TimeMonitor(ds_train.get_dataset_size()), train_callback, ckpoint_cb]) 99 | 100 | # data download 101 | print('Download data from modelarts server to obs.') 102 | mox.file.copy_parallel(src_url=config.ckpt_path, dst_url=config.train_url) 103 | 104 | param_dict = load_checkpoint(find_ckpt(ckpt_path)) 105 | load_param_into_net(eval_net, param_dict) 106 | 107 | auc_metric = AUCMetric() 108 | eval_model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) 109 | eval_callback = EvalCallBack(eval_model, ds_eval, auc_metric, config) 110 | 111 | eval_model.eval(ds_eval, callbacks=eval_callback) 112 | 113 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter5/wide_deep_ascend_v1.1.1/__init__.py -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ test evaluation """ 17 | 18 | import os 19 | 20 | from mindspore import Model, context 21 | from mindspore.train.serialization import load_checkpoint, load_param_into_net 22 | 23 | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel 24 | from src.callbacks import LossCallBack, EvalCallBack 25 | from src.datasets import create_dataset, DataType 26 | from src.metrics import AUCMetric 27 | from src.config import WideDeepConfig 28 | from src.util import untar, find_ckpt 29 | import moxing as mox 30 | 31 | 32 | def get_WideDeep_net(config): 33 | """ 34 | Get network of wide&deep model. 35 | """ 36 | WideDeep_net = WideDeepModel(config) 37 | 38 | loss_net = NetWithLossClass(WideDeep_net, config) 39 | train_net = TrainStepWrap(loss_net) 40 | eval_net = PredictWithSigmoid(WideDeep_net) 41 | 42 | return train_net, eval_net 43 | 44 | 45 | class ModelBuilder(): 46 | """ 47 | Wide and deep model builder 48 | """ 49 | def __init__(self): 50 | pass 51 | 52 | def get_hook(self): 53 | pass 54 | 55 | def get_train_hook(self): 56 | hooks = [] 57 | callback = LossCallBack() 58 | hooks.append(callback) 59 | 60 | if int(os.getenv('DEVICE_ID')) == 0: 61 | pass 62 | return hooks 63 | 64 | def get_net(self, config): 65 | return get_WideDeep_net(config) 66 | 67 | 68 | def test_eval(config): 69 | """ 70 | test evaluate 71 | """ 72 | data_path = config.data_path 73 | ckpt_path = config.ckpt_path 74 | batch_size = config.batch_size 75 | if config.dataset_type == "tfrecord": 76 | dataset_type = DataType.TFRECORD 77 | elif config.dataset_type == "mindrecord": 78 | dataset_type = DataType.MINDRECORD 79 | else: 80 | dataset_type = DataType.H5 81 | 82 | # data upload 83 | print('Upload data from obs to modelarts server.') 84 | mox.file.copy_parallel(src_url=config.data_url, dst_url=data_path) 85 | mox.file.copy_parallel(src_url=config.ckpt_url, dst_url=ckpt_path) 86 | 87 | tar_file = data_path + "train_demo.tar.gz" 88 | untar(tar_file, data_path) 89 | data_path = data_path + config.dataset_type 90 | 91 | ds_eval = create_dataset(data_path, train_mode=False, epochs=1, 92 | batch_size=batch_size, data_type=dataset_type) 93 | print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) 94 | 95 | net_builder = ModelBuilder() 96 | train_net, eval_net = net_builder.get_net(config) 97 | param_dict = load_checkpoint(find_ckpt(ckpt_path)) 98 | load_param_into_net(eval_net, param_dict) 99 | 100 | auc_metric = AUCMetric() 101 | model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) 102 | 103 | eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) 104 | 105 | model.eval(ds_eval, callbacks=eval_callback) 106 | 107 | 108 | if __name__ == "__main__": 109 | widedeep_config = WideDeepConfig() 110 | widedeep_config.argparse_init() 111 | 112 | context.set_context(mode=context.GRAPH_MODE, device_target=widedeep_config.device_target) 113 | test_eval(widedeep_config) 114 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/process_train_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ Process train and eval""" 16 | import time 17 | from src.config import WideDeepConfig 18 | from mindspore import context 19 | from src.preprocess_data import StatsDict, mkdir_path, statsdata, random_split_trans2mindrecord 20 | from train_and_eval import train_eval 21 | import moxing as mox 22 | 23 | 24 | if __name__ == "__main__": 25 | config = WideDeepConfig() 26 | config.argparse_init() 27 | config.data_argparse_init() 28 | 29 | data_file = config.data_file 30 | data_path = config.data_path 31 | 32 | # data upload 33 | print('Upload data from obs to modelarts server.') 34 | mox.file.copy_parallel(src_url=config.data_url, dst_url=data_path) 35 | 36 | target_field_size = config.dense_dim + config.slot_dim 37 | stats = StatsDict(field_size=target_field_size, dense_dim=config.dense_dim, slot_dim=config.slot_dim, 38 | skip_id_convert=config.skip_id_convert) 39 | data_file_path = data_path + data_file 40 | stats_output_path = data_path + "stats_dict/" 41 | mkdir_path(stats_output_path) 42 | statsdata(data_file_path, stats_output_path, stats, dense_dim=config.dense_dim, slot_dim=config.slot_dim) 43 | 44 | stats.load_dict(dict_path=stats_output_path, prefix="") 45 | stats.get_cat2id(threshold=config.threshold) 46 | 47 | in_file_path = data_path + data_file 48 | output_path = data_path + config.dataset_type 49 | mkdir_path(output_path) 50 | random_split_trans2mindrecord(in_file_path, output_path, stats, part_rows=2000000, 51 | train_line_count=config.train_line_count, line_per_sample=1000, 52 | test_size=0.1, seed=2020, dense_dim=config.dense_dim, slot_dim=config.slot_dim) 53 | 54 | context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) 55 | start_time = time.time() 56 | print('Start train and eval time: ', start_time) 57 | train_eval(config) 58 | end_time = time.time() 59 | cost_train_eval_time = end_time - start_time 60 | print('Train and eval total cost time: ', cost_train_eval_time/60, ' minutes.') 61 | print('Done all the jobs: data preprocess, train and eval!!!') 62 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sklearn 4 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter5/wide_deep_ascend_v1.1.1/src/__init__.py -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/src/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ 17 | Area under cure metric 18 | """ 19 | 20 | from sklearn.metrics import roc_auc_score 21 | from mindspore.nn.metrics import Metric 22 | 23 | class AUCMetric(Metric): 24 | """ 25 | Area under cure metric 26 | """ 27 | 28 | def __init__(self): 29 | super(AUCMetric, self).__init__() 30 | self.clear() 31 | 32 | def clear(self): 33 | """Clear the internal evaluation result.""" 34 | self.true_labels = [] 35 | self.pred_probs = [] 36 | 37 | def update(self, *inputs): # inputs 38 | """Update list of predicts and labels.""" 39 | all_predict = inputs[1].asnumpy().flatten().tolist() # predict 40 | all_label = inputs[2].asnumpy().flatten().tolist() # label 41 | self.pred_probs.extend(all_predict) 42 | self.true_labels.extend(all_label) 43 | 44 | def eval(self): 45 | if len(self.true_labels) != len(self.pred_probs): 46 | raise RuntimeError( 47 | 'true_labels.size is not equal to pred_probs.size()') 48 | 49 | auc = roc_auc_score(self.true_labels, self.pred_probs) 50 | print("====" * 20 + " auc_metric end") 51 | print("====" * 20 + " auc: {}".format(auc)) 52 | return auc 53 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/src/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | import os 16 | import tarfile 17 | 18 | 19 | def count_line(filepath): 20 | count = 0 21 | f = open(filepath, "r") 22 | for line in f.readlines(): 23 | count = count + 1 24 | return count 25 | 26 | 27 | def find_ckpt(ckpt_path): 28 | files = os.listdir(ckpt_path) 29 | for fi in files: 30 | fi_d = os.path.join(ckpt_path, fi) 31 | if fi.endswith(".ckpt"): 32 | return fi_d 33 | 34 | 35 | def untar(fname, dirs): 36 | try: 37 | t = tarfile.open(fname) 38 | t.extractall(path = dirs) 39 | return True 40 | except Exception as e: 41 | print(e) 42 | return False 43 | -------------------------------------------------------------------------------- /chapter5/wide_deep_ascend_v1.1.1/train_and_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """ test_training """ 16 | import os 17 | 18 | from mindspore import Model, context 19 | from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor 20 | 21 | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel 22 | from src.callbacks import LossCallBack, EvalCallBack 23 | from src.datasets import create_dataset, DataType 24 | from src.metrics import AUCMetric 25 | import moxing as mox 26 | 27 | 28 | def get_WideDeep_net(config): 29 | """ 30 | Get network of wide&deep model. 31 | """ 32 | WideDeep_net = WideDeepModel(config) 33 | 34 | loss_net = NetWithLossClass(WideDeep_net, config) 35 | train_net = TrainStepWrap(loss_net, sparse=config.sparse) 36 | eval_net = PredictWithSigmoid(WideDeep_net) 37 | 38 | return train_net, eval_net 39 | 40 | 41 | class ModelBuilder(): 42 | """ 43 | ModelBuilder 44 | """ 45 | def __init__(self): 46 | pass 47 | 48 | def get_hook(self): 49 | pass 50 | 51 | def get_train_hook(self): 52 | hooks = [] 53 | callback = LossCallBack() 54 | hooks.append(callback) 55 | 56 | if int(os.getenv('DEVICE_ID')) == 0: 57 | pass 58 | return hooks 59 | 60 | def get_net(self, config): 61 | return get_WideDeep_net(config) 62 | 63 | 64 | def train_eval(config): 65 | """ 66 | test_train_eval 67 | """ 68 | data_path = config.data_path + config.dataset_type 69 | batch_size = config.batch_size 70 | epochs = config.epochs 71 | sparse = config.sparse 72 | if config.dataset_type == "tfrecord": 73 | dataset_type = DataType.TFRECORD 74 | elif config.dataset_type == "mindrecord": 75 | dataset_type = DataType.MINDRECORD 76 | else: 77 | dataset_type = DataType.H5 78 | 79 | # create train and eval dataset 80 | ds_train = create_dataset(data_path, train_mode=True, epochs=1, 81 | batch_size=batch_size, data_type=dataset_type) 82 | print("ds_train.size: {}".format(ds_train.get_dataset_size())) 83 | ds_eval = create_dataset(data_path, train_mode=False, epochs=1, 84 | batch_size=batch_size, data_type=dataset_type) 85 | print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) 86 | 87 | net_builder = ModelBuilder() 88 | 89 | train_net, eval_net = net_builder.get_net(config) 90 | train_net.set_train() 91 | auc_metric = AUCMetric() 92 | 93 | model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) 94 | 95 | eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) 96 | 97 | callback = LossCallBack(config=config) 98 | ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) 99 | ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig) 100 | 101 | out = model.eval(ds_eval, dataset_sink_mode=(not sparse)) 102 | print("=====" * 5 + "model.eval() initialized: {}".format(out)) 103 | model.train(epochs, ds_train, 104 | callbacks=[TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb], 105 | dataset_sink_mode=(not sparse)) 106 | 107 | # data download 108 | print('Download data from modelarts server to obs.') 109 | mox.file.copy_parallel(src_url=config.ckpt_path, dst_url=config.train_url) 110 | -------------------------------------------------------------------------------- /chapter5/wide_deep_gpu/eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ test_training """ 17 | 18 | import os 19 | 20 | from mindspore import Model, context 21 | from mindspore.train.serialization import load_checkpoint, load_param_into_net,\ 22 | build_searched_strategy, merge_sliced_parameter 23 | 24 | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel 25 | from src.callbacks import LossCallBack, EvalCallBack 26 | from src.datasets import create_dataset, DataType 27 | from src.metrics import AUCMetric 28 | from src.config import WideDeepConfig 29 | 30 | 31 | def get_WideDeep_net(config): 32 | """ 33 | Get network of wide&deep model. 34 | """ 35 | WideDeep_net = WideDeepModel(config) 36 | 37 | loss_net = NetWithLossClass(WideDeep_net, config) 38 | train_net = TrainStepWrap(loss_net) 39 | eval_net = PredictWithSigmoid(WideDeep_net) 40 | 41 | return train_net, eval_net 42 | 43 | 44 | class ModelBuilder(): 45 | """ 46 | Wide and deep model builder 47 | """ 48 | def __init__(self): 49 | pass 50 | 51 | def get_hook(self): 52 | pass 53 | 54 | def get_train_hook(self): 55 | hooks = [] 56 | callback = LossCallBack() 57 | hooks.append(callback) 58 | 59 | if int(os.getenv('DEVICE_ID')) == 0: 60 | pass 61 | return hooks 62 | 63 | def get_net(self, config): 64 | return get_WideDeep_net(config) 65 | 66 | 67 | def test_eval(config): 68 | """ 69 | test evaluate 70 | """ 71 | data_path = config.data_path 72 | batch_size = config.batch_size 73 | if config.dataset_type == "tfrecord": 74 | dataset_type = DataType.TFRECORD 75 | elif config.dataset_type == "mindrecord": 76 | dataset_type = DataType.MINDRECORD 77 | else: 78 | dataset_type = DataType.H5 79 | ds_eval = create_dataset(data_path, train_mode=False, epochs=1, 80 | batch_size=batch_size, data_type=dataset_type) 81 | print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) 82 | 83 | net_builder = ModelBuilder() 84 | train_net, eval_net = net_builder.get_net(config) 85 | ckpt_path = config.ckpt_path 86 | if ";" in ckpt_path: 87 | ckpt_paths = ckpt_path.split(';') 88 | param_list_dict = {} 89 | strategy = build_searched_strategy(config.stra_ckpt) 90 | for slice_path in ckpt_paths: 91 | param_slice_dict = load_checkpoint(slice_path) 92 | for key, value in param_slice_dict.items(): 93 | if 'optimizer' in key: 94 | continue 95 | if key not in param_list_dict: 96 | param_list_dict[key] = [] 97 | param_list_dict[key].append(value) 98 | param_dict = {} 99 | for key, value in param_list_dict.items(): 100 | if key in strategy: 101 | merged_parameter = merge_sliced_parameter(value, strategy) 102 | else: 103 | merged_parameter = merge_sliced_parameter(value) 104 | param_dict[key] = merged_parameter 105 | else: 106 | param_dict = load_checkpoint(ckpt_path) 107 | load_param_into_net(eval_net, param_dict) 108 | 109 | auc_metric = AUCMetric() 110 | model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) 111 | 112 | eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) 113 | 114 | model.eval(ds_eval, callbacks=eval_callback) 115 | 116 | 117 | if __name__ == "__main__": 118 | widedeep_config = WideDeepConfig() 119 | widedeep_config.argparse_init() 120 | 121 | context.set_context(mode=context.GRAPH_MODE, device_target=widedeep_config.device_target) 122 | test_eval(widedeep_config) 123 | -------------------------------------------------------------------------------- /chapter5/wide_deep_gpu/operation.md: -------------------------------------------------------------------------------- 1 | 注:需事先安装好docker gpu环境,可参考[docker_install.md](https://github.com/mindspore-ai/mindspore-21-days-tutorials/blob/main/chapter1/mobilenetv2/docker_install.md)文件 2 | 3 | # 训练准备阶段 4 | ### 下载wide & deep体验脚本 5 | ``` 6 | # 在root用户主目录下执行如下命令 7 | git clone https://github.com/mindspore-ai/mindspore-21-days-tutorials.git 8 | mkdir -p /root/workspace/wide_deep 9 | cp -r /root/mindspore-21-days-tutorials/chapter5/wide_deep_gpu /root/workspace/wide_deep 10 | cd /root/workspace/wide_deep 11 | ``` 12 | 13 | ### 准备阶段 14 | ##### 体验作业准备 15 | 下载事先准备好的mindrecord和.ckpt文件 16 | ``` 17 | # 从华为云obs上下载经由10%Criteo数据集训练生成的mindrecord数据集文件 18 | wget https://wide-deep-21.obs.cn-north-4.myhuaweicloud.com/train_demo.tar.gz 19 | tar -zxvf train_demo.tar.gz 20 | mkdir -p data/ten_percent 21 | mv mindrecord data/ten_percent 22 | # 从华为云obs上下载经由10%Criteo数据集训练生成的.ckpt文件 23 | wget https://wide-deep-21.obs.cn-north-4.myhuaweicloud.com/wide_deep.ckpt 24 | ``` 25 | 26 | ##### 进阶作业准备 27 | 准备Criteo数据集(非全量),从华为云obs上下载Criteo数据集mini_demo.txt(全量数据的1%) 28 | ``` 29 | mkdir -p data/one_percent 30 | wget https://wide-deep-21.obs.cn-north-4.myhuaweicloud.com/mini_demo.txt 31 | mv mini_demo.txt ./data/one_percent 32 | ``` 33 | 34 | 35 | ### 训练启动阶段 36 | ##### 启动GPU容器 37 | 使用GPU mindspore-1.0.0版本镜像,将训练脚本及数据集所在目录挂载到容器环境中 38 | ``` 39 | docker run -it -v /root/workspace/wide_deep:/wide_deep --runtime=nvidia --privileged=true mindspore/mindspore-gpu:1.0.0 /bin/bash 40 | ``` 41 | 42 | ##### 安装环境依赖项 43 | ``` 44 | pip install pandas 45 | pip install sklearn 46 | ``` 47 | 若执行过程中出现如下警告,可执行`pip install --upgrade pip`命令升级工具. 48 | ``` 49 | WARNING: You are using pip version 19.2.3, however version 20.2.4 is available. 50 | You should consider upgrading via the 'pip install --upgrade pip' command. 51 | ``` 52 | 53 | ##### 体验作业要求 54 | 验证结果 55 | ``` 56 | cd /wide_deep 57 | python eval.py --data_path=data/ten_percent/mindrecord --ckpt_path=wide_deep.ckpt 58 | ``` 59 | 60 | ##### 进阶作业要求 61 | 处理数据 62 | ``` 63 | python src/preprocess_data.py --data_path=/wide_deep/data/one_percent/ --data_file=mini_demo.txt 64 | ``` 65 | 66 | 开始训练 67 | ``` 68 | python train.py --data_path=data/one_percent/mindrecord 69 | ``` 70 | 71 | 验证结果 72 | ``` 73 | python eval.py --data_path=data/one_percent/mindrecord --ckpt_path=widedeep_train-1_42.ckpt 74 | ``` 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /chapter5/wide_deep_gpu/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-ai/mindspore-21-days-tutorials/a8ab76281cd839c6e1fd917b1a385f290cea963b/chapter5/wide_deep_gpu/src/__init__.py -------------------------------------------------------------------------------- /chapter5/wide_deep_gpu/src/count_line.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def count_line(filepath): 5 | count = 0 6 | f = open(filepath, "r") 7 | for line in f.readlines(): 8 | count = count + 1 9 | return count 10 | 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser(description="Count line nums of dataset") 14 | parser.add_argument("--data_path", type=str, default="/wide_deep/data/one_percent/", 15 | help='The path of the data file') 16 | parser.add_argument("--file_name", type=str, default="mini_demo.txt", 17 | help='The name of the data file') 18 | args = parser.parse_args() 19 | data_file = args.data_path + args.file_name 20 | line_num = count_line(data_file) 21 | print("{} line num: {}".format(args.file_name, line_num)) 22 | 23 | -------------------------------------------------------------------------------- /chapter5/wide_deep_gpu/src/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | 16 | """ 17 | Area under cure metric 18 | """ 19 | 20 | from sklearn.metrics import roc_auc_score 21 | from mindspore import context 22 | from mindspore.nn.metrics import Metric 23 | from mindspore.communication.management import get_rank, get_group_size 24 | 25 | class AUCMetric(Metric): 26 | """ 27 | Area under cure metric 28 | """ 29 | 30 | def __init__(self): 31 | super(AUCMetric, self).__init__() 32 | self.clear() 33 | self.full_batch = context.get_auto_parallel_context("full_batch") 34 | 35 | def clear(self): 36 | """Clear the internal evaluation result.""" 37 | self.true_labels = [] 38 | self.pred_probs = [] 39 | 40 | def update(self, *inputs): # inputs 41 | """Update list of predicts and labels.""" 42 | all_predict = inputs[1].asnumpy().flatten().tolist() # predict 43 | all_label = inputs[2].asnumpy().flatten().tolist() # label 44 | self.pred_probs.extend(all_predict) 45 | if self.full_batch: 46 | rank_id = get_rank() 47 | group_size = get_group_size() 48 | gap = len(all_label) // group_size 49 | self.true_labels.extend(all_label[rank_id*gap: (rank_id+1)*gap]) 50 | else: 51 | self.true_labels.extend(all_label) 52 | 53 | def eval(self): 54 | if len(self.true_labels) != len(self.pred_probs): 55 | raise RuntimeError( 56 | 'true_labels.size is not equal to pred_probs.size()') 57 | 58 | auc = roc_auc_score(self.true_labels, self.pred_probs) 59 | print("====" * 20 + " auc_metric end") 60 | print("====" * 20 + " auc: {}".format(auc)) 61 | return auc 62 | -------------------------------------------------------------------------------- /chapter5/wide_deep_gpu/train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Huawei Technologies Co., Ltd 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ test_training """ 15 | import os 16 | from mindspore import Model, context 17 | from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor 18 | from src.wide_and_deep import PredictWithSigmoid, TrainStepWrap, NetWithLossClass, WideDeepModel 19 | from src.callbacks import LossCallBack 20 | from src.datasets import create_dataset, DataType 21 | from src.config import WideDeepConfig 22 | 23 | 24 | def get_WideDeep_net(configure): 25 | """ 26 | Get network of wide&deep model. 27 | """ 28 | WideDeep_net = WideDeepModel(configure) 29 | 30 | loss_net = NetWithLossClass(WideDeep_net, configure) 31 | train_net = TrainStepWrap(loss_net) 32 | eval_net = PredictWithSigmoid(WideDeep_net) 33 | 34 | return train_net, eval_net 35 | 36 | 37 | class ModelBuilder(): 38 | """ 39 | Build the model. 40 | """ 41 | def __init__(self): 42 | pass 43 | 44 | def get_hook(self): 45 | pass 46 | 47 | def get_train_hook(self): 48 | hooks = [] 49 | callback = LossCallBack() 50 | hooks.append(callback) 51 | if int(os.getenv('DEVICE_ID')) == 0: 52 | pass 53 | return hooks 54 | 55 | def get_net(self, configure): 56 | return get_WideDeep_net(configure) 57 | 58 | 59 | def test_train(configure): 60 | """ 61 | test_train 62 | """ 63 | data_path = configure.data_path 64 | batch_size = configure.batch_size 65 | epochs = configure.epochs 66 | if configure.dataset_type == "tfrecord": 67 | dataset_type = DataType.TFRECORD 68 | elif configure.dataset_type == "mindrecord": 69 | dataset_type = DataType.MINDRECORD 70 | else: 71 | dataset_type = DataType.H5 72 | ds_train = create_dataset(data_path, train_mode=True, epochs=1, 73 | batch_size=batch_size, data_type=dataset_type) 74 | print("ds_train.size: {}".format(ds_train.get_dataset_size())) 75 | 76 | net_builder = ModelBuilder() 77 | train_net, _ = net_builder.get_net(configure) 78 | train_net.set_train() 79 | 80 | model = Model(train_net) 81 | callback = LossCallBack(config=configure) 82 | ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), 83 | keep_checkpoint_max=5) 84 | ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=configure.ckpt_path, config=ckptconfig) 85 | model.train(epochs, ds_train, callbacks=[TimeMonitor(ds_train.get_dataset_size()), callback, ckpoint_cb]) 86 | 87 | 88 | if __name__ == "__main__": 89 | config = WideDeepConfig() 90 | config.argparse_init() 91 | 92 | context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) 93 | test_train(config) 94 | --------------------------------------------------------------------------------