├── .DS_Store ├── README.md ├── bert_train 2 ├── .DS_Store ├── bert-test.sh ├── bert-train.sh ├── course_images │ ├── .DS_Store │ ├── .ipynb_checkpoints │ │ └── bert_base_model-checkpoint.png │ └── bert_base_model.png ├── output │ ├── .DS_Store │ ├── config.json │ ├── special_tokens_map.json │ ├── tokenizer_config.json │ ├── training_args.bin │ └── vocab.txt ├── pretrain_bert_base_cased │ ├── .ipynb_checkpoints │ │ └── config-checkpoint.json │ ├── config.json │ ├── tokenizer.json │ └── vocab.txt ├── pytorch_bert_train.ipynb ├── readme-bert.txt ├── requirements.txt ├── run_squad.py ├── runs │ ├── .DS_Store │ ├── Dec22_02-28-02_notebook-devenviron-1220-132148-d3emjl-notebook-0 │ │ └── events.out.tfevents.1703183282.notebook-devenviron-1220-132148-d3emjl-notebook-0.9388.0 │ ├── Dec22_02-49-43_notebook-devenviron-1220-132148-d3emjl-notebook-0 │ │ └── events.out.tfevents.1703184583.notebook-devenviron-1220-132148-d3emjl-notebook-0.62269.0 │ └── Dec22_02-56-28_notebook-devenviron-1220-132148-d3emjl-notebook-0 │ │ └── events.out.tfevents.1703184988.notebook-devenviron-1220-132148-d3emjl-notebook-0.68848.0 └── squad │ ├── dev-v1.1.json │ └── train-v1.1.json ├── code_chap_2_student ├── exp_2_1_mnist_mlp │ ├── __init__.py │ ├── __pycache__ │ │ └── standard_layer.cpython-36.pyc │ ├── arraytest.npy │ ├── main_exp_2_1.py │ ├── mlp-256-128-50epoch.npy │ ├── readme.txt │ └── stu_upload │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── layers_1.cpython-36.pyc │ │ └── mnist_mlp_cpu.cpython-36.pyc │ │ ├── layers_1.py │ │ └── mnist_mlp_cpu.py └── exp_2_2_mnist_mlp_dlp │ ├── __pycache__ │ ├── test_cpu.cpython-36.pyc │ └── test_cpu.cpython-37.pyc │ ├── main_exp_2_2.py │ ├── readme.txt │ ├── stu_upload │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── layers_1.cpython-36.pyc │ │ ├── layers_1.cpython-37.pyc │ │ ├── mnist_mlp_cpu.cpython-36.pyc │ │ ├── mnist_mlp_cpu.cpython-37.pyc │ │ ├── mnist_mlp_demo.cpython-36.pyc │ │ └── mnist_mlp_demo.cpython-37.pyc │ ├── layers_1.py │ ├── mnist_mlp_cpu.py │ ├── mnist_mlp_demo.py │ └── weight.npy │ └── test_cpu.py ├── code_chap_3_student ├── .DS_Store ├── .vscode │ └── settings.json ├── cat1.jpg ├── exp_3_1_vgg │ ├── .DS_Store │ ├── .layerresult.log.swp │ ├── .main_exp_3_1.py.swo │ ├── .main_exp_3_1.py.swp │ ├── __init__.py │ ├── __pycache__ │ │ └── standard_layer.cpython-36.pyc │ ├── main_exp_3_1.py │ ├── pool5_dump.npy │ ├── readme.txt │ └── stu_upload │ │ ├── .DS_Store │ │ ├── .vgg_cpu.py.swp │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── layers_1.cpython-36.pyc │ │ ├── layers_1.cpython-37.pyc │ │ ├── layers_2.cpython-36.pyc │ │ ├── layers_2.cpython-37.pyc │ │ ├── vgg_cpu.cpython-36.pyc │ │ └── vgg_cpu.cpython-37.pyc │ │ ├── layers_1.py │ │ ├── layers_1.pyc │ │ ├── layers_2.py │ │ ├── layers_2.pyc │ │ ├── vgg_cpu.py │ │ └── vgg_cpu.pyc ├── exp_3_2_vgg_dlp │ ├── .DS_Store │ ├── .vgg19new0606.log.swp │ ├── file_list │ ├── main_exp_3_2.py │ ├── readme.txt │ ├── stu_upload │ │ ├── .DS_Store │ │ ├── .vgg19_demo.py.swp │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── vgg19_demo.cpython-36.pyc │ │ │ └── vgg19_demo.cpython-37.pyc │ │ ├── vgg19_demo.py │ │ └── vgg19_demo.pyc │ └── synset_words.txt ├── exp_3_3_style_transfer │ ├── .DS_Store │ ├── __pycache__ │ │ ├── standard_layer_2.cpython-36.pyc │ │ └── standard_layer_3.cpython-36.pyc │ ├── main_exp_3_3.py │ ├── output │ │ ├── .DS_Store │ │ ├── output_10.jpg │ │ ├── output_22.jpg │ │ ├── output_34.jpg │ │ └── output_46.jpg │ ├── readme.txt │ └── stu_upload │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── exp_3_3_style_transfer.cpython-36.pyc │ │ ├── layers_1.cpython-36.pyc │ │ ├── layers_2.cpython-36.pyc │ │ └── layers_3.cpython-36.pyc │ │ ├── exp_3_3_style_transfer.py │ │ ├── exp_3_3_style_transfer.pyc │ │ ├── layers_1.py │ │ ├── layers_1.pyc │ │ ├── layers_2.py │ │ ├── layers_2.pyc │ │ ├── layers_3.py │ │ └── layers_3.pyc ├── readme.txt ├── style.jpg └── weinisi.jpg ├── code_chap_4_student ├── .DS_Store ├── .vscode │ └── settings.json ├── exp_4_1_vgg19_student │ ├── .DS_Store │ ├── data │ │ ├── .DS_Store │ │ ├── cat1.jpg │ │ └── strawberries.jpg │ ├── labels │ │ └── imagenet_classes.txt │ ├── models │ │ └── .DS_Store │ ├── readme.txt │ ├── run_cpu.sh │ ├── run_mlu.sh │ └── stu_upload │ │ ├── .DS_Store │ │ ├── evaluate_cnnl_mfus.py │ │ ├── evaluate_cpu.py │ │ └── generate_pth.py ├── exp_4_2_fast_style_transfer_infer_student │ ├── .DS_Store │ ├── data │ │ └── .DS_Store │ ├── models │ │ └── .DS_Store │ ├── out │ │ ├── .DS_Store │ │ ├── cpu │ │ │ ├── image0.jpg │ │ │ ├── image1.jpg │ │ │ ├── image10.jpg │ │ │ ├── image11.jpg │ │ │ ├── image12.jpg │ │ │ ├── image13.jpg │ │ │ ├── image14.jpg │ │ │ ├── image15.jpg │ │ │ ├── image16.jpg │ │ │ ├── image2.jpg │ │ │ ├── image3.jpg │ │ │ ├── image4.jpg │ │ │ ├── image5.jpg │ │ │ ├── image6.jpg │ │ │ ├── image7.jpg │ │ │ ├── image8.jpg │ │ │ └── image9.jpg │ │ └── mlu_cnnl_mfus │ │ │ ├── image0.jpg │ │ │ └── image1.jpg │ ├── readme.txt │ ├── run_cpu.sh │ ├── run_mlu.sh │ └── stu_upload │ │ ├── .DS_Store │ │ ├── evaluate_cnnl_mfus.py │ │ └── evaluate_cpu.py ├── exp_4_3_fast_style_transfer_infer_student │ ├── .DS_Store │ ├── data │ │ ├── .DS_Store │ │ └── udnie.jpg │ ├── models │ │ └── .DS_Store │ ├── out │ │ ├── .DS_Store │ │ └── train │ │ │ ├── image0_0.jpg │ │ │ ├── image0_10.jpg │ │ │ ├── image0_20.jpg │ │ │ └── image0_30.jpg │ ├── readme.txt │ ├── run_train_cpu.sh │ ├── run_train_mlu.sh │ └── stu_upload │ │ ├── .DS_Store │ │ ├── train-mlu.py │ │ └── train.py └── exp_4_4_custom_pytorch_op_student │ ├── .DS_Store │ ├── data │ └── .DS_Store │ ├── models │ └── .DS_Store │ ├── out │ └── .DS_Store │ ├── readme.txt │ ├── run_cpu.sh │ └── stu_upload │ ├── evaluate_cpu.py │ ├── op_hsigmoid │ ├── hsigmoid.cpp │ └── setup.py │ └── test_hsigmoid.py └── exp_5_1_custom_pytorch_mlu_op ├── .DS_Store ├── README.md ├── build ├── .DS_Store ├── lib.linux-x86_64-3.7 │ ├── libmlu_custom_ext.cpython-37m-x86_64-linux-gnu.so │ └── mlu_custom_ext │ │ ├── __init__.py │ │ └── mlu_functions │ │ ├── __init__.py │ │ └── mlu_functions.py └── temp.linux-x86_64-3.7 │ └── opt │ └── code_chap_5_student │ └── exp_5_1_custom_pytorch_mlu_op │ └── mlu_custom_ext │ └── mlu │ └── src │ ├── bang_sigmoid.o │ └── bang_sigmoid_sample.o ├── dist └── mlu_custom_ext-0.1-py3.7-linux-x86_64.egg ├── mlu_custom_ext.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt └── top_level.txt ├── mlu_custom_ext ├── .DS_Store ├── __init__.py ├── mlu │ ├── .DS_Store │ ├── include │ │ ├── bang_sigmoid_sample.h │ │ ├── customed_ops.h │ │ └── kernel.h │ └── src │ │ ├── bang_sigmoid.cpp │ │ └── bang_sigmoid_sample.mlu └── mlu_functions │ ├── __init__.py │ └── mlu_functions.py ├── setup.py └── tests └── test_sigmoid.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intelligent-Computing-Systems-labs 2 | BUPT智能计算系统lab 3 | 4 | 以下为智能计算系统考试说明,考的挺难的,非常的痛苦😭 5 | 6 | # 《智能计算系统考试总结,每行是一个题目》 7 | **注意!这些只是印象里的相关内容,不是试卷的内容(而且我也没记完整),而且不同届的题目一定有所出入,本说明仅作复习时的参考。 8 | 本科虽然为开卷考,但是考试题量大,知识点多,如果不熟悉整体的知识框架而仅仅是打印大量ppt资料,是无法通过考试或者拿到高分的,请同学们好好复习。 9 | 再次强调!本科目开展时间较短,教学内容很可能快速变化,请根据你自己的学习内容复习!** 10 | 11 | **填空判断各10道,考察的知识点有:** 12 | CNN的形状计算(四个维度), 13 | 关于不同的指令集的内容好像有两道, 14 | 卷积层的两个重要特征(局部连接和权重共享) 15 | Faster-rcnn相对的改进之处 16 | 原地操作导致的问题 17 | Cpu中的主要指令方式(两个) 18 | 向量Mac和标量mac的区别,在cnn中的应用 19 | load-store结构 20 | DLP相关知识一定重点看(最好单独整理一下,考了3,4道这个相关的) 21 | IQ队列相关知识好像有1,2道 22 | 23 | **概念题10道,考察的知识点有:** 24 | CNN和RNN的梯度消失的本质区别, 25 | 26 | LSTM的三个门, 27 | 28 | 怎么保证计算stage的先后顺序:好像是前n步输入了第n+1步才能输入,后m步输出了第m-1才能输出(具体参考ppt), 29 | 30 | 如何降低访存需求,看第六个还是第七个ppt上有 31 | 32 | AlexNet(给你论文里的网络结构图片)为什么是两个网络(其实是一个,但是要放在两张显卡里)为什么两个网络有数据 33 | 交互 34 | 35 | Np里的运算方法,np.arange(n)重点看看,要你画矩阵的,初始化都是用这个,然后各种处理方式,比如reshape和resize的区别(risize会用第一个元素补全数据不足的地方),反正各种初始化,处理方法都看看,tensor中的也看看(跟np差不多) 36 | 37 | 为什么分块循环可以提升速度(输入神经元的访问次数减少,放入了缓存,提升cache命中率) 38 | 39 | 三个缓存对应三种数据,为什么要这么设计 40 | 41 | **计算题:** 42 | 1.一道3通道2卷积核的卷积计算,非常复杂,尽快写前面的来算(计算量实在太大了,我记得是每个通道是5*5还有padding,根本算不完,不知道后续有没有变动) 43 | 44 | 2.自定义函数的前向计算和反相传播,知道怎么反向传播即可 45 | 46 | 3.计算访存效率,BW的公式反推出,n的公式计算即可,注意单位 47 | 48 | 49 | 如果有补充可以联系我,也可以不联系我直接搞个新的 50 | 还可以参考https://github.com/LeiWang1999/AICS-Course 51 | 52 | 里面的题目有一定参考价值(比如alexnet的并行),不过这个好像不是BUPT特供 53 | -------------------------------------------------------------------------------- /bert_train 2/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/.DS_Store -------------------------------------------------------------------------------- /bert_train 2/bert-test.sh: -------------------------------------------------------------------------------- 1 | #python ./transformers/examples/question-answering_mlu/run_squad.py \ 2 | python ./run_squad.py \ 3 | --model_type bert \ 4 | --model_name_or_path ./output \ 5 | --do_eval \ 6 | --fp16 \ 7 | --do_lower_case \ 8 | --predict_file ./squad/dev-v1.1.json \ 9 | --max_seq_length 384 \ 10 | --doc_stride 128 \ 11 | --overwrite_output_dir \ 12 | --output_dir ./output 13 | -------------------------------------------------------------------------------- /bert_train 2/bert-train.sh: -------------------------------------------------------------------------------- 1 | #python ./transformers/examples/question-answering_mlu/run_squad.py \ 2 | python ./run_squad.py \ 3 | --model_type bert \ 4 | --model_name_or_path ./pretrain_bert_base_cased \ 5 | --do_train \ 6 | --fp16 \ 7 | --do_lower_case \ 8 | --train_file ./squad/train-v1.1.json \ 9 | --predict_file ./squad/dev-v1.1.json \ 10 | --per_gpu_train_batch_size 12 \ 11 | --learning_rate 3e-5 \ 12 | --num_train_epochs 1.0 \ 13 | --max_seq_length 384 \ 14 | --doc_stride 128 \ 15 | --logging_steps 2000 \ 16 | --save_steps 2000 \ 17 | --overwrite_output_dir \ 18 | --output_dir ./output 19 | -------------------------------------------------------------------------------- /bert_train 2/course_images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/course_images/.DS_Store -------------------------------------------------------------------------------- /bert_train 2/course_images/.ipynb_checkpoints/bert_base_model-checkpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/course_images/.ipynb_checkpoints/bert_base_model-checkpoint.png -------------------------------------------------------------------------------- /bert_train 2/course_images/bert_base_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/course_images/bert_base_model.png -------------------------------------------------------------------------------- /bert_train 2/output/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/output/.DS_Store -------------------------------------------------------------------------------- /bert_train 2/output/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "./pretrain_bert_base_cased", 3 | "architectures": [ 4 | "BertForQuestionAnswering" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "gradient_checkpointing": false, 8 | "hidden_act": "gelu", 9 | "hidden_dropout_prob": 0.1, 10 | "hidden_size": 768, 11 | "initializer_range": 0.02, 12 | "intermediate_size": 3072, 13 | "layer_norm_eps": 1e-12, 14 | "max_position_embeddings": 512, 15 | "model_type": "bert", 16 | "num_attention_heads": 12, 17 | "num_hidden_layers": 12, 18 | "pad_token_id": 0, 19 | "position_embedding_type": "absolute", 20 | "transformers_version": "4.6.0.dev0", 21 | "type_vocab_size": 2, 22 | "use_cache": true, 23 | "vocab_size": 30522 24 | } 25 | -------------------------------------------------------------------------------- /bert_train 2/output/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /bert_train 2/output/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "tokenizer_file": "./pretrain_bert_base_cased/tokenizer.json", "name_or_path": "./pretrain_bert_base_cased"} -------------------------------------------------------------------------------- /bert_train 2/output/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/output/training_args.bin -------------------------------------------------------------------------------- /bert_train 2/pretrain_bert_base_cased/.ipynb_checkpoints/config-checkpoint.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForMaskedLM" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "layer_norm_eps": 1e-12, 13 | "max_position_embeddings": 512, 14 | "model_type": "bert", 15 | "num_attention_heads": 12, 16 | "num_hidden_layers": 12, 17 | "pad_token_id": 0, 18 | "position_embedding_type": "absolute", 19 | "transformers_version": "4.6.0.dev0", 20 | "type_vocab_size": 2, 21 | "use_cache": true, 22 | "vocab_size": 30522 23 | } 24 | -------------------------------------------------------------------------------- /bert_train 2/pretrain_bert_base_cased/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForMaskedLM" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "layer_norm_eps": 1e-12, 13 | "max_position_embeddings": 512, 14 | "model_type": "bert", 15 | "num_attention_heads": 12, 16 | "num_hidden_layers": 12, 17 | "pad_token_id": 0, 18 | "position_embedding_type": "absolute", 19 | "transformers_version": "4.6.0.dev0", 20 | "type_vocab_size": 2, 21 | "use_cache": true, 22 | "vocab_size": 30522 23 | } 24 | -------------------------------------------------------------------------------- /bert_train 2/readme-bert.txt: -------------------------------------------------------------------------------- 1 | 补全bert_train/run_squad.py文件 2 | 3 | 使用预训练模型进行微调:bash bert_train.sh 4 | 精度验证:bash bert_test.sh 5 | -------------------------------------------------------------------------------- /bert_train 2/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboardX 2 | -------------------------------------------------------------------------------- /bert_train 2/runs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/runs/.DS_Store -------------------------------------------------------------------------------- /bert_train 2/runs/Dec22_02-28-02_notebook-devenviron-1220-132148-d3emjl-notebook-0/events.out.tfevents.1703183282.notebook-devenviron-1220-132148-d3emjl-notebook-0.9388.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/runs/Dec22_02-28-02_notebook-devenviron-1220-132148-d3emjl-notebook-0/events.out.tfevents.1703183282.notebook-devenviron-1220-132148-d3emjl-notebook-0.9388.0 -------------------------------------------------------------------------------- /bert_train 2/runs/Dec22_02-49-43_notebook-devenviron-1220-132148-d3emjl-notebook-0/events.out.tfevents.1703184583.notebook-devenviron-1220-132148-d3emjl-notebook-0.62269.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/runs/Dec22_02-49-43_notebook-devenviron-1220-132148-d3emjl-notebook-0/events.out.tfevents.1703184583.notebook-devenviron-1220-132148-d3emjl-notebook-0.62269.0 -------------------------------------------------------------------------------- /bert_train 2/runs/Dec22_02-56-28_notebook-devenviron-1220-132148-d3emjl-notebook-0/events.out.tfevents.1703184988.notebook-devenviron-1220-132148-d3emjl-notebook-0.68848.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/bert_train 2/runs/Dec22_02-56-28_notebook-devenviron-1220-132148-d3emjl-notebook-0/events.out.tfevents.1703184988.notebook-devenviron-1220-132148-d3emjl-notebook-0.68848.0 -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/__init__.py -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/__pycache__/standard_layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/__pycache__/standard_layer.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/arraytest.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/arraytest.npy -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/main_exp_2_1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from stu_upload.layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer 4 | from stu_upload.mnist_mlp_cpu import MNIST_MLP, build_mnist_mlp 5 | import numpy as np 6 | import struct 7 | import time 8 | 9 | 10 | def evaluate(mlp): 11 | pred_results = np.zeros([mlp.test_data.shape[0]]) 12 | for idx in range(mlp.test_data.shape[0]//mlp.batch_size): 13 | batch_images = mlp.test_data[idx*mlp.batch_size:(idx+1)*mlp.batch_size, :-1] 14 | prob = mlp.forward(batch_images) 15 | pred_labels = np.argmax(prob, axis=1) 16 | pred_results[idx*mlp.batch_size:(idx+1)*mlp.batch_size] = pred_labels 17 | if mlp.test_data.shape[0] % mlp.batch_size >0: 18 | last_batch = mlp.test_data.shape[0]/mlp.batch_size*mlp.batch_size 19 | batch_images = mlp.test_data[-last_batch:, :-1] 20 | prob = mlp.forward(batch_images) 21 | pred_labels = np.argmax(prob, axis=1) 22 | pred_results[-last_batch:] = pred_labels 23 | accuracy = np.mean(pred_results == mlp.test_data[:,-1]) 24 | print('Accuracy in test set: %f' % accuracy) 25 | 26 | if __name__ == '__main__': 27 | h1,h2,e=256,128,50 28 | mlp=MNIST_MLP(hidden1=h1,hidden2=h2,max_epoch=e) 29 | mlp.load_data() 30 | mlp.build_model() 31 | mlp.init_model() 32 | mlp.load_model('mlp-%d-%d-%depoch.npy'%(h1,h2,e)) 33 | evaluate(mlp) 34 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/mlp-256-128-50epoch.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/mlp-256-128-50epoch.npy -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/readme.txt: -------------------------------------------------------------------------------- 1 | 补全 stu_upload 中的 layer_1.py、mnist_mlp_cpu.py 文件,执行 main_exp_2_1.py 运行实验 2 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__init__.py -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__pycache__/layers_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__pycache__/layers_1.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__pycache__/mnist_mlp_cpu.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/__pycache__/mnist_mlp_cpu.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/layers_1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import struct 4 | import os 5 | import time 6 | 7 | def show_matrix(mat, name): 8 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 9 | pass 10 | 11 | def show_time(time, name): 12 | #print(name + str(time)) 13 | pass 14 | 15 | 16 | class FullyConnectedLayer(object): 17 | def __init__(self, num_input, num_output): # 全连接层初始化 18 | self.num_input=num_input 19 | self.num_output=num_output 20 | print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output)) 21 | def init_param(self, std=0.01): # 参数初始化 22 | self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output)) 23 | self.bias=np.zeros([1, self.num_output]) 24 | show_matrix(self.weight, 'fc weight ') 25 | show_matrix(self.bias, 'fc bias ') 26 | def forward(self, input): # 前向传播计算 27 | start_time = time.time() 28 | self.input=input 29 | # TODO:全连接层的前向传播,计算输出结果 30 | self.output=np.dot(self.input, self.weight) + self.bias 31 | return self.output 32 | 33 | def backward(self, top_diff): # 反向传播的计算 34 | # TODO:全连接层的反向传播,计算参数梯度和本层损失 35 | self.d_weight=np.dot(self.input.T, top_diff) 36 | self.d_bias= np.sum(top_diff, axis=0, keepdims=True) 37 | bottom_diff=np.dot(top_diff, self.weight.T) 38 | 39 | return bottom_diff 40 | def get_gradient(self): 41 | 42 | return self.d_weight,self.d_bias 43 | 44 | def update_param(self, lr): # 参数更新 45 | # TODO:对全连接层参数利用参数进行更新 46 | self.weight=self.weight - lr * self.d_weight 47 | self.bias=self.bias - self.d_bias 48 | 49 | def load_param(self, weight, bias): # 参数加载 50 | assert self.weight.shape == weight.shape 51 | assert self.bias.shape == bias.shape 52 | self.weight=weight 53 | self.bias=bias 54 | show_matrix(self.weight, 'fc weight ') 55 | show_matrix(self.bias, 'fc bias ') 56 | 57 | def save_param(self): # 参数保存 58 | show_matrix(self.weight, 'fc weight ') 59 | show_matrix(self.bias, 'fc bias ') 60 | return self.weight, self.bias 61 | 62 | 63 | class ReLULayer(object): 64 | def __init__(self): 65 | print('\t Relu layer') 66 | 67 | def forward(self, input): # 前向传播的计算 68 | start_time = time.time() 69 | self.input=input 70 | # TODO:ReLU层的前向传播,计算输出结果 71 | output = np.where(self.input < 0, 0, self.input) 72 | return output 73 | def backward(self, top_diff): # 反向传播的计算 74 | # TODO:ReLU层的反向传播,计算本层损失 75 | bottom_diff = top_diff * (self.input > 0) 76 | return bottom_diff 77 | 78 | class SoftmaxLossLayer(object): 79 | def __init__(self): 80 | print('\tSoftmax loss layer.') 81 | def forward(self, input): # 前向传播的计算 82 | # TODO:softmax 损失层的前向传播,计算输出结果 83 | input_max = np.max(input, axis=1, keepdims=True) 84 | input_exp = np.exp(input-input_max) 85 | exp_sum = np.sum(input_exp, axis=1, keepdims=True) 86 | self.prob = input_exp / exp_sum 87 | return self.prob 88 | 89 | def get_loss(self,label): # 计算损失 90 | self.batch_size=self.prob.shape[0] 91 | self.label_onehot=np.zeros_like(self.prob) 92 | self.label_onehot[np.arange(self.batch_size),label]=1.0 93 | loss=-np.sum(np.log(self.prob)*self.label_onehot)/self.batch_size 94 | return loss 95 | def backward(self): # 反向传播的计算 96 | # TODO:softmax 损失层的反向传播,计算本层损失 97 | bottom_diff=(self.prob - self.label_onehot) / self.batch_size 98 | return bottom_diff 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_1_mnist_mlp/stu_upload/mnist_mlp_cpu.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import struct 4 | import os 5 | import time 6 | 7 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 8 | from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer 9 | 10 | MNIST_DIR = "../mnist_data" 11 | TRAIN_DATA = "train-images-idx3-ubyte" 12 | TRAIN_LABEL = "train-labels-idx1-ubyte" 13 | TEST_DATA = "t10k-images-idx3-ubyte" 14 | TEST_LABEL = "t10k-labels-idx1-ubyte" 15 | 16 | 17 | def show_matrix(mat, name): 18 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 19 | pass 20 | 21 | 22 | class MNIST_MLP(object): 23 | def __init__(self, batch_size=100, input_size=784,hidden1=512,hidden2=256,out_classes=10,lr=0.006, max_epoch=1,print_iter=100): 24 | self.batch_size = batch_size 25 | self.input_size = input_size 26 | self.hidden1 = hidden1 27 | self.hidden2 = hidden2 28 | self.out_classes = out_classes 29 | self.lr = lr 30 | self.max_epoch = max_epoch 31 | self.print_iter = print_iter 32 | 33 | 34 | def load_mnist(self,file_dir, is_images = 'True'): 35 | bin_file = open(file_dir, 'rb') 36 | bin_data = bin_file.read() 37 | bin_file.close() 38 | 39 | 40 | if is_images: 41 | fmt_header = '>iiii' 42 | magic,num_images,num_rows,num_cols=struct.unpack_from(fmt_header,bin_data,0) 43 | else: 44 | fmt_header = '>ii' 45 | magic,num_images=struct.unpack_from(fmt_header,bin_data,0) 46 | num_rows, num_cols= 1,1 47 | data_size = num_images * num_rows * num_cols 48 | mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header)) 49 | 50 | mat_data = np.reshape(mat_data,[num_images,num_rows*num_cols]) 51 | print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape))) 52 | return mat_data 53 | 54 | def load_data(self): 55 | # TODO: 调用函数 load_mnist 读取和预处理 MNIST 中训练数据和测试数据的图像和标记 56 | print('Loading MNIST data from files...') 57 | train_images = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_DATA), True) 58 | train_labels = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_LABEL), False) 59 | test_images = self.load_mnist(os.path.join(MNIST_DIR, TEST_DATA), True) 60 | test_labels = self.load_mnist(os.path.join(MNIST_DIR, TEST_LABEL), False) 61 | 62 | self.train_data=np.append(train_images,train_labels, axis=1) 63 | self.test_data=np.append(test_images, test_labels, axis=1) 64 | 65 | 66 | 67 | def shuffle_data(self): 68 | print('Randomly shuffle MNIST data...') 69 | np.random.shuffle(self.train_data) 70 | 71 | def build_model(self): # 建立网络结构 72 | # TODO:建立三层神经网络结构 73 | print('Building multi-layer perception model...') 74 | self.fc1=FullyConnectedLayer(self.input_size, self.hidden1) 75 | self.relu1=ReLULayer() 76 | self.fc2=FullyConnectedLayer(self.hidden1, self.hidden2) 77 | self.relu2=ReLULayer() 78 | self.fc3=FullyConnectedLayer(self.hidden2, self.out_classes) 79 | self.softmax=SoftmaxLossLayer() 80 | self.update_layer_list=[self.fc1,self.fc2,self.fc3] 81 | 82 | def init_model(self): 83 | print('Initializing parameters of each layer in MLP...') 84 | for layer in self.update_layer_list: 85 | layer.init_param() 86 | def load_model(self, param_dir): 87 | print('Loading parameters from file ' + param_dir) 88 | params=np.load(param_dir,allow_pickle=True).item() 89 | #####weight参数 90 | self.fc1.load_param(params['w1'],params['b1']) 91 | self.fc2.load_param(params['w2'],params['b2']) 92 | self.fc3.load_param(params['w3'],params['b3']) 93 | 94 | 95 | def save_model(self, param_dir): 96 | print('Saving parameters to file ' + param_dir) 97 | params = {} 98 | params['w1'], params['b1'] = self.fc1.save_param() 99 | params['w2'], params['b2'] = self.fc2.save_param() 100 | params['w3'], params['b3'] = self.fc3.save_param() 101 | print( params) 102 | np.save(param_dir, params) 103 | 104 | 105 | def forward(self, input): # 神经网络的前向传播 106 | # TODO:神经网络的前向传播 107 | h1=self.fc1.forward(input) 108 | h1=self.relu1.forward(h1) 109 | h2=self.fc2.forward(h1) 110 | h2=self.relu2.forward(h2) 111 | h3=self.fc3.forward(h2) 112 | prob=self.softmax.forward(h3) 113 | return prob 114 | 115 | 116 | 117 | def backward(self): # 神经网络的反向传播 118 | # TODO:神经网络的反向传播 119 | dloss = self.softmax.backward() 120 | dh3 = self.fc3.backward(dloss) 121 | dh2 = self.relu2.backward(dh3) 122 | dh2 = self.fc2.backward(dh2) 123 | dh1 = self.relu1.backward(dh2) 124 | dh1 = self.fc1.backward(dh1) 125 | 126 | def update(self,lr): 127 | for layer in self.update_layer_list: 128 | layer.update_param(lr) 129 | 130 | 131 | 132 | 133 | def train(self): 134 | max_batch=self.train_data.shape[0] // self.batch_size ###python3 135 | 136 | print('Start training...') 137 | for idx_epoch in range(self.max_epoch): 138 | self.shuffle_data() 139 | for idx_batch in range(max_batch): 140 | batch_images = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size,:-1] ##batchsize ,最后1列 141 | batch_labels = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size,-1] 142 | prob = self.forward(batch_images) 143 | loss = self.softmax.get_loss(batch_labels) 144 | self.backward() 145 | self.update(self.lr) 146 | if idx_batch % self.print_iter == 0: 147 | print('Epoch %d, iter %d, loss: %.6f' % (idx_epoch, idx_batch, loss)) 148 | 149 | 150 | 151 | 152 | def evaluate(self): 153 | pred_results = np.zeros([self.test_data.shape[0]]) 154 | for idx in range(int(self.test_data.shape[0]/self.batch_size)): 155 | batch_images=self.test_data[idx*self.batch_size:(idx+1)*self.batch_size, :1] 156 | prob = self.forward(batch_images) 157 | pred_labels=np.argmax(prob,axis=1) 158 | pred_results[idx*self.batch_size:(idx+1)*self.batch_size]=pred_labels 159 | accuracy = np.mean(pred_results==self.test_data[:,-1]) 160 | print('Accuracy in test set:%f' % accuracy) 161 | 162 | 163 | def build_mnist_mlp(param_dir='weight.npy'): 164 | h1,h2,e=256,128,50 165 | mlp=MNIST_MLP(hidden1=h1,hidden2=h2,max_epoch=e) 166 | mlp.load_data() 167 | mlp.build_model() 168 | mlp.init_model() 169 | mlp.train() 170 | mlp.save_model('mlp-%d-%d-%depoch.npy'%(h1,h2,e)) 171 | mlp.load_model('mlp-%d-%d-%depoch.npy'%(h1,h2,e)) 172 | return mlp 173 | 174 | 175 | if __name__ == '__main__': 176 | mlp = build_mnist_mlp() 177 | mlp.evaluate() 178 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/__pycache__/test_cpu.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/__pycache__/test_cpu.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/__pycache__/test_cpu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/__pycache__/test_cpu.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/main_exp_2_2.py: -------------------------------------------------------------------------------- 1 | from stu_upload.mnist_mlp_demo import MNIST_MLP, HIDDEN1, HIDDEN2, OUT 2 | import test_cpu 3 | import time 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | 9 | 10 | def evaluate(mlp): 11 | pred_results = np.zeros([mlp.test_data.shape[0]]) 12 | 13 | for idx in range(mlp.test_data.shape[0]//mlp.batch_size): 14 | # print("batch %d"%idx) 15 | batch_images = mlp.test_data[idx*mlp.batch_size:(idx+1)*mlp.batch_size, :-1] 16 | data = batch_images.flatten().tolist() 17 | 18 | 19 | mlp.net.setInputData(data) 20 | 21 | 22 | start = time.time() 23 | mlp.forward() 24 | end = time.time() 25 | print('inferencing time: %f'%(end - start)) 26 | prob = mlp.net.getOutputData() 27 | 28 | 29 | 30 | prob = np.array(prob).reshape((mlp.batch_size, mlp.out_classes)) 31 | #print(prob) 32 | 33 | np.savetxt("result1.txt",prob); 34 | pred_labels = np.argmax(prob, axis=1) 35 | pred_results[idx*mlp.batch_size:(idx+1)*mlp.batch_size] = pred_labels 36 | 37 | if mlp.test_data.shape[0] % mlp.batch_size >0: 38 | last_batch = mlp.test_data.shape[0]//mlp.batch_size*mlp.batch_size 39 | batch_images = mlp.test_data[-last_batch:, :-1] 40 | data = batch_images.flatten().tolist() 41 | mlp.net.setInputData(data) 42 | mlp.forward() 43 | prob = mlp.net.getOutputData() 44 | pred_labels = np.argmax(prob, axis=1) 45 | pred_results[-last_batch:] = pred_labels 46 | accuracy = np.mean(pred_results == mlp.test_data[:,-1]) 47 | print('Accuracy in test set: %f' % accuracy) 48 | 49 | def run_mnist(): 50 | batch_size = 10000 51 | #h1, h2, c = HIDDEN1, HIDDEN2, OUT 52 | h1, h2, c = 32, 16, 10 53 | mlp = MNIST_MLP() 54 | 55 | mlp.build_model(batch_size=batch_size, hidden1=h1, hidden2=h2, out_classes=c) 56 | 57 | model_path = 'stu_upload/weight.npy' 58 | test_data = '../mnist_data/t10k-images-idx3-ubyte' 59 | test_label = '../mnist_data/t10k-labels-idx1-ubyte' 60 | mlp.load_data(test_data, test_label) 61 | mlp.load_model(model_path) 62 | 63 | for i in range(10): 64 | evaluate(mlp) 65 | 66 | if __name__ == '__main__': 67 | print('-------- TEST CPU --------') 68 | test_cpu.run_test() 69 | print('-------- TEST DLP --------') 70 | run_mnist() 71 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/readme.txt: -------------------------------------------------------------------------------- 1 | 补全 stu_upload 中的 mnist_mlp_demo.py 文件, 并复制实验2-1中实现的layer_1.py、mnist_mlp_cpu.py 以及训练得到的参数复制到 stu_upload 目录下,执行 main_exp_2_2.py 运行实验。 2 | 3 | 注意: 4 | 上传的实验2-1中训练生成的模型参数,如 mlp-32-16-10epoch.npy,需要修改名称为 weight.npy,否则无法识别。 5 | 上传的 mnist mlp 网络的 cpu 实现,即实验2-1中完成的 mnist_mlp_cpu.py 文件,需要做出以下修改: 6 | 7 | 修改 build_mnist_mlp() 函数中的内容: 8 | 1. 修改 batch_size. 9 | 将 mlp = MNIST_MLP(hidden1=h1, hidden2=h2, max_epoch=e) 10 | 修改为 mlp = MNIST_MLP(batch_size=10000, hidden1=h1, hidden2=h2, max_epoch=e) 11 | 12 | 2. 注释掉训练的函数 13 | mlp.train() 14 | 和 15 | mlp.save_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e)) 16 | 两句,并将 17 | mlp.load_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e)) 18 | 取消注释,同时修改函数参数为 param_dir 19 | mlp.load_model(param_dir) 20 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__init__.py -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/layers_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/layers_1.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/layers_1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/layers_1.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_cpu.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_cpu.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_cpu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_cpu.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_demo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_demo.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_demo.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/__pycache__/mnist_mlp_demo.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/layers_1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import struct 4 | import os 5 | import time 6 | 7 | def show_matrix(mat, name): 8 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 9 | pass 10 | 11 | def show_time(time, name): 12 | #print(name + str(time)) 13 | pass 14 | 15 | 16 | class FullyConnectedLayer(object): 17 | def __init__(self, num_input, num_output): # 全连接层初始化 18 | self.num_input=num_input 19 | self.num_output=num_output 20 | print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output)) 21 | def init_param(self, std=0.01): # 参数初始化 22 | self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output)) 23 | self.bias=np.zeros([1, self.num_output]) 24 | show_matrix(self.weight, 'fc weight ') 25 | show_matrix(self.bias, 'fc bias ') 26 | def forward(self, input): # 前向传播计算 27 | start_time = time.time() 28 | self.input=input 29 | # TODO:全连接层的前向传播,计算输出结果 30 | self.output=np.dot(self.input, self.weight) + self.bias 31 | return self.output 32 | 33 | def backward(self, top_diff): # 反向传播的计算 34 | # TODO:全连接层的反向传播,计算参数梯度和本层损失 35 | self.d_weight=np.dot(self.input.T, top_diff) 36 | self.d_bias= np.sum(top_diff, axis=0, keepdims=True) 37 | bottom_diff=np.dot(top_diff, self.weight.T) 38 | 39 | return bottom_diff 40 | def get_gradient(self): 41 | 42 | return self.d_weight,self.d_bias 43 | 44 | def update_param(self, lr): # 参数更新 45 | # TODO:对全连接层参数利用参数进行更新 46 | self.weight=self.weight - lr * self.d_weight 47 | self.bias=self.bias - self.d_bias 48 | 49 | def load_param(self, weight, bias): # 参数加载 50 | assert self.weight.shape == weight.shape 51 | assert self.bias.shape == bias.shape 52 | self.weight=weight 53 | self.bias=bias 54 | show_matrix(self.weight, 'fc weight ') 55 | show_matrix(self.bias, 'fc bias ') 56 | 57 | def save_param(self): # 参数保存 58 | show_matrix(self.weight, 'fc weight ') 59 | show_matrix(self.bias, 'fc bias ') 60 | return self.weight, self.bias 61 | 62 | 63 | class ReLULayer(object): 64 | def __init__(self): 65 | print('\t Relu layer') 66 | 67 | def forward(self, input): # 前向传播的计算 68 | start_time = time.time() 69 | self.input=input 70 | # TODO:ReLU层的前向传播,计算输出结果 71 | output = np.where(self.input < 0, 0, self.input) 72 | return output 73 | def backward(self, top_diff): # 反向传播的计算 74 | # TODO:ReLU层的反向传播,计算本层损失 75 | bottom_diff = top_diff * (self.input > 0) 76 | return bottom_diff 77 | 78 | class SoftmaxLossLayer(object): 79 | def __init__(self): 80 | print('\tSoftmax loss layer.') 81 | def forward(self, input): # 前向传播的计算 82 | # TODO:softmax 损失层的前向传播,计算输出结果 83 | input_max = np.max(input, axis=1, keepdims=True) 84 | input_exp = np.exp(input-input_max) 85 | exp_sum = np.sum(input_exp, axis=1, keepdims=True) 86 | self.prob = input_exp / exp_sum 87 | return self.prob 88 | 89 | def get_loss(self,label): # 计算损失 90 | self.batch_size=self.prob.shape[0] 91 | self.label_onehot=np.zeros_like(self.prob) 92 | self.label_onehot[np.arange(self.batch_size),label]=1.0 93 | loss=-np.sum(np.log(self.prob)*self.label_onehot)/self.batch_size 94 | return loss 95 | def backward(self): # 反向传播的计算 96 | # TODO:softmax 损失层的反向传播,计算本层损失 97 | bottom_diff=(self.prob - self.label_onehot) / self.batch_size 98 | return bottom_diff 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/mnist_mlp_cpu.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import struct 4 | import os 5 | import time 6 | 7 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 8 | from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer 9 | 10 | MNIST_DIR = "../mnist_data" 11 | TRAIN_DATA = "train-images-idx3-ubyte" 12 | TRAIN_LABEL = "train-labels-idx1-ubyte" 13 | TEST_DATA = "t10k-images-idx3-ubyte" 14 | TEST_LABEL = "t10k-labels-idx1-ubyte" 15 | 16 | 17 | def show_matrix(mat, name): 18 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 19 | pass 20 | 21 | 22 | class MNIST_MLP(object): 23 | def __init__(self, batch_size=100, input_size=784,hidden1=512,hidden2=256,out_classes=10,lr=0.006, max_epoch=1,print_iter=100): 24 | self.batch_size = batch_size 25 | self.input_size = input_size 26 | self.hidden1 = hidden1 27 | self.hidden2 = hidden2 28 | self.out_classes = out_classes 29 | self.lr = lr 30 | self.max_epoch = max_epoch 31 | self.print_iter = print_iter 32 | 33 | 34 | def load_mnist(self,file_dir, is_images = 'True'): 35 | bin_file = open(file_dir, 'rb') 36 | bin_data = bin_file.read() 37 | bin_file.close() 38 | 39 | 40 | if is_images: 41 | fmt_header = '>iiii' 42 | magic,num_images,num_rows,num_cols=struct.unpack_from(fmt_header,bin_data,0) 43 | else: 44 | fmt_header = '>ii' 45 | magic,num_images=struct.unpack_from(fmt_header,bin_data,0) 46 | num_rows, num_cols= 1,1 47 | data_size = num_images * num_rows * num_cols 48 | mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header)) 49 | 50 | mat_data = np.reshape(mat_data,[num_images,num_rows*num_cols]) 51 | print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape))) 52 | return mat_data 53 | 54 | def load_data(self): 55 | # TODO: 调用函数 load_mnist 读取和预处理 MNIST 中训练数据和测试数据的图像和标记 56 | print('Loading MNIST data from files...') 57 | train_images = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_DATA), True) 58 | train_labels = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_LABEL), False) 59 | test_images = self.load_mnist(os.path.join(MNIST_DIR, TEST_DATA), True) 60 | test_labels = self.load_mnist(os.path.join(MNIST_DIR, TEST_LABEL), False) 61 | 62 | self.train_data=np.append(train_images,train_labels, axis=1) 63 | self.test_data=np.append(test_images, test_labels, axis=1) 64 | 65 | 66 | 67 | def shuffle_data(self): 68 | print('Randomly shuffle MNIST data...') 69 | np.random.shuffle(self.train_data) 70 | 71 | def build_model(self): # 建立网络结构 72 | # TODO:建立三层神经网络结构 73 | print('Building multi-layer perception model...') 74 | self.fc1=FullyConnectedLayer(self.input_size, self.hidden1) 75 | self.relu1=ReLULayer() 76 | self.fc2=FullyConnectedLayer(self.hidden1, self.hidden2) 77 | self.relu2=ReLULayer() 78 | self.fc3=FullyConnectedLayer(self.hidden2, self.out_classes) 79 | self.softmax=SoftmaxLossLayer() 80 | self.update_layer_list=[self.fc1,self.fc2,self.fc3] 81 | 82 | def init_model(self): 83 | print('Initializing parameters of each layer in MLP...') 84 | for layer in self.update_layer_list: 85 | layer.init_param() 86 | def load_model(self, param_dir): 87 | print('Loading parameters from file ' + param_dir) 88 | params=np.load(param_dir,allow_pickle=True).item() 89 | #####weight参数 90 | self.fc1.load_param(params['w1'],params['b1']) 91 | self.fc2.load_param(params['w2'],params['b2']) 92 | self.fc3.load_param(params['w3'],params['b3']) 93 | 94 | 95 | def save_model(self, param_dir): 96 | print('Saving parameters to file ' + param_dir) 97 | params = {} 98 | params['w1'], params['b1'] = self.fc1.save_param() 99 | params['w2'], params['b2'] = self.fc2.save_param() 100 | params['w3'], params['b3'] = self.fc3.save_param() 101 | print( params) 102 | np.save(param_dir, params) 103 | 104 | 105 | def forward(self, input): # 神经网络的前向传播 106 | # TODO:神经网络的前向传播 107 | h1=self.fc1.forward(input) 108 | h1=self.relu1.forward(h1) 109 | h2=self.fc2.forward(h1) 110 | h2=self.relu2.forward(h2) 111 | h3=self.fc3.forward(h2) 112 | prob=self.softmax.forward(h3) 113 | self.culculate() 114 | return prob 115 | 116 | 117 | 118 | def backward(self): # 神经网络的反向传播 119 | # TODO:神经网络的反向传播 120 | dloss = self.softmax.backward() 121 | dh3 = self.fc3.backward(dloss) 122 | dh2 = self.relu2.backward(dh3) 123 | dh2 = self.fc2.backward(dh2) 124 | dh1 = self.relu1.backward(dh2) 125 | dh1 = self.fc1.backward(dh1) 126 | 127 | def update(self,lr): 128 | for layer in self.update_layer_list: 129 | layer.update_param(lr) 130 | 131 | 132 | 133 | 134 | def train(self): 135 | max_batch=self.train_data.shape[0] // self.batch_size ###python3 136 | 137 | print('Start training...') 138 | for idx_epoch in range(self.max_epoch): 139 | self.shuffle_data() 140 | for idx_batch in range(max_batch): 141 | batch_images = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size,:-1] ##batchsize ,最后1列 142 | batch_labels = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size,-1] 143 | prob = self.forward(batch_images) 144 | loss = self.softmax.get_loss(batch_labels) 145 | self.backward() 146 | self.update(self.lr) 147 | if idx_batch % self.print_iter == 0: 148 | print('Epoch %d, iter %d, loss: %.6f' % (idx_epoch, idx_batch, loss)) 149 | 150 | 151 | 152 | 153 | def evaluate(self): 154 | pred_results = np.zeros([self.test_data.shape[0]]) 155 | for idx in range(int(self.test_data.shape[0]/self.batch_size)): 156 | batch_images=self.test_data[idx*self.batch_size:(idx+1)*self.batch_size, :1] 157 | prob = self.forward(batch_images) 158 | pred_labels=np.argmax(prob,axis=1) 159 | pred_results[idx*self.batch_size:(idx+1)*self.batch_size]=pred_labels 160 | accuracy = np.mean(pred_results==self.test_data[:,-1]) 161 | print('Accuracy in test set:%f' % accuracy) 162 | 163 | def culculate(self): 164 | a = 0 165 | for i in range(10000): 166 | for j in range(1500): 167 | a += i * j + i + j 168 | 169 | 170 | def build_mnist_mlp(param_dir='weight.npy'): 171 | h1,h2,e=256,128,50 172 | mlp=MNIST_MLP(batch_size=10000,hidden1=h1,hidden2=h2,max_epoch=e) 173 | mlp.load_data() 174 | mlp.build_model() 175 | mlp.init_model() 176 | # mlp.train() 177 | # mlp.save_model('weight.npy') 178 | mlp.load_model(param_dir) 179 | return mlp 180 | 181 | 182 | if __name__ == '__main__': 183 | mlp = build_mnist_mlp() 184 | mlp.evaluate() 185 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/mnist_mlp_demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import pycnnl 3 | import time 4 | import numpy as np 5 | import os 6 | import scipy.io 7 | 8 | class VGG19(object): 9 | def __init__(self): 10 | # set up net 11 | 12 | self.net = pycnnl.CnnlNet() 13 | self.input_quant_params = [] 14 | self.filter_quant_params = [] 15 | 16 | 17 | def build_model(self, param_path='../../imagenet-vgg-verydeep-19.mat'): 18 | self.param_path = param_path 19 | 20 | 21 | # TODO: 使用net的createXXXLayer接口搭建VGG19网络 22 | # creating layers 23 | self.net.setInputShape(1, 3, 224, 224) 24 | # conv1_1 25 | input_shape1=pycnnl.IntVector(4) 26 | input_shape1[0]=1 27 | input_shape1[1]=3 28 | input_shape1[2]=224 29 | input_shape1[3]=224 30 | self.net.createConvLayer('conv1_1', input_shape1, 64, 3, 1, 1, 1) 31 | 32 | # relu1_1 33 | self.net.createReLuLayer('relu1_1') 34 | 35 | # conv1_2 36 | input_shape12=pycnnl.IntVector(4) 37 | input_shape12[0]=1 38 | input_shape12[1]=64 39 | input_shape12[2]=224 40 | input_shape12[3]=224 41 | self.net.createConvLayer('conv1_2',input_shape12, 64, 3, 1, 1, 1) 42 | 43 | # relu1_2 44 | self.net.createReLuLayer('relu1_2') 45 | 46 | # poo11 47 | input_shapep1=pycnnl.IntVector(4) 48 | input_shapep1[0]=1 49 | input_shapep1[1]=64 50 | input_shapep1[2]=224 51 | input_shapep1[3]=224 52 | self.net.createPoolingLayer('pool1',input_shapep1,2,2) 53 | 54 | # conv2_1 55 | input_shape2=pycnnl.IntVector(4) 56 | input_shape2[0]=1 57 | input_shape2[1]=64 58 | input_shape2[2]=112 59 | input_shape2[3]=112 60 | self.net.createConvLayer('conv2_1', input_shape2, 128, 3, 1, 1, 1) 61 | 62 | # relu2_1 63 | self.net.createReLuLayer('relu2_1') 64 | 65 | # conv2_2 66 | input_shape22=pycnnl.IntVector(4) 67 | input_shape22[0]=1 68 | input_shape22[1]=128 69 | input_shape22[2]=112 70 | input_shape22[3]=112 71 | self.net.createConvLayer('conv2_2',input_shape22, 128, 3, 1, 1, 1) 72 | 73 | # relu2_2 74 | self.net.createReLuLayer('relu2_2') 75 | 76 | # poo12 77 | input_shapep2=pycnnl.IntVector(4) 78 | input_shapep2[0]=1 79 | input_shapep2[1]=128 80 | input_shapep2[2]=112 81 | input_shapep2[3]=112 82 | self.net.createPoolingLayer('pool2',input_shapep2,2,2) 83 | 84 | # conv3_1 85 | input_shape3=pycnnl.IntVector(4) 86 | input_shape3[0]=1 87 | input_shape3[1]=128 88 | input_shape3[2]=56 89 | input_shape3[3]=56 90 | self.net.createConvLayer('conv3_1', input_shape3, 256, 3, 1, 1, 1) 91 | 92 | # relu3_1 93 | self.net.createReLuLayer('relu3_1') 94 | 95 | # conv3_2 96 | input_shape32=pycnnl.IntVector(4) 97 | input_shape32[0]=1 98 | input_shape32[1]=256 99 | input_shape32[2]=56 100 | input_shape32[3]=56 101 | self.net.createConvLayer('conv3_2',input_shape32, 256, 3, 1, 1, 1) 102 | 103 | # relu3_2 104 | self.net.createReLuLayer('relu3_2') 105 | 106 | # conv3_3 107 | input_shape33=pycnnl.IntVector(4) 108 | input_shape33[0]=1 109 | input_shape33[1]=256 110 | input_shape33[2]=56 111 | input_shape33[3]=56 112 | self.net.createConvLayer('conv3_3',input_shape33, 256, 3, 1, 1, 1) 113 | 114 | # relu3_3 115 | self.net.createReLuLayer('relu3_3') 116 | 117 | # conv3_4 118 | input_shape34=pycnnl.IntVector(4) 119 | input_shape34[0]=1 120 | input_shape34[1]=256 121 | input_shape34[2]=56 122 | input_shape34[3]=56 123 | self.net.createConvLayer('conv3_4',input_shape34, 256, 3, 1, 1, 1) 124 | 125 | # relu3_4 126 | self.net.createReLuLayer('relu3_4') 127 | 128 | # poo13 129 | input_shapep3=pycnnl.IntVector(4) 130 | input_shapep3[0]=1 131 | input_shapep3[1]=256 132 | input_shapep3[2]=56 133 | input_shapep3[3]=56 134 | self.net.createPoolingLayer('pool3',input_shapep3,2,2) 135 | 136 | # conv4_1 137 | input_shape4=pycnnl.IntVector(4) 138 | input_shape4[0]=1 139 | input_shape4[1]=256 140 | input_shape4[2]=28 141 | input_shape4[3]=28 142 | self.net.createConvLayer('conv4_1', input_shape4, 512, 3, 1, 1, 1) 143 | 144 | # relu4_1 145 | self.net.createReLuLayer('relu4_1') 146 | 147 | # conv4_2 148 | input_shape42=pycnnl.IntVector(4) 149 | input_shape42[0]=1 150 | input_shape42[1]=512 151 | input_shape42[2]=28 152 | input_shape42[3]=28 153 | self.net.createConvLayer('conv4_2',input_shape42, 512, 3, 1, 1, 1) 154 | 155 | # relu4_2 156 | self.net.createReLuLayer('relu4_2') 157 | 158 | # conv4_3 159 | input_shape43=pycnnl.IntVector(4) 160 | input_shape43[0]=1 161 | input_shape43[1]=512 162 | input_shape43[2]=28 163 | input_shape43[3]=28 164 | self.net.createConvLayer('conv4_3',input_shape43, 512, 3, 1, 1, 1) 165 | 166 | # relu4_3 167 | self.net.createReLuLayer('relu4_3') 168 | 169 | 170 | # conv4_4 171 | input_shape44=pycnnl.IntVector(4) 172 | input_shape44[0]=1 173 | input_shape44[1]=512 174 | input_shape44[2]=28 175 | input_shape44[3]=28 176 | self.net.createConvLayer('conv4_4',input_shape44, 512, 3, 1, 1, 1) 177 | 178 | # relu4_4 179 | self.net.createReLuLayer('relu4_4') 180 | 181 | # poo14 182 | input_shapep4=pycnnl.IntVector(4) 183 | input_shapep4[0]=1 184 | input_shapep4[1]=512 185 | input_shapep4[2]=28 186 | input_shapep4[3]=28 187 | self.net.createPoolingLayer('pool4',input_shapep4,2,2) 188 | 189 | # conv5_1 190 | input_shape51=pycnnl.IntVector(4) 191 | input_shape51[0]=1 192 | input_shape51[1]=512 193 | input_shape51[2]=14 194 | input_shape51[3]=14 195 | self.net.createConvLayer('conv5_1',input_shape51, 512, 3, 1, 1, 1) 196 | 197 | # relu5_1 198 | self.net.createReLuLayer('relu5_1') 199 | 200 | # conv5_2 201 | input_shape52=pycnnl.IntVector(4) 202 | input_shape52[0]=1 203 | input_shape52[1]=512 204 | input_shape52[2]=14 205 | input_shape52[3]=14 206 | self.net.createConvLayer('conv5_2',input_shape52, 512, 3, 1, 1, 1) 207 | 208 | # relu5_2 209 | self.net.createReLuLayer('relu5_2') 210 | 211 | # conv5_3 212 | input_shape53=pycnnl.IntVector(4) 213 | input_shape53[0]=1 214 | input_shape53[1]=512 215 | input_shape53[2]=14 216 | input_shape53[3]=14 217 | self.net.createConvLayer('conv5_3',input_shape53, 512, 3, 1, 1, 1) 218 | 219 | # relu5_3 220 | self.net.createReLuLayer('relu5_3') 221 | 222 | # conv5_4 223 | input_shape54=pycnnl.IntVector(4) 224 | input_shape54[0]=1 225 | input_shape54[1]=512 226 | input_shape54[2]=14 227 | input_shape54[3]=14 228 | self.net.createConvLayer('conv5_4',input_shape54, 512, 3, 1, 1, 1) 229 | 230 | # relu5_4 231 | self.net.createReLuLayer('relu5_4') 232 | 233 | # poo15 234 | input_shapep5=pycnnl.IntVector(4) 235 | input_shapep5[0]=1 236 | input_shapep5[1]=512 237 | input_shapep5[2]=14 238 | input_shapep5[3]=14 239 | self.net.createPoolingLayer('pool5', input_shapep5, 2, 2) 240 | 241 | # fc6 242 | input_shapem1=pycnnl.IntVector(4) 243 | input_shapem1[0]=1 244 | input_shapem1[1]=1 245 | input_shapem1[2]=1 246 | input_shapem1[3]=25088 247 | weight_shapem1=pycnnl.IntVector(4) 248 | weight_shapem1[0]=1 249 | weight_shapem1[1]=1 250 | weight_shapem1[2]=25088 251 | weight_shapem1[3]=4096 252 | output_shapem1=pycnnl.IntVector(4) 253 | output_shapem1[0]=1 254 | output_shapem1[1]=1 255 | output_shapem1[2]=1 256 | output_shapem1[3]=4096 257 | self.net.createMlpLayer('fc6', input_shapem1,weight_shapem1,output_shapem1) 258 | 259 | # relu6 260 | self.net.createReLuLayer('relu6') 261 | 262 | # fc7 263 | input_shapem2=pycnnl.IntVector(4) 264 | input_shapem2[0]=1 265 | input_shapem2[1]=1 266 | input_shapem2[2]=1 267 | input_shapem2[3]=4096 268 | weight_shapem2=pycnnl.IntVector(4) 269 | weight_shapem2[0]=1 270 | weight_shapem2[1]=1 271 | weight_shapem2[2]=4096 272 | weight_shapem2[3]=4096 273 | output_shapem2=pycnnl.IntVector(4) 274 | output_shapem2[0]=1 275 | output_shapem2[1]=1 276 | output_shapem2[2]=1 277 | output_shapem2[3]=4096 278 | self.net.createMlpLayer('fc7', input_shapem2,weight_shapem2,output_shapem2) 279 | 280 | # relu7 281 | self.net.createReLuLayer('relu7') 282 | 283 | # fc8 284 | input_shapem3=pycnnl.IntVector(4) 285 | input_shapem3[0]=1 286 | input_shapem3[1]=1 287 | input_shapem3[2]=1 288 | input_shapem3[3]=4096 289 | weight_shapem3=pycnnl.IntVector(4) 290 | weight_shapem3[0]=1 291 | weight_shapem3[1]=1 292 | weight_shapem3[2]=4096 293 | weight_shapem3[3]=1000 294 | output_shapem3=pycnnl.IntVector(4) 295 | output_shapem3[0]=1 296 | output_shapem3[1]=1 297 | output_shapem3[2]=1 298 | output_shapem3[3]=1000 299 | self.net.createMlpLayer('fc8', input_shapem3,weight_shapem3,output_shapem3) 300 | 301 | # softmax 302 | input_shapes=pycnnl.IntVector(3) 303 | input_shapes[0]=1 304 | input_shapes[1]=1 305 | input_shapes[2]=1000 306 | self.net.createSoftmaxLayer('softmax',input_shapes ,1) 307 | 308 | def load_model(self): 309 | # loading params ... 310 | print('Loading parameters from file ' + self.param_path) 311 | params = scipy.io.loadmat(self.param_path) 312 | self.image_mean = params['normalization'][0][0][0] 313 | self.image_mean = np.mean(self.image_mean, axis=(0, 1)) 314 | 315 | count = 0 316 | for idx in range(self.net.size()): 317 | if 'conv' in self.net.getLayerName(idx): 318 | weight, bias = params['layers'][0][idx][0][0][0][0] 319 | # TODO:调整权重形状 320 | # matconvnet: weights dim [height, width, in_channel, out_channel] 321 | # ours: weights dim [out_channel, height, width,in_channel] 322 | weight = weight.transpose([3,0,1,2]).flatten().astype(np.float64) 323 | bias = bias.reshape(-1).astype(np.float64) 324 | self.net.loadParams(idx, weight, bias) 325 | count += 1 326 | if 'fc' in self.net.getLayerName(idx): 327 | # Loading params may take quite a while. Please be patient. 328 | weight, bias = params['layers'][0][idx][0][0][0][0] 329 | weight = np.transpose(np.reshape(weight, (-1, weight.shape[-1])), [1, 0]).flatten().astype(np.float) 330 | bias = bias.reshape(-1).astype(np.float64) 331 | self.net.loadParams(idx, weight, bias) 332 | count += 1 333 | 334 | def load_image(self, image_dir): 335 | # loading image 336 | self.image = image_dir 337 | image_mean = np.array([123.68, 116.779, 103.939]) 338 | print('Loading and preprocessing image from ' + image_dir) 339 | input_image = scipy.misc.imread(image_dir) 340 | input_image = scipy.misc.imresize(input_image,[224,224,3]) 341 | input_image = np.array(input_image).astype(np.float32) 342 | input_image -= image_mean 343 | input_image = np.reshape(input_image, [1]+list(input_image.shape)) 344 | # input dim [N, height, width, channel] 2 345 | # TODO:调整输入数据 346 | input_image = input_image.transpose([0, 3, 1, 2]).astype(np.float64) 347 | input_data = input_image.flatten() 348 | self.net.setInputData(input_data) 349 | 350 | def forward(self): 351 | return self.net.forward() 352 | 353 | def get_top5(self, label): 354 | start = time.time() 355 | self.forward() 356 | end = time.time() 357 | 358 | result = self.net.getOutputData() 359 | 360 | # loading labels 361 | labels = [] 362 | with open('../synset_words.txt', 'r') as f: 363 | labels = f.readlines() 364 | 365 | # print results 366 | top1 = False 367 | top5 = False 368 | print('------ Top 5 of ' + self.image + ' ------') 369 | prob = sorted(list(result), reverse=True)[:6] 370 | if result.index(prob[0]) == label: 371 | top1 = True 372 | for i in range(5): 373 | top = prob[i] 374 | idx = result.index(top) 375 | if idx == label: 376 | top5 = True 377 | print('%f - '%top + labels[idx].strip()) 378 | 379 | print('inference time: %f'%(end - start)) 380 | return top1,top5 381 | 382 | def evaluate(self, file_list): 383 | top1_num = 0 384 | top5_num = 0 385 | total_num = 0 386 | 387 | start = time.time() 388 | with open(file_list, 'r') as f: 389 | file_list = f.readlines() 390 | total_num = len(file_list) 391 | for line in file_list: 392 | image = line.split()[0].strip() 393 | label = int(line.split()[1].strip()) 394 | vgg.load_image(image) 395 | top1,top5 = vgg.get_top5(label) 396 | if top1 : 397 | top1_num += 1 398 | if top5 : 399 | top5_num += 1 400 | end = time.time() 401 | 402 | print('Global accuracy : ') 403 | print('accuracy1: %f (%d/%d) '%(float(top1_num)/float(total_num), top1_num, total_num)) 404 | print('accuracy5: %f (%d/%d) '%(float(top5_num)/float(total_num), top5_num, total_num)) 405 | print('Total execution time: %f'%(end - start)) 406 | 407 | 408 | if __name__ == '__main__': 409 | vgg = VGG19() 410 | vgg.build_model() 411 | vgg.load_model() 412 | vgg.evaluate('../file_list') 413 | -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/weight.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_2_student/exp_2_2_mnist_mlp_dlp/stu_upload/weight.npy -------------------------------------------------------------------------------- /code_chap_2_student/exp_2_2_mnist_mlp_dlp/test_cpu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from stu_upload.layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer 4 | from stu_upload.mnist_mlp_cpu import MNIST_MLP, build_mnist_mlp 5 | import numpy as np 6 | import struct 7 | import time 8 | 9 | def evaluate(mlp): 10 | pred_results = np.zeros([mlp.test_data.shape[0]]) 11 | for idx in range(mlp.test_data.shape[0]//mlp.batch_size): 12 | batch_images = mlp.test_data[idx*mlp.batch_size:(idx+1)*mlp.batch_size, :-1] 13 | start = time.time() 14 | prob = mlp.forward(batch_images) 15 | end = time.time() 16 | print("inferencing time: %f"%(end-start)) 17 | pred_labels = np.argmax(prob, axis=1) 18 | pred_results[idx*mlp.batch_size:(idx+1)*mlp.batch_size] = pred_labels 19 | if mlp.test_data.shape[0] % mlp.batch_size >0: 20 | last_batch = mlp.test_data.shape[0]/mlp.batch_size*mlp.batch_size 21 | batch_images = mlp.test_data[-last_batch:, :-1] 22 | prob = mlp.forward(batch_images) 23 | pred_labels = np.argmax(prob, axis=1) 24 | pred_results[-last_batch:] = pred_labels 25 | accuracy = np.mean(pred_results == mlp.test_data[:,-1]) 26 | print('Accuracy in test set: %f' % accuracy) 27 | 28 | def run_test(): 29 | mlp = build_mnist_mlp('stu_upload/weight.npy') 30 | evaluate(mlp) 31 | 32 | if __name__ == '__main__': 33 | run_test() 34 | -------------------------------------------------------------------------------- /code_chap_3_student/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.defaultFormatter": null 4 | } 5 | } -------------------------------------------------------------------------------- /code_chap_3_student/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/cat1.jpg -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/.layerresult.log.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/.layerresult.log.swp -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/.main_exp_3_1.py.swo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/.main_exp_3_1.py.swo -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/.main_exp_3_1.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/.main_exp_3_1.py.swp -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/__init__.py -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/__pycache__/standard_layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/__pycache__/standard_layer.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/main_exp_3_1.py: -------------------------------------------------------------------------------- 1 | from stu_upload.vgg_cpu import VGG19 2 | import numpy as np 3 | import struct 4 | import os 5 | import scipy.io 6 | import time 7 | #import os 8 | import sys 9 | 10 | 11 | 12 | def computeMse(data1,data2): 13 | errors = [] 14 | for i in range(len(data1)): 15 | errors.append(data1[i]-data2[i]) 16 | 17 | squared_error = [] 18 | for val in errors: 19 | squared_error.append(pow(val, 2)) 20 | 21 | return sum(squared_error) / len(squared_error) 22 | def forward(vgg): 23 | print('Inferencing...') 24 | start_time = time.time() 25 | current = vgg.input_image 26 | pool5 = np.array([]) 27 | for idx in range(len(vgg.param_layer_name)): 28 | print('Inferencing layer: ' + vgg.param_layer_name[idx]) 29 | 30 | current = vgg.layers[vgg.param_layer_name[idx]].forward(current) 31 | 32 | 33 | if 'pool5' in vgg.param_layer_name[idx]: 34 | pool5 = current 35 | print('Inference time: %f' % (time.time()-start_time)) 36 | return current, pool5 37 | 38 | def check_pool5(stu_pool5): 39 | data = np.load('pool5_dump.npy') 40 | pool5_mse = computeMse(stu_pool5.flatten(), data.flatten()) 41 | print('test pool5 mse: %f'%pool5_mse) 42 | 43 | if pool5_mse < 0.003: 44 | print('CHECK POOL5 PASS.') 45 | else: 46 | print('CHECK POOL5 FAILED.') 47 | exit() 48 | 49 | def evaluate(vgg): 50 | prob, pool5 = forward(vgg) 51 | print('--------------检测结果------------------------') 52 | 53 | top1 = np.argmax(prob[0]) 54 | print('Classification result: id = %d, prob = %f' % (top1, prob[0, top1])) 55 | return pool5 56 | 57 | if __name__ == '__main__': 58 | #test_conv_and_pool_layer() 59 | print('-------------------------------') 60 | vgg = VGG19(param_path='../imagenet-vgg-verydeep-19.mat') 61 | vgg.build_model() 62 | vgg.init_model() 63 | vgg.load_model() 64 | vgg.load_image('../cat1.jpg') 65 | pool5 = evaluate(vgg) 66 | print('-------------------------------') 67 | check_pool5(pool5) 68 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/pool5_dump.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/pool5_dump.npy -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/readme.txt: -------------------------------------------------------------------------------- 1 | 补全 stu_upload 中的 layer_1.py、layer_2.py、vgg_cpu.py 文件,执行 main_exp_3_1.py 运行实验 2 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/.vgg_cpu.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/.vgg_cpu.py.swp -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__init__.py -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__init__.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_1.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_1.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_2.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/layers_2.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/vgg_cpu.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/vgg_cpu.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/vgg_cpu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/__pycache__/vgg_cpu.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/layers_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import struct 3 | import os 4 | import time 5 | 6 | def show_matrix(mat, name): 7 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 8 | pass 9 | 10 | def show_time(time, name): 11 | #print(name + str(time)) 12 | pass 13 | 14 | class FullyConnectedLayer(object): # 全连接层初始化 15 | def __init__(self, num_input, num_output): 16 | self.num_input = num_input 17 | self.num_output = num_output 18 | print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output)) 19 | def init_param(self, std=0.01): # 参数初始化 20 | self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output)) 21 | self.bias = np.zeros([1, self.num_output]) 22 | show_matrix(self.weight, 'fc weight ') 23 | show_matrix(self.bias, 'fc bias ') 24 | def forward(self, input): # 前向传播计算 25 | start_time = time.time() 26 | self.input = input 27 | # TODO:全连接层的前向传播,计算输出结果 28 | self.output = np.matmul(self.input, self.weight) + self.bias 29 | return self.output 30 | def backward(self, top_diff): # 反向传播的计算 31 | # TODO:全连接层的反向传播,计算参数梯度和本层损失 32 | self.d_weight = np.matmul(self.input.T, top_diff) 33 | self.d_bias = np.sum(top_diff, axis=0, keepdims=True) / top_diff.shape[0] 34 | bottom_diff = np.dot(top_diff, self.weight.T) 35 | return bottom_diff 36 | def get_gradient(self): 37 | return self.d_weight, self.d_bias 38 | def update_param(self, lr): # 参数更新 39 | # TODO:对全连接层参数利用参数进行更新 40 | self.weight = self.weight - self.d_weight 41 | self.bias = self.bias - self.d_bias 42 | def load_param(self, weight, bias): # 参数加载 43 | assert self.weight.shape == weight.shape 44 | assert self.bias.shape == bias.shape 45 | self.weight = weight 46 | self.bias = bias 47 | def save_param(self): # 参数保存 48 | return self.weight, self.bias 49 | 50 | class ReLULayer(object): 51 | def __init__(self): 52 | print('\tReLU layer.') 53 | def forward(self, input): # 前向传播的计算 54 | self.input = input 55 | # TODO:ReLU层的前向传播,计算输出结果 56 | output = np.where(self.input < 0, 0, self.input) 57 | return output 58 | def backward(self, top_diff): # 反向传播的计算 59 | # TODO:ReLU层的反向传播,计算本层损失 60 | bottom_diff = top_diff * (self.input > 0) 61 | return bottom_diff 62 | 63 | class SoftmaxLossLayer(object): 64 | def __init__(self): 65 | print('\tSoftmax loss layer.') 66 | def forward(self, input): # 前向传播的计算 67 | # TODO:softmax 损失层的前向传播,计算输出结果 68 | input_max = np.max(input, axis=1, keepdims=True) 69 | input_exp = np.exp(input - input_max) 70 | exp_sum = np.sum(input_exp, axis=1, keepdims=True) 71 | self.prob = input_exp / exp_sum 72 | return self.prob 73 | def get_loss(self, label): # 计算损失 74 | self.batch_size = self.prob.shape[0] 75 | self.label_onehot = np.zeros_like(self.prob) 76 | self.label_onehot[np.arange(self.batch_size), label] = 1.0 77 | loss = -np.sum(np.log(self.prob) * self.label_onehot) / self.batch_size 78 | return loss 79 | def backward(self): # 反向传播的计算 80 | # TODO:softmax 损失层的反向传播,计算本层损失 81 | bottom_diff = (self.prob - self.label_onehot) / self.batch_size 82 | return bottom_diff 83 | 84 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/layers_1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/layers_1.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/layers_2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import struct 3 | import os 4 | import time 5 | 6 | def show_matrix(mat, name): 7 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 8 | pass 9 | 10 | def show_time(time, name): 11 | #print(name + str(time)) 12 | pass 13 | 14 | class ConvolutionalLayer(object): 15 | def __init__(self, kernel_size, channel_in, channel_out, padding, stride): 16 | # 卷积层的初始化 17 | self.kernel_size = kernel_size 18 | self.channel_in = channel_in 19 | self.channel_out = channel_out 20 | self.padding = padding 21 | self.stride = stride 22 | print('\tConvolutional layer with kernel size %d, input channel %d, output channel %d.' % (self.kernel_size, self.channel_in, self.channel_out)) 23 | def init_param(self, std=0.01): # 参数初始化 24 | self.weight = np.random.normal(loc=0.0, scale=std, size=(self.channel_in, self.kernel_size, self.kernel_size, self.channel_out)) 25 | self.bias = np.zeros([self.channel_out]) 26 | 27 | def forward(self, input): # 前向传播的计算 28 | start_time = time.time() 29 | self.input = input # [N, C, H, W] 30 | print(input.shape) 31 | # TODO: 边界扩充 32 | height = self.input.shape[2] + self.padding * 2 33 | width = self.input.shape[3] + self.padding * 2 34 | self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width]) 35 | self.input_pad[:, :, self.padding:-self.padding, self.padding:-self.padding] = self.input 36 | height_out = int((height - self.kernel_size) / self.stride + 1) 37 | width_out = int((width - self.kernel_size) / self.stride + 1) 38 | self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out]) 39 | for idxn in range(self.input.shape[0]): 40 | for idxc in range(self.channel_out): 41 | for idxh in range(height_out): 42 | for idxw in range(width_out): 43 | # TODO: 计算卷积层的前向传播,特征图与卷积核的内积再加偏置 44 | # 正向传播 45 | h_start = idxh * self.stride 46 | h_end = h_start + self.kernel_size 47 | w_start = idxw * self.stride 48 | w_end = w_start + self.kernel_size 49 | self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] \ 50 | * self.input_pad[idxn, :, h_start: h_end, w_start : w_end]) \ 51 | + self.bias[idxc] 52 | return self.output 53 | 54 | def load_param(self, weight, bias): # 参数加载 55 | assert self.weight.shape == weight.shape 56 | assert self.bias.shape == bias.shape 57 | self.weight = weight 58 | self.bias = bias 59 | 60 | class MaxPoolingLayer(object): 61 | def __init__(self, kernel_size, stride): # 最大池化层的初始化 62 | self.kernel_size = kernel_size 63 | self.stride = stride 64 | print('\tMax pooling layer with kernel size %d, stride %d.' % (self.kernel_size, self.stride)) 65 | def forward(self, input): 66 | start_time = time.time() 67 | self.input = input # [N, C, H, W] 68 | self.max_index = np.zeros(self.input.shape) 69 | height_out = int((self.input.shape[2] - self.kernel_size) / self.stride + 1) 70 | width_out = int((self.input.shape[3] - self.kernel_size) / self.stride + 1) 71 | self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out]) 72 | for idxn in range(self.input.shape[0]): 73 | for idxc in range(self.input.shape[1]): 74 | for idxh in range(height_out): 75 | for idxw in range(width_out): 76 | # TODO: 计算最大池化层的前向传播, 取池化窗口内的最大值 77 | h_start = idxh * self.stride 78 | h_end = h_start + self.kernel_size 79 | w_start = idxw * self.stride 80 | w_end = w_start + self.kernel_size 81 | # 提取局部区域 82 | local_region = self.input[idxn, idxc, h_start:h_end, w_start:w_end] 83 | self.output[idxn, idxc, idxh, idxw] = np.max(local_region) 84 | return self.output 85 | 86 | class FlattenLayer(object): 87 | def __init__(self, input_shape, output_shape): # 扁平化层的初始化 88 | self.input_shape = input_shape 89 | self.output_shape = output_shape 90 | assert np.prod(self.input_shape) == np.prod(self.output_shape) 91 | print('\tFlatten layer with input shape %s, output shape %s.' % (str(self.input_shape), str(self.output_shape))) 92 | def forward(self, input): # 前向传播的计算 93 | assert list(input.shape[1:]) == list(self.input_shape) 94 | # matconvnet feature map dim: [N, height, width, channel] 95 | # ours feature map dim: [N, channel, height, width] 96 | self.input = input.transpose(0, 2, 3, 1) 97 | self.output = self.input.reshape(self.input.shape[0], self.output_shape) 98 | show_matrix(self.output, 'flatten out ') 99 | return self.output 100 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/layers_2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/layers_2.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/vgg_cpu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import struct 3 | import os 4 | import scipy.io 5 | import time 6 | import sys 7 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 8 | 9 | 10 | 11 | from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer 12 | from layers_2 import ConvolutionalLayer, MaxPoolingLayer, FlattenLayer 13 | 14 | def show_matrix(mat, name): 15 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 16 | pass 17 | 18 | class VGG19(object): 19 | def __init__(self, param_path='../../imagenet-vgg-verydeep-19.mat'): 20 | self.param_path = param_path 21 | self.param_layer_name = ( 22 | 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 23 | 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 24 | 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 25 | 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 26 | 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5', 27 | 'flatten', 'fc6', 'relu6', 'fc7', 'relu7', 'fc8', 'softmax' 28 | ) 29 | 30 | def build_model(self): 31 | # TODO:定义VGG19 的网络结构 32 | print('Building vgg-19 model...') 33 | 34 | self.layers = {} 35 | self.layers['conv1_1'] = ConvolutionalLayer(3, 3, 64, 1, 1) 36 | self.layers['relu1_1'] = ReLULayer() 37 | self.layers['conv1_2'] = ConvolutionalLayer(3, 64, 64, 1, 1) 38 | self.layers['relu1_2'] = ReLULayer() 39 | self.layers['pool1'] = MaxPoolingLayer(2, 2) 40 | 41 | self.layers['conv2_1'] = ConvolutionalLayer(3, 64, 128, 1, 1) 42 | self.layers['relu2_1'] = ReLULayer() 43 | self.layers['conv2_2'] = ConvolutionalLayer(3, 128, 128, 1, 1) 44 | self.layers['relu2_2'] = ReLULayer() 45 | self.layers['pool2'] = MaxPoolingLayer(2, 2) 46 | 47 | self.layers['conv3_1'] = ConvolutionalLayer(3, 128, 256, 1, 1) 48 | self.layers['relu3_1'] = ReLULayer() 49 | self.layers['conv3_2'] = ConvolutionalLayer(3, 256, 256, 1, 1) 50 | self.layers['relu3_2'] = ReLULayer() 51 | self.layers['conv3_3'] = ConvolutionalLayer(3, 256, 256, 1, 1) 52 | self.layers['relu3_3'] = ReLULayer() 53 | self.layers['conv3_4'] = ConvolutionalLayer(3, 256, 256, 1, 1) 54 | self.layers['relu3_4'] = ReLULayer() 55 | self.layers['pool3'] = MaxPoolingLayer(2, 2) 56 | 57 | self.layers['conv4_1'] = ConvolutionalLayer(3, 256, 512, 1, 1) 58 | self.layers['relu4_1'] = ReLULayer() 59 | self.layers['conv4_2'] = ConvolutionalLayer(3, 512, 512, 1, 1) 60 | self.layers['relu4_2'] = ReLULayer() 61 | self.layers['conv4_3'] = ConvolutionalLayer(3, 512, 512, 1, 1) 62 | self.layers['relu4_3'] = ReLULayer() 63 | self.layers['conv4_4'] = ConvolutionalLayer(3, 512, 512, 1, 1) 64 | self.layers['relu4_4'] = ReLULayer() 65 | self.layers['pool4'] = MaxPoolingLayer(2, 2) 66 | 67 | self.layers['conv5_1'] = ConvolutionalLayer(3, 512, 512, 1, 1) 68 | self.layers['relu5_1'] = ReLULayer() 69 | self.layers['conv5_2'] = ConvolutionalLayer(3, 512, 512, 1, 1) 70 | self.layers['relu5_2'] = ReLULayer() 71 | self.layers['conv5_3'] = ConvolutionalLayer(3, 512, 512, 1, 1) 72 | self.layers['relu5_3'] = ReLULayer() 73 | self.layers['conv5_4'] = ConvolutionalLayer(3, 512, 512, 1, 1) 74 | self.layers['relu5_4'] = ReLULayer() 75 | self.layers['pool5'] = MaxPoolingLayer(2, 2) 76 | 77 | self.layers['flatten'] = FlattenLayer([512, 7, 7], 512 * 7 * 7) 78 | 79 | self.layers['fc6'] = FullyConnectedLayer(25088, 4096) 80 | self.layers['relu6'] = ReLULayer() 81 | self.layers['fc7'] = FullyConnectedLayer(4096, 4096) 82 | self.layers['relu7'] = ReLULayer() 83 | self.layers['fc8'] = FullyConnectedLayer(4096, 1000) 84 | 85 | self.layers['softmax'] = SoftmaxLossLayer() 86 | 87 | 88 | self.update_layer_list = [] 89 | for layer_name in self.layers.keys(): 90 | if 'conv' in layer_name or 'fc' in layer_name: 91 | self.update_layer_list.append(layer_name) 92 | 93 | def init_model(self): 94 | print('Initializing parameters of each layer in vgg-19...') 95 | for layer_name in self.update_layer_list: 96 | self.layers[layer_name].init_param() 97 | 98 | def load_model(self): 99 | print('Loading parameters from file ' + self.param_path) 100 | params = scipy.io.loadmat(self.param_path) 101 | #print(params) 102 | self.image_mean = params['normalization'][0][0][0] 103 | self.image_mean = np.mean(self.image_mean, axis=(0, 1)) 104 | print('Get image mean: ' + str(self.image_mean)) 105 | 106 | for idx in range(43): 107 | if 'conv' in self.param_layer_name[idx]: 108 | weight, bias = params['layers'][0][idx][0][0][0][0] 109 | # matconvnet: weights dim [height, width, in_channel, out_channel] 110 | # ours: weights dim [in_channel, height, width, out_channel] 111 | # TODO:调整参数的形状 112 | weight = weight.transpose(2, 0, 1, 3) 113 | bias = np.reshape(bias, bias.shape[-1]) 114 | self.layers[self.param_layer_name[idx]].load_param(weight, bias) 115 | if idx >= 37 and 'fc' in self.param_layer_name[idx]: 116 | weight, bias = params['layers'][0][idx-1][0][0][0][0] 117 | weight = np.reshape(weight, [-1, weight.shape[-1]]) 118 | self.layers[self.param_layer_name[idx]].load_param(weight, bias) 119 | 120 | def load_image(self, image_dir): 121 | print('Loading and preprocessing image from ' + image_dir) 122 | self.input_image = scipy.misc.imread(image_dir) 123 | self.input_image = scipy.misc.imresize(self.input_image,[224,224,3]) 124 | # # 使用 Pillow 读取图像 125 | 126 | # self.input_image = Image.open(image_dir) 127 | 128 | # # 调整图像大小 129 | # self.input_image = self.input_image.resize((224, 224)) 130 | 131 | self.input_image = np.array(self.input_image).astype(np.float32) 132 | self.input_image -= self.image_mean 133 | self.input_image = np.reshape(self.input_image, [1]+list(self.input_image.shape)) 134 | # input dim [N, channel, height, width] 135 | # TODO:调整图片维度顺序 136 | self.input_image = self.input_image.transpose(0, 3, 1, 2) 137 | 138 | def forward(self): # TODO:神经网络的前向传播 139 | print('Inferencing...') 140 | start_time = time.time() 141 | current = self.input_image 142 | for idx in range(len(self.param_layer_name)): 143 | print('Inferencing layer: ' + self.param_layer_name[idx]) 144 | current = self.layers[self.param_layer_name[idx]].forward(current) 145 | print('Inference time: %f' % (time.time()-start_time)) 146 | return current 147 | 148 | def evaluate(self): 149 | # TODO:获取神经网络前向传播的结果 150 | prob = self.forward() 151 | top1 = np.argmax(prob[0]) 152 | print('Classification result: id = %d, prob = %f' % (top1, prob[0, top1])) 153 | 154 | 155 | if __name__ == '__main__': 156 | vgg = VGG19() 157 | vgg.build_model() 158 | vgg.init_model() 159 | vgg.load_model() 160 | vgg.load_image('../../cat1.jpg') 161 | prob = vgg.evaluate() 162 | 163 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_1_vgg/stu_upload/vgg_cpu.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_1_vgg/stu_upload/vgg_cpu.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/.vgg19new0606.log.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/.vgg19new0606.log.swp -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/file_list: -------------------------------------------------------------------------------- 1 | ../../cat1.jpg 281 2 | ../../cat1.jpg 281 3 | ../../cat1.jpg 281 4 | ../../cat1.jpg 281 5 | ../../cat1.jpg 281 6 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/main_exp_3_2.py: -------------------------------------------------------------------------------- 1 | from stu_upload.vgg19_demo import VGG19 2 | import time 3 | import numpy as np 4 | import os 5 | import scipy.io 6 | 7 | def evaluate(vgg): 8 | start = time.time() 9 | vgg.forward() 10 | end = time.time() 11 | print('inference time: %f'%(end - start)) 12 | result = vgg.net.getOutputData() 13 | prob = max(result) 14 | top1 = result.index(prob) 15 | print('Classification result: id = %d, prob = %f'%(top1, prob)) 16 | 17 | 18 | if __name__ == '__main__': 19 | vgg = VGG19() 20 | 21 | vgg.build_model(param_path='../imagenet-vgg-verydeep-19.mat') 22 | vgg.load_model() 23 | vgg.load_image('../cat1.jpg') 24 | #evaluate(vgg) 25 | for i in range(10): 26 | evaluate(vgg) 27 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/readme.txt: -------------------------------------------------------------------------------- 1 | 补全stu_upload中的文件,执行 main_exp_3_2.py 运行实验 2 | 3 | 注意:使用pycnnl创建vgg网络不需要flatten层 4 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/.vgg19_demo.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/.vgg19_demo.py.swp -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__init__.py -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__init__.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/vgg19_demo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/vgg19_demo.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/vgg19_demo.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/__pycache__/vgg19_demo.cpython-37.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/vgg19_demo.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_2_vgg_dlp/stu_upload/vgg19_demo.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/__pycache__/standard_layer_2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/__pycache__/standard_layer_2.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/__pycache__/standard_layer_3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/__pycache__/standard_layer_3.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/main_exp_3_3.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from stu_upload.exp_3_3_style_transfer import * 3 | from stu_upload.layers_2 import ConvolutionalLayer, MaxPoolingLayer 4 | #from stu_upload.layers_3 import ContentLossLayer, StyleLossLayer 5 | import numpy as np 6 | import struct 7 | import os 8 | import scipy.io 9 | import time 10 | 11 | def computeMse(data1,data2): 12 | errors = [] 13 | for i in range(len(data1)): 14 | errors.append(data1[i]-data2[i]) 15 | 16 | squared_error = [] 17 | for val in errors: 18 | squared_error.append(pow(val, 2)) 19 | 20 | return sum(squared_error) / len(squared_error) 21 | 22 | def test_speed_up(): 23 | test_data = np.random.rand(1, 256, 24, 40) 24 | test_dloss = np.random.rand(1, 256, 24, 40) 25 | test_filter = np.random.rand(256, 3, 3, 256) 26 | test_bias = np.random.rand(256) 27 | 28 | conv = ConvolutionalLayer(3, 256, 256, 1, 1) 29 | conv.init_param() 30 | conv.load_param(test_filter, test_bias) 31 | stamp = time.time() 32 | conv_forward_result = conv.forward(test_data) 33 | conv_forward_time = time.time()-stamp 34 | print('conv forward raw time: %f ms'%(conv_forward_time*1000)) 35 | stamp = time.time() 36 | conv_backward_result = conv.backward(test_dloss) 37 | conv_backward_time = time.time()-stamp 38 | print('conv backward raw time: %f ms'%(conv_backward_time*1000)) 39 | 40 | speedup_conv = ConvolutionalLayer(3, 256, 256, 1, 1, 1) 41 | speedup_conv.init_param() 42 | speedup_conv.load_param(test_filter, test_bias) 43 | stamp = time.time() 44 | speedup_conv_forward_result = speedup_conv.forward(test_data) 45 | speedup_conv_forward_time = time.time()-stamp 46 | print('conv forward speedup time: %f ms'%(speedup_conv_forward_time*1000)) 47 | stamp = time.time() 48 | speedup_conv_backward_result = speedup_conv.backward(test_dloss) 49 | speedup_conv_backward_time = time.time()-stamp 50 | print('conv backward speedup time: %f ms'%(speedup_conv_backward_time*1000)) 51 | 52 | speedup_conv_forward_mse = computeMse(conv_forward_result.flatten(), speedup_conv_forward_result.flatten()) 53 | speedup_conv_backward_mse = computeMse(conv_backward_result.flatten(), speedup_conv_backward_result.flatten()) 54 | if speedup_conv_forward_mse < 0.003 and speedup_conv_backward_mse < 0.003: 55 | print('SPEEDUP CONV TEST PASS.') 56 | else: 57 | print('SPEEDUP CONV TEST FAILED.') 58 | exit() 59 | 60 | print('CONV FORWARD SPEEDUP RATIO: %f'%(conv_forward_time / speedup_conv_forward_time)) 61 | print('CONV BACKWARD SPEEDUP RATIO: %f'%(conv_backward_time / speedup_conv_backward_time)) 62 | 63 | if __name__ == '__main__': 64 | np.random.seed(1234) 65 | print('-------------------------') 66 | # test_speed_up() 67 | print('-------------------------') 68 | CONTENT_LOSS_LAYERS = ['relu4_2'] 69 | STYLE_LOSS_LAYERS = ['relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'] 70 | NOISE = 0.5 71 | ALPHA, BETA = 1, 500 72 | TRAIN_STEP = 100 73 | LEARNING_RATE = 1.0 74 | IMAGE_HEIGHT, IMAGE_WIDTH = 192, 320 75 | 76 | vgg = VGG19(param_path='../imagenet-vgg-verydeep-19.mat') 77 | vgg.build_model() 78 | vgg.init_model() 79 | vgg.load_model() 80 | content_loss_layer = ContentLossLayer() 81 | style_loss_layer = StyleLossLayer() 82 | adam_optimizer = AdamOptimizer(1.0, [1, 3, IMAGE_HEIGHT, IMAGE_WIDTH]) 83 | 84 | content_image, content_shape = vgg.load_image('../weinisi.jpg', IMAGE_HEIGHT, IMAGE_WIDTH) 85 | style_image, _ = vgg.load_image('../style.jpg', IMAGE_HEIGHT, IMAGE_WIDTH) 86 | content_layers = vgg.forward(content_image, CONTENT_LOSS_LAYERS) 87 | style_layers = vgg.forward(style_image, STYLE_LOSS_LAYERS) 88 | transfer_image = get_random_img(content_image, NOISE) 89 | 90 | start = time.time() 91 | for step in range(TRAIN_STEP): 92 | transfer_layers = vgg.forward(transfer_image, CONTENT_LOSS_LAYERS + STYLE_LOSS_LAYERS) 93 | content_loss = np.array([]) 94 | style_loss = np.array([]) 95 | content_diff = np.zeros(transfer_image.shape) 96 | style_diff = np.zeros(transfer_image.shape) 97 | for layer in CONTENT_LOSS_LAYERS: 98 | # TODO: 计算内容损失的前向传播 99 | current_loss = ___________________________________ 100 | content_loss = np.append(content_loss, current_loss) 101 | # TODO: 计算内容损失的反向传播 102 | dloss = content_loss_layer.backward(transfer_layers[layer], content_layers[layer]) 103 | content_diff += ___________________________________ 104 | for layer in STYLE_LOSS_LAYERS: 105 | # TODO: 计算风格损失的前向传播 106 | current_loss = ___________________________________ 107 | style_loss = np.append(style_loss, current_loss) 108 | # TODO: 计算风格损失的反向传播 109 | dloss = style_loss_layer.backward(transfer_layers[layer], style_layers[layer]) 110 | style_diff += ____________________________________ 111 | total_loss = ALPHA * np.mean(content_loss) + BETA * np.mean(style_loss) 112 | image_diff = ALPHA * content_diff / len(CONTENT_LOSS_LAYERS) + BETA * style_diff / len(STYLE_LOSS_LAYERS) 113 | # TODO: 利用Adam优化器对风格迁移图像进行更新 114 | transfer_image = _____________________________________ 115 | if step % 1 == 0: 116 | print('Step %d, loss = %f' % (step, total_loss), content_loss, style_loss) 117 | print('cost time: %f'%(time.time() - start)) 118 | vgg.save_image(transfer_image, content_shape, 'output/output_' + str(step) + '.jpg') 119 | start = time.time() 120 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/output/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/output/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/output/output_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/output/output_10.jpg -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/output/output_22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/output/output_22.jpg -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/output/output_34.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/output/output_34.jpg -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/output/output_46.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/output/output_46.jpg -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/readme.txt: -------------------------------------------------------------------------------- 1 | 补全 stu_upload 中的 layer_1.py、layer_2.py、layer_3.py、style_transfer.py 文件,并且补全 main_exp_3_3.py,执行 main_exp_3_3.py 运行实验. 2 | 训练生成的图片保存在 output 目录下。 3 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/.DS_Store -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/__init__.py -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/__init__.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/exp_3_3_style_transfer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/exp_3_3_style_transfer.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/layers_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/layers_1.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/layers_2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/layers_2.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/layers_3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/__pycache__/layers_3.cpython-36.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/exp_3_3_style_transfer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import struct 3 | import os 4 | import scipy.io 5 | import time 6 | import sys 7 | from PIL import Image 8 | 9 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 10 | 11 | from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer 12 | from layers_2 import ConvolutionalLayer, MaxPoolingLayer, FlattenLayer 13 | from layers_3 import ContentLossLayer, StyleLossLayer 14 | 15 | def show_matrix(mat, name): 16 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 17 | pass 18 | 19 | class VGG19(object): 20 | def __init__(self, param_path='../../imagenet-vgg-verydeep-19.mat'): 21 | self.param_path = param_path 22 | self.param_layer_name = [ 23 | 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 24 | 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 25 | 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 26 | 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 27 | 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5' 28 | ] 29 | 30 | def build_model(self): 31 | # TODO:定义VGG19 的网络结构 32 | print('Building vgg-19 model...') 33 | 34 | self.layers = {} 35 | self.layers['conv1_1'] = ConvolutionalLayer(3, 3, 64, 1, 1) 36 | self.layers['relu1_1'] = ReLULayer() 37 | self.layers['conv1_2'] = ConvolutionalLayer(3, 64, 64, 1, 1) 38 | self.layers['relu1_2'] = ReLULayer() 39 | self.layers['pool1'] = MaxPoolingLayer(2, 2) 40 | 41 | self.layers['conv2_1'] = ConvolutionalLayer(3, 64, 128, 1, 1) 42 | self.layers['relu2_1'] = ReLULayer() 43 | self.layers['conv2_2'] = ConvolutionalLayer(3, 128, 128, 1, 1) 44 | self.layers['relu2_2'] = ReLULayer() 45 | self.layers['pool2'] = MaxPoolingLayer(2, 2) 46 | 47 | self.layers['conv3_1'] = ConvolutionalLayer(3, 128, 256, 1, 1) 48 | self.layers['relu3_1'] = ReLULayer() 49 | self.layers['conv3_2'] = ConvolutionalLayer(3, 256, 256, 1, 1) 50 | self.layers['relu3_2'] = ReLULayer() 51 | self.layers['conv3_3'] = ConvolutionalLayer(3, 256, 256, 1, 1) 52 | self.layers['relu3_3'] = ReLULayer() 53 | self.layers['conv3_4'] = ConvolutionalLayer(3, 256, 256, 1, 1) 54 | self.layers['relu3_4'] = ReLULayer() 55 | self.layers['pool3'] = MaxPoolingLayer(2, 2) 56 | 57 | self.layers['conv4_1'] = ConvolutionalLayer(3, 256, 512, 1, 1) 58 | self.layers['relu4_1'] = ReLULayer() 59 | self.layers['conv4_2'] = ConvolutionalLayer(3, 512, 512, 1, 1) 60 | self.layers['relu4_2'] = ReLULayer() 61 | self.layers['conv4_3'] = ConvolutionalLayer(3, 512, 512, 1, 1) 62 | self.layers['relu4_3'] = ReLULayer() 63 | self.layers['conv4_4'] = ConvolutionalLayer(3, 512, 512, 1, 1) 64 | self.layers['relu4_4'] = ReLULayer() 65 | self.layers['pool4'] = MaxPoolingLayer(2, 2) 66 | 67 | self.layers['conv5_1'] = ConvolutionalLayer(3, 512, 512, 1, 1) 68 | self.layers['relu5_1'] = ReLULayer() 69 | self.layers['conv5_2'] = ConvolutionalLayer(3, 512, 512, 1, 1) 70 | self.layers['relu5_2'] = ReLULayer() 71 | self.layers['conv5_3'] = ConvolutionalLayer(3, 512, 512, 1, 1) 72 | self.layers['relu5_3'] = ReLULayer() 73 | self.layers['conv5_4'] = ConvolutionalLayer(3, 512, 512, 1, 1) 74 | self.layers['relu5_4'] = ReLULayer() 75 | self.layers['pool5'] = MaxPoolingLayer(2, 2) 76 | 77 | self.update_layer_list = [] 78 | for layer_name in self.layers.keys(): 79 | if 'conv' in layer_name: 80 | self.update_layer_list.append(layer_name) 81 | 82 | def init_model(self): 83 | print('Initializing parameters of each layer in vgg-19...') 84 | for layer_name in self.update_layer_list: 85 | self.layers[layer_name].init_param() 86 | 87 | def load_model(self): 88 | print('Loading parameters from file ' + self.param_path) 89 | params = scipy.io.loadmat(self.param_path) 90 | self.image_mean = params['normalization'][0][0][0] 91 | self.image_mean = np.mean(self.image_mean, axis=(0, 1)) 92 | print('Get image mean: ' + str(self.image_mean)) 93 | for idx in range(37): 94 | if 'conv' in self.param_layer_name[idx]: 95 | weight, bias = params['layers'][0][idx][0][0][0][0] 96 | # matconvnet: weights dim [height, width, in_channel, out_channel] 97 | # ours: weights dim [in_channel, height, width, out_channel] 98 | weight = np.transpose(weight,[2,0,1,3]) 99 | bias = bias.reshape(-1) 100 | self.layers[self.param_layer_name[idx]].load_param(weight, bias) 101 | 102 | def load_image(self, image_dir, image_height, image_width): 103 | print('Loading and preprocessing image from ' + image_dir) 104 | # self.input_image = scipy.misc.imread(image_dir) 105 | # image_shape = self.input_image.shape 106 | # self.input_image = scipy.misc.imresize(self.input_image,[image_height, image_width,3]) 107 | 108 | # 使用Pillow读取图像 109 | input_image = Image.open(image_dir) 110 | 111 | # 调整图像大小 112 | self.input_image = input_image.resize((image_height, image_width)) 113 | 114 | # 获取图像的形状 115 | image_shape = self.input_image.size + (3,) 116 | 117 | self.input_image = np.array(self.input_image).astype(np.float32) 118 | self.input_image -= self.image_mean 119 | self.input_image = np.reshape(self.input_image, [1]+list(self.input_image.shape)) 120 | # input dim [N, channel, height, width] 121 | # TODO: 调整输入数据的形状 122 | self.input_image = self.input_image.transpose(0,3,1,2) 123 | return self.input_image, image_shape 124 | 125 | def save_image(self, input_image, image_shape, image_dir): 126 | #print('Save image at ' + image_dir) 127 | # TODO:调整输出图片的形状 128 | input_image = input_image.transpose(0,2,3,1) 129 | input_image = input_image[0] + self.image_mean 130 | input_image = np.clip(input_image, 0, 255).astype(np.uint8) 131 | # input_image = scipy.misc.imresize(input_image, image_shape) 132 | # scipy.misc.imsave(image_dir, input_image) 133 | # 创建 Pillow 图像 134 | pillow_image = Image.fromarray(input_image) 135 | if len(image_shape) == 3: 136 | image_shape = (image_shape[1], image_shape[0]) # 交换宽度和高度 137 | # 调整图像大小(使用默认的高质量重采样滤波器) 138 | pillow_image = pillow_image.resize(image_shape) 139 | 140 | # 保存图像 141 | pillow_image.save(image_dir) 142 | 143 | def forward(self, input_image, layer_list): 144 | start_time = time.time() 145 | current = input_image 146 | layer_forward = {} 147 | for idx in range(len(self.param_layer_name)): 148 | #print('Inferencing layer: ' + self.param_layer_name[idx]) 149 | # TODO: 计算VGG19网络的前向传播 150 | current = self.layers[self.param_layer_name[idx]].forward(current) 151 | if self.param_layer_name[idx] in layer_list: 152 | layer_forward[self.param_layer_name[idx]] = current 153 | print('Forward time: %f' % (time.time()-start_time)) 154 | return layer_forward 155 | 156 | def backward(self, dloss, layer_name): 157 | start_time = time.time() 158 | layer_idx = list.index(self.param_layer_name, layer_name) 159 | for idx in range(layer_idx, -1, -1): 160 | # TODO: 计算VGG19网络的反向传播 161 | dloss = self.layers[self.param_layer_name[idx]].backward(dloss) 162 | print('Backward time: %f' % (time.time()-start_time)) 163 | return dloss 164 | 165 | def get_random_img(content_image, noise): 166 | noise_image = np.random.uniform(-20, 20, content_image.shape) 167 | random_img = noise_image * noise + content_image * (1 - noise) 168 | return random_img 169 | 170 | class AdamOptimizer(object): 171 | def __init__(self, lr, diff_shape): 172 | self.beta1 = 0.9 173 | self.beta2 = 0.999 174 | self.eps = 1e-8 175 | self.lr = lr 176 | self.mt = np.zeros(diff_shape) 177 | self.vt = np.zeros(diff_shape) 178 | self.step = 0 179 | def update(self, input, grad): 180 | # TODO:补全参数更新过程 181 | self.step += 1 182 | self.mt = self.beta1 * self.mt + (1 - self.beta1) * grad 183 | self.vt = self.beta2 * self.vt + (1 - self.beta2) * (grad ** 2) 184 | mt_hat = self.mt / (1 - np.power(self.beta1, self.step)) 185 | vt_hat = self.vt / (1 - np.power(self.beta2, self.step)) 186 | # TODO: 利用梯度的一阶矩和二阶矩的无偏估计更新风格迁移图像 187 | output = input - self.lr * mt_hat / (np.sqrt(vt_hat) + self.eps) 188 | return output 189 | 190 | 191 | if __name__ == '__main__': 192 | 193 | CONTENT_LOSS_LAYERS = ['relu4_2'] 194 | STYLE_LOSS_LAYERS = ['relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'] 195 | NOISE = 0.5 196 | ALPHA, BETA = 1, 500 197 | TRAIN_STEP = 50 198 | LEARNING_RATE = 1.0 199 | IMAGE_HEIGHT, IMAGE_WIDTH = 192, 320 200 | 201 | vgg = VGG19() 202 | vgg.build_model() 203 | vgg.init_model() 204 | vgg.load_model() 205 | content_loss_layer = ContentLossLayer() 206 | style_loss_layer = StyleLossLayer() 207 | adam_optimizer = AdamOptimizer(1.0, [1, 3, IMAGE_WIDTH, IMAGE_HEIGHT]) 208 | 209 | content_image, content_shape = vgg.load_image('../../weinisi.jpg', IMAGE_HEIGHT, IMAGE_WIDTH) 210 | style_image, _ = vgg.load_image('../../style.jpg', IMAGE_HEIGHT, IMAGE_WIDTH) 211 | content_layers = vgg.forward(content_image, CONTENT_LOSS_LAYERS) 212 | style_layers = vgg.forward(style_image, STYLE_LOSS_LAYERS) 213 | transfer_image = get_random_img(content_image, NOISE) 214 | 215 | for step in range(TRAIN_STEP): 216 | transfer_layers = vgg.forward(transfer_image, CONTENT_LOSS_LAYERS + STYLE_LOSS_LAYERS) 217 | content_loss = np.array([]) 218 | style_loss = np.array([]) 219 | content_diff = np.zeros(transfer_image.shape) 220 | style_diff = np.zeros(transfer_image.shape) 221 | for layer in CONTENT_LOSS_LAYERS: 222 | # TODO: 计算内容损失的前向传播 223 | current_loss = content_loss_layer.forward(transfer_layers[layer], content_layers[layer]) 224 | content_loss = np.append(content_loss, current_loss) 225 | # TODO: 计算内容损失的反向传播 226 | dloss = content_loss_layer.backward(transfer_layers[layer], content_layers[layer]) 227 | content_diff += vgg.backward(dloss, layer) 228 | for layer in STYLE_LOSS_LAYERS: 229 | # TODO: 计算风格损失的前向传播 230 | current_loss = style_loss_layer.forward(transfer_layers[layer], style_layers[layer]) 231 | style_loss = np.append(style_loss, current_loss) 232 | # TODO: 计算风格损失的反向传播 233 | dloss = style_loss_layer.backward(transfer_layers[layer], style_layers[layer]) 234 | style_diff += vgg.backward(dloss, layer) 235 | total_loss = ALPHA * np.mean(content_loss) + BETA * np.mean(style_loss) 236 | image_diff = ALPHA * content_diff / len(CONTENT_LOSS_LAYERS) + BETA * style_diff / len(STYLE_LOSS_LAYERS) 237 | # TODO: 利用Adam优化器对风格迁移图像进行更新 238 | transfer_image = adam_optimizer.update(transfer_image, image_diff) 239 | if step % 2 == 0: 240 | print('Step %d, loss = %f' % (step, total_loss), content_loss, style_loss) 241 | vgg.save_image(transfer_image, content_shape, '../output/output_' + str(step) + '.jpg') -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/exp_3_3_style_transfer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/exp_3_3_style_transfer.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import struct 3 | import os 4 | import time 5 | 6 | def show_matrix(mat, name): 7 | #print(name + str(mat.shape) + ' mean %f, std %f' % (mat.mean(), mat.std())) 8 | pass 9 | 10 | def show_time(time, name): 11 | #print(name + str(time)) 12 | pass 13 | 14 | class FullyConnectedLayer(object): 15 | def __init__(self, num_input, num_output): # 全连接层初始化 16 | self.num_input = num_input 17 | self.num_output = num_output 18 | print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output)) 19 | def init_param(self, std=0.01): # 参数初始化 20 | self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output)) 21 | self.bias = np.zeros([1, self.num_output]) 22 | def forward(self, input): # 前向传播计算 23 | start_time = time.time() 24 | self.input = input 25 | # TODO:全连接层的前向传播,计算输出结果 26 | self.output = np.dot(self.input, self.weight) + self.bias 27 | return self.output 28 | def backward(self, top_diff): # 反向传播的计算 29 | # TODO:全连接层的反向传播,计算参数梯度和本层损失 30 | self.d_weight = np.matmul(self.input.T, top_diff) 31 | self.d_bias = np.sum(top_diff, axis=0, keepdims=True) / top_diff.shape[0] 32 | bottom_diff = np.dot(top_diff, self.weight.T) 33 | return bottom_diff 34 | def get_gradient(self): 35 | return self.d_weight, self.d_bias 36 | def update_param(self, lr): # 参数更新 37 | self.weight = self.weight - lr * self.d_weight 38 | self.bias = self.bias - lr * self.d_bias 39 | def load_param(self, weight, bias): # 参数加载 40 | assert self.weight.shape == weight.shape 41 | assert self.bias.shape == bias.shape 42 | self.weight = weight 43 | self.bias = bias 44 | def save_param(self): # 参数保存 45 | show_matrix(self.weight, 'fc weight ') 46 | show_matrix(self.bias, 'fc bias ') 47 | return self.weight, self.bias 48 | 49 | class ReLULayer(object): 50 | def __init__(self): 51 | print('\tReLU layer.') 52 | def forward(self, input): # 前向传播的计算 53 | start_time = time.time() 54 | self.input = input 55 | # TODO:ReLU层的前向传播,计算输出结果 56 | output = np.where(self.input < 0, 0, self.input) 57 | return output 58 | def backward(self, top_diff): # 反向传播的计算 59 | # TODO:ReLU层的反向传播,计算本层损失 60 | bottom_diff = top_diff * (self.input > 0) 61 | 62 | return bottom_diff 63 | 64 | class SoftmaxLossLayer(object): 65 | def __init__(self): 66 | print('\tSoftmax loss layer.') 67 | def forward(self, input): # 前向传播的计算 68 | # TODO:softmax 损失层的前向传播,计算输出结果 69 | input_max = np.max(input, axis=1, keepdims=True) 70 | input_exp = np.exp(input - input_max) 71 | exp_sum = np.sum(input_exp, axis=1, keepdims=True) 72 | self.prob = input_exp / exp_sum 73 | return self.prob 74 | def get_loss(self, label): # 计算损失 75 | self.batch_size = self.prob.shape[0] 76 | self.label_onehot = np.zeros_like(self.prob) 77 | self.label_onehot[np.arange(self.batch_size), label] = 1.0 78 | loss = -np.sum(np.log(self.prob) * self.label_onehot) / self.batch_size 79 | return loss 80 | def backward(self): # 反向传播的计算 81 | # TODO:softmax 损失层的反向传播,计算本层损失 82 | bottom_diff = (self.prob - self.label_onehot) / self.batch_size 83 | return bottom_diff 84 | 85 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_1.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_2.pyc -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import struct 3 | import os 4 | import scipy.io 5 | import time 6 | 7 | class ContentLossLayer(object): 8 | def __init__(self): 9 | print('\tContent loss layer.') 10 | def forward(self, input_layer, content_layer): 11 | # TODO: 计算风格迁移图像和目标内容图像的内容损失 12 | 13 | loss = np.square(input_layer - content_layer).sum() / (2 * input_layer.size) 14 | return loss 15 | def backward(self, input_layer, content_layer): 16 | # TODO: 计算内容损失的反向传播 17 | bottom_diff = (input_layer - content_layer) / input_layer.size 18 | return bottom_diff 19 | 20 | class StyleLossLayer(object): 21 | def __init__(self): 22 | print('\tStyle loss layer.') 23 | def forward(self, input_layer, style_layer): 24 | # TODO: 计算风格迁移图像和目标风格图像的Gram 矩阵(3.12) 25 | style_layer_reshape = np.reshape(style_layer, [style_layer.shape[0], style_layer.shape[1], -1]) 26 | #self.gram_style = np.zeros([style_layer.shape[0], style_layer.shape[1], style_layer.shape[1]]) 27 | #Yiwen Xu's method 28 | self.gram_style = np.dot(style_layer_reshape[0,:,:], style_layer_reshape[0,:,:].T) 29 | self.input_layer_reshape = np.reshape(input_layer, [input_layer.shape[0], input_layer.shape[1], -1]) 30 | self.gram_input = np.zeros([input_layer.shape[0], input_layer.shape[1], input_layer.shape[1]]) 31 | for idxn in range(input_layer.shape[0]): 32 | #TODO: check right 33 | self.gram_input[idxn, :, :] = np.dot(self.input_layer_reshape[idxn,:,:], self.input_layer_reshape[idxn,:,:].T) 34 | M = input_layer.shape[2] * input_layer.shape[3] 35 | N = input_layer.shape[1] 36 | self.div = M * M * N * N 37 | # TODO: 计算风格迁移图像和目标风格图像的风格损失(3.13, 3.14) 38 | style_diff = np.sum(np.square(self.gram_input-self.gram_style)) 39 | loss = 1.0 / (4*input_layer.shape[0]*self.div) * style_diff 40 | return loss 41 | def backward(self, input_layer, style_layer): 42 | bottom_diff = np.zeros([input_layer.shape[0], input_layer.shape[1], input_layer.shape[2]*input_layer.shape[3]]) 43 | for idxn in range(input_layer.shape[0]): 44 | # TODO: 计算风格损失的反向传播(3.15) 45 | diff = self.gram_input[idxn,:,:]-self.gram_style 46 | bottom_diff[idxn, :, :] = 1.0 / (input_layer.shape[0]*self.div) * np.dot(diff, self.input_layer_reshape[idxn,:,:]) 47 | bottom_diff = np.reshape(bottom_diff, input_layer.shape) 48 | return bottom_diff 49 | -------------------------------------------------------------------------------- /code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_3.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/exp_3_3_style_transfer/stu_upload/layers_3.pyc -------------------------------------------------------------------------------- /code_chap_3_student/readme.txt: -------------------------------------------------------------------------------- 1 | 实验二 2 | 实验2.1请进入exp_2_1_mnist_mlp目录,请参考目录内readme.txt补全代码并提交。 3 | 实验2.2请进入exp_2_2_mnist_mlp_dlp目录,请参考目录内readme.txt补全代码并提交。 4 | 5 | 实验三 6 | 实验3.1请进入exp_3_1_vgg目录,请参考目录内readme.txt补全代码并提交。 7 | 实验3.2请进入exp_3_2_vgg_dlp目录,请参考目录内readme.txt补全代码并提交。 8 | 实验3.3请进入exp_3_3_style_transfer目录,请参考目录内readme.txt补全代码并提交。 9 | 10 | -------------------------------------------------------------------------------- /code_chap_3_student/style.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/style.jpg -------------------------------------------------------------------------------- /code_chap_3_student/weinisi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_3_student/weinisi.jpg -------------------------------------------------------------------------------- /code_chap_4_student/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.defaultFormatter": null 4 | } 5 | } -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_1_vgg19_student/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_1_vgg19_student/data/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/data/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_1_vgg19_student/data/cat1.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/data/strawberries.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_1_vgg19_student/data/strawberries.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_1_vgg19_student/models/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/readme.txt: -------------------------------------------------------------------------------- 1 | 1、补全 stu_upload 中的 generate_pth.py、evaluate_cpu.py、evaluate_cnnl_mfus.py 文件; 2 | 3 | 2、执行 bash run_cpu.sh完成CPU上的推理; 4 | 5 | 3、执行 bash run_mlu.sh完成DLP上的推理 6 | 7 | 需要提交的文件为generate_pth.py、evaluate_cpu.py、evaluate_cnnl_mfus.py文件,将以上文件压缩为 zip 包提交。 -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/run_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm models/vgg19.pth 3 | python stu_upload/generate_pth.py 4 | python stu_upload/evaluate_cpu.py 5 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/run_mlu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python stu_upload/evaluate_cnnl_mfus.py 3 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/stu_upload/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_1_vgg19_student/stu_upload/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/stu_upload/evaluate_cnnl_mfus.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch_mlu 4 | import torch_mlu.core.mlu_model as ct 5 | import torch.nn as nn 6 | import time 7 | from PIL import Image 8 | from torchvision import transforms 9 | torch.set_grad_enabled(False) 10 | ct.set_device(0) 11 | cfgs = [64,'R', 64,'R', 'M', 128,'R', 128,'R', 'M', 12 | 256,'R', 256,'R', 256,'R', 256,'R', 'M', 13 | 512,'R', 512,'R', 512,'R', 512,'R', 'M', 14 | 512,'R', 512,'R', 512,'R', 512,'R', 'M'] 15 | 16 | IMAGE_PATH = 'data/strawberries.jpg' 17 | VGG_PATH = 'models/vgg19.pth' 18 | 19 | def vgg19(): 20 | layers = [ 21 | 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 22 | 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 23 | 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 24 | 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 25 | 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5', 26 | 'flatten', 'fc6', 'relu6','fc7', 'relu7', 'fc8', 'softmax' 27 | ] 28 | 29 | conv_map = { 30 | 'conv1_1': (3, 64), 'conv1_2': (64, 64), 31 | 'conv2_1': (64, 128), 'conv2_2': (128, 128), 32 | 'conv3_1': (128, 256), 'conv3_2': (256, 256), 'conv3_3': (256, 256), 'conv3_4': (256, 256), 33 | 'conv4_1': (256, 512), 'conv4_2': (512, 512), 'conv4_3': (512, 512), 'conv4_4': (512, 512), 34 | 'conv5_1': (512, 512), 'conv5_2': (512, 512), 'conv5_3': (512, 512), 'conv5_4': (512, 512), 35 | } 36 | 37 | layer_container = nn.Sequential() 38 | in_channels = 3 39 | num_classes = 1000 40 | for i, layer_name in enumerate(layers): 41 | if layer_name.startswith('conv'): 42 | # TODO: 在时序容器中传入卷积运算 43 | in_ch, out_ch = conv_map[layer_name] 44 | layer_container.add_module(layer_name, nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=3, padding=1)) 45 | in_channels = out_ch 46 | elif layer_name.startswith('relu'): 47 | # TODO: 在时序容器中执行ReLU计算 48 | layer_container.add_module(layer_name, nn.ReLU(inplace=True)) 49 | elif layer_name.startswith('pool'): 50 | # TODO: 在时序容器中执行maxpool计算 51 | layer_container.add_module(layer_name, nn.MaxPool2d(kernel_size=2, stride=2)) 52 | elif layer_name == 'flatten': 53 | # TODO: 在时序容器中执行flatten计算 54 | layer_container.add_module(layer_name, nn.Flatten()) 55 | elif layer_name == 'fc6': 56 | # TODO: 在时序容器中执行全连接层计算 57 | layer_container.add_module(layer_name, nn.Linear(in_features=25088, out_features=4096)) 58 | elif layer_name == 'fc7': 59 | # TODO: 在时序容器中执行全连接层计算 60 | layer_container.add_module(layer_name, nn.Linear(in_features=4096, out_features=4096)) 61 | elif layer_name == 'fc8': 62 | # TODO: 在时序容器中执行全连接层计算 63 | layer_container.add_module(layer_name, nn.Linear(in_features=4096, out_features=num_classes)) 64 | elif layer_name == 'softmax': 65 | # TODO: 在时序容器中执行Softmax计算 66 | layer_container.add_module(layer_name, nn.Softmax(dim=1)) 67 | return layer_container 68 | 69 | def load_image(path): 70 | #TODO: 使用 Image.open模块读入输入图像,并返回形状为(1,244,244,3)的数组 image 71 | with Image.open(path) as image: 72 | image = image.resize((244, 244)) 73 | transform = transforms.Compose([transforms.Resize(256), 74 | transforms.CenterCrop(224), 75 | transforms.ToTensor(), 76 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 77 | std=[0.229, 0.224, 0.225])]) 78 | #TODO: 对图像调用transform函数进行预处理 79 | image = transform(image) 80 | #TODO: 对tensor的第0维进行扩展 81 | image = image.unsqueeze(0) 82 | 83 | return image 84 | 85 | 86 | if __name__ == '__main__': 87 | input_image = load_image(IMAGE_PATH) 88 | #TODO: 生成VGG19网络模型并保存在net中 89 | net = vgg19() 90 | #TODO: 加载网络参数到net中 91 | state_dict = torch.load(VGG_PATH) 92 | net.load_state_dict(state_dict) 93 | #TODO: 模型进入推理模式 94 | net.eval().float() 95 | example_forward_input = torch.rand((1,3,224,224),dtype = torch.float) 96 | #TODO: 使用JIT对模型进行trace,把动态图转化为静态图,得到net_trace 97 | net_trace = torch.jit.trace(net, example_forward_input) 98 | #TODO: 将输入图像拷贝到MLU设备 99 | input_image = input_image.to("mlu") 100 | #TODO: 将net_trace拷贝到MLU设备 101 | net_trace.to("mlu") 102 | st = time.time() 103 | #TODO: 进行推理,得到prob 104 | prob = net_trace(input_image).cpu() 105 | print("mlu370 infer time:{:.3f} s".format(time.time()-st)) 106 | #TODO: 将prob从MLU设备拷贝到CPU设备 107 | prob.to("cpu") 108 | with open('./labels/imagenet_classes.txt') as f: 109 | classes = [line.strip() for line in f.readlines()] 110 | _, indices = torch.sort(prob, descending=True) 111 | print("Classification result: id = %s, prob = %f " % (classes[indices[0][0]], prob[0][indices[0][0]].item())) 112 | if classes[indices[0][0]] == 'strawberry': 113 | print('TEST RESULT PASS.') 114 | else: 115 | print('TEST RESULT FAILED.') 116 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/stu_upload/evaluate_cpu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import time 5 | from PIL import Image 6 | from torchvision import transforms 7 | 8 | os.putenv('MLU_VISIBLE_DEVICES','') 9 | cfgs = [64,'R', 64,'R', 'M', 128,'R', 128,'R', 'M', 10 | 256,'R', 256,'R', 256,'R', 256,'R', 'M', 11 | 512,'R', 512,'R', 512,'R', 512,'R', 'M', 12 | 512,'R', 512,'R', 512,'R', 512,'R', 'M'] 13 | 14 | IMAGE_PATH = 'data/strawberries.jpg' 15 | VGG_PATH = 'models/vgg19.pth' 16 | 17 | def vgg19(): 18 | layers = [ 19 | 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 20 | 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 21 | 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 22 | 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 23 | 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5', 24 | 'flatten', 'fc6', 'relu6','fc7', 'relu7', 'fc8', 'softmax' 25 | ] 26 | 27 | conv_map = { 28 | 'conv1_1': (3, 64), 'conv1_2': (64, 64), 29 | 'conv2_1': (64, 128), 'conv2_2': (128, 128), 30 | 'conv3_1': (128, 256), 'conv3_2': (256, 256), 'conv3_3': (256, 256), 'conv3_4': (256, 256), 31 | 'conv4_1': (256, 512), 'conv4_2': (512, 512), 'conv4_3': (512, 512), 'conv4_4': (512, 512), 32 | 'conv5_1': (512, 512), 'conv5_2': (512, 512), 'conv5_3': (512, 512), 'conv5_4': (512, 512), 33 | } 34 | 35 | layer_container = nn.Sequential() 36 | in_channels = 3 37 | num_classes = 1000 38 | for i, layer_name in enumerate(layers): 39 | if layer_name.startswith('conv'): 40 | # TODO: 在时序容器中传入卷积运算 41 | in_ch, out_ch = conv_map[layer_name] 42 | layer_container.add_module(layer_name, nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=3, padding=1)) 43 | in_channels = out_ch 44 | elif layer_name.startswith('relu'): 45 | # TODO: 在时序容器中执行ReLU计算 46 | layer_container.add_module(layer_name, nn.ReLU(inplace=True)) 47 | elif layer_name.startswith('pool'): 48 | # TODO: 在时序容器中执行maxpool计算 49 | layer_container.add_module(layer_name, nn.MaxPool2d(kernel_size=2, stride=2)) 50 | elif layer_name == 'flatten': 51 | # TODO: 在时序容器中执行flatten计算 52 | layer_container.add_module(layer_name, nn.Flatten()) 53 | elif layer_name == 'fc6': 54 | # TODO: 在时序容器中执行全连接层计算 55 | layer_container.add_module(layer_name, nn.Linear(in_features=25088, out_features=4096)) 56 | elif layer_name == 'fc7': 57 | # TODO: 在时序容器中执行全连接层计算 58 | layer_container.add_module(layer_name, nn.Linear(in_features=4096, out_features=4096)) 59 | elif layer_name == 'fc8': 60 | # TODO: 在时序容器中执行全连接层计算 61 | layer_container.add_module(layer_name, nn.Linear(in_features=4096, out_features=num_classes)) 62 | elif layer_name == 'softmax': 63 | # TODO: 在时序容器中执行Softmax计算 64 | layer_container.add_module(layer_name, nn.Softmax(dim=1)) 65 | return layer_container 66 | 67 | def load_image(path): 68 | #TODO: 使用 Image.open模块读入输入图像,并返回形状为(1,244,244,3)的数组 image 69 | with Image.open(path) as image: 70 | image = image.resize((244, 244)) 71 | transform = transforms.Compose([transforms.Resize(256), 72 | transforms.CenterCrop(224), 73 | transforms.ToTensor(), 74 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 75 | std=[0.229, 0.224, 0.225])]) 76 | #TODO: 对图像调用transform函数进行预处理 77 | image = transform(image) 78 | #TODO: 对tensor的第0维进行扩展 79 | image = image.unsqueeze(0) 80 | 81 | return image 82 | 83 | 84 | if __name__ == '__main__': 85 | input_image = load_image(IMAGE_PATH) 86 | #TODO: 生成VGG19网络模型并保存在net中 87 | net = vgg19() 88 | #TODO: 加载网络参数到net中 89 | state_dict = torch.load(VGG_PATH) 90 | net.load_state_dict(state_dict) 91 | #TODO: 模型进入推理模式 92 | net.eval() 93 | st = time.time() 94 | # TODO: 计算 net,得到prob 95 | prob = net(input_image) 96 | print("cpu infer time:{:.3f} s".format(time.time()-st)) 97 | with open('./labels/imagenet_classes.txt') as f: 98 | classes = [line.strip() for line in f.readlines()] 99 | _, indices = torch.sort(prob, descending=True) 100 | print("Classification result: id = %s, prob = %f " % (classes[indices[0][0]], prob[0][indices[0][0]].item())) 101 | if classes[indices[0][0]] == 'strawberry': 102 | print('TEST RESULT PASS.') 103 | else: 104 | print('TEST RESULT FAILED.') 105 | exit() 106 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_1_vgg19_student/stu_upload/generate_pth.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.io 3 | import torch 4 | import torch.nn as nn 5 | from collections import OrderedDict 6 | 7 | os.putenv('MLU_VISIBLE_DEVICES','') 8 | cfgs = [64,'R', 64,'R', 'M', 128,'R', 128,'R', 'M', 9 | 256,'R', 256,'R', 256,'R', 256,'R', 'M', 10 | 512,'R', 512,'R', 512,'R', 512,'R', 'M', 11 | 512,'R', 512,'R', 512,'R', 512,'R', 'M'] 12 | 13 | IMAGE_PATH = 'data/strawberries.jpg' 14 | VGG_PATH = 'data/imagenet-vgg-verydeep-19.mat' 15 | 16 | def vgg19(): 17 | layers = [ 18 | 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 19 | 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 20 | 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 21 | 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 22 | 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5', 23 | 'flatten', 'fc6', 'relu6','fc7', 'relu7', 'fc8', 'softmax' 24 | ] 25 | 26 | conv_map = { 27 | 'conv1_1': (3, 64), 'conv1_2': (64, 64), 28 | 'conv2_1': (64, 128), 'conv2_2': (128, 128), 29 | 'conv3_1': (128, 256), 'conv3_2': (256, 256), 'conv3_3': (256, 256), 'conv3_4': (256, 256), 30 | 'conv4_1': (256, 512), 'conv4_2': (512, 512), 'conv4_3': (512, 512), 'conv4_4': (512, 512), 31 | 'conv5_1': (512, 512), 'conv5_2': (512, 512), 'conv5_3': (512, 512), 'conv5_4': (512, 512), 32 | } 33 | 34 | layer_container = nn.Sequential() 35 | in_channels = 3 36 | num_classes = 1000 37 | for i, layer_name in enumerate(layers): 38 | if layer_name.startswith('conv'): 39 | # TODO: 在时序容器中传入卷积运算 40 | in_ch, out_ch = conv_map[layer_name] 41 | layer_container.add_module(layer_name, nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=3, padding=1)) 42 | in_channels = out_ch 43 | elif layer_name.startswith('relu'): 44 | # TODO: 在时序容器中执行ReLU计算 45 | layer_container.add_module(layer_name, nn.ReLU(inplace=True)) 46 | elif layer_name.startswith('pool'): 47 | # TODO: 在时序容器中执行maxpool计算 48 | layer_container.add_module(layer_name, nn.MaxPool2d(kernel_size=2, stride=2)) 49 | elif layer_name == 'flatten': 50 | # TODO: 在时序容器中执行flatten计算 51 | layer_container.add_module(layer_name, nn.Flatten()) 52 | elif layer_name == 'fc6': 53 | # TODO: 在时序容器中执行全连接层计算 54 | layer_container.add_module(layer_name, nn.Linear(in_features=25088, out_features=4096)) 55 | elif layer_name == 'fc7': 56 | # TODO: 在时序容器中执行全连接层计算 57 | layer_container.add_module(layer_name, nn.Linear(in_features=4096, out_features=4096)) 58 | elif layer_name == 'fc8': 59 | # TODO: 在时序容器中执行全连接层计算 60 | layer_container.add_module(layer_name, nn.Linear(in_features=4096, out_features=num_classes)) 61 | elif layer_name == 'softmax': 62 | # TODO: 在时序容器中执行Softmax计算 63 | layer_container.add_module(layer_name, nn.Softmax(dim=1)) 64 | return layer_container 65 | 66 | 67 | if __name__ == '__main__': 68 | #TODO:使用scipy加载.mat格式的VGG19模型 69 | datas = scipy.io.loadmat(VGG_PATH) 70 | 71 | model = vgg19() 72 | new_state_dict = OrderedDict() 73 | for i, param_name in enumerate(model.state_dict()): 74 | name = param_name.split('.') 75 | if name[-1] == 'weight': 76 | new_state_dict[param_name] = torch.from_numpy(datas[str(i)]).float() 77 | else: 78 | new_state_dict[param_name] = torch.from_numpy(datas[str(i)][0]).float() 79 | #TODO:加载网络参数到model 80 | model.load_state_dict(new_state_dict) 81 | print("*** Start Saving pth ***") 82 | #TODO:保存模型的参数到models/vgg19.pth 83 | torch.save(model.state_dict(), 'models/vgg19.pth') 84 | print('Saving pth PASS.') 85 | 86 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/data/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/models/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image0.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image1.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image10.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image11.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image12.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image13.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image14.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image15.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image16.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image2.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image3.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image4.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image5.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image6.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image7.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image8.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/cpu/image9.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/mlu_cnnl_mfus/image0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/mlu_cnnl_mfus/image0.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/mlu_cnnl_mfus/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/out/mlu_cnnl_mfus/image1.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/readme.txt: -------------------------------------------------------------------------------- 1 | 补全 stu_upload 中的 evaluate_cpu.py、evaluate_mlu.py。 2 | 3 | 执行bash run_cpu.sh 单独执行 cpu 模式。 4 | 执行bash run_mlu.sh 单独执行 mlu 模式。 5 | 6 | 需要提交的文件为 evaluate_cpu.py、evaluate_mlu.py,将以上文件压缩为 zip 包提交。 7 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/run_cpu.sh: -------------------------------------------------------------------------------- 1 | #rm ./out/cpu/* 2 | 3 | python3 ./stu_upload/evaluate_cpu.py 4 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/run_mlu.sh: -------------------------------------------------------------------------------- 1 | #rm out/mlu_cnnl_mfus/* 2 | 3 | python stu_upload/evaluate_cnnl_mfus.py 4 | 5 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/stu_upload/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/stu_upload/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/stu_upload/evaluate_cnnl_mfus.py: -------------------------------------------------------------------------------- 1 | from torchvision.models import vgg19 2 | from torch import nn 3 | from zipfile import ZipFile 4 | from torch.utils.data import Dataset, DataLoader 5 | from torchvision.utils import save_image 6 | import torch 7 | import cv2 8 | import numpy 9 | import time 10 | import torch_mlu 11 | 12 | class COCODataSet(Dataset): 13 | 14 | def __init__(self): 15 | super(COCODataSet, self).__init__() 16 | self.zip_files = ZipFile('./data/train2014_small.zip') 17 | self.data_set = [] 18 | for file_name in self.zip_files.namelist(): 19 | if file_name.endswith('.jpg'): 20 | self.data_set.append(file_name) 21 | 22 | def __len__(self): 23 | return len(self.data_set) 24 | 25 | def __getitem__(self, item): 26 | file_path = self.data_set[item] 27 | image = self.zip_files.read(file_path) 28 | image = numpy.asarray(bytearray(image), dtype='uint8') 29 | # TODO: 使用cv2.imdecode()函数从指定的内存缓存中读取数据,并把数据转换(解码)成彩色图像格式。 30 | image = cv2.imdecode(image, flags=cv2.IMREAD_COLOR) 31 | # TODO: 使用cv2.resize()将图像缩放为512*512大小,其中所采用的插值方式为:区域插值 32 | image = cv2.resize(image, (512,512), interpolation=cv2.INTER_AREA) 33 | # TODO: 使用cv2.cvtColor将图片从BGR格式转换成RGB格式 34 | image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) 35 | # TODO: 将image从numpy形式转换为torch.float32,并将其归一化为[0,1] 36 | image = torch.tensor(image, dtype=torch.float32) / 255 37 | # TODO: 用permute函数将tensor从HxWxC转换为CxHxW 38 | image = image.permute(2, 0, 1) 39 | return image 40 | 41 | class ResBlock(nn.Module): 42 | 43 | def __init__(self, c): 44 | super(ResBlock, self).__init__() 45 | self.layer = nn.Sequential( 46 | #TODO: 进行卷积,卷积核为3*1*1 47 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 48 | #TODO: 执行实例归一化 49 | nn.InstanceNorm2d(c), 50 | #TODO: 执行ReLU 51 | nn.ReLU(), 52 | #TODO: 进行卷积,卷积核为3*1*1 53 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 54 | #TODO: 执行实例归一化 55 | nn.InstanceNorm2d(c) 56 | ) 57 | 58 | def forward(self, x): 59 | #TODO: 返回残差运算的结果 60 | return x + self.layer(x) 61 | 62 | 63 | class TransNet(nn.Module): 64 | 65 | def __init__(self): 66 | super(TransNet, self).__init__() 67 | self.layer = nn.Sequential( 68 | 69 | ###################下采样层################ 70 | # TODO:构建图像转换网络,第一层卷积 71 | nn.Conv2d(in_channels=3, out_channels=32, kernel_size=9, stride=1, padding=4, bias=False), 72 | # TODO:实例归一化 73 | nn.InstanceNorm2d(32), 74 | # TODO:创建激活函数ReLU 75 | nn.ReLU(), 76 | # TODO:第二层卷积 77 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False), 78 | # TODO:实例归一化 79 | nn.InstanceNorm2d(64), 80 | # TODO:创建激活函数ReLU 81 | nn.ReLU(), 82 | # TODO:第三层卷积 83 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False), 84 | # TODO:实例归一化 85 | nn.InstanceNorm2d(128), 86 | # TODO:创建激活函数ReLU 87 | nn.ReLU(), 88 | ##################残差层################## 89 | ResBlock(128), 90 | ResBlock(128), 91 | ResBlock(128), 92 | ResBlock(128), 93 | ResBlock(128), 94 | ################上采样层################## 95 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 96 | nn.Upsample(scale_factor=2, mode='nearest'), 97 | #TODO: 执行卷积操作 98 | nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False), 99 | #TODO: 实例归一化 100 | nn.InstanceNorm2d(64), 101 | #TODO: 执行ReLU操作 102 | nn.ReLU(), 103 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 104 | nn.Upsample(scale_factor=2, mode='nearest'), 105 | #TODO: 执行卷积操作 106 | nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False), 107 | #TODO: 实例归一化 108 | nn.InstanceNorm2d(32), 109 | #TODO: 执行ReLU操作 110 | nn.ReLU(), 111 | ###############输出层##################### 112 | #TODO: 执行卷积操作 113 | nn.Conv2d(in_channels=32, out_channels=3, kernel_size=9, stride=1, padding=4, bias=True), 114 | #TODO: sigmoid激活函数 115 | nn.Sigmoid() 116 | ) 117 | 118 | def forward(self, x): 119 | return self.layer(x) 120 | 121 | 122 | 123 | 124 | if __name__ == '__main__': 125 | # TODO: 使用cpu生成图像转换网络模型并保存在g_net中 126 | g_net = TransNet() 127 | # TODO: 从/models文件夹下加载网络参数到g_net中 128 | param = torch.load("./models/fst.pth") 129 | g_net.load_state_dict(param) 130 | print("g_net build PASS!\n") 131 | # TODO:将g_net模型转化为eval,并转化为浮点类型,输出得到net 132 | net = g_net.eval().float() 133 | data_set = COCODataSet() 134 | print("load COCODataSet PASS!\n") 135 | batch_size = 1 136 | data_group = DataLoader(data_set,batch_size,True,drop_last=True) 137 | example_forward_input = torch.rand((1,3,512,512),dtype = torch.float) 138 | #TODO: 使用JIT对net模型进行trace,得到net_trace 139 | net_trace = torch.jit.trace(net, example_forward_input) 140 | for i, image in enumerate(data_group): 141 | #print(f"The {i} image will be predicted.") 142 | image_c = image.cpu() 143 | #将image_c图片拷贝到MLU设备,得到input_image_c 144 | input_image_c = image_c.to("mlu") 145 | #将net_trace模型拷贝到MLU设备,得到net_mlu 146 | net_mlu = net_trace.to("mlu") 147 | start = time.time() 148 | # TODO: 对input_image_c计算 net_mlu,得到image_g_mlu 149 | image_g_mlu = net(input_image_c) 150 | image_g_mlu = image_g_mlu.cpu() 151 | end = time.time() 152 | delta_time = end - start 153 | print("Inference (mfus) processing time: %s" % delta_time) 154 | #TODO: 利用save_image函数将tensor形式的生成图像image_g_mlu以及输入图像image_c以jpg格式左右拼接的形式保存在/out/mlu_cnnl_mfus/文件夹下 155 | images = torch.cat([image_g_mlu, image_c], dim=0) 156 | save_image(images, f"./out/mlu_cnnl_mfus/image{i}.jpg") 157 | 158 | print("TEST RESULT PASS!\n") -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_2_fast_style_transfer_infer_student/stu_upload/evaluate_cpu.py: -------------------------------------------------------------------------------- 1 | from torchvision.models import vgg19 2 | from torch import nn 3 | from zipfile import ZipFile 4 | from torch.utils.data import Dataset, DataLoader 5 | from torchvision.utils import save_image 6 | import torch 7 | import cv2 8 | import numpy 9 | import time 10 | 11 | class COCODataSet(Dataset): 12 | 13 | def __init__(self): 14 | super(COCODataSet, self).__init__() 15 | self.zip_files = ZipFile('./data/train2014_small.zip') 16 | self.data_set = [] 17 | for file_name in self.zip_files.namelist(): 18 | if file_name.endswith('.jpg'): 19 | self.data_set.append(file_name) 20 | 21 | def __len__(self): 22 | return len(self.data_set) 23 | 24 | def __getitem__(self, item): 25 | file_path = self.data_set[item] 26 | image = self.zip_files.read(file_path) 27 | image = numpy.asarray(bytearray(image), dtype='uint8') 28 | # TODO: 使用cv2.imdecode()函数从指定的内存缓存中读取数据,并把数据转换(解码)成彩色图像格式。 29 | image = cv2.imdecode(image, flags=cv2.IMREAD_COLOR) 30 | # TODO: 使用cv2.resize()将图像缩放为512*512大小,其中所采用的插值方式为:区域插值 31 | image = cv2.resize(image, (512,512), interpolation=cv2.INTER_AREA) 32 | # TODO: 使用cv2.cvtColor将图片从BGR格式转换成RGB格式 33 | image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) 34 | # TODO: 将image从numpy形式转换为torch.float32,并将其归一化为[0,1] 35 | image = torch.tensor(image, dtype=torch.float32) / 255 36 | # TODO: 用permute函数将tensor从HxWxC转换为CxHxW 37 | image = image.permute(2, 0, 1) 38 | return image 39 | 40 | class ResBlock(nn.Module): 41 | 42 | def __init__(self, c): 43 | super(ResBlock, self).__init__() 44 | self.layer = nn.Sequential( 45 | #TODO: 进行卷积,卷积核为3*1*1 46 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 47 | #TODO: 执行实例归一化 48 | nn.InstanceNorm2d(c), 49 | #TODO: 执行ReLU 50 | nn.ReLU(), 51 | #TODO: 进行卷积,卷积核为3*1*1 52 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 53 | #TODO: 执行实例归一化 54 | nn.InstanceNorm2d(c) 55 | ) 56 | 57 | def forward(self, x): 58 | #TODO: 返回残差运算的结果 59 | return x + self.layer(x) 60 | 61 | 62 | class TransNet(nn.Module): 63 | 64 | def __init__(self): 65 | super(TransNet, self).__init__() 66 | self.layer = nn.Sequential( 67 | 68 | ###################下采样层################ 69 | # TODO:构建图像转换网络,第一层卷积 70 | nn.Conv2d(in_channels=3, out_channels=32, kernel_size=9, stride=1, padding=4, bias=False), 71 | # TODO:实例归一化 72 | nn.InstanceNorm2d(32), 73 | # TODO:创建激活函数ReLU 74 | nn.ReLU(), 75 | # TODO:第二层卷积 76 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False), 77 | # TODO:实例归一化 78 | nn.InstanceNorm2d(64), 79 | # TODO:创建激活函数ReLU 80 | nn.ReLU(), 81 | # TODO:第三层卷积 82 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False), 83 | # TODO:实例归一化 84 | nn.InstanceNorm2d(128), 85 | # TODO:创建激活函数ReLU 86 | nn.ReLU(), 87 | ##################残差层################## 88 | ResBlock(128), 89 | ResBlock(128), 90 | ResBlock(128), 91 | ResBlock(128), 92 | ResBlock(128), 93 | ################上采样层################## 94 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 95 | nn.Upsample(scale_factor=2, mode='nearest'), 96 | #TODO: 执行卷积操作 97 | nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False), 98 | #TODO: 实例归一化 99 | nn.InstanceNorm2d(64), 100 | #TODO: 执行ReLU操作 101 | nn.ReLU(), 102 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 103 | nn.Upsample(scale_factor=2, mode='nearest'), 104 | #TODO: 执行卷积操作 105 | nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False), 106 | #TODO: 实例归一化 107 | nn.InstanceNorm2d(32), 108 | #TODO: 执行ReLU操作 109 | nn.ReLU(), 110 | ###############输出层##################### 111 | #TODO: 执行卷积操作 112 | nn.Conv2d(in_channels=32, out_channels=3, kernel_size=9, stride=1, padding=4, bias=True), 113 | #TODO: sigmoid激活函数 114 | nn.Sigmoid() 115 | ) 116 | 117 | def forward(self, x): 118 | return self.layer(x) 119 | 120 | 121 | 122 | if __name__ == '__main__': 123 | # TODO: 使用cpu生成图像转换网络模型并保存在g_net中 124 | g_net = TransNet() 125 | #print(g_net.layer) 126 | # TODO:从/models文件夹下加载网络参数到g_net中 127 | param = torch.load("./models/fst.pth") 128 | g_net.load_state_dict(param) 129 | print("g_net build PASS!\n") 130 | data_set = COCODataSet() 131 | print("load COCODataSet PASS!\n") 132 | 133 | batch_size = 1 134 | data_group = DataLoader(data_set,batch_size,True,drop_last=True) 135 | 136 | for i, image in enumerate(data_group): 137 | image_c = image.cpu() 138 | #print(image_c.shape) 139 | start = time.time() 140 | # TODO: 计算 g_net,得到image_g 141 | image_g = g_net(image_c) 142 | end = time.time() 143 | delta_time = end - start 144 | print("Inference (CPU) processing time: %s" % delta_time) 145 | #TODO: 利用save_image函数将tensor形式的生成图像image_g以及输入图像image_c以jpg格式左右拼接的形式保存在/out/cpu/文件夹下 146 | images = torch.cat([image_g, image_c], dim=0) 147 | save_image(images, f"./out/cpu/image{i}.jpg") 148 | print("TEST RESULT PASS!\n") -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/data/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/data/udnie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/data/udnie.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/models/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_0.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_10.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_20.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_30.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/out/train/image0_30.jpg -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/readme.txt: -------------------------------------------------------------------------------- 1 | 补全 stu_upload 中的 train-mlu.py、train.py。 2 | 3 | 执行bash run_train_cpu.sh 单独执行 cpu训练模式。 4 | 执行bash run_train_mlu.sh 单独执行 mlu训练模式。 5 | 6 | 需要提交的文件为train-mlu.py、train.py,将以上文件压缩为 zip 包提交。 7 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/run_train_cpu.sh: -------------------------------------------------------------------------------- 1 | #rm ./out/train/* 2 | 3 | python3 ./stu_upload/train.py 4 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/run_train_mlu.sh: -------------------------------------------------------------------------------- 1 | #rm ./out/train/* 2 | 3 | python ./stu_upload/train-mlu.py 4 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/stu_upload/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/stu_upload/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/stu_upload/train-mlu.py: -------------------------------------------------------------------------------- 1 | from torchvision.models import vgg19 2 | from torch import nn 3 | from zipfile import ZipFile 4 | from torch.utils.data import Dataset, DataLoader 5 | from torchvision.utils import save_image 6 | import torch 7 | import torch_mlu 8 | import cv2 9 | import numpy 10 | import os 11 | 12 | class COCODataSet(Dataset): 13 | 14 | def __init__(self): 15 | super(COCODataSet, self).__init__() 16 | self.zip_files = ZipFile('./data/train2014.zip') 17 | self.data_set = [] 18 | for file_name in self.zip_files.namelist(): 19 | if file_name.endswith('.jpg'): 20 | self.data_set.append(file_name) 21 | 22 | def __len__(self): 23 | return len(self.data_set) 24 | 25 | def __getitem__(self, item): 26 | file_path = self.data_set[item] 27 | image = self.zip_files.read(file_path) 28 | image = numpy.asarray(bytearray(image), dtype='uint8') 29 | # TODO: 使用cv2.imdecode()函数从指定的内存缓存中读取数据,并把数据转换(解码)成彩色图像格式。 30 | image = cv2.imdecode(image, flags=cv2.IMREAD_COLOR) 31 | # TODO: 使用cv2.resize()将图像缩放为512*512大小,其中所采用的插值方式为:区域插值 32 | image = cv2.resize(image, (512,512), interpolation=cv2.INTER_AREA) 33 | # TODO: 使用cv2.cvtColor将图片从BGR格式转换成RGB格式 34 | image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) 35 | # TODO: 将image从numpy形式转换为torch.float32,并将其归一化为[0,1] 36 | image = torch.tensor(image, dtype=torch.float32) / 255 37 | # TODO: 用permute函数将tensor从HxWxC转换为CxHxW 38 | image = image.permute(2, 0, 1) 39 | return image 40 | 41 | 42 | class VGG19(nn.Module): 43 | def __init__(self): 44 | super(VGG19, self).__init__() 45 | #TODO: 调用vgg19网络 46 | a = vgg19() 47 | a = a.features 48 | #TODO: 定义self.layer1为第2层卷积后对应的特征 49 | self.layer1 = nn.Sequential(*list(a.children())[:2]) 50 | #TODO: 定义self.layer2为第4层卷积后对应的特征 51 | self.layer2 = nn.Sequential(*list(a.children())[2:4]) 52 | #TODO: 定义self.layer3为第8层卷积后对应的特征 53 | self.layer3 = nn.Sequential(*list(a.children())[4:8]) 54 | #TODO: 定义self.layer4为第12层卷积后对应的特征 55 | self.layer4 = nn.Sequential(*list(a.children())[8:12]) 56 | 57 | def forward(self, input_): 58 | out1 = self.layer1(input_) 59 | out2 = self.layer2(out1) 60 | out3 = self.layer3(out2) 61 | out4 = self.layer4(out3) 62 | return out1, out2, out3, out4 63 | 64 | 65 | class ResBlock(nn.Module): 66 | 67 | def __init__(self, c): 68 | super(ResBlock, self).__init__() 69 | self.layer = nn.Sequential( 70 | #TODO: 进行卷积,卷积核为3*1*1 71 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 72 | #TODO: 执行实例归一化 73 | nn.InstanceNorm2d(c), 74 | #TODO: 执行ReLU 75 | nn.ReLU(), 76 | #TODO: 进行卷积,卷积核为3*1*1 77 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 78 | #TODO: 执行实例归一化 79 | nn.InstanceNorm2d(c) 80 | ) 81 | 82 | def forward(self, x): 83 | #TODO: 返回残差运算的结果 84 | return x + self.layer(x) 85 | 86 | 87 | class TransNet(nn.Module): 88 | 89 | def __init__(self): 90 | super(TransNet, self).__init__() 91 | self.layer = nn.Sequential( 92 | 93 | ###################下采样层################ 94 | # TODO:构建图像转换网络,第一层卷积 95 | nn.Conv2d(in_channels=3, out_channels=32, kernel_size=9, stride=1, padding=4, bias=False), 96 | # TODO:实例归一化 97 | nn.InstanceNorm2d(32), 98 | # TODO:创建激活函数ReLU 99 | nn.ReLU(), 100 | # TODO:第二层卷积 101 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False), 102 | # TODO:实例归一化 103 | nn.InstanceNorm2d(64), 104 | # TODO:创建激活函数ReLU 105 | nn.ReLU(), 106 | # TODO:第三层卷积 107 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False), 108 | # TODO:实例归一化 109 | nn.InstanceNorm2d(128), 110 | # TODO:创建激活函数ReLU 111 | nn.ReLU(), 112 | ##################残差层################## 113 | ResBlock(128), 114 | ResBlock(128), 115 | ResBlock(128), 116 | ResBlock(128), 117 | ResBlock(128), 118 | ################上采样层################## 119 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 120 | nn.Upsample(scale_factor=2, mode='nearest'), 121 | #TODO: 执行卷积操作 122 | nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False), 123 | #TODO: 实例归一化 124 | nn.InstanceNorm2d(64), 125 | #TODO: 执行ReLU操作 126 | nn.ReLU(), 127 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 128 | nn.Upsample(scale_factor=2, mode='nearest'), 129 | #TODO: 执行卷积操作 130 | nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False), 131 | #TODO: 实例归一化 132 | nn.InstanceNorm2d(32), 133 | #TODO: 执行ReLU操作 134 | nn.ReLU(), 135 | ###############输出层##################### 136 | #TODO: 执行卷积操作 137 | nn.Conv2d(in_channels=32, out_channels=3, kernel_size=9, stride=1, padding=4, bias=True), 138 | #TODO: sigmoid激活函数 139 | nn.Sigmoid() 140 | ) 141 | 142 | def forward(self, x): 143 | return self.layer(x) 144 | 145 | 146 | def load_image(path): 147 | # TODO: 使用cv2从路径中读取图片 148 | image = cv2.imread(path, cv2.IMREAD_COLOR) 149 | # TODO: 使用cv2.cvtColor将图片从BGR格式转换成RGB格式 150 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 151 | # TODO: 使用cv2.resize()将图像缩放为512*512大小 152 | image = cv2.resize(image, (512, 512), interpolation=cv2.INTER_AREA) 153 | # TODO: 将image从numpy形式转换为torch.float32,并将其归一化为[0,1] 154 | image = torch.tensor(image, dtype=torch.float32) / 255.0 155 | # TODO: 将tensor从HxWxC转换为CxHxW,并对在0维上增加一个维度 156 | image = image.permute(2, 0, 1).unsqueeze(0) 157 | return image 158 | 159 | 160 | def get_gram_matrix(f_map): 161 | """ 162 | """ 163 | n, c, h, w = f_map.shape 164 | if n == 1: 165 | f_map = f_map.reshape(c, h * w) 166 | gram_matrix = torch.mm(f_map, f_map.t()) 167 | return gram_matrix 168 | else: 169 | f_map = f_map.reshape(n, c, h * w) 170 | gram_matrix = torch.matmul(f_map, f_map.transpose(1, 2)) 171 | return gram_matrix 172 | 173 | 174 | if __name__ == '__main__': 175 | image_style = load_image('./data/udnie.jpg').cpu() 176 | #TODO: 将输入的风格图像加载到mlu设备上,得到mlu_iamge_style 177 | mlu_image_style = image_style.to('mlu') 178 | net = VGG19().cpu() 179 | g_net = TransNet().cpu() 180 | #TODO: 将特征网络加载到mlu得到mlu_g_net 181 | mlu_g_net = g_net.to('mlu') 182 | #TODO: 将图像转换网络加载到mlu得到mlu_net 183 | mlu_net = net.to('mlu') 184 | print("mlu_net build PASS!\n") 185 | #TODO: 使用adam优化器对mlu_g_net的参数进行优化 186 | optimizer = torch.optim.Adam(g_net.parameters(), lr=0.001) 187 | #TODO: 在cpu上计算均方误差损失函得到loss_func 188 | loss_func = nn.MSELoss() 189 | #TODO: 将损失函数加载到mlu上得到mlu_loss_func 190 | mlu_loss_func = loss_func.to('mlu') 191 | print("build loss PASS!\n") 192 | data_set = COCODataSet() 193 | print("load COCODataSet PASS!\n") 194 | batch_size = 1 195 | data_loader = DataLoader(data_set, batch_size, True, drop_last=True) 196 | #TODO:mlu_iamge_style经过特征提取网络mlu_net生成风格特征s1-s4 197 | s1, s2, s3, s4 = mlu_net(mlu_image_style) 198 | #TODO: 对风格特征s1-s4计算格拉姆矩阵并从当前计算图中分离下来,得到对应的s1-s4 199 | s1, s2, s3, s4 = [get_gram_matrix(s).detach() for s in [s1, s2, s3, s4]] 200 | j = 0 201 | count = 0 202 | epochs = 0 203 | while j <= epochs: 204 | for i, image in enumerate(data_loader): 205 | image_c = image.cpu() 206 | #TODO: 将输入图像拷贝到mlu上得到mlu_image_c 207 | mlu_image_c = image_c.to('mlu') 208 | #TODO: 将mlu_image_c经过mlu_g_net输出生成图像mlu_imge_g 209 | mlu_image_g = mlu_g_net(mlu_image_c) 210 | #TODO: 利用特征提取网络mlu_net提取生成图像mlu_image_g的特征out1-out4 211 | out1, out2, out3, out4 = mlu_net(mlu_image_c) 212 | ##############计算风格损失################# 213 | #TODO: 对生成图像的特征out1-out4计算gram矩阵,并与风格图像的特征求损失,分别得到loss_s1-loss_s4 214 | loss_s1 = mlu_loss_func(get_gram_matrix(out1), s1) 215 | loss_s2 = mlu_loss_func(get_gram_matrix(out2), s2) 216 | loss_s3 = mlu_loss_func(get_gram_matrix(out3), s3) 217 | loss_s4 = mlu_loss_func(get_gram_matrix(out4), s4) 218 | #TODO:loss_s1-loss_s4相加得到风格损失loss_s 219 | loss_s = loss_s1 + loss_s2 + loss_s3 + loss_s4 220 | 221 | ##############计算内容损失################# 222 | #TODO: 将图片mlu_image_c经过特征提取网络mlu_net得到内容特图像的特征c1-c4 223 | c1, c2, c3, c4 = mlu_net(mlu_image_c) 224 | 225 | #TODO: 将内容图像特征c2从计算图中分离并与内容图像特征out2求内容损失loss_c2 226 | loss_c2 = mlu_loss_func(out2, c2.detach()) 227 | loss_c = loss_c2 228 | 229 | ##############计算总损失################# 230 | loss = loss_c + 0.000000005 * loss_s 231 | 232 | ########清空梯度、计算梯度、更新参数###### 233 | #TODO: 梯度初始化为零 234 | optimizer.zero_grad() 235 | #TODO: 反向传播求梯度 236 | loss.backward() 237 | #TODO: 更新所有参数 238 | optimizer.step() 239 | print('j:',j, 'i:',i, 'loss:',loss.item(), 'loss_c:',loss_c.item(), 'loss_s:',loss_s.item()) 240 | count += 1 241 | mlu_image_g = mlu_image_g.cpu() 242 | mlu_image_c = mlu_image_c.cpu() 243 | if i % 10 == 0: 244 | #TODO: 将图像转换网络fst_train_mlu.pth的参数存储在models/文件夹下 245 | torch.save(g_net.state_dict(), './models/fst_train_mlu.pth') 246 | #TODO: 利用save_image函数将tensor形式的生成图像mlu_image_g以及输入图像mlu_image_c以jpg左右拼接的形式保存在/out/train_mlu/文件夹下 247 | images = torch.cat([mlu_image_g, mlu_image_c], dim=0) 248 | save_image(images, f"./out/train_mlu/image{j}_{i}.jpg") 249 | j += 1 250 | 251 | print("MLU TRAIN RESULT PASS!\n") 252 | 253 | 254 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_3_fast_style_transfer_infer_student/stu_upload/train.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from torchvision.models import vgg19 3 | from torch import nn 4 | from zipfile import ZipFile 5 | from torch.utils.data import Dataset, DataLoader 6 | from torchvision.utils import save_image 7 | import torch 8 | import cv2 9 | import numpy 10 | 11 | 12 | class COCODataSet(Dataset): 13 | 14 | def __init__(self): 15 | super(COCODataSet, self).__init__() 16 | self.zip_files = ZipFile('./data/train2014.zip') 17 | self.data_set = [] 18 | for file_name in self.zip_files.namelist(): 19 | if file_name.endswith('.jpg'): 20 | self.data_set.append(file_name) 21 | 22 | def __len__(self): 23 | return len(self.data_set) 24 | 25 | def __getitem__(self, item): 26 | file_path = self.data_set[item] 27 | image = self.zip_files.read(file_path) 28 | image = numpy.asarray(bytearray(image), dtype='uint8') 29 | # TODO: 使用cv2.imdecode()函数从指定的内存缓存中读取数据,并把数据转换(解码)成彩色图像格式。 30 | image = cv2.imdecode(image, flags=cv2.IMREAD_COLOR) 31 | # TODO: 使用cv2.resize()将图像缩放为512*512大小,其中所采用的插值方式为:区域插值 32 | image = cv2.resize(image, (512,512), interpolation=cv2.INTER_AREA) 33 | # TODO: 使用cv2.cvtColor将图片从BGR格式转换成RGB格式 34 | image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) 35 | # TODO: 将image从numpy形式转换为torch.float32,并将其归一化为[0,1] 36 | image = torch.tensor(image, dtype=torch.float32) / 255 37 | # TODO: 用permute函数将tensor从HxWxC转换为CxHxW 38 | image = image.permute(2, 0, 1) 39 | return image 40 | 41 | 42 | class VGG19(nn.Module): 43 | def __init__(self): 44 | super(VGG19, self).__init__() 45 | #TODO: 调用vgg19网络 46 | a = vgg19() 47 | a = a.features 48 | #TODO: 定义self.layer1为第2层卷积后对应的特征 49 | self.layer1 = nn.Sequential(*list(a.children())[:2]) 50 | #TODO: 定义self.layer2为第4层卷积后对应的特征 51 | self.layer2 = nn.Sequential(*list(a.children())[2:4]) 52 | #TODO: 定义self.layer3为第8层卷积后对应的特征 53 | self.layer3 = nn.Sequential(*list(a.children())[4:8]) 54 | #TODO: 定义self.layer4为第12层卷积后对应的特征 55 | self.layer4 = nn.Sequential(*list(a.children())[8:12]) 56 | 57 | def forward(self, input_): 58 | out1 = self.layer1(input_) 59 | out2 = self.layer2(out1) 60 | out3 = self.layer3(out2) 61 | out4 = self.layer4(out3) 62 | return out1, out2, out3, out4 63 | 64 | 65 | class ResBlock(nn.Module): 66 | 67 | def __init__(self, c): 68 | super(ResBlock, self).__init__() 69 | self.layer = nn.Sequential( 70 | #TODO: 进行卷积,卷积核为3*1*1 71 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 72 | #TODO: 执行实例归一化 73 | nn.InstanceNorm2d(c), 74 | #TODO: 执行ReLU 75 | nn.ReLU(), 76 | #TODO: 进行卷积,卷积核为3*1*1 77 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=3 ,padding=1, bias=False), 78 | #TODO: 执行实例归一化 79 | nn.InstanceNorm2d(c) 80 | ) 81 | 82 | def forward(self, x): 83 | #TODO: 返回残差运算的结果 84 | return x + self.layer(x) 85 | 86 | 87 | class TransNet(nn.Module): 88 | 89 | def __init__(self): 90 | super(TransNet, self).__init__() 91 | self.layer = nn.Sequential( 92 | 93 | ###################下采样层################ 94 | # TODO:构建图像转换网络,第一层卷积 95 | nn.Conv2d(in_channels=3, out_channels=32, kernel_size=9, stride=1, padding=4, bias=False), 96 | # TODO:实例归一化 97 | nn.InstanceNorm2d(32), 98 | # TODO:创建激活函数ReLU 99 | nn.ReLU(), 100 | # TODO:第二层卷积 101 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, bias=False), 102 | # TODO:实例归一化 103 | nn.InstanceNorm2d(64), 104 | # TODO:创建激活函数ReLU 105 | nn.ReLU(), 106 | # TODO:第三层卷积 107 | nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False), 108 | # TODO:实例归一化 109 | nn.InstanceNorm2d(128), 110 | # TODO:创建激活函数ReLU 111 | nn.ReLU(), 112 | ##################残差层################## 113 | ResBlock(128), 114 | ResBlock(128), 115 | ResBlock(128), 116 | ResBlock(128), 117 | ResBlock(128), 118 | ################上采样层################## 119 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 120 | nn.Upsample(scale_factor=2, mode='nearest'), 121 | #TODO: 执行卷积操作 122 | nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False), 123 | #TODO: 实例归一化 124 | nn.InstanceNorm2d(64), 125 | #TODO: 执行ReLU操作 126 | nn.ReLU(), 127 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 128 | nn.Upsample(scale_factor=2, mode='nearest'), 129 | #TODO: 执行卷积操作 130 | nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False), 131 | #TODO: 实例归一化 132 | nn.InstanceNorm2d(32), 133 | #TODO: 执行ReLU操作 134 | nn.ReLU(), 135 | ###############输出层##################### 136 | #TODO: 执行卷积操作 137 | nn.Conv2d(in_channels=32, out_channels=3, kernel_size=9, stride=1, padding=4, bias=True), 138 | #TODO: sigmoid激活函数 139 | nn.Sigmoid() 140 | ) 141 | 142 | def forward(self, x): 143 | return self.layer(x) 144 | 145 | 146 | def load_image(path): 147 | # TODO: 使用cv2从路径中读取图片 148 | image = cv2.imread(path, cv2.IMREAD_COLOR) 149 | # TODO: 使用cv2.cvtColor将图片从BGR格式转换成RGB格式 150 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 151 | # TODO: 使用cv2.resize()将图像缩放为512*512大小 152 | image = cv2.resize(image, (512, 512), interpolation=cv2.INTER_AREA) 153 | # TODO: 将image从numpy形式转换为torch.float32,并将其归一化为[0,1] 154 | image = torch.tensor(image, dtype=torch.float32) / 255.0 155 | # TODO: 将tensor从HxWxC转换为CxHxW,并对在0维上增加一个维度 156 | image = image.permute(2, 0, 1).unsqueeze(0) 157 | return image 158 | 159 | 160 | def get_gram_matrix(f_map): 161 | """ 162 | """ 163 | n, c, h, w = f_map.shape 164 | if n == 1: 165 | f_map = f_map.reshape(c, h * w) 166 | gram_matrix = torch.mm(f_map, f_map.t()) 167 | return gram_matrix 168 | else: 169 | f_map = f_map.reshape(n, c, h * w) 170 | gram_matrix = torch.matmul(f_map, f_map.transpose(1, 2)) 171 | return gram_matrix 172 | 173 | 174 | if __name__ == '__main__': 175 | image_style = load_image('./data/udnie.jpg').cpu() 176 | net = VGG19().cpu() 177 | g_net = TransNet().cpu() 178 | print("g_net build PASS!\n") 179 | #TODO: 使用adam优化器对g_net的参数进行优化,得到optimizer 180 | optimizer = torch.optim.Adam(g_net.parameters(), lr=0.001) 181 | #TODO: 在cpu上计算均方误差损失函得到loss_func函数 182 | loss_func = nn.MSELoss() 183 | print("build loss PASS!\n") 184 | data_set = COCODataSet() 185 | print("load COCODataSet PASS!\n") 186 | batch_size = 1 187 | data_loader = DataLoader(data_set, batch_size, True, drop_last=True) 188 | #TODO:输入的风格图像经过特征提取网络生成风格特征s1-s4 189 | s1, s2, s3, s4 = net(image_style) 190 | #TODO: 对风格特征s1-s4计算格拉姆矩阵并从当前计算图中分离下来,得到对应的s1-s4 191 | s1, s2, s3, s4 = [get_gram_matrix(s).detach() for s in [s1, s2, s3, s4]] 192 | j = 0 193 | count = 0 194 | epochs = 0 195 | while j <= epochs: 196 | for i, image in enumerate(data_loader): 197 | image_c = image.cpu() 198 | #TODO: 将输入图像经过图像转化网络输出生成图像image_g 199 | image_g = g_net(image_c) 200 | #TODO: 利用特征提取网络提取生成图像的特征out1、out2、out3、out4 201 | out1, out2, out3, out4 = net(image_g) 202 | 203 | ###############计算风格损失################### 204 | #TODO: 对生成图像的特征out1-out4计算gram矩阵,并与风格图像的特征s1-s4通过loss_func求损失,分别得到loss_s1-loss_s4 205 | loss_s1 = loss_func(get_gram_matrix(out1), s1) 206 | loss_s2 = loss_func(get_gram_matrix(out2), s2) 207 | loss_s3 = loss_func(get_gram_matrix(out3), s3) 208 | loss_s4 = loss_func(get_gram_matrix(out4), s4) 209 | #TODO:loss_s1-loss_s4相加得到风格损失loss_s 210 | loss_s = loss_s1 + loss_s2 + loss_s3 + loss_s4 211 | 212 | ###############计算内容损失################### 213 | #TODO: 将输入图像经过特征提取网络得到内容特图像的特征c1-c4 214 | c1, c2, c3, c4 = net(image_c) 215 | #TODO: 将内容图像特征c2从计算图中分离并与内容图像特征out2通过loss_func得到内容损失loss_c2 216 | loss_c2 = loss_func(out2, c2.detach()) 217 | loss_c = loss_c2 218 | 219 | ###############计算总损失################### 220 | loss = loss_c + 0.000000005 * loss_s 221 | 222 | #######清空梯度、计算梯度、更新参数########### 223 | #TODO: 梯度初始化为零 224 | optimizer.zero_grad() 225 | #TODO: 反向传播求梯度 226 | loss.backward() 227 | #TODO: 更新所有参数 228 | optimizer.step() 229 | print('j:',j, 'i:',i, 'loss:',loss.item(), 'loss_c:',loss_c.item(), 'loss_s:',loss_s.item()) 230 | count += 1 231 | if i % 10 == 0: 232 | #TODO: 将图像转换网络的参数fst_train.pth存储在models文件夹下 233 | torch.save(g_net.state_dict(), './models/fst_train.pth') 234 | #TODO: 利用save_image函数将tensor形式的生成图像image_g以及输入图像image_c以jpg左右拼接的形式保存在/out/train/文件夹下 235 | images = torch.cat([image_g, image_c], dim=0) 236 | save_image(images, f"./out/train/image{j}_{i}.jpg") 237 | if i >= 10: 238 | break 239 | j += 1 240 | 241 | print("TRAIN RESULT PASS!\n") 242 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_4_custom_pytorch_op_student/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_4_custom_pytorch_op_student/data/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_4_custom_pytorch_op_student/models/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/out/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/code_chap_4_student/exp_4_4_custom_pytorch_op_student/out/.DS_Store -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/readme.txt: -------------------------------------------------------------------------------- 1 | 1、补全 hsigmoid.cpp、setup.py、test_hsigmoid.py、evluate_cpu.py 文件。 2 | 2、进入op_hsigmoid实验目录,执行python setup.py build_ext --inplace命令进行hsigmoid算子编译。 3 | 3、进入stu_upload目录,执行python test_hsigmoid.py进行算子测试。 4 | 4、在stu_upload目录下,执行python evaluate_cpu.py进行模型推理。 5 | 6 | 需要提交的文件为hsigmoid.cpp、setup.py、test_hsigmoid.py、evluate_cpu.py 文件,将以上文件压缩为 zip 包提交。 7 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/run_cpu.sh: -------------------------------------------------------------------------------- 1 | rm ./out/cpu/* 2 | 3 | python ./stu_upload/evaluate_cpu.py 4 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/stu_upload/evaluate_cpu.py: -------------------------------------------------------------------------------- 1 | from torchvision.models import vgg19 2 | from torch import nn 3 | from zipfile import ZipFile 4 | from torch.utils.data import Dataset, DataLoader 5 | from torchvision.utils import save_image 6 | import torch 7 | import cv2 8 | import numpy 9 | import time 10 | #TODO:导入自定义动态链接库 11 | ______________________________________________ 12 | 13 | 14 | 15 | class COCODataSet(Dataset): 16 | 17 | def __init__(self): 18 | super(COCODataSet, self).__init__() 19 | self.zip_files = ZipFile('../data/train2014_small.zip') 20 | self.data_set = [] 21 | for file_name in self.zip_files.namelist(): 22 | if file_name.endswith('.jpg'): 23 | self.data_set.append(file_name) 24 | 25 | def __len__(self): 26 | return len(self.data_set) 27 | 28 | def __getitem__(self, item): 29 | file_path = self.data_set[item] 30 | image = self.zip_files.read(file_path) 31 | image = numpy.asarray(bytearray(image), dtype='uint8') 32 | # TODO: 使用cv2.imdecode()函数从指定的内存缓存中读取数据,并把数据转换(解码)成彩色图像格式。 33 | ______________________________________________ 34 | # TODO: 使用cv2.resize()将图像缩放为512*512大小,其中所采用的插值方式为:区域插值 35 | ______________________________________________ 36 | # TODO: 使用cv2.cvtColor将图片从BGR格式转换成RGB格式 37 | ______________________________________________ 38 | # TODO: 将image从numpy形式转换为torch.float32,并将其归一化为[0,1] 39 | ______________________________________________ 40 | # TODO: 用permute函数将tensor从HxWxC转换为CxHxW 41 | ______________________________________________ 42 | return image 43 | 44 | 45 | class ResBlock(nn.Module): 46 | 47 | def __init__(self, c): 48 | super(ResBlock, self).__init__() 49 | self.layer = nn.Sequential( 50 | 51 | #TODO: 进行卷积,卷积核为3*1*1 52 | ______________________________________________ 53 | #TODO: 执行实例归一化 54 | ______________________________________________ 55 | #TODO: 执行ReLU 56 | ______________________________________________ 57 | #TODO: 进行卷积,卷积核为3*1*1 58 | ______________________________________________ 59 | #TODO: 执行实例归一化 60 | ______________________________________________ 61 | ) 62 | 63 | def forward(self, x): 64 | #TODO: 返回残差运算的结果 65 | _________________________________________ 66 | 67 | 68 | class TransNet(nn.Module): 69 | 70 | def __init__(self): 71 | super(TransNet, self).__init__() 72 | self.layer = nn.Sequential( 73 | 74 | ###################下采样层################ 75 | # TODO:构建图像转换网络,第一层卷积 76 | _________________________________________ 77 | # TODO:实例归一化 78 | _________________________________________ 79 | # TODO:创建激活函数ReLU 80 | _________________________________________ 81 | # TODO:第二层卷积 82 | _________________________________________ 83 | # TODO:实例归一化 84 | _________________________________________ 85 | # TODO:创建激活函数ReLU 86 | _________________________________________ 87 | # TODO:第三层卷积 88 | _________________________________________ 89 | # TODO:实例归一化 90 | _________________________________________ 91 | # TODO:创建激活函数ReLU 92 | _________________________________________ 93 | 94 | ##################残差层################## 95 | _________________________________________ 96 | _________________________________________ 97 | _________________________________________ 98 | _________________________________________ 99 | _________________________________________ 100 | 101 | ################上采样层################## 102 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 103 | _________________________________________ 104 | #TODO: 执行卷积操作 105 | _________________________________________ 106 | #TODO: 实例归一化 107 | _________________________________________ 108 | #TODO: 执行ReLU操作 109 | _________________________________________ 110 | 111 | #TODO: 使用torch.nn.Upsample对特征图进行上采样 112 | _________________________________________ 113 | #TODO: 执行卷积操作 114 | _________________________________________ 115 | #TODO: 实例归一化 116 | _________________________________________ 117 | #TODO: 执行ReLU操作 118 | _________________________________________ 119 | 120 | ###############输出层##################### 121 | #TODO: 执行卷积操作 122 | _________________________________________ 123 | ) 124 | 125 | 126 | def forward(self, x): 127 | x = self.layer(x) 128 | #调用自定义hsigmoid算子对rand进行处理得到输出结果out 129 | ______________________________________________ 130 | return out 131 | 132 | 133 | if __name__ == '__main__': 134 | # TODO: 使用cpu生成图像转换网络模型并保存在g_net中 135 | _________________________________________ 136 | # TODO:从/models文件夹下加载网络参数到g_net中 137 | _________________________________________ 138 | print("g_net build PASS!\n") 139 | data_set = COCODataSet() 140 | print("load COCODataSet PASS!\n") 141 | batch_size = 1 142 | data_group = DataLoader(data_set,batch_size,True,drop_last=True) 143 | 144 | for i, image in enumerate(data_group): 145 | image_c = image.cpu() 146 | #print(image_c.shape) 147 | start = time.time() 148 | # TODO: 计算 g_net,得到image_g 149 | _________________________________________ 150 | end = time.time() 151 | delta_time = end - start 152 | print("Inference (CPU) processing time: %s" % delta_time) 153 | #TODO: 利用save_image函数将tensor形式的生成图像image_g以及输入图像image_c以jpg格式左右拼接的形式保存在/out/cpu/文件夹下 154 | _________________________________________ 155 | break 156 | print("TEST RESULT PASS!\n") 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/stu_upload/op_hsigmoid/hsigmoid.cpp: -------------------------------------------------------------------------------- 1 | //Pytorch扩展头文件的引用 2 | #include 3 | using namespace std; 4 | 5 | //hsigmoid_cpu函数的具体实现 6 | torch::Tensor hsigmoid_cpu(const torch::Tensor & dets) { 7 | //TODO: 将输入的tensor转化为浮点类型的vector 8 | ______________________________________ 9 | int input_size = input_data.size(); 10 | //TODO: 创建一个浮点类型的output_data,output_data为大小与输入相同的vector 11 | ______________________________________ 12 | //TODO: 对于输入向量的每个元素计算hsigmoid 13 | ______________________________________ 14 | //TODO: Create tensor options with dtype float32 15 | auto opts = torch::TensorOptions().dtype(torch::kFloat32); 16 | //TODO: Create a tensor from the output vector 17 | auto foo= torch::from_blob(output_data.data(), {int64_t(output_data.size())}, opts).clone(); 18 | //TODO: 将得到的tensor reshape为所需的大小 19 | ______________________________________ 20 | return output; 21 | } 22 | //TODO: 算子绑定为Pytorch的模块 23 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 24 | ______________________________________ 25 | } 26 | -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/stu_upload/op_hsigmoid/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils import cpp_extension 3 | 4 | setup( 5 | #TODO: 给出编译后的链接库名称 6 | ______________________________________ 7 | ext_modules=[ 8 | cpp_extension.CppExtension( 9 | #TODO:以正确的格式给出编译文件即编译函数 10 | ______________________________________ 11 | ) 12 | ], 13 | # 执行编译命令设置 14 | cmdclass={ 15 | 'build_ext': cpp_extension.BuildExtension 16 | } 17 | ) 18 | print("generate .so PASS!\n") -------------------------------------------------------------------------------- /code_chap_4_student/exp_4_4_custom_pytorch_op_student/stu_upload/test_hsigmoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torchvision 4 | import numpy as np 5 | #TODO:导入自定义连接库 6 | ______________________________________ 7 | 8 | def hsigmoid_cpu(rand): 9 | rand = rand.contiguous() 10 | #TODO:调用hsigmoid函数对rand进行处理得到输出结果output 11 | ______________________________________ 12 | return output.contiguous() 13 | 14 | def test_hsigmoid(): 15 | torch.manual_seed(12345) 16 | rand = (torch.randn(3, 512, 512, dtype=torch.float32).abs()+1) 17 | #TODO:调用hsigmoid_cpu函数对rand进行处理得到输出结果output_cpu 18 | ______________________________________ 19 | print("------------------hsigmoid test completed----------------------") 20 | print("input: ", rand) 21 | print("input_size:", rand.size()) 22 | print("output: ", output_cpu) 23 | print("output_size:", output_cpu.size()) 24 | 25 | print("TEST hsigmoid PASS!\n") 26 | 27 | test_hsigmoid() 28 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/.DS_Store -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/README.md: -------------------------------------------------------------------------------- 1 | # MLUExtension 编译自定义算子 2 | ## 部分说明 3 | 1. `python setup.py install`:编译并安装包。 4 | 2. 如果实现BangC算子的时候按照头文件和实现分离的模式,需要手动包含头文件所在的路径:`"-I{}".format(os.path.join(cpath, "include")` 原因在于 cncc 无法正确处理 C++ 头文件,包含torch头文件导致编译出错。注意因为 `include_dirs` 主要是 `gcc` 使用,它可以包含在 `include_dirs` 参数中不会出现编译问题。 5 | 3. 和 CUDA 一样,MLUExtension 实现不仅可以传入参数设置编译架构也可以通过环境变量设置。在 MLU 架构上可以设置两种编译参数: 6 | - `--bang-arch`:其对应着一系列的架构,例如:`--bang-arch=compute_20`表示编译生成`--bang-mlu-arch=mtp_220 --bang-mlu-arch=mtp_270 --bang-mlu-arch=mtp_290` 7 | - `--bang-mlu-arch`:指定特定架构,例如:`--bang-mlu-arch=mtp_372`表示生成MLU370的代码。因为框架`--bang-arch` 主要用在训练卡上,所以默认生成设备相关代码的时候采用`--bang-arch 8 | `模式,这样保证同系列板卡都可用。如果你需要生成特定架构的可以通过传入`--bang-mlu-arch=xxx`,这样默认的板卡参数将失效。 9 | 如果不传入任何架构相关信息,BuildExtension会自动获取板卡架构,保证当前板卡可用,同样采用`--bang-arch`参数设置生成特定compute下的代码。板卡参数`--bang-arch` 或者 `--bang-mlu-arch` 可以通过 `cncc --help`查询到。 10 | 4. 和CUDA类似,MLU上环境变量`TORCH_BANG_ARCH_LIST`能开启对架构的支持,例如`TORCH_BANG_ARCH_LIST="2.0;3.0"`表示对`--bang-arch`设置为`--bang-arch=compute_20 --bang-arch=compute_30`,其含义如上所述。上述设置中优先级顺序为setup.py中设置>环境变量设置。 11 | - setup.py 中设置,默认认为用户清楚自己需要生成的是哪个架构的代码,这时候参数以用户指定为准。 12 | - `TORCH_BANG_ARCH_LIST`环境变量设置次之,此操作主要用来添加生成新的架构代码但是又不想在setup.py中设置的情况。 13 | - 如果不设置,默认通过runtime获取当前架构,默认情况下不用设置架构信息,执行中运行时会自动获取对应架构。 14 | - 设置环境变量同时传入编译器参数 `--bang-mlu-arch=mtp_220` 则以传入参数为准,环境变量不生效。 15 | - 代码架构和运行设备需要匹配,否则容易出现类似 `Found kernel(xxx) but not load(xxx)`的错误,例如:为2xx生成的代码,在3xx上运行会出现:`Found kernel(_Z19bang_sigmoid_kernelIfEvPT_S1_i) but not load(101315)`。 16 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/build/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/build/.DS_Store -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/build/lib.linux-x86_64-3.7/libmlu_custom_ext.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/build/lib.linux-x86_64-3.7/libmlu_custom_ext.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/build/lib.linux-x86_64-3.7/mlu_custom_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/build/lib.linux-x86_64-3.7/mlu_custom_ext/__init__.py -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/build/lib.linux-x86_64-3.7/mlu_custom_ext/mlu_functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlu_functions import * 2 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/build/lib.linux-x86_64-3.7/mlu_custom_ext/mlu_functions/mlu_functions.py: -------------------------------------------------------------------------------- 1 | from turtle import forward 2 | import torch 3 | import torch.nn as nn 4 | import torch.jit as jit 5 | 6 | from typing import Any 7 | 8 | # TODO: 请补充自定义算子库的名称 9 | from libmlu_custom_ext import * # NOSONAR 10 | 11 | 12 | class sigmoid_function(torch.autograd.Function): 13 | """ 14 | sigmoid for autograd 15 | """ 16 | 17 | @staticmethod 18 | def forward(ctx, x): 19 | # TODO: 请补充自定义算子的python接口函数名 20 | y = active_sigmoid_mlu(x) 21 | ctx.save_for_backward(*[x, y]) 22 | return y 23 | 24 | @staticmethod 25 | def backward(ctx: Any, d_r: Any) -> Any: 26 | d_r = d_r.contiguous() 27 | x, y = ctx.saved_tensors 28 | dx = y * (1 - y) * d_r 29 | return dx 30 | 31 | 32 | @jit.ignore 33 | def sigmoid(x: torch.Tensor) -> torch.Tensor: 34 | """ 35 | sigmoid for forward 36 | """ 37 | return sigmoid_function.apply(x) 38 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/build/temp.linux-x86_64-3.7/opt/code_chap_5_student/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/build/temp.linux-x86_64-3.7/opt/code_chap_5_student/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid.o -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/build/temp.linux-x86_64-3.7/opt/code_chap_5_student/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid_sample.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/build/temp.linux-x86_64-3.7/opt/code_chap_5_student/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid_sample.o -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/dist/mlu_custom_ext-0.1-py3.7-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/dist/mlu_custom_ext-0.1-py3.7-linux-x86_64.egg -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: mlu-custom-ext 3 | Version: 0.1 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | License: UNKNOWN 7 | Platform: UNKNOWN 8 | 9 | UNKNOWN 10 | 11 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | README.md 2 | setup.py 3 | /opt/code_chap_5_student/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid.cpp 4 | /opt/code_chap_5_student/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid_sample.mlu 5 | mlu_custom_ext/__init__.py 6 | mlu_custom_ext.egg-info/PKG-INFO 7 | mlu_custom_ext.egg-info/SOURCES.txt 8 | mlu_custom_ext.egg-info/dependency_links.txt 9 | mlu_custom_ext.egg-info/top_level.txt 10 | mlu_custom_ext/mlu_functions/__init__.py 11 | mlu_custom_ext/mlu_functions/mlu_functions.py -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | libmlu_custom_ext 2 | mlu_custom_ext 3 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/.DS_Store -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/__init__.py -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryli2002/Intelligent-Computing-Systems-labs/adf0f7daba2044e3c8d231dc8d0cc8f5f6e21171/exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/.DS_Store -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/include/bang_sigmoid_sample.h: -------------------------------------------------------------------------------- 1 | #ifndef CAMBRICON_BANG_SIGMOID_SAMPLE_H 2 | #define CAMBRICON_BANG_SIGMOID_SAMPLE_H 3 | #include 4 | template 5 | void bang_sigmoid_kernel_entry(cnrtQueue *queue, T *d_dst, T *d_src, 6 | int elem_count); 7 | #endif 8 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/include/customed_ops.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | #include 4 | #include 5 | torch::Tensor active_sigmoid_mlu(torch::Tensor x); 6 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/include/kernel.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (C) 2021 by Cambricon, Inc. All rights reserved. 3 | * 4 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 5 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 6 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 7 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 8 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 9 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 10 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | *************************************************************************/ 12 | #ifndef KERNELS_KERNEL_H_ 13 | #define KERNELS_KERNEL_H_ 14 | 15 | /****************************************************************************** 16 | * Macros for host and device side 17 | ******************************************************************************/ 18 | #define NFU_ALIGN_SIZE 128 // Byte 19 | #define REM_FOR_STACK (128 * 1024) // 128KB reserved for cncc 20 | #define CEIL_ALIGN(x, align) (((x) + (align)-1) / (align) * (align)) 21 | #define FLOOR_ALIGN(x, align) ((x) / (align) * (align)) 22 | 23 | #if defined(__BANG__) 24 | #include 25 | #endif // defined(__BANG__) 26 | 27 | /****************************************************************************** 28 | * Macros for device side 29 | ******************************************************************************/ 30 | #define CORE_DIM 4 31 | #if defined(__BANG__) 32 | #define MAX_NRAM_SIZE \ 33 | (__MLU_NRAM_SIZE__ * 1024 - 128 * 1024) // 128KB reserved for cncc 34 | #define MAX_SRAM_SIZE \ 35 | (__MLU_SRAM_SIZE__ * 1024 - 128 * 1024) // 128KB reserved for cncc 36 | #define MAX_WRAM_SIZE (__MLU_WRAM_SIZE__ * 1024) 37 | #if __BANG_ARCH == 290 38 | #define BLOCK_SIZE (384 * 1024) 39 | #endif 40 | #if __BANGC_ARCH == 370 41 | #define BLOCK_SIZE (512 * 1024) 42 | #endif 43 | __mlu_func__ void pvLock() { 44 | #if __BANG_ARCH__ == 270 45 | if (coreId != 0x80) { 46 | __bang_lock(0, 0); 47 | } 48 | #endif 49 | } 50 | 51 | __mlu_func__ void pvUnlock() { 52 | #if __BANG_ARCH__ == 270 53 | if (coreId != 0x80) { 54 | __bang_unlock(0, 0); 55 | } 56 | #endif 57 | } 58 | #endif // defined(__BANG__) 59 | 60 | #endif // KERNELS_KERNEL_H_ 61 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include "ATen/Tensor.h" 6 | #include "aten/cnnl/cnnlHandle.h" 7 | #include "aten/cnnl/cnnl_util.h" 8 | #include "aten/operators/bang/bang_kernel.h" 9 | #include "aten/operators/bang/internal/bang_internal.h" 10 | #include "aten/util/tensor_util.h" 11 | #include "aten/util/types.h" 12 | 13 | using namespace torch_mlu; 14 | torch::Tensor active_sigmoid_mlu(torch::Tensor x) { 15 | auto x_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(x); 16 | auto x_impl = getMluTensorImpl(x_contiguous); 17 | auto x_ptr = x_impl->cnnlMalloc(); 18 | 19 | auto y = at::empty_like(x_contiguous); 20 | auto y_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(y); 21 | auto y_impl = getMluTensorImpl(y_contiguous); 22 | auto y_ptr = y_impl->cnnlMalloc(); 23 | 24 | int32_t size = x_contiguous.numel(); 25 | 26 | cnrtQueue_t queue = getCurQueue(); 27 | // TODO: 请补充Sigmoid主程序函数接口的签名 28 | bang_sigmoid_kernel_entry(queue, reinterpret_cast(y_ptr), 29 | reinterpret_cast(x_ptr), size); 30 | 31 | return y; 32 | } 33 | 34 | PYBIND11_MODULE(libmlu_custom_ext, m) { 35 | // TODO: 请补充pybind函数定义 36 | m.def("active_sigmoid_mlu", &active_sigmoid_mlu); 37 | } 38 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu/src/bang_sigmoid_sample.mlu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | # 4 | __nram__ char NRAM_BUFFER[MAX_NRAM_SIZE]; 5 | 6 | template 7 | __mlu_global__ void bang_sigmoid_kernel(T *d_dst, T *d_src, int N) { 8 | const int NRAM_LIMIT_SIZE = FLOOR_ALIGN(MAX_NRAM_SIZE / 2, 64); 9 | int nram_limit = NRAM_LIMIT_SIZE / sizeof(T); 10 | // 对列数据切分 11 | int32_t num_per_core = N / taskDim; 12 | int32_t repeat = num_per_core / nram_limit; 13 | int32_t rem = num_per_core % nram_limit; 14 | 15 | T *d_input_per_task = d_src + taskId * nram_limit; 16 | T *d_output_per_task = d_dst + taskId * nram_limit; 17 | T *nram_out = (T *)NRAM_BUFFER; 18 | T *nram_in = (T *)(NRAM_BUFFER + NRAM_LIMIT_SIZE); 19 | 20 | const int align_rem = CEIL_ALIGN(rem, 64); 21 | 22 | int i = 0; 23 | for (; i < repeat; i++) { 24 | // TODO: 请补充拷贝方向 25 | __memcpy_async(nram_in, d_input_per_task + i * nram_limit, 26 | NRAM_LIMIT_SIZE, GDRAM2NRAM); 27 | __sync_io(); 28 | // TODO: 请补充BANG的sigmoid函数 29 | __bang_active_sigmoid(nram_out, nram_in, nram_limit); 30 | __sync_compute(); 31 | 32 | // TODO: 请补充拷贝方向 33 | __memcpy_async(d_output_per_task + i * nram_limit, nram_out, 34 | NRAM_LIMIT_SIZE, NRAM2GDRAM); 35 | 36 | __sync_io(); 37 | } 38 | if (rem > 0) { 39 | // TODO: 请补充拷贝方向 40 | __memcpy_async(nram_in, d_input_per_task + i * nram_limit, 41 | rem * sizeof(T), GDRAM2NRAM); 42 | __sync_io(); 43 | // TODO: 请补充BANG的sigmoid函数 44 | __bang_active_sigmoid(nram_out, nram_in, align_rem); 45 | __sync_compute(); 46 | // TODO: 请补充拷贝方向 47 | __memcpy_async(d_output_per_task + i * nram_limit, nram_out, 48 | rem * sizeof(T), NRAM2GDRAM); 49 | 50 | __sync_io(); 51 | } 52 | } 53 | template 54 | void bang_sigmoid_kernel_entry(cnrtQueue *queue, T *d_dst, T *d_src, 55 | int elem_count) { 56 | cnrtDim3_t dim = {1, 1, 1}; 57 | int taskDims = dim.x * dim.y * dim.z; 58 | // TODO: 请补充Kernel函数类型 59 | cnrtFunctionType_t c = CNRT_FUNC_TYPE_BLOCK; 60 | if (elem_count < taskDims) { 61 | dim.x = 1; 62 | dim.y = 1; 63 | } 64 | // TODO: 请补充Kernel函数的调用 65 | bang_sigmoid_kernel<<>>(d_dst, d_src, elem_count); 66 | cnrtQueueSync(queue); 67 | } 68 | template 69 | void bang_sigmoid_sample(T *h_dst, T *h_src, const int elem_count) { 70 | 71 | T *d_src, *d_dst; 72 | cnrtQueue_t queue; 73 | cnrtQueueCreate(&queue); 74 | cnrtRet_t ret; 75 | ret = 76 | cnrtMalloc(reinterpret_cast(&d_src), elem_count * sizeof(T)); 77 | ret = 78 | cnrtMalloc(reinterpret_cast(&d_dst), elem_count * sizeof(T)); 79 | 80 | ret = cnrtMemcpy(d_src, h_src, elem_count * sizeof(T), 81 | CNRT_MEM_TRANS_DIR_HOST2DEV); // cnrtMemcpyHostToDev 82 | 83 | bang_sigmoid_kernel_entry(queue, d_dst, d_src, elem_count); 84 | cnrtQueueSync(queue); 85 | // TODO: 请补充Host和Device间的内存拷贝方向 86 | ret = cnrtMemcpy(h_dst, d_dst, elem_count * sizeof(T), 87 | CNRT_MEM_TRANS_DIR_DEV2HOST); 88 | 89 | ret = cnrtQueueDestroy(queue); 90 | } 91 | template void bang_sigmoid_sample(float*, float*, int); 92 | template void bang_sigmoid_kernel_entry(cnrtQueue *, float *, float *, int); 93 | 94 | 95 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu_functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlu_functions import * 2 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/mlu_custom_ext/mlu_functions/mlu_functions.py: -------------------------------------------------------------------------------- 1 | from turtle import forward 2 | import torch 3 | import torch.nn as nn 4 | import torch.jit as jit 5 | 6 | from typing import Any 7 | 8 | # TODO: 请补充自定义算子库的名称 9 | from libmlu_custom_ext import * # NOSONAR 10 | 11 | 12 | class sigmoid_function(torch.autograd.Function): 13 | """ 14 | sigmoid for autograd 15 | """ 16 | 17 | @staticmethod 18 | def forward(ctx, x): 19 | # TODO: 请补充自定义算子的python接口函数名 20 | y = active_sigmoid_mlu(x) 21 | ctx.save_for_backward(*[x, y]) 22 | return y 23 | 24 | @staticmethod 25 | def backward(ctx: Any, d_r: Any) -> Any: 26 | d_r = d_r.contiguous() 27 | x, y = ctx.saved_tensors 28 | dx = y * (1 - y) * d_r 29 | return dx 30 | 31 | 32 | @jit.ignore 33 | def sigmoid(x: torch.Tensor) -> torch.Tensor: 34 | """ 35 | sigmoid for forward 36 | """ 37 | return sigmoid_function.apply(x) 38 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from setuptools import setup, find_packages 4 | 5 | from torch.utils import cpp_extension 6 | from torch_mlu.utils.cpp_extension import MLUExtension, BuildExtension 7 | import glob 8 | import shutil 9 | import distutils 10 | from setuptools.dist import Distribution 11 | 12 | mlu_custom_src = "mlu_custom_ext" 13 | cpath = os.path.join(os.path.abspath(os.path.dirname(__file__)), 14 | os.path.join(mlu_custom_src, "mlu")) 15 | 16 | 17 | def source(src): 18 | cpp_src = glob.glob("{}/*.cpp".format(src)) 19 | mlu_src = glob.glob("{}/*.mlu".format(src)) 20 | cpp_src.extend(mlu_src) 21 | return cpp_src 22 | 23 | def main(): 24 | mlu_extension = MLUExtension( 25 | name="libmlu_custom_ext", 26 | sources=source(os.path.join(cpath, 'src')), 27 | include_dirs=[os.path.join(cpath, "include")], 28 | verbose=True, 29 | extra_cflags=['-w'], 30 | extra_link_args=['-w'], 31 | extra_compile_args={ 32 | "cxx": [ 33 | "-O3", 34 | "-std=c++14", 35 | ], 36 | "cncc": ["-O3", "-I{}".format(os.path.join(cpath, "include"))] 37 | }) 38 | dist = Distribution() 39 | dist.script_name = os.path.basename(sys.argv[0]) 40 | dist.script_args = sys.argv[1:] 41 | if dist.script_args == ["clean"]: 42 | if os.path.exists(os.path.abspath('build')): 43 | shutil.rmtree('build') 44 | setup(name="mlu_custom_ext", 45 | version="0.1", 46 | packages=find_packages(), 47 | ext_modules=[mlu_extension], 48 | cmdclass={ 49 | "build_ext": 50 | BuildExtension.with_options(no_python_abi_suffix=True) 51 | }) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /exp_5_1_custom_pytorch_mlu_op/tests/test_sigmoid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch_mlu 4 | import copy 5 | from mlu_custom_ext import mlu_functions 6 | import unittest 7 | 8 | 9 | class TestSigmoid(unittest.TestCase): 10 | """ 11 | test sigmoid 12 | """ 13 | 14 | def test_forward_with_shapes(self, shapes=[(3, 4)]): 15 | # TODO: 请补充Warm up代码 16 | for shape in shapes: 17 | # 执行一些预热操作,比如简单的数据传输和计算 18 | x_warmup = torch.randn(shape, device='mlu') 19 | y_warmup = mlu_functions.sigmoid(x_warmup) 20 | torch.mlu.synchronize() 21 | 22 | 23 | for shape in shapes: 24 | event_start = torch.mlu.Event() 25 | event_end = torch.mlu.Event() 26 | 27 | event_start.record() 28 | x_cpu = torch.randn(shape) 29 | x_mlu = x_cpu.to('mlu') 30 | # TODO: 请补充mlu_custom_ext库的Sigmoid函数调用 31 | y_mlu = mlu_functions.sigmoid(x_mlu) 32 | y_cpu = x_cpu.sigmoid() 33 | np.testing.assert_array_almost_equal(y_mlu.cpu(), y_cpu, decimal=3) 34 | event_end.record() 35 | 36 | torch.mlu.synchronize() 37 | print('forward time: ', event_start.elapsed_time(event_end), 'ms') 38 | 39 | def test_backward_with_shapes(self, shapes=[(3, 4)]): 40 | # TODO: 请补充Warm up代码 41 | for shape in shapes: 42 | # 执行一些预热操作,比如简单的数据传输和计算 43 | x_warmup = torch.randn(shape, device='mlu') 44 | y_warmup = mlu_functions.sigmoid(x_warmup) 45 | torch.mlu.synchronize() 46 | 47 | for shape in shapes: 48 | event_start = torch.mlu.Event() 49 | event_end = torch.mlu.Event() 50 | 51 | event_start.record() 52 | x_mlu = torch.randn(shape, requires_grad=True, device='mlu') 53 | # TODO: 请补充mlu_custom_ext库的Sigmoid函数调用 54 | y_mlu = mlu_functions.sigmoid(x_mlu) 55 | z_mlu = torch.sum(y_mlu) 56 | z_mlu.backward() 57 | grad_mlu = x_mlu.grad 58 | with torch.no_grad(): 59 | grad_cpu = (y_mlu * (1 - y_mlu)).cpu() 60 | np.testing.assert_array_almost_equal(grad_mlu.detach().cpu(), 61 | grad_cpu, 62 | decimal=3) 63 | event_end.record() 64 | 65 | torch.mlu.synchronize() 66 | print('backward time: ', event_start.elapsed_time(event_end), 'ms') 67 | 68 | 69 | if __name__ == '__main__': 70 | unittest.main() 71 | --------------------------------------------------------------------------------