├── .gitignore ├── LICENSE ├── README.md ├── assets ├── CI_test_suite │ ├── .gitignore │ ├── README.md │ ├── ci_utils.py │ ├── profile_utils.py │ └── test_suite.py └── img │ ├── autotuner.png │ ├── pulp-trainlib-mm-flow.png │ ├── pulp-trainlib-primitives.png │ └── trainlib-deployer.png ├── lib ├── README.md ├── include │ ├── mm_manager_list.txt │ ├── mm_manager_list_fp16.txt │ ├── pulp_act_fp16.h │ ├── pulp_act_fp32.h │ ├── pulp_batchnorm_fp32.h │ ├── pulp_conv2d_fp16.h │ ├── pulp_conv2d_fp32.h │ ├── pulp_conv_dw_fp16.h │ ├── pulp_conv_dw_fp32.h │ ├── pulp_conv_naive_fp16.h │ ├── pulp_conv_naive_fp32.h │ ├── pulp_conv_pw_fp16.h │ ├── pulp_conv_pw_fp32.h │ ├── pulp_dropout_fp16.h │ ├── pulp_dropout_fp32.h │ ├── pulp_embedding_fp16.h │ ├── pulp_im2col_fp16.h │ ├── pulp_im2col_fp32.h │ ├── pulp_instnorm_fp16.h │ ├── pulp_instnorm_fp32.h │ ├── pulp_interpolation_fp16.h │ ├── pulp_interpolation_fp32.h │ ├── pulp_layernorm_fp32.h │ ├── pulp_linear_fp16.h │ ├── pulp_linear_fp32.h │ ├── pulp_losses_fp16.h │ ├── pulp_losses_fp32.h │ ├── pulp_matmul_fp16.h │ ├── pulp_matmul_fp32.h │ ├── pulp_mhsa_fp16.h │ ├── pulp_mhsa_fp32.h │ ├── pulp_nonorm_fp16.h │ ├── pulp_nonorm_fp32.h │ ├── pulp_optimizers_fp16.h │ ├── pulp_optimizers_fp32.h │ ├── pulp_pooling_fp16.h │ ├── pulp_pooling_fp32.h │ ├── pulp_random.h │ ├── pulp_residual_fp16.h │ ├── pulp_residual_fp32.h │ ├── pulp_rmsnorm_fp16.h │ ├── pulp_rmsnorm_fp32.h │ ├── pulp_rnn_fp32.h │ ├── pulp_train.h │ ├── pulp_train_defines.h │ ├── pulp_train_utils_fp16.h │ ├── pulp_train_utils_fp32.h │ ├── pulp_transp_conv2d_fp16.h │ ├── pulp_transp_conv2d_fp32.h │ └── tensor_checkers.h └── sources │ ├── pulp_act_fp16.c │ ├── pulp_act_fp32.c │ ├── pulp_batchnorm_fp32.c │ ├── pulp_conv2d_fp16.c │ ├── pulp_conv2d_fp32.c │ ├── pulp_conv_dw_fp16.c │ ├── pulp_conv_dw_fp32.c │ ├── pulp_conv_naive_fp16.c │ ├── pulp_conv_naive_fp32.c │ ├── pulp_conv_pw_fp16.c │ ├── pulp_conv_pw_fp32.c │ ├── pulp_dropout_fp16.c │ ├── pulp_dropout_fp32.c │ ├── pulp_embedding_fp16.c │ ├── pulp_im2col_fp16.c │ ├── pulp_im2col_fp32.c │ ├── pulp_instnorm_fp16.c │ ├── pulp_instnorm_fp32.c │ ├── pulp_interpolation_fp16.c │ ├── pulp_interpolation_fp32.c │ ├── pulp_layernorm_fp32.c │ ├── pulp_linear_fp16.c │ ├── pulp_linear_fp32.c │ ├── pulp_losses_fp16.c │ ├── pulp_losses_fp32.c │ ├── pulp_matmul_fp16.c │ ├── pulp_matmul_fp32.c │ ├── pulp_mhsa_fp16.c │ ├── pulp_mhsa_fp32.c │ ├── pulp_nonorm_fp16.c │ ├── pulp_nonorm_fp32.c │ ├── pulp_optimizers_fp16.c │ ├── pulp_optimizers_fp32.c │ ├── pulp_pooling_fp16.c │ ├── pulp_pooling_fp32.c │ ├── pulp_random.c │ ├── pulp_residual_fp16.c │ ├── pulp_residual_fp32.c │ ├── pulp_rmsnorm_fp16.c │ ├── pulp_rmsnorm_fp32.c │ ├── pulp_rnn_fp32.c │ ├── pulp_train_utils_fp16.c │ ├── pulp_train_utils_fp32.c │ ├── pulp_transp_conv2d_fp16.c │ └── pulp_transp_conv2d_fp32.c ├── tests ├── .gitignore ├── README.md ├── mm_manager_list.txt ├── mm_manager_list_fp16.txt ├── test_DMA_tensor │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ └── stats.h ├── test_ResNet_CIFAR10 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── pulp-sdk-configs │ │ ├── link.ld │ │ └── pulp.json │ ├── readme.txt │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ └── dump_utils.py ├── test_act │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── SoftmaxFastExp.py │ │ └── dump_utils.py ├── test_batchnorm_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ └── dump_utils.py ├── test_blocktranspose │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ └── stats.h ├── test_broadcast_add │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ └── GM.py ├── test_broadcast_matmul │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ └── GM.py ├── test_conv2d_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_conv2d_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_conv_pw_dw_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_conv_pw_dw_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_cordic │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ └── dump_utils.py ├── test_dropout │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ └── dump_utils.py ├── test_gelu_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ ├── profile_utils.py │ │ └── test_model.py ├── test_im2col │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ └── stats.h ├── test_instnorm_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── readme.txt │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ └── dump_utils.py ├── test_instnorm_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── readme.txt │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ └── dump_utils.py ├── test_interpolation │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ └── dump_utils.py ├── test_layernorm_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ ├── tensor_checkers.h │ └── utils │ │ ├── GM.py │ │ └── dump_utils.py ├── test_layout_change │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ └── stats.h ├── test_linear_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_linear_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_losses_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ └── losses.py ├── test_losses_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ └── losses.py ├── test_matmul │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── output_eval.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── dump_utils.py │ │ └── profile_fastest.py ├── test_mhsa_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── SoftmaxFastExp.py │ │ ├── dump_utils.py │ │ ├── mhsa.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_mhsa_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── SoftmaxFastExp.py │ │ ├── dump_utils.py │ │ ├── mhsa.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_mhsa_fp32_partialsoftmax_old │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── mhsa.py │ │ ├── mhsa_partial_softmax.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_mhsa_paper_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net-args.h │ ├── net.c │ ├── net.h │ ├── net_l1.c │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── SoftmaxFastExp.py │ │ ├── dump_utils.py │ │ ├── mhsa.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_mhsa_paper_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net-args.h │ ├── net.c │ ├── net.h │ ├── net_l1.c │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── SoftmaxFastExp.py │ │ ├── dump_utils.py │ │ ├── mhsa.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_mobilebert_fp16 │ ├── .gitignore │ ├── Makefile │ ├── binarygen.py │ ├── main.c │ ├── net.c │ ├── net.h │ ├── net_args.h │ ├── net_flash.c │ ├── net_l1.c │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ └── dump_utils.py ├── test_mobilebert_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── net_args.h │ ├── net_l1.c │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ └── dump_utils.py ├── test_pad │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ └── stats.h ├── test_pooling │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ └── dump_utils.py ├── test_random │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ └── stats.h ├── test_reduce_mean │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── .gitignore │ │ └── GM.py ├── test_residual │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ ├── utils │ │ ├── GM.py │ │ └── dump_utils.py │ └── variables.h ├── test_rnn_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── profile_optimized.py │ │ ├── profile_sizes.py │ │ └── profile_utils.py ├── test_tiny_vit_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ ├── tensor_checkers.h │ └── utils │ │ ├── .gitignore │ │ ├── GM.py │ │ ├── dump_utils.py │ │ ├── model │ │ ├── Attention.py │ │ ├── BasicLayer.py │ │ ├── Conv2dBN.py │ │ ├── ConvLayer.py │ │ ├── DropPath.py │ │ ├── LocalAttention.py │ │ ├── MBConv.py │ │ ├── Mlp.py │ │ ├── PatchEmbed.py │ │ ├── PatchMerging.py │ │ ├── RotaryEmbedding.py │ │ ├── SinusoidalEmbeddings.py │ │ ├── SparseAttention.py │ │ ├── TinyViT.py │ │ ├── TinyViTBlock.py │ │ └── model_utils.py │ │ ├── model_configs.py │ │ └── writers │ │ ├── component_writers.py │ │ ├── file_writers.py │ │ └── writers_utils.py ├── test_transp_conv2d_fp16 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ └── profile_utils.py ├── test_transp_conv2d_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ ├── GM.py │ │ ├── dump_utils.py │ │ └── profile_utils.py ├── test_transpose │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ └── utils │ │ └── GM.py └── test_vit_fp32 │ ├── .gitignore │ ├── Makefile │ ├── main.c │ ├── net.c │ ├── net.h │ ├── stats.h │ ├── tensor_checkers.h │ └── utils │ ├── .gitignore │ ├── GM.py │ ├── dump_utils.py │ ├── torch_to_trainlib.py │ ├── vit_lr │ ├── MultiHeadSelfAttention.py │ ├── PositionWiseFeedForward.py │ ├── PositionalEmbedding1D.py │ ├── README.md │ ├── ResizeProcedure.py │ ├── SoftmaxFastExp.py │ ├── Transformer.py │ ├── TransformerBlock.py │ ├── ViTLR_model.py │ └── vit_lr_utils.py │ └── writers │ ├── component_writers.py │ └── file_writers.py └── tools ├── .gitignore ├── AutoTuner ├── autotuner.py ├── server_execution_files │ ├── run_regression.sh │ └── sw │ │ └── bwruntest.py └── tiling_utils.py ├── README.md ├── TrainLib_Deployer ├── TrainLib_Deployer.py └── deployer_utils │ ├── DNN_Composer.py │ ├── DNN_Reader.py │ ├── GM_templates.py │ ├── deployment_utils.py │ ├── deployment_utils_double_buffer.py │ ├── deployment_utils_single_buffer.py │ ├── net_templates.py │ ├── net_templates_double_buffer.py │ ├── net_templates_single_buffer.py │ └── srcfiles │ ├── dump_utils.py │ ├── main.c │ └── stats.h └── memory_footprint_tool └── memory_footprint_eval.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | **/.vscode/ 3 | **/__pycache__/ 4 | .idea/ 5 | -------------------------------------------------------------------------------- /assets/CI_test_suite/.gitignore: -------------------------------------------------------------------------------- 1 | /__pycache__/ 2 | test_suite_results.txt 3 | temp/ -------------------------------------------------------------------------------- /assets/CI_test_suite/README.md: -------------------------------------------------------------------------------- 1 | # Test suite for continuous integration 2 | 3 | By launching the [test suite](test_suite.py), users can verify PULP-TrainLib's primitives. 4 | To extend the test suite, please insert a new section in the Python suite, by following the structure of the other primitives. 5 | 6 | The test suite is designed to create a `temp/` folder which contains all the tests that have been executed. In each test, the output is contained into its respective `log.txt` file, which is filled with the terminal's output. A summary of the execution of each test is then stored into `test_suite_results.txt`. Check for the expression `CONTAINS ERRORS` to check for tests which failed. -------------------------------------------------------------------------------- /assets/CI_test_suite/ci_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | import os 17 | import shutil 18 | 19 | # Copy related test folder into temp 20 | def copy_test_folder_ci (test_id, ci_test_folder, test_folder): 21 | 22 | test_dest_folder = str(ci_test_folder)+"/temp/tests/ci_test_"+str(test_id) 23 | if not os.path.exists(test_dest_folder): 24 | os.mkdir(test_dest_folder) 25 | os.chdir(test_dest_folder) 26 | shutil.copytree(test_folder, test_dest_folder, dirs_exist_ok=True) 27 | 28 | 29 | # Copy the trainlib into the suitable position 30 | def copy_trainlib_ci (ci_test_folder, trainlib_folder): 31 | 32 | trainlib_dest_folder = str(ci_test_folder)+"/temp/lib" 33 | os.chdir(trainlib_dest_folder) 34 | shutil.copytree(trainlib_folder, trainlib_dest_folder, dirs_exist_ok=True) 35 | 36 | 37 | -------------------------------------------------------------------------------- /assets/img/autotuner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/autotuner.png -------------------------------------------------------------------------------- /assets/img/pulp-trainlib-mm-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/pulp-trainlib-mm-flow.png -------------------------------------------------------------------------------- /assets/img/pulp-trainlib-primitives.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/pulp-trainlib-primitives.png -------------------------------------------------------------------------------- /assets/img/trainlib-deployer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/trainlib-deployer.png -------------------------------------------------------------------------------- /lib/include/pulp_dropout_fp16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2024 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Authors: Alberto Dequino 19 | */ 20 | 21 | #include 22 | #include "pulp_train_defines.h" 23 | 24 | /** 25 | * @brief Structure for FP16 dropout 26 | * @param probability the probability of the single value to be dropped 27 | * @param input input to apply the dropout 28 | * @param use_mask flag to choose wheter to do a real dropout or just apply a mask (useful for reproducing GM results) 29 | * @param mask vector used for masking (requires use_mask==1, and same size of input vector) 30 | * @param size input/mask vector size 31 | * @param seed initial seed value 32 | */ 33 | struct dropout_args_fp16{ 34 | fp16 probability; 35 | fp16 * input; 36 | int use_mask; 37 | fp16 * mask; 38 | int size; 39 | int seed; 40 | }; 41 | 42 | 43 | /** 44 | * PULP-TrainLib's definitions 45 | */ 46 | 47 | 48 | /** 49 | * @brief FP16 Dropout function 50 | */ 51 | void pulp_dropout_fp16_cl(void * dropout_args); -------------------------------------------------------------------------------- /lib/include/pulp_dropout_fp32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2024 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Authors: Alberto Dequino 19 | */ 20 | 21 | #include 22 | 23 | /** 24 | * @brief Structure for FP32 dropout 25 | * @param probability the probability of the single value to be dropped 26 | * @param input input to apply the dropout 27 | * @param use_mask flag to choose wheter to do a real dropout or just apply a mask (useful for reproducing GM results) 28 | * @param mask vector used for masking (requires use_mask==1, and same size of input vector) 29 | * @param size input/mask vector size 30 | * @param seed initial seed value 31 | */ 32 | struct dropout_args_fp32{ 33 | float probability; 34 | float * input; 35 | int use_mask; 36 | float * mask; 37 | int size; 38 | int seed; 39 | }; 40 | 41 | 42 | /** 43 | * PULP-TrainLib's definitions 44 | */ 45 | 46 | 47 | /** 48 | * @brief FP32 Dropout function 49 | */ 50 | void pulp_dropout_fp32_cl(void * dropout_args); -------------------------------------------------------------------------------- /lib/include/pulp_embedding_fp16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | /** 19 | * Authors: Alberto Dequino 20 | */ 21 | 22 | struct Embedding_args_fp16{ 23 | fp16* BUFF; 24 | int dim; 25 | int embed_dim; 26 | int *ids; 27 | fp16 *embeds; 28 | fp16 *out; 29 | }; 30 | 31 | void embedding_fw_tiled_fp16(void *embedding_args); -------------------------------------------------------------------------------- /lib/include/pulp_layernorm_fp32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 University of Bologna 3 | * All rights reserved. 4 | * 5 | * This software may be modified and distributed under the terms 6 | * of the BSD license. See the LICENSE file for details. 7 | * 8 | * Authors: Calin Diaconu (calin.diaconu@studio.unibo.it) 9 | */ 10 | 11 | #ifndef PULP_TRAINLIB_PULP_LAYERNORM_FP32_H 12 | #define PULP_TRAINLIB_PULP_LAYERNORM_FP32_H 13 | 14 | #include "math.h" 15 | 16 | /** 17 | * @brief Arguments for the forward pass of the LayerNorm layer. 18 | * @brief x: input tensor 19 | * @brief weight: weight tensor 20 | * @brief bias: bias tensor 21 | * @brief output: output tensor 22 | * @brief eps: epsilon value 23 | * @brief size: size of the tensors 24 | * @brief step_size: step size over which the normalization is performed 25 | */ 26 | struct LayerNorm_args_fp32 { 27 | float *x; 28 | float *weight; 29 | float *bias; 30 | float *output; 31 | float *eps; 32 | int size; 33 | int step_size; 34 | }; 35 | 36 | /** 37 | * @brief Forward function that calls the parallelized version for the LayerNorm layer. 38 | * @param (void *) (struct LayerNorm_args_fp32 void_args) 39 | */ 40 | void pulp_layerNorm_fp32_fw_cl(void *layer_norm_args); 41 | 42 | #endif //PULP_TRAINLIB_PULP_LAYERNORM_FP32_H 43 | -------------------------------------------------------------------------------- /lib/include/pulp_nonorm_fp16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2024 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | /** 19 | * Nonorm training functions, grouped into FW and BW 20 | * 21 | * Authors: Alberto Dequino 22 | */ 23 | 24 | 25 | /** 26 | * Nonorm layer configuration structure 27 | */ 28 | 29 | /** 30 | * @brief Structure for NoNorm Training in FP32 31 | * @param input input for the nonorm layer (from forward perspective) 32 | * @param coeff weight vector 33 | * @param bias bias 34 | * @param output output for the nonorm layer (from forward perspective) 35 | */ 36 | struct Nonorm_args_fp16 { 37 | struct blob_fp16 * input; 38 | struct blob_fp16 * coeff; 39 | struct blob_fp16 * bias; 40 | struct blob_fp16 * output; 41 | }; 42 | 43 | /** 44 | * Nonorm layer training functions, grouped into FW and BW 45 | */ 46 | 47 | // FORWARD FUNCTIONS 48 | 49 | /** 50 | * @brief Forward pass function, forked on PULP cluster. 51 | * @param Nonorm_args_fp16 structure configuring the nonorm layer. 52 | */ 53 | void pulp_nonorm_fp16_fw_cl( void * Nonorm_args ); 54 | 55 | 56 | -------------------------------------------------------------------------------- /lib/include/pulp_nonorm_fp32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | /** 19 | * Nonorm training functions, grouped into FW and BW 20 | * 21 | * Authors: Alberto Dequino 22 | */ 23 | 24 | 25 | /** 26 | * Nonorm layer configuration structure 27 | */ 28 | 29 | /** 30 | * @brief Structure for NoNorm Training in FP32 31 | * @param input input for the nonorm layer (from forward perspective) 32 | * @param coeff weight vector 33 | * @param bias bias 34 | * @param output output for the nonorm layer (from forward perspective) 35 | */ 36 | struct Nonorm_args { 37 | struct blob * input; 38 | struct blob * coeff; 39 | struct blob * bias; 40 | struct blob * output; 41 | }; 42 | 43 | /** 44 | * Nonorm layer training functions, grouped into FW and BW 45 | */ 46 | 47 | // FORWARD FUNCTIONS 48 | 49 | /** 50 | * @brief Forward pass function, forked on PULP cluster. 51 | * @param Nonorm_args structure configuring the nonorm layer. 52 | */ 53 | void pulp_nonorm_fp32_fw_cl( void * Nonorm_args ); 54 | 55 | 56 | -------------------------------------------------------------------------------- /lib/include/pulp_optimizers_fp16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Authors: Davide Nadalini, Leonardo Ravaglia 19 | */ 20 | 21 | #include "pulp_train_defines.h" 22 | 23 | /** 24 | * Optimizer configuration structure 25 | */ 26 | 27 | /** 28 | * @brief Parameters for optimizer fucntions for every single layer 29 | * @param weights blob of the weights (with their gradient inside) 30 | * @param bias blob of the biases (with their gradient inside) 31 | * @param learning_rate the learning rate of the optimizer 32 | * @param use_biases flag: use bias (1) or not use bias (0). 33 | */ 34 | struct optim_args_fp16 { 35 | struct blob_fp16 * weights; 36 | struct blob_fp16 * biases; 37 | fp16 learning_rate; 38 | int use_biases; 39 | }; 40 | 41 | 42 | 43 | /** 44 | * Optimizers 45 | **/ 46 | 47 | /** 48 | * @brief Gradient descent optimizer for a single layer. Use pi_cl_team_fork(NUM_CORES, pulp_gradient_descent_fp32, &args) to parallelize. 49 | * @param optim_args pointer to optim_args structure (see pulp_train_utils_fp32.h) 50 | */ 51 | void pulp_gradient_descent_fp16( 52 | void * optim_args 53 | ); 54 | -------------------------------------------------------------------------------- /lib/include/pulp_optimizers_fp32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Authors: Davide Nadalini, Leonardo Ravaglia 19 | */ 20 | 21 | 22 | /** 23 | * Optimizer configuration structure 24 | */ 25 | 26 | /** 27 | * @brief Structure for optimizers 28 | * @param weights blob of the weights (with their gradient inside) 29 | * @param bias blob of the biases (with their gradient inside) 30 | * @param learning_rate the learning rate of the optimizer 31 | * @param use_biases flag: use bias (1) or not use bias (0). 32 | */ 33 | struct optim_args { 34 | struct blob * weights; 35 | struct blob * biases; 36 | float learning_rate; 37 | int use_biases; 38 | }; 39 | 40 | 41 | 42 | /** 43 | * Optimizers 44 | **/ 45 | 46 | /** 47 | * @brief Gradient descent optimizer for a single layer. Use pi_cl_team_fork(NUM_CORES, pulp_gradient_descent_fp32, &args) to parallelize. 48 | * @param optim_args pointer to optim_args structure (see pulp_train_utils_fp32.h) 49 | */ 50 | void pulp_gradient_descent_fp32( 51 | void * optim_args 52 | ); 53 | -------------------------------------------------------------------------------- /lib/include/pulp_rmsnorm_fp16.h: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "pulp_train_defines.h" 3 | 4 | struct weighted_scaling_args_fp16 { 5 | fp16* out; 6 | fp16* in; 7 | fp16* w; 8 | fp16 scaling_factor; 9 | unsigned int size; 10 | }; 11 | 12 | struct sum_of_squares_args_fp16 { 13 | fp16* out; 14 | fp16* in; 15 | unsigned int size; 16 | }; 17 | 18 | void weighted_scaling_fp16_cl(void* weighted_scaling_args_fp16); 19 | 20 | void sum_of_squares_fp16_cl(void* sum_of_squares_args_fp16); 21 | 22 | void rmsnorm_parallelized_fp16(fp16* o, fp16* x, fp16* weight, fp16* buffer_n_cores, int size); 23 | -------------------------------------------------------------------------------- /lib/sources/pulp_embedding_fp16.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2024 University of Bologna 3 | * All rights reserved. 4 | * 5 | * This software may be modified and distributed under the terms 6 | * of the BSD license. See the LICENSE file for details. 7 | * 8 | * Authors: Alberto Dequino (alberto.dequino@unibo.it) 9 | */ 10 | 11 | #include "pulp_train_utils_fp16.h" 12 | #include "pulp_embedding_fp16.h" 13 | #include "pulp_train_defines.h" 14 | #include "pmsis.h" 15 | 16 | // FORWARD, TILED 17 | 18 | void embedding_fw_tiled_fp16(void *embedding_args){ 19 | struct Embedding_args_fp16 *args = (struct Embedding_args_fp16*) embedding_args; 20 | 21 | fp16 *BUFF = args->BUFF; 22 | 23 | int dim = args->dim; 24 | int embed_dim = args->embed_dim; 25 | 26 | pi_cl_dma_cmd_t * cmd_store; 27 | pi_cl_dma_cmd_t * cmd_load; 28 | 29 | const int blockSize=(dim+NUM_CORES-1)/NUM_CORES; 30 | const int start = pi_core_id()*blockSize; 31 | const int stop = start + blockSize > dim ? dim : start+blockSize; 32 | 33 | for(int i = start; i < stop; i++){ 34 | int id = (args->ids)[i]; 35 | pi_cl_dma_cmd((uint32_t) (args->embeds + id * embed_dim), (uint32_t) (BUFF + (int) (pi_core_id()) * embed_dim), 2 * embed_dim, PI_CL_DMA_DIR_EXT2LOC, cmd_load); 36 | pi_cl_dma_cmd_wait(cmd_load); 37 | pi_cl_dma_cmd((uint32_t) (args->out + i * embed_dim), (uint32_t) (BUFF + (int) (pi_core_id()) * embed_dim), 2 * embed_dim, PI_CL_DMA_DIR_LOC2EXT, cmd_store); 38 | pi_cl_dma_cmd_wait(cmd_store); 39 | } 40 | } -------------------------------------------------------------------------------- /lib/sources/pulp_nonorm_fp16.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2024 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Authors: Alberto Dequino 19 | */ 20 | 21 | #include "pulp_train_utils_fp16.h" 22 | #include "pulp_nonorm_fp16.h" 23 | 24 | void pulp_nonorm_fp16_fw_cl( void * Nonorm_args ) 25 | { 26 | struct Nonorm_args_fp16 * NN_args = (struct Nonorm_args_fp16 *) Nonorm_args; 27 | fp16 *coeffData = NN_args->coeff->data; 28 | fp16 *biasData = NN_args->bias->data; 29 | fp16 *outData = NN_args->output->data; 30 | fp16 *inputData = NN_args->input->data; 31 | fp16 temp; 32 | 33 | int N = (NN_args->input)->H; // Sequence Length (we parallelize on this) 34 | int W = (NN_args->input)->W; // Embedding size 35 | 36 | const uint32_t blockSize = (N+NUM_CORES-1) / NUM_CORES; 37 | const uint32_t start = pi_core_id()*blockSize; 38 | const uint32_t stop = start+blockSize > N ? N : start+blockSize; 39 | 40 | for(uint32_t i = start; i < stop; i++){ 41 | int row = i * W; 42 | for(uint32_t j = 0; j < W; j++){ 43 | temp = inputData[row + j] * coeffData[j]; 44 | outData[row + j] = temp + biasData[j]; 45 | } 46 | } 47 | } 48 | 49 | -------------------------------------------------------------------------------- /lib/sources/pulp_nonorm_fp32.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2024 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /** 18 | * Authors: Alberto Dequino 19 | */ 20 | 21 | #include "pulp_train_utils_fp32.h" 22 | #include "pulp_nonorm_fp32.h" 23 | 24 | void pulp_nonorm_fp32_fw_cl( void * Nonorm_args ) 25 | { 26 | struct Nonorm_args * NN_args = (struct Nonorm_args *) Nonorm_args; 27 | float *coeffData = NN_args->coeff->data; 28 | float *biasData = NN_args->bias->data; 29 | float *outData = NN_args->output->data; 30 | float *inputData = NN_args->input->data; 31 | float temp; 32 | 33 | int N = (NN_args->input)->H; // Sequence Length (we parallelize on this) 34 | int W = (NN_args->input)->W; // Embedding size 35 | 36 | const uint32_t blockSize = (N+NUM_CORES-1) / NUM_CORES; 37 | const uint32_t start = pi_core_id()*blockSize; 38 | const uint32_t stop = start+blockSize > N ? N : start+blockSize; 39 | 40 | for(uint32_t i = start; i < stop; i++){ 41 | int row = i * W; 42 | for(uint32_t j = 0; j < W; j++){ 43 | temp = inputData[row + j] * coeffData[j]; 44 | outData[row + j] = temp + biasData[j]; 45 | } 46 | } 47 | } 48 | 49 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | build_every_layer_optim.py 2 | allprof.txt 3 | build_every_matmul.py 4 | all_matmul.txt 5 | start_pulp.sh 6 | v2_start_pulp.sh 7 | license_header.txt 8 | -------------------------------------------------------------------------------- /tests/mm_manager_list_fp16.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------ 2 | -- Copyright (C) 2021-2022 ETH Zurich and University of Bologna -- 3 | -- -- 4 | -- Licensed under the Apache License, Version 2.0 (the "License"); -- 5 | -- you may not use this file except in compliance with the License. -- 6 | -- You may obtain a copy of the License at -- 7 | -- -- 8 | -- http://www.apache.org/licenses/LICENSE-2.0 -- 9 | -- -- 10 | -- Unless required by applicable law or agreed to in writing, software -- 11 | -- distributed under the License is distributed on an "AS IS" BASIS, -- 12 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- 13 | -- See the License for the specific language governing permissions and -- 14 | -- limitations under the License. -- 15 | ------------------------------------------------------------------------------ 16 | 17 | 18 | ---------------------------------------------------- 19 | --- MM_MANAGER MATMUL LIST (for layer profiling) --- 20 | ---------------------------------------------------- 21 | 22 | STANDARD MATMULS: 23 | 24 | // Naives 25 | matmul_type == 0 26 | mm_fp16 27 | matmul_type == 1 28 | mm_M_fp16 29 | 30 | // Parallelism on N 31 | matmul_type == 2 32 | mm_fp16_SIMD_2x4 33 | matmul_type == 3 34 | mm_fp16_SIMD_4x8 35 | 36 | // Parallelism on M 37 | matmul_type == 4 38 | mm_M_fp16_SIMD_2x4 39 | matmul_type == 5 40 | mm_M_fp16_SIMD_4x8 41 | 42 | END STANDARD 43 | -------------------------------------------------------------------------------- /tests/test_DMA_tensor/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt -------------------------------------------------------------------------------- /tests/test_DMA_tensor/Makefile: -------------------------------------------------------------------------------- 1 | APP = dma_transfer 2 | 3 | # User settings 4 | NUM_CORES?=1 5 | BITS?=16 6 | # Layer sizes 7 | HEIGHT?=4 8 | WIDTH?=4 9 | CHANNELS?=64*12 10 | #APP_CFLAGS += -DMERGE_PARALLEL 11 | #APP_CFLAGS += -DDEBUG_APP 12 | #APP_CFLAGS += -DPRINT_OUTPUT 13 | # End of user settings 14 | 15 | TRAIN_LIB=../../lib 16 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 17 | APP_SRCS = main.c net.c 18 | 19 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 20 | APP_CFLAGS += -O3 -g3 -mno-memcpy 21 | APP_CFLAGS += -DFABRIC 22 | APP_CFLAGS += -DCLUSTER 23 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 24 | APP_CFLAGS += -DBITS=$(BITS) 25 | APP_CFLAGS += -DPROF_NET 26 | APP_CFLAGS += -DHEIGHT=$(HEIGHT) 27 | APP_CFLAGS += -DWIDTH=$(WIDTH) 28 | APP_CFLAGS += -DCHANNELS=$(CHANNELS) 29 | #APP_CFLAGS += -DDEBUG 30 | APP_CFLAGS += -mhwloopalign 31 | APP_LDFLAGS += -lm 32 | 33 | # STATISTICS 34 | APP_CFLAGS += -DSTATS 35 | 36 | # Sources 37 | # APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 38 | 39 | include $(RULES_DIR)/pmsis_rules.mk 40 | -------------------------------------------------------------------------------- /tests/test_DMA_tensor/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching training procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Net training successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_DMA_tensor/net.h: -------------------------------------------------------------------------------- 1 | // Tensor sizes 2 | #define TENSOR_SIZE (WIDTH*HEIGHT*CHANNELS) 3 | 4 | // PULP DEFINES 5 | #define STACK_SIZE 4096 6 | #define MOUNT 1 7 | #define UNMOUNT 0 8 | #define CID 0 9 | 10 | void net_step (); -------------------------------------------------------------------------------- /tests/test_ResNet_CIFAR10/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | cifar* 3 | log.csv 4 | init-defines.h 5 | io_data.h 6 | -------------------------------------------------------------------------------- /tests/test_ResNet_CIFAR10/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /** 21 | * Configures cluster, then calls net_step() 22 | **/ 23 | 24 | int main (void) { 25 | 26 | 27 | printf("\nHello sir.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Exiting DNN Training.\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_ResNet_CIFAR10/net.h: -------------------------------------------------------------------------------- 1 | // PULP Defines 2 | #define STACK_SIZE 4096 3 | 4 | // Tolerance to check updated output 5 | #define TOLERANCE 1e-12 6 | 7 | // Training functions 8 | void DNN_init(); 9 | void compute_loss(int idx); 10 | void update_weights(); 11 | void forward(); 12 | void backward(); 13 | void net_step(); 14 | 15 | // Print and check functions 16 | void print_output(); 17 | void check_post_training_output(); 18 | 19 | // DMA managment functions 20 | void load_input(void * src_blob, uint8_t data_diff_both); 21 | void load_output(void * src_blob, uint8_t data_diff_both); 22 | void load_coeff(void * src_blob, uint8_t data_diff_both); 23 | void store_output(void * dest_blob, uint8_t data_diff_both); 24 | void store_input(void * dest_blob, uint8_t data_diff_both); 25 | void store_coeff(void * dest_blob, uint8_t data_diff_both); 26 | void copy_struct_param(unsigned int from, unsigned int to, int size); 27 | void get_input_dim(void * b); 28 | void get_output_dim(void * b); 29 | void get_weight_dim(void * b); 30 | void reset_arguments(); 31 | void update_blob(); 32 | void PrintBlob(void * b, int step); 33 | void reset_dim(); 34 | #define MAX_IN_SIZE 6400 35 | #define MAX_WGT_SIZE 6400 36 | #define MAX_SIZE 27664 37 | -------------------------------------------------------------------------------- /tests/test_ResNet_CIFAR10/readme.txt: -------------------------------------------------------------------------------- 1 | To compile the application, run "make clean get_golden all run". 2 | To modify the hyperparameters (learning rate, epochs, batch size still not implemented), edit the variables inside "utils/GM.py". 3 | 4 | N.B: this project needs to have an L2 of at least 8 MB in GVSoC, please edit GVSoC's memory map to fit this requirement. 5 | To do so, copy & paste the content of "pulp-sdk-configs/"'s files in the respective files of your pulp.sdk (THIS VERSION IS PREFERABLE: https://github.com/pulp-platform/pulp-sdk/releases/tag/2021.09.15): 6 | 7 | 1) pulp-sdk/rtos/pulpos/kernel/chips/pulp/link.ld 8 | 9 | 2) pulp-sdk/tools/gap-configs/configs/config/pulp.json 10 | -------------------------------------------------------------------------------- /tests/test_act/.gitignore: -------------------------------------------------------------------------------- 1 | init_defines.h 2 | act_data.h 3 | log.txt 4 | BUILD/ -------------------------------------------------------------------------------- /tests/test_act/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_act 2 | 3 | # ~~~~~~~~~~ User settings ~~~~~~~~~~ 4 | # Standard matmul arguments 5 | IN_H?=4 6 | IN_W?=4 7 | IN_C?=8 8 | VALUE?=0.05 9 | 10 | # General arguments 11 | DATA_TYPE?='FP16' # FP32 or FP16 12 | NUM_CORES?=8 13 | 14 | # ~~~~~~~~~~ End of user settings ~~~~~~~~~~ 15 | 16 | TRAIN_LIB=../../lib 17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 18 | APP_SRCS += main.c net.c 19 | 20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c 24 | 25 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 26 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 27 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 28 | APP_CFLAGS += -DPROF_NET 29 | APP_CFLAGS += -DIN_H=$(IN_H) 30 | APP_CFLAGS += -DIN_W=$(IN_W) 31 | APP_CFLAGS += -DIN_C=$(IN_C) 32 | APP_CFLAGS += -DVALUE=$(VALUE) 33 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE) 34 | #APP_CFLAGS += -DDEBUG 35 | 36 | APP_LDFLAGS += -lm 37 | 38 | # STATISTICS 39 | APP_CFLAGS += -DSTATS 40 | 41 | get_golden: 42 | rm -rf BUILD/ 43 | python3 ./utils/GM.py --in_c $(IN_C) --in_h $(IN_H) --in_w $(IN_W) --value $(VALUE) --data_type $(DATA_TYPE) 44 | 45 | include $(RULES_DIR)/pmsis_rules.mk 46 | -------------------------------------------------------------------------------- /tests/test_act/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | printf("\nHello there.\nConfiguring cluster..\n"); 26 | 27 | // Configure cluster 28 | struct pi_device cluster_dev; 29 | struct pi_cluster_conf cl_conf; 30 | struct pi_cluster_task cl_task; 31 | 32 | pi_cluster_conf_init(&cl_conf); 33 | pi_open_from_conf(&cluster_dev, &cl_conf); 34 | if (pi_cluster_open(&cluster_dev)) { 35 | return -1; 36 | } 37 | 38 | printf("\nLaunching activations evaluation...\n\n"); 39 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 40 | 41 | printf("\nActivation function evaluation successfully terminated :)\n"); 42 | pi_cluster_close(&cluster_dev); 43 | 44 | pmsis_exit(0); 45 | } 46 | -------------------------------------------------------------------------------- /tests/test_act/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // User profiling flags 18 | #define FP32 32 19 | #define FP16 16 20 | // Tensor checksum definition 21 | #if DATA_TYPE == FP32 22 | #define CHECK_TOLERANCE 1e-9 23 | #define ERROR_TOLERANCE 1e-9 24 | 25 | #define GELU_TANH_APPROX_CHECK_TOLERANCE 1e-4 26 | #define GELU_TANH_APPROX_ERROR_TOLERANCE 1e-4 27 | 28 | #define TANH_CHECK_TOLERANCE 1e-4 29 | #define TANH_ERROR_TOLERANCE 1e-4 30 | #elif DATA_TYPE == FP16 31 | #define CHECK_TOLERANCE 1e-2 32 | #define ERROR_TOLERANCE 1e-2 33 | #endif 34 | 35 | 36 | // PULP DEFINES 37 | #define STACK_SIZE 4096 38 | #define MOUNT 1 39 | #define UNMOUNT 0 40 | #define CID 0 41 | 42 | void net_step(); 43 | -------------------------------------------------------------------------------- /tests/test_act/utils/SoftmaxFastExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | def fastexp_gist(x): 6 | x_copy = x.type(torch.float32) 7 | x_copy = x_copy * 12102203.17133801 + 1064986823.010288 8 | x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32) 9 | x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32) 10 | 11 | return x_copy.type(torch.uint32).view(torch.float32) 12 | 13 | 14 | class SoftmaxFastExp(Function): 15 | @staticmethod 16 | def forward(ctx, input): 17 | maxes = torch.max(input, -1, keepdim=True)[0] 18 | # maxes = torch.swapaxes(maxes, -2, -1) 19 | x_exp = fastexp_gist((input - maxes)) 20 | x_exp_sum = torch.sum(x_exp, -1, keepdim=True) 21 | output = x_exp / x_exp_sum 22 | ctx.save_for_backward(output) 23 | 24 | return output 25 | 26 | @staticmethod 27 | def backward(ctx, grad_output): 28 | out_data = ctx.saved_tensors[0] 29 | sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1]) 30 | grad_input = (grad_output - sums) * out_data 31 | 32 | return grad_input 33 | -------------------------------------------------------------------------------- /tests/test_batchnorm_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | data.h 3 | init-defines.h 4 | io_data.h 5 | readme.txt 6 | log.txt -------------------------------------------------------------------------------- /tests/test_batchnorm_fp32/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_batchnorm_fp32 2 | 3 | # User Section 4 | CI?=4 5 | HI?=4 6 | WI?=4 7 | 8 | BATCH_SIZE?=2 9 | NUM_CORES?=2 10 | HWC?=0 11 | 12 | STEP?='FORWARD' # 'FORWARD' or 'BACKWARD_GRAD' or 'BACKWARD_ERROR' 13 | # End of User Section 14 | 15 | TRAIN_LIB=../../lib 16 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 17 | APP_SRCS = main.c net.c 18 | 19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_batchnorm_fp32.c 20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c 25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c 26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c 28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c 30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp32.c 31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp16.c 32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c 33 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp16.c 34 | 35 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 36 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 37 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 38 | APP_CFLAGS += -DPROF_NET 39 | APP_CFLAGS += -DOPTIMIZE 40 | APP_CFLAGS += -DDEBUG 41 | 42 | APP_LDFLAGS += -lm 43 | 44 | # STATISTICS 45 | APP_CFLAGS += -DSTATS 46 | 47 | get_golden: 48 | python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -NUM_CORES ${NUM_CORES} -STEP ${STEP} -BATCH_SIZE ${BATCH_SIZE} 49 | 50 | include $(RULES_DIR)/pmsis_rules.mk 51 | -------------------------------------------------------------------------------- /tests/test_batchnorm_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2025 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "stdio.h" 19 | #include "stdlib.h" 20 | #include "net.h" 21 | 22 | /** 23 | * Configures cluster, then calls net_step() 24 | **/ 25 | int main(void) { 26 | printf("\nConfiguring cluster..\n"); 27 | 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | 36 | if (pi_cluster_open(&cluster_dev)) { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Exiting DNN Training.\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_batchnorm_fp32/net.h: -------------------------------------------------------------------------------- 1 | // PULP Defines 2 | #define STACK_SIZE 4096 3 | 4 | // Tolerance to check updated output 5 | #define TOLERANCE 1e-6 6 | 7 | 8 | // Training functions 9 | void DNN_init(); 10 | 11 | void compute_loss(); 12 | 13 | void update_weights(); 14 | 15 | void forward(); 16 | 17 | void backward(); 18 | 19 | void net_step(); 20 | 21 | 22 | // Print and check functions 23 | void print_output(); 24 | 25 | void check_post_training_output(); 26 | -------------------------------------------------------------------------------- /tests/test_blocktranspose/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt -------------------------------------------------------------------------------- /tests/test_blocktranspose/Makefile: -------------------------------------------------------------------------------- 1 | APP = blocktransposer 2 | 3 | # User code 4 | NUM_CORES?=8 5 | CH_IN?=2 6 | CH_OUT?=3 7 | HK?=2 8 | WK?=2 9 | HWC_LAY?=1 # =0 use CHW layout, =1 use HWC layout for the weights 10 | APP_CFLAGS += -DPRINT_MATS 11 | # End of user code 12 | 13 | 14 | TRAIN_LIB=../../lib 15 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 16 | APP_SRCS = main.c net.c 17 | #APP_CFLAGS += -DDEBUG 18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 19 | APP_CFLAGS += -O3 -g3 -mno-memcpy 20 | APP_CFLAGS += -DFABRIC 21 | APP_CFLAGS += -DCLUSTER 22 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 23 | APP_CFLAGS += -DPROF_NET 24 | APP_CFLAGS += -mhwloopalign 25 | APP_CFLAGS += -DTin_Cout=$(CH_OUT) 26 | APP_CFLAGS += -DTin_Cin=$(CH_IN) 27 | APP_CFLAGS += -DTin_Hk=$(HK) 28 | APP_CFLAGS += -DTin_Wk=$(WK) 29 | APP_CFLAGS += -DHWC_LAYOUT=$(HWC_LAY) 30 | APP_LDFLAGS += -lm 31 | 32 | # STATISTICS 33 | APP_CFLAGS += -DSTATS 34 | 35 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 36 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c 37 | 38 | include $(RULES_DIR)/pmsis_rules.mk 39 | -------------------------------------------------------------------------------- /tests/test_blocktranspose/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching transposition procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Transposition successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_blocktranspose/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | // // In data 8 | // #define Tin_Cout 16 9 | // #define Tin_Cin 3 10 | // #define Tin_Hk 3 11 | // #define Tin_Wk 3 12 | 13 | void net_step (); -------------------------------------------------------------------------------- /tests/test_broadcast_add/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | test_data.h 4 | -------------------------------------------------------------------------------- /tests/test_broadcast_add/Makefile: -------------------------------------------------------------------------------- 1 | APP = transposer 2 | 3 | # User code 4 | DIMS_1 = 17 1 31 5 | DIMS_2 = 23 1 6 | 7 | NUM_CORES = 8 8 | DATA_TYPE = 32 9 | 10 | #APP_CFLAGS += -DPRINT_MATS 11 | # End of user code 12 | 13 | 14 | TRAIN_LIB=../../lib 15 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 16 | APP_SRCS = main.c net.c 17 | #APP_CFLAGS += -DDEBUG 18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 19 | APP_CFLAGS += -O3 -g3 -mno-memcpy 20 | APP_CFLAGS += -DFABRIC 21 | APP_CFLAGS += -DCLUSTER 22 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 23 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE) 24 | APP_CFLAGS += -DPROF_NET 25 | APP_CFLAGS += -mhwloopalign 26 | APP_LDFLAGS += -lm 27 | 28 | # STATISTICS 29 | APP_CFLAGS += -DSTATS 30 | 31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 33 | 34 | include $(RULES_DIR)/pmsis_rules.mk 35 | 36 | get_golden: 37 | rm -rf BUILD/ 38 | python3 utils/GM.py --dims_1 $(DIMS_1) --dims_2 $(DIMS_2) --dtype $(DATA_TYPE) 39 | -------------------------------------------------------------------------------- /tests/test_broadcast_add/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main(void) { 9 | printf("\nHello there.\nConfiguring cluster..\n"); 10 | 11 | // Configure cluster 12 | struct pi_device cluster_dev; 13 | struct pi_cluster_conf cl_conf; 14 | struct pi_cluster_task cl_task; 15 | 16 | pi_cluster_conf_init(&cl_conf); 17 | pi_open_from_conf(&cluster_dev, &cl_conf); 18 | 19 | if (pi_cluster_open(&cluster_dev)) { 20 | return -1; 21 | } 22 | 23 | printf("\nLaunching broadcast addition procedure...\n"); 24 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, broadcast_add_test, NULL)); 25 | 26 | printf("Broadcast addition finished!\n"); 27 | pi_cluster_close(&cluster_dev); 28 | 29 | pmsis_exit(0); 30 | } 31 | -------------------------------------------------------------------------------- /tests/test_broadcast_add/net.c: -------------------------------------------------------------------------------- 1 | #include "pulp_train.h" 2 | 3 | #include "net.h" 4 | #include "stats.h" 5 | 6 | #include "test_data.h" 7 | 8 | #include "tensor_checkers.h" 9 | 10 | 11 | void broadcast_add_test() { 12 | #ifdef PROF_NET 13 | INIT_STATS(); 14 | PRE_START_STATS(); 15 | #endif 16 | printf("Executing on %d cores.\n", NUM_CORES); 17 | 18 | #if DATA_TYPE == 32 19 | struct array_broadcast_sum_fp32_args args; 20 | printf("WORKING ON FP32\n"); 21 | #elif DATA_TYPE == 16 22 | struct array_broadcast_sum_fp16_args args; 23 | printf("WORKING ON FP16\n"); 24 | #endif 25 | 26 | // Get arguments 27 | args.op_1 = IN_MATRIX_1; 28 | args.op_2 = IN_MATRIX_2; 29 | args.dest = OUT_MATRIX; 30 | 31 | args.op_1_dims = DIMS_1; 32 | args.op_2_dims = DIMS_2; 33 | 34 | args.op_1_dims_len = N_DIMS_1; 35 | args.op_2_dims_len = N_DIMS_2; 36 | 37 | #ifdef PROF_NET 38 | START_STATS(); 39 | #endif 40 | 41 | // Perform transposition 42 | #if DATA_TYPE == 32 43 | pi_cl_team_fork(NUM_CORES, array_broadcast_sum_fp32, &args); 44 | #elif DATA_TYPE == 16 45 | pi_cl_team_fork(NUM_CORES, array_broadcast_sum_fp16, &args); 46 | #endif 47 | 48 | // Stop stats 49 | #ifdef PROF_NET 50 | STOP_STATS(); 51 | #endif 52 | 53 | mean_error_checker(args.dest, TEST_OUT, TOTAL_SIZE_OUT); 54 | elementwise_checker(args.dest, TEST_OUT, TOTAL_SIZE_OUT); 55 | 56 | return; 57 | } 58 | -------------------------------------------------------------------------------- /tests/test_broadcast_add/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | void broadcast_add_test(); 8 | -------------------------------------------------------------------------------- /tests/test_broadcast_matmul/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | matmul_data.h 4 | net_args.h 5 | dis.S 6 | fastest_matmul.txt 7 | test_data.h 8 | -------------------------------------------------------------------------------- /tests/test_broadcast_matmul/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_broadcast_matmul 2 | 3 | # User settings 4 | # Standard matmul arguments 5 | DIMS_1 = 2 1 3 2 5 6 | DIMS_2 = 5 1 5 3 7 | 8 | NUM_CORES = 8 9 | DATA_TYPE = 16 # 32 for fp32, 16 for fp16 10 | # End of user settings 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS += main.c net.c 15 | 16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c 18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c 20 | 21 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 22 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 23 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 24 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE) 25 | APP_CFLAGS += -DPROF_NET 26 | 27 | APP_LDFLAGS += -lm 28 | 29 | # STATISTICS 30 | APP_CFLAGS += -DSTATS 31 | 32 | get_golden: 33 | rm -rf BUILD/ 34 | python3 utils/GM.py --dims_1 $(DIMS_1) --dims_2 $(DIMS_2) --dtype $(DATA_TYPE) 35 | 36 | include $(RULES_DIR)/pmsis_rules.mk 37 | -------------------------------------------------------------------------------- /tests/test_broadcast_matmul/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | 5 | int main(void) { 6 | printf("\nHello there.\nConfiguring cluster..\n"); 7 | 8 | // Configure cluster 9 | struct pi_device cluster_dev; 10 | struct pi_cluster_conf cl_conf; 11 | struct pi_cluster_task cl_task; 12 | 13 | pi_cluster_conf_init(&cl_conf); 14 | pi_open_from_conf(&cluster_dev, &cl_conf); 15 | 16 | if (pi_cluster_open(&cluster_dev)) { 17 | return -1; 18 | } 19 | 20 | printf("\nLaunching broadcast matmul evaluation...\n\n"); 21 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, broadcast_matmul_test, NULL)); 22 | 23 | printf("\nMatmul evaluation successfully terminated :)\n"); 24 | pi_cluster_close(&cluster_dev); 25 | 26 | pmsis_exit(0); 27 | } 28 | -------------------------------------------------------------------------------- /tests/test_broadcast_matmul/net.c: -------------------------------------------------------------------------------- 1 | #include "pulp_train.h" 2 | 3 | #include "net.h" 4 | #include "stats.h" 5 | 6 | #include "test_data.h" 7 | 8 | #include "tensor_checkers.h" 9 | 10 | 11 | void broadcast_matmul_test() { 12 | #ifdef PROF_NET 13 | INIT_STATS(); 14 | PRE_START_STATS(); 15 | #endif 16 | printf("Executing on %d cores.\n", NUM_CORES); 17 | 18 | #if DATA_TYPE == 32 19 | struct broadcastMatMul_args_fp32 args; 20 | printf("WORKING ON FP32\n"); 21 | #elif DATA_TYPE == 16 22 | struct broadcastMatMul_args_fp16 args; 23 | printf("WORKING ON FP16\n"); 24 | #endif 25 | 26 | // Get arguments 27 | args.A = IN_MATRIX_1; 28 | args.B = IN_MATRIX_2; 29 | args.C = OUT_MATRIX; 30 | 31 | args.A_dims = DIMS_1; 32 | args.B_dims = DIMS_2; 33 | 34 | args.A_dims_len = N_DIMS_1; 35 | args.B_dims_len = N_DIMS_2; 36 | 37 | #ifdef PROF_NET 38 | START_STATS(); 39 | #endif 40 | 41 | // Perform transposition 42 | #if DATA_TYPE == 32 43 | mm_broadcast_fp32(&args); 44 | #elif DATA_TYPE == 16 45 | mm_broadcast_fp16(&args); 46 | #endif 47 | 48 | // Stop stats 49 | #ifdef PROF_NET 50 | STOP_STATS(); 51 | #endif 52 | 53 | mean_error_checker(args.C, TEST_OUT, TOTAL_SIZE_OUT); 54 | elementwise_checker(args.C, TEST_OUT, TOTAL_SIZE_OUT); 55 | 56 | return; 57 | } 58 | -------------------------------------------------------------------------------- /tests/test_broadcast_matmul/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | void broadcast_matmul_test(); 8 | -------------------------------------------------------------------------------- /tests/test_broadcast_matmul/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /tests/test_conv2d_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | conv2d-grads.h 4 | conv2d-output.h 5 | init-defines.h 6 | input-image.h 7 | step-check.h 8 | runs.txt -------------------------------------------------------------------------------- /tests/test_conv2d_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_conv2d_fp16/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pulp_train_defines.h" 18 | #include "step-check.h" 19 | 20 | // User profiling flags 21 | 22 | #if defined(FORWARD) && !defined(DEBUG) 23 | #define PROF_FWD 24 | #endif 25 | 26 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG) 27 | #define PROF_BKWD 28 | #endif 29 | 30 | // Net sizes 31 | 32 | // CONV2D 33 | #define Tout_H_l1 ((Tin_H_l1-Tker_H_l1+PAD_U+PAD_D)/STRIDE_H + 1) 34 | #define Tout_W_l1 ((Tin_W_l1-Tker_W_l1+PAD_L+PAD_R)/STRIDE_W + 1) 35 | 36 | // Tensor checksum definition 37 | #define CHECK_TOLERANCE 1e-3 38 | #define ERROR_TOLERANCE 1e-3 39 | 40 | // PULP DEFINES 41 | #define STACK_SIZE 4096 42 | #define MOUNT 1 43 | #define UNMOUNT 0 44 | #define CID 0 45 | 46 | // Support functions 47 | static inline void forward(); 48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 50 | static inline void train(); 51 | // Main function 52 | void net_step (); 53 | 54 | -------------------------------------------------------------------------------- /tests/test_conv2d_fp16/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_conv2d_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | conv2d-grads.h 4 | conv2d-output.h 5 | init-defines.h 6 | input-image.h 7 | step-check.h 8 | runs.txt 9 | log.c -------------------------------------------------------------------------------- /tests/test_conv2d_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_conv2d_fp32/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | 19 | // User profiling flags 20 | 21 | #if defined(FORWARD) && !defined(DEBUG) 22 | #define PROF_FWD 23 | #endif 24 | 25 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG) 26 | #define PROF_BKWD 27 | #endif 28 | 29 | // Net sizes 30 | 31 | // CONV2D 32 | #define Tout_H_l1 ((Tin_H_l1-Tker_H_l1+PAD_U+PAD_D)/STRIDE_H + 1) 33 | #define Tout_W_l1 ((Tin_W_l1-Tker_W_l1+PAD_L+PAD_R)/STRIDE_W + 1) 34 | 35 | // Tensor checksum definition 36 | #define CHECK_TOLERANCE 1e-4 37 | #define ERROR_TOLERANCE 1e-4 38 | 39 | // PULP DEFINES 40 | #define STACK_SIZE 4096 41 | #define MOUNT 1 42 | #define UNMOUNT 0 43 | #define CID 0 44 | 45 | // Support functions 46 | static inline void forward(); 47 | static inline void compare_tensors(float *A, float *B, int length); 48 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 49 | static inline void train(); 50 | // Main function 51 | void net_step (); 52 | 53 | -------------------------------------------------------------------------------- /tests/test_conv_pw_dw_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | pylog.txt 4 | dw-grads.h 5 | dw-output.h 6 | init-defines.h 7 | input-image.h 8 | pw-grads.h 9 | pw-output.h 10 | step-check.h 11 | utils/GM_old.py 12 | README.md 13 | runs.txt -------------------------------------------------------------------------------- /tests/test_conv_pw_dw_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_conv_pw_dw_fp16/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_conv_pw_dw_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | pylog.txt 4 | dw-grads.h 5 | dw-output.h 6 | init-defines.h 7 | input-image.h 8 | pw-grads.h 9 | pw-output.h 10 | step-check.h 11 | utils/GM_old.py 12 | README.md 13 | runs.txt 14 | utils/__pycache__/ -------------------------------------------------------------------------------- /tests/test_conv_pw_dw_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_cordic/.gitignore: -------------------------------------------------------------------------------- 1 | cordic_data.h 2 | log.txt 3 | BUILD/ -------------------------------------------------------------------------------- /tests/test_cordic/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_cordic 2 | 3 | # User settings 4 | 5 | # General arguments 6 | N_TEST ?= 200 7 | NUM_CORES ?= 8 8 | # End of user settings 9 | 10 | TRAIN_LIB=../../lib 11 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 12 | APP_SRCS += main.c net.c 13 | 14 | 15 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 17 | 18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 19 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 20 | APP_CFLAGS += -DN_TEST=$(N_TEST) 21 | APP_CFLAGS += -DNUM_CORES 22 | 23 | APP_LDFLAGS += -lm 24 | 25 | # STATISTICS 26 | APP_CFLAGS += -DSTATS 27 | 28 | get_golden: 29 | python3 ./utils/GM.py --n_test=$(N_TEST) 30 | 31 | include $(RULES_DIR)/pmsis_rules.mk 32 | -------------------------------------------------------------------------------- /tests/test_cordic/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching cordic function evaluation...\n\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("\nCordic function evaluation successfully terminated\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } -------------------------------------------------------------------------------- /tests/test_cordic/net.h: -------------------------------------------------------------------------------- 1 | void net_step(); 2 | -------------------------------------------------------------------------------- /tests/test_cordic/utils/GM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import dump_utils as dump 3 | import argparse 4 | import math 5 | 6 | parser = argparse.ArgumentParser("Cordic test") 7 | parser.add_argument( '--n_test', type=int, default=300) 8 | args = parser.parse_args() 9 | 10 | n_test = args.n_test 11 | 12 | # angles = torch.empty(n_test).uniform_(-math.pi/2, math.pi/2) 13 | angles = torch.empty(n_test).uniform_(0, 10) 14 | 15 | cos = torch.cos(angles) 16 | sin = torch.sin(angles) 17 | 18 | # Write data to file 19 | f = open("cordic_data.h", "w") 20 | # f.write("#define N_TEST "+str(n_test)+"\n") 21 | f.write("PI_L1 float gm_angles["+str(n_test)+"] = {"+dump.tensor_to_string(angles)+"};\n") 22 | f.write("PI_L2 float gm_cos["+str(n_test)+"] = {"+dump.tensor_to_string(cos)+"};\n") 23 | f.write("PI_L2 float gm_sin["+str(n_test)+"] = {"+dump.tensor_to_string(sin)+"};\n") 24 | 25 | f.close() 26 | 27 | 28 | def print_constant(N): 29 | print("atan_pow_2: \n") 30 | for i in range(0, N): 31 | print(f"{math.atan(2**(-i))}, ") 32 | 33 | sf = 1 34 | for i in range(0, N): 35 | sf *= math.cos(math.atan(2**(-i))) 36 | 37 | print(f"\nscaling factor: {sf}") -------------------------------------------------------------------------------- /tests/test_dropout/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | dropout_data.h 4 | net_args.h -------------------------------------------------------------------------------- /tests/test_dropout/Makefile: -------------------------------------------------------------------------------- 1 | APP = rng 2 | 3 | # User code 4 | NUM_CORES?=8 5 | PROBABILITY?=0.23 6 | SEED?=0 7 | SIZE?=100 8 | USE_MASK?=0 9 | DATA_TYPE?=float # 'float' or 'fp16' 10 | # End of user code 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS = main.c net.c 15 | #APP_CFLAGS += -DDEBUG 16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 17 | APP_CFLAGS += -O3 -g3 -mno-memcpy 18 | APP_CFLAGS += -DFABRIC 19 | APP_CFLAGS += -DCLUSTER 20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 21 | APP_CFLAGS += -DPROF_NET 22 | APP_CFLAGS += -mhwloopalign 23 | APP_CFLAGS += -DPROBABILITY=$(PROBABILITY) 24 | APP_CFLAGS += -DSEED=$(SEED) #sneed 25 | APP_CFLAGS += -DSIZE=$(SIZE) 26 | APP_CFLAGS += -DUSE_MASK=$(USE_MASK) 27 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE) 28 | APP_LDFLAGS += -lm 29 | 30 | 31 | # STATISTICS 32 | APP_CFLAGS += -DSTATS 33 | 34 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 35 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 36 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_random.c 37 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_dropout_fp32.c 38 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_dropout_fp16.c 39 | 40 | include $(RULES_DIR)/pmsis_rules.mk 41 | 42 | get_golden: 43 | python3 utils/GM.py --in_size $(SIZE) --type $(DATA_TYPE) --prob $(PROBABILITY) 44 | -------------------------------------------------------------------------------- /tests/test_dropout/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching random number generation procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Dropout successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_dropout/net.c: -------------------------------------------------------------------------------- 1 | #include "pulp_train.h" 2 | 3 | #include "stats.h" 4 | #include "net.h" 5 | 6 | #include "net_args.h" 7 | #include "dropout_data.h" 8 | #include 9 | 10 | 11 | // Main function 12 | void net_step () 13 | { 14 | #ifdef PROF_NET 15 | INIT_STATS(); 16 | PRE_START_STATS(); 17 | #endif 18 | 19 | #ifdef FLOAT16 20 | struct dropout_args_fp16 args; 21 | args.seed = SEED; 22 | args.probability = PROBABILITY; 23 | args.input = input; 24 | args.mask = mask; 25 | args.use_mask = USE_MASK; 26 | args.size = IN_SIZE; 27 | 28 | printf("Dropout function:\n"); 29 | #ifdef PROF_NET 30 | START_STATS(); 31 | #endif 32 | 33 | pi_cl_team_fork(NUM_CORES, pulp_dropout_fp16_cl, &args); 34 | 35 | #ifdef PROF_NET 36 | STOP_STATS(); 37 | #endif 38 | #endif 39 | 40 | #ifdef FLOAT32 41 | struct dropout_args_fp32 args; 42 | args.seed = SEED; 43 | args.probability = PROBABILITY; 44 | args.input = input; 45 | args.mask = mask; 46 | args.use_mask = USE_MASK; 47 | args.size = IN_SIZE; 48 | 49 | printf("Dropout function:\n"); 50 | #ifdef PROF_NET 51 | START_STATS(); 52 | #endif 53 | 54 | pi_cl_team_fork(NUM_CORES, pulp_dropout_fp32_cl, &args); 55 | 56 | #ifdef PROF_NET 57 | STOP_STATS(); 58 | #endif 59 | #endif 60 | 61 | int count = 0; 62 | 63 | for(int i = 0; i < IN_SIZE; i++){ 64 | //printf("%f\n", input[i]); 65 | if(input[i]==0.0f) 66 | count++; 67 | } 68 | 69 | printf("%d\n", count); 70 | printf("Percentage of dropped out values: %f\%\n", (count*100.0f/SIZE)); 71 | 72 | return; 73 | } 74 | -------------------------------------------------------------------------------- /tests/test_dropout/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | #include "pulp_train_defines.h" 8 | 9 | void net_step (); -------------------------------------------------------------------------------- /tests/test_dropout/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_gelu_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | gelu-output.h 3 | init-defines.h 4 | input-sequence.h 5 | mhsa-grads.h 6 | mhsa-output.h 7 | step-check.h 8 | dis.S -------------------------------------------------------------------------------- /tests/test_gelu_fp16/Makefile: -------------------------------------------------------------------------------- 1 | APP = gelu_fp16 2 | 3 | # User settings 4 | IN_H?=7 5 | IN_W?=7 6 | IN_CH?=160 7 | OUT_CH?=160 8 | NUM_CORES?=8 9 | STEP?='FORWARD' # Possible steps: 'FORWARD', 'BACKWARD' 10 | APP_CFLAGS += -DOPTIMIZE 11 | MATMUL_TYPE?=3 12 | NUM_MATMULS?=24 # When profiling with multiple matmul algorithms 13 | NUM_SIZES?=3 # When profiling multiple sizes of the network 14 | # End of user settings 15 | 16 | TRAIN_LIB=../../lib 17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 18 | APP_SRCS = main.c net.c 19 | 20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c 22 | 23 | DATA_TYPE?='fp16' 24 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 25 | APP_CFLAGS += -O3 -g 26 | APP_CFLAGS += -DFABRIC 27 | APP_CFLAGS += -DCLUSTER 28 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 29 | APP_CFLAGS += -DPROF_NET 30 | APP_CFLAGS += -DMEMOCC_COMP 31 | APP_CFLAGS += -mhwloopalign 32 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE} 33 | #APP_CFLAGS += -DDEBUG 34 | APP_LDFLAGS += -lm 35 | 36 | # STATISTICS 37 | APP_CFLAGS += -DSTATS 38 | 39 | get_golden: 40 | python3 ./utils/GM.py --step $(STEP) --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH} 41 | 42 | profile_all_optim: 43 | python3 ./utils/profile_optimized.py --num_matmuls ${NUM_MATMULS} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH} --n_heads $(N_HEADS) --att_dim $(ATT_DIM) 44 | 45 | profile_all_sizes: 46 | python3 ./utils/profile_sizes.py --num_sizes ${NUM_SIZES} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --matmul_type ${MATMUL_TYPE} 47 | 48 | include $(RULES_DIR)/pmsis_rules.mk 49 | -------------------------------------------------------------------------------- /tests/test_gelu_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * DUMMY MAIN 25 | * Configures cluster, then calls net_step() 26 | */ 27 | int main () { 28 | 29 | printf("\nHello there.\nConfiguring cluster..\n"); 30 | // Configure cluster 31 | struct pi_device cluster_dev; 32 | struct pi_cluster_conf cl_conf; 33 | struct pi_cluster_task cl_task; 34 | 35 | pi_cluster_conf_init(&cl_conf); 36 | pi_open_from_conf(&cluster_dev, &cl_conf); 37 | if (pi_cluster_open(&cluster_dev)) 38 | { 39 | return -1; 40 | } 41 | 42 | printf("\nLaunching training procedure...\n"); 43 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 44 | 45 | 46 | printf("\nNet training successful!\n"); 47 | pi_cluster_close(&cluster_dev); 48 | 49 | pmsis_exit(0); 50 | } 51 | -------------------------------------------------------------------------------- /tests/test_gelu_fp16/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pulp_train_defines.h" 18 | #include "step-check.h" 19 | 20 | // User profiling flags 21 | 22 | //#define DEBUG 23 | 24 | #if defined(FORWARD) && !defined(DEBUG) 25 | #define PROF_FWD 26 | #endif 27 | 28 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG) 29 | #define PROF_BCKWD 30 | #endif 31 | 32 | // Net sizes 33 | 34 | #define Tker_l0 (Tin_l0*Tout_l0) 35 | 36 | // Tensor checksum definition 37 | #define CHECK_TOLERANCE 0.001 38 | #define ERROR_TOLERANCE 0.001 39 | 40 | // PULP DEFINES 41 | #define STACK_SIZE 4096 42 | #define MOUNT 1 43 | #define UNMOUNT 0 44 | #define CID 0 45 | 46 | // Support functions 47 | static inline void forward(); 48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 50 | static inline void train(); 51 | // Main function 52 | void net_step (); 53 | 54 | -------------------------------------------------------------------------------- /tests/test_gelu_fp16/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_gelu_fp16/utils/test_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch import float32 3 | from torch import nn 4 | from torch import Tensor 5 | from torch import cuda 6 | import torch 7 | from torch.nn import functional as F 8 | 9 | class TestModel(nn.Module): 10 | """Just testing the GELU activation""" 11 | def __init__(self): 12 | super().__init__() 13 | self.act = nn.GELU(approximate='tanh') 14 | self.scores = None # for visualization 15 | 16 | def forward(self, x): 17 | x = self.act(x) 18 | return x -------------------------------------------------------------------------------- /tests/test_im2col/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt -------------------------------------------------------------------------------- /tests/test_im2col/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching training procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Net training successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_im2col/net.h: -------------------------------------------------------------------------------- 1 | // Tensor sizes 2 | #define Tker_H_l1 Tker_W_l1 3 | 4 | #define Tout_W_l1 ((int)(Tin_W_l1-Tker_W_l1+LPAD+RPAD+WSTR)/WSTR) 5 | #define Tout_H_l1 ((int)(Tin_H_l1-Tker_H_l1+UPAD+DPAD+HSTR)/HSTR) 6 | 7 | #define weight_init 0.1 8 | 9 | #define PAD_BW (Tker_W_l1-1) 10 | 11 | #define i2c_b_size (Tker_H_l1*Tker_W_l1*Tin_C_l1*(Tin_H_l1-Tker_H_l1+UPAD+DPAD+HSTR)/HSTR*(Tin_W_l1-Tker_W_l1+LPAD+RPAD+WSTR)/WSTR) 12 | #define i2c_b_size_bw (Tker_H_l1*Tker_W_l1*Tout_C_l1*Tin_H_l1*Tin_W_l1) 13 | 14 | // Tensor checksum definition 15 | #define ABS(x) ((x)>0?(x):(-(x))) 16 | #define CHECK_TOLERANCE 1e-3 17 | #define ERROR_TOLERANCE 0.01 18 | 19 | // PULP DEFINES 20 | #define STACK_SIZE 4096 21 | #define MOUNT 1 22 | #define UNMOUNT 0 23 | #define CID 0 24 | 25 | void net_step (); -------------------------------------------------------------------------------- /tests/test_instnorm_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | data.h 3 | init-defines.h 4 | io_data.h 5 | readme.txt -------------------------------------------------------------------------------- /tests/test_instnorm_fp16/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_instnorm_fp16 2 | 3 | # User Section 4 | CI?=8 5 | HI?=4 6 | WI?=4 7 | NUM_CORES?=8 8 | HWC?=0 9 | STEP?='FORWARD' # 'FORWARD' or 'BACKWARD_GRAD' or 'BACKWARD_ERROR' 10 | # End of User Section 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS += main.c net.c 15 | 16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c 17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c 18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c 24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c 26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp32.c 27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp16.c 28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c 29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp16.c 30 | 31 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 32 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 33 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 34 | APP_CFLAGS += -DPROF_NET 35 | APP_CFLAGS += -DOPTIMIZE 36 | 37 | 38 | 39 | APP_LDFLAGS += -lm 40 | 41 | # STATISTICS 42 | APP_CFLAGS += -DSTATS 43 | 44 | get_golden: 45 | python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -NUM_CORES ${NUM_CORES} -STEP ${STEP} 46 | 47 | include $(RULES_DIR)/pmsis_rules.mk 48 | 49 | 50 | -------------------------------------------------------------------------------- /tests/test_instnorm_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /** 21 | * Configures cluster, then calls net_step() 22 | **/ 23 | 24 | int main (void) { 25 | 26 | 27 | printf("\nHello sir.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Exiting DNN Training.\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_instnorm_fp16/net.h: -------------------------------------------------------------------------------- 1 | // PULP Defines 2 | #define STACK_SIZE 4096 3 | 4 | // Tolerance to check updated output 5 | #define TOLERANCE 1e-6 6 | 7 | // Training functions 8 | void DNN_init(); 9 | void compute_loss(); 10 | void update_weights(); 11 | void forward(); 12 | void backward(); 13 | void net_step(); 14 | 15 | // Print and check functions 16 | void print_output(); 17 | void check_post_training_output(); 18 | -------------------------------------------------------------------------------- /tests/test_instnorm_fp16/readme.txt: -------------------------------------------------------------------------------- 1 | To compile the application, run "make clean get_golden all run > log.txt". 2 | If running on a board (not GVSoC), add "APP_CFLAGS += -DBOARD" to the user section of the Makefile (profiling of cycles only). 3 | To modify the hyperparameters (learning rate, epochs, batch size still not implemented), 4 | edit the variables inside "utils/GM.py". 5 | -------------------------------------------------------------------------------- /tests/test_instnorm_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | data.h 3 | init-defines.h 4 | io_data.h 5 | readme.txt 6 | log.txt -------------------------------------------------------------------------------- /tests/test_instnorm_fp32/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_instnorm_fp32 2 | 3 | # User Section 4 | CI?=8 5 | HI?=4 6 | WI?=4 7 | NUM_CORES?=8 8 | HWC?=0 9 | STEP?='FORWARD' # 'FORWARD' or 'BACKWARD_GRAD' or 'BACKWARD_ERROR' 10 | # End of User Section 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS += main.c net.c 15 | 16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c 17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c 18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c 24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c 26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp32.c 27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp16.c 28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c 29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp16.c 30 | 31 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 32 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 33 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 34 | APP_CFLAGS += -DPROF_NET 35 | APP_CFLAGS += -DOPTIMIZE 36 | 37 | 38 | 39 | APP_LDFLAGS += -lm 40 | 41 | # STATISTICS 42 | APP_CFLAGS += -DSTATS 43 | 44 | get_golden: 45 | python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -NUM_CORES ${NUM_CORES} -STEP ${STEP} 46 | 47 | include $(RULES_DIR)/pmsis_rules.mk 48 | 49 | 50 | -------------------------------------------------------------------------------- /tests/test_instnorm_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /** 21 | * Configures cluster, then calls net_step() 22 | **/ 23 | 24 | int main (void) { 25 | 26 | 27 | printf("\nHello sir.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Exiting DNN Training.\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_instnorm_fp32/net.h: -------------------------------------------------------------------------------- 1 | // PULP Defines 2 | #define STACK_SIZE 4096 3 | 4 | // Tolerance to check updated output 5 | #define TOLERANCE 1e-6 6 | 7 | // Training functions 8 | void DNN_init(); 9 | void compute_loss(); 10 | void update_weights(); 11 | void forward(); 12 | void backward(); 13 | void net_step(); 14 | 15 | // Print and check functions 16 | void print_output(); 17 | void check_post_training_output(); 18 | -------------------------------------------------------------------------------- /tests/test_instnorm_fp32/readme.txt: -------------------------------------------------------------------------------- 1 | To compile the application, run "make clean get_golden all run > log.txt". 2 | If running on a board (not GVSoC), add "APP_CFLAGS += -DBOARD" to the user section of the Makefile (profiling of cycles only). 3 | To modify the hyperparameters (learning rate, epochs, batch size still not implemented), 4 | edit the variables inside "utils/GM.py". 5 | -------------------------------------------------------------------------------- /tests/test_interpolation/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | intp_data.h 4 | net_args.h -------------------------------------------------------------------------------- /tests/test_interpolation/Makefile: -------------------------------------------------------------------------------- 1 | APP = interpolation_fp32 2 | 3 | # User code 4 | NUM_CORES?=1 5 | DATA_TYPE?='float' # 'float' or 'bfloat16' 6 | INTP_TYPE?=0 # 0='NEAREST', 1='BILINEAR' 7 | CH?=1 8 | IN_H?=8 9 | IN_W?=8 10 | OUT_H?=48 11 | OUT_W?=48 12 | # End of user code 13 | 14 | TRAIN_LIB=../../lib 15 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 16 | APP_SRCS = main.c net.c 17 | #APP_CFLAGS += -DDEBUG 18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 19 | APP_CFLAGS += -O3 -g3 -mno-memcpy 20 | APP_CFLAGS += -DFABRIC 21 | APP_CFLAGS += -DCLUSTER 22 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 23 | APP_CFLAGS += -DPROF_NET 24 | APP_CFLAGS += -mhwloopalign 25 | APP_CFLAGS += -DCH=$(CH) 26 | APP_CFLAGS += -DIN_H=$(IN_H) 27 | APP_CFLAGS += -DIN_W=$(IN_W) 28 | APP_CFLAGS += -DOUT_H=$(OUT_H) 29 | APP_CFLAGS += -DOUT_W=$(OUT_W) 30 | APP_CFLAGS += -DINTP_TYPE=$(INTP_TYPE) 31 | APP_LDFLAGS += -lm 32 | 33 | # STATISTICS 34 | APP_CFLAGS += -DSTATS 35 | 36 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 37 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 38 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_interpolation_fp32.c 39 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_interpolation_fp16.c 40 | 41 | include $(RULES_DIR)/pmsis_rules.mk 42 | 43 | get_golden: 44 | python3 utils/GM.py --in_ch $(CH) --in_height $(IN_H) --in_width $(IN_W) --out_height $(OUT_H) --out_width $(OUT_W) --type $(DATA_TYPE) 45 | -------------------------------------------------------------------------------- /tests/test_interpolation/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching interpolation procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Interpolation successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_interpolation/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | #include "pulp_train_defines.h" 8 | #include "net_args.h" 9 | 10 | // Tensor checksum definition 11 | #define CHECK_TOLERANCE 1e-6 12 | #define ERROR_TOLERANCE 1e-6 13 | 14 | #ifdef FLOAT32 15 | static inline void compare_tensors(float *A, float *B, int length); 16 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 17 | #elif defined(BFLOAT16) 18 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 19 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 20 | #endif 21 | void net_step (); 22 | -------------------------------------------------------------------------------- /tests/test_interpolation/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_layernorm_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | layer_norm_init_defines.h 3 | layer_norm_input.h 4 | layer_norm_output.h 5 | layer_norm_wb.h 6 | -------------------------------------------------------------------------------- /tests/test_layernorm_fp32/Makefile: -------------------------------------------------------------------------------- 1 | APP = layernorm_fp32 2 | 3 | # User code 4 | NUM_CORES?=8 5 | DATA_TYPE?=fp32 # 'fp32' 6 | 7 | INPUT_WIDTH?=16 8 | INPUT_HEIGHT?=16 9 | # End of user code 10 | 11 | TASK_NAME=sst-2 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS = main.c net.c 15 | 16 | #APP_CFLAGS += -DDEBUG 17 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 18 | APP_CFLAGS += -O3 -g3 -mno-memcpy 19 | APP_CFLAGS += -DFABRIC 20 | APP_CFLAGS += -DCLUSTER 21 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 22 | APP_CFLAGS += -DN_HEADS=$(N_HEADS) 23 | APP_CFLAGS += -DPROF_NET 24 | APP_CFLAGS += -mhwloopalign 25 | APP_LDFLAGS += -lm 26 | 27 | APP_CFLAGS += -DTILE_H=$(TILE_H) 28 | APP_CFLAGS += -DTILE_W=$(TILE_W) 29 | APP_CFLAGS += -DTILE_DIM=$(TILE_DIM) 30 | 31 | APP_CFLAGS += -DOPTIMIZE 32 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE} 33 | 34 | # STATISTICS 35 | APP_CFLAGS += -DSTATS 36 | 37 | # =============== SOURCES =============== 38 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_layernorm_fp32.c 39 | 40 | include $(RULES_DIR)/pmsis_rules.mk 41 | 42 | get_golden: 43 | rm -rf BUILD/ 44 | python3 utils/GM.py --data_type $(DATA_TYPE) --input_shape_height $(INPUT_HEIGHT) --input_shape_width $(INPUT_WIDTH) 45 | -------------------------------------------------------------------------------- /tests/test_layernorm_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * Configures cluster, then calls net_step() 25 | */ 26 | int main() { 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | 29 | // Configure cluster 30 | struct pi_device cluster_dev; 31 | struct pi_cluster_conf cl_conf; 32 | struct pi_cluster_task cl_task; 33 | 34 | pi_cluster_conf_init(&cl_conf); 35 | pi_open_from_conf(&cluster_dev, &cl_conf); 36 | if (pi_cluster_open(&cluster_dev)) { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("\nNet training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_layernorm_fp32/net.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by diaco on 26/10/2024. 3 | // 4 | 5 | #ifndef PULP_TRAINLIB_NET_H 6 | #define PULP_TRAINLIB_NET_H 7 | 8 | // PULP DEFINES 9 | #define STACK_SIZE 40960 10 | #define MOUNT 1 11 | #define UNMOUNT 0 12 | #define CID 0 13 | #define MAX_SIZE 25104 14 | 15 | #include "pulp_train_defines.h" 16 | 17 | // net functions 18 | void init_and_connect_blobs(); 19 | void forward(); 20 | void net_step(); 21 | 22 | // DMA managment functions 23 | void load_input(void * src_blob, uint8_t data_diff_both); 24 | void load_output(void * src_blob, uint8_t data_diff_both); 25 | void load_coeff(void * src_blob, uint8_t data_diff_both); 26 | void store_output(void * dest_blob, uint8_t data_diff_both); 27 | void store_input(void * dest_blob, uint8_t data_diff_both); 28 | void store_coeff(void * dest_blob, uint8_t data_diff_both); 29 | void copy_struct_param(unsigned int from, unsigned int to, int size); 30 | void get_input_dim(void * b); 31 | void get_output_dim(void * b); 32 | void get_weight_dim(void * b); 33 | void reset_arguments(); 34 | void update_blob(); 35 | void reset_dim(); 36 | 37 | #endif //PULP_TRAINLIB_NET_H 38 | -------------------------------------------------------------------------------- /tests/test_layout_change/.gitignore: -------------------------------------------------------------------------------- 1 | log.txt 2 | BUILD/ -------------------------------------------------------------------------------- /tests/test_layout_change/Makefile: -------------------------------------------------------------------------------- 1 | APP = layout_change 2 | 3 | # User code 4 | NUM_CORES?=8 5 | T_C?=2 6 | T_H?=3 7 | T_W?=3 8 | #APP_CFLAGS += -DPRINT_MATS 9 | # End of user code 10 | 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS = main.c net.c 15 | #APP_CFLAGS += -DDEBUG 16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 17 | APP_CFLAGS += -O3 -g3 -mno-memcpy 18 | APP_CFLAGS += -DFABRIC 19 | APP_CFLAGS += -DCLUSTER 20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 21 | APP_CFLAGS += -DPROF_NET 22 | APP_CFLAGS += -mhwloopalign 23 | APP_CFLAGS += -DT_C=$(T_C) 24 | APP_CFLAGS += -DT_H=$(T_H) 25 | APP_CFLAGS += -DT_W=$(T_W) 26 | APP_LDFLAGS += -lm 27 | 28 | # STATISTICS 29 | APP_CFLAGS += -DSTATS 30 | 31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 33 | 34 | include $(RULES_DIR)/pmsis_rules.mk 35 | -------------------------------------------------------------------------------- /tests/test_layout_change/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching transposition procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, change_layout, NULL)); 26 | 27 | printf("Transposition successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_layout_change/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | void change_layout (); -------------------------------------------------------------------------------- /tests/test_linear_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | linear-data.h 4 | step-check.h 5 | output_eval.h 6 | runs.txt -------------------------------------------------------------------------------- /tests/test_linear_fp16/Makefile: -------------------------------------------------------------------------------- 1 | APP = linear_test_fp16 2 | 3 | # User settings 4 | IN_CH?=8 5 | OUT_CH?=8 6 | NUM_CORES?=8 7 | STEP?='FORWARD' # Possible steps: 'FORWARD', 'BACKWARD_GRAD', 'BACKWARD_ERROR' 8 | #APP_CFLAGS += -DDEBUG 9 | APP_CFLAGS += -DOPTIMIZE 10 | MATMUL_TYPE?=0 11 | USE_BIASES_LINEAR?=0 # Allocate biases (1) or not (0) 12 | NUM_MATMULS?=6 # When profiling with multiple matmul algorithms 13 | NUM_SIZES?=3 # When profiling multiple sizes of the network 14 | # End of user settings 15 | 16 | TRAIN_LIB=../../lib 17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 18 | APP_SRCS = main.c net.c 19 | 20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_linear_fp16.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 24 | 25 | DATA_TYPE?='fp16' 26 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 27 | APP_CFLAGS += -O3 -g3 28 | APP_CFLAGS += -DFABRIC 29 | APP_CFLAGS += -DCLUSTER 30 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 31 | APP_CFLAGS += -DPROF_NET 32 | APP_CFLAGS += -DMEMOCC_COMP 33 | APP_CFLAGS += -mhwloopalign 34 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE} 35 | APP_CFLAGS += -DUSE_BIASES_LINEAR=${USE_BIASES_LINEAR} 36 | APP_LDFLAGS += -lm 37 | 38 | # STATISTICS 39 | APP_CFLAGS += -DSTATS 40 | 41 | get_golden: 42 | python3 utils/GM.py --in_size $(IN_CH) --out_size $(OUT_CH) --step $(STEP) --use_bias $(USE_BIASES_LINEAR) 43 | 44 | profile_all_optim: 45 | python3 ./utils/profile_optimized.py --num_matmuls ${NUM_MATMULS} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --in_size ${IN_CH} --out_size ${OUT_CH} --use_bias ${USE_BIASES_LINEAR} 46 | 47 | profile_all_sizes: 48 | python3 ./utils/profile_sizes.py --num_sizes ${NUM_SIZES} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --matmul_type ${MATMUL_TYPE} --use_bias ${USE_BIASES_LINEAR} 49 | 50 | include $(RULES_DIR)/pmsis_rules.mk 51 | -------------------------------------------------------------------------------- /tests/test_linear_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_linear_fp16/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pulp_train_defines.h" 18 | #include "step-check.h" 19 | 20 | // User profiling flags 21 | 22 | #if defined(FORWARD) && !defined(DEBUG) 23 | #define PROF_FWD 24 | #endif 25 | 26 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG) 27 | #define PROF_BCKWD 28 | #endif 29 | 30 | // Net sizes 31 | 32 | #define Tker_l0 (Tin_l0*Tout_l0) 33 | 34 | // Tensor checksum definition 35 | #define CHECK_TOLERANCE 1e-3 36 | #define ERROR_TOLERANCE 0.01 37 | 38 | // PULP DEFINES 39 | #define STACK_SIZE 4096 40 | #define MOUNT 1 41 | #define UNMOUNT 0 42 | #define CID 0 43 | 44 | // Support functions 45 | static inline void forward(); 46 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 47 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 48 | static inline void train(); 49 | // Main function 50 | void net_step (); 51 | 52 | -------------------------------------------------------------------------------- /tests/test_linear_fp16/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_linear_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | linear-data.h 4 | step-check.h 5 | output_eval.h 6 | runs.txt 7 | gapsdk_script.sh 8 | **/__pycache__/ 9 | log_bkp.txt 10 | runs_bkp.txt 11 | BUILD_bkp/ 12 | -------------------------------------------------------------------------------- /tests/test_linear_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_linear_fp32/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | 19 | // User profiling flags 20 | 21 | #if defined(FORWARD) && !defined(DEBUG) 22 | #define PROF_FWD 23 | #endif 24 | 25 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG) 26 | #define PROF_BCKWD 27 | #endif 28 | 29 | // Net sizes 30 | 31 | #define Tker_l0 (Tin_l0*Tout_l0) 32 | 33 | // Tensor checksum definition 34 | #define CHECK_TOLERANCE 1e-3 35 | #define ERROR_TOLERANCE 0.01 36 | 37 | // PULP DEFINES 38 | #define STACK_SIZE 4096 39 | #define MOUNT 1 40 | #define UNMOUNT 0 41 | #define CID 0 42 | 43 | // Support functions 44 | static inline void forward(); 45 | static inline void compare_tensors(float *A, float *B, int length); 46 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 47 | static inline void train(); 48 | // Main function 49 | void net_step (); 50 | 51 | -------------------------------------------------------------------------------- /tests/test_losses_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | loss_values.h -------------------------------------------------------------------------------- /tests/test_losses_fp16/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_loss_fp16 2 | 3 | # User settings 4 | # Standard matmul arguments 5 | OUT_SIZE?=10 6 | VALUE?=0.1 7 | LOSS_FN?=0 # Available options: 0='L1Loss', 1='MSE', 2='CrossEntropy', 3='berHuLoss' 8 | # General arguments 9 | NUM_CORES?=1 10 | FP16_FORMAT?=1 # Available formats: 0='FP16', 1='bfloat16' 11 | # End of user settings 12 | 13 | TRAIN_LIB=../../lib 14 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 15 | APP_SRCS += main.c net.c 16 | 17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c 18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 19 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 20 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 21 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 22 | APP_CFLAGS += -DPROF_NET 23 | APP_CFLAGS += -DWGT_SIZE=$(OUT_SIZE) 24 | APP_CFLAGS += -DLOSS_FN=$(LOSS_FN) 25 | 26 | APP_LDFLAGS += -lm 27 | 28 | # STATISTICS 29 | APP_CFLAGS += -DSTATS 30 | 31 | get_golden: 32 | python3 ./utils/GM.py --out_size $(OUT_SIZE) --value $(VALUE) --loss_fn $(LOSS_FN) --format $(FP16_FORMAT) 33 | 34 | include $(RULES_DIR)/pmsis_rules.mk 35 | -------------------------------------------------------------------------------- /tests/test_losses_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching matmul evaluation...\n\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("\nOptimizer evaluation successfully terminated :)\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } -------------------------------------------------------------------------------- /tests/test_losses_fp16/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // User profiling flags 18 | #define FLOAT32 19 | // Tensor checksum definition 20 | #define CHECK_TOLERANCE 1e-4 21 | #define ERROR_TOLERANCE 1e-4 22 | 23 | // PULP DEFINES 24 | #define STACK_SIZE 4096 25 | #define MOUNT 1 26 | #define UNMOUNT 0 27 | #define CID 0 28 | 29 | // Loss defines 30 | #define L1Loss 0 31 | #define MSE 1 32 | #define CrossEntropy 2 33 | #define berHuLoss 3 34 | 35 | void net_step(); 36 | -------------------------------------------------------------------------------- /tests/test_losses_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | loss_values.h -------------------------------------------------------------------------------- /tests/test_losses_fp32/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_loss 2 | 3 | # User settings 4 | # Standard matmul arguments 5 | OUT_SIZE?=10 6 | VALUE?=0.5 7 | LOSS_FN?=3 # Available options: 0='L1Loss', 1='MSE', 2='CrossEntropy', 3='berHuLoss' 8 | # General arguments 9 | NUM_CORES?=1 10 | # End of user settings 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS += main.c net.c 15 | 16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c 17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 19 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 21 | APP_CFLAGS += -DPROF_NET 22 | APP_CFLAGS += -DWGT_SIZE=$(OUT_SIZE) 23 | APP_CFLAGS += -DLOSS_FN=$(LOSS_FN) 24 | 25 | APP_LDFLAGS += -lm 26 | 27 | # STATISTICS 28 | APP_CFLAGS += -DSTATS 29 | 30 | get_golden: 31 | python3 ./utils/GM.py --out_size $(OUT_SIZE) --value $(VALUE) --loss_fn $(LOSS_FN) 32 | 33 | include $(RULES_DIR)/pmsis_rules.mk 34 | -------------------------------------------------------------------------------- /tests/test_losses_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching loss function evaluation...\n\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("\nOptimizer evaluation successfully terminated :)\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } -------------------------------------------------------------------------------- /tests/test_losses_fp32/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // User profiling flags 18 | #define FLOAT32 19 | // Tensor checksum definition 20 | #define CHECK_TOLERANCE 1e-6 21 | #define ERROR_TOLERANCE 1e-6 22 | 23 | // PULP DEFINES 24 | #define STACK_SIZE 4096 25 | #define MOUNT 1 26 | #define UNMOUNT 0 27 | #define CID 0 28 | 29 | // Loss defines 30 | #define L1Loss 0 31 | #define MSE 1 32 | #define CrossEntropy 2 33 | #define berHuLoss 3 34 | 35 | void net_step(); 36 | -------------------------------------------------------------------------------- /tests/test_matmul/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | matmul_data.h 4 | net_args.h 5 | dis.S 6 | fastest_matmul.txt -------------------------------------------------------------------------------- /tests/test_matmul/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_matmul 2 | 3 | # User settings 4 | # Standard matmul arguments 5 | IN_CH?=32 # Used also to define the number of channels of DW Conv 6 | MID_CH?=32 7 | OUT_CH?=32 8 | # General arguments 9 | DATA_TYPE?='float' # float, fp16 (=>float16), bf16 (=>float16alt) to select the desired format 10 | DIVIDER?=100000000 # Scaling factor for data initialization in golden model 11 | TRANSP?=0 # Matrix B is transposed if = 1, not transposed if = 0. 12 | NUM_CORES?=8 13 | # End of user settings 14 | 15 | TRAIN_LIB=../../lib 16 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 17 | APP_SRCS += main.c net.c 18 | 19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c 23 | 24 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 25 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 26 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 27 | APP_CFLAGS += -DPROF_NET 28 | 29 | APP_LDFLAGS += -lm 30 | 31 | # STATISTICS 32 | APP_CFLAGS += -DSTATS 33 | 34 | get_golden: 35 | python3 utils/GM.py --in_size $(IN_CH) --out_size $(OUT_CH) --mid_size $(MID_CH) --type $(DATA_TYPE) --init_value_div $(DIVIDER) --transpose $(TRANSP) 36 | 37 | profile_fastest: 38 | python3 utils/profile_fastest.py 39 | 40 | include $(RULES_DIR)/pmsis_rules.mk 41 | -------------------------------------------------------------------------------- /tests/test_matmul/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching matmul evaluation...\n\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("\nMatmul evaluation successfully terminated :)\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } -------------------------------------------------------------------------------- /tests/test_matmul/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "net_args.h" 18 | 19 | // User profiling flags 20 | 21 | #if (defined(STANDARD)) && !defined(DEBUG) 22 | #define PROF_MM 23 | #endif 24 | 25 | // Tensor checksum definition 26 | #ifdef FLOAT32 27 | #define CHECK_TOLERANCE 1e-3 28 | #define ERROR_TOLERANCE 0.01 29 | #endif 30 | #ifdef FLOAT16 31 | #define CHECK_TOLERANCE 1e0 32 | #define ERROR_TOLERANCE 0.05 33 | #endif 34 | #ifdef BFLOAT16 35 | #define CHECK_TOLERANCE 1e-3 36 | #define ERROR_TOLERANCE 0.05 37 | #endif 38 | 39 | // PULP DEFINES 40 | #define STACK_SIZE 4096 41 | #define MOUNT 1 42 | #define UNMOUNT 0 43 | #define CID 0 44 | 45 | void net_step(); 46 | -------------------------------------------------------------------------------- /tests/test_matmul/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_mhsa_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | attention_scores.h 3 | init-defines.h 4 | input-sequence.h 5 | mhsa-grads.h 6 | mhsa-output.h 7 | step-check.h 8 | dis.S -------------------------------------------------------------------------------- /tests/test_mhsa_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * DUMMY MAIN 25 | * Configures cluster, then calls net_step() 26 | */ 27 | int main () { 28 | 29 | printf("\nHello there.\nConfiguring cluster..\n"); 30 | // Configure cluster 31 | struct pi_device cluster_dev; 32 | struct pi_cluster_conf cl_conf; 33 | struct pi_cluster_task cl_task; 34 | 35 | pi_cluster_conf_init(&cl_conf); 36 | pi_open_from_conf(&cluster_dev, &cl_conf); 37 | if (pi_cluster_open(&cluster_dev)) 38 | { 39 | return -1; 40 | } 41 | 42 | printf("\nLaunching training procedure...\n"); 43 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 44 | 45 | 46 | printf("\nNet training successful!\n"); 47 | pi_cluster_close(&cluster_dev); 48 | 49 | pmsis_exit(0); 50 | } 51 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp16/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pulp_train_defines.h" 18 | #include "step-check.h" 19 | 20 | // User profiling flags 21 | 22 | //#define DEBUG 23 | 24 | #if defined(FORWARD) && !defined(DEBUG) 25 | #define PROF_FWD 26 | #endif 27 | 28 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG) 29 | #define PROF_BCKWD 30 | #endif 31 | 32 | // Net sizes 33 | 34 | #define Tker_l0 (Tin_l0*Tout_l0) 35 | 36 | // Tensor checksum definition 37 | #define CHECK_TOLERANCE 0.001 38 | #define ERROR_TOLERANCE 0.001 39 | 40 | // PULP DEFINES 41 | #define STACK_SIZE 4096 42 | #define MOUNT 1 43 | #define UNMOUNT 0 44 | #define CID 0 45 | 46 | // Support functions 47 | static inline void forward(); 48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 50 | static inline void train(); 51 | // Main function 52 | void net_step (); 53 | 54 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp16/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_mhsa_fp16/utils/SoftmaxFastExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | def fastexp_gist(x): 6 | x_copy = x.type(torch.float32) 7 | x_copy = x_copy * 12102203.17133801 + 1064986823.010288 8 | x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32) 9 | x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32) 10 | 11 | return x_copy.type(torch.uint32).view(torch.float32) 12 | 13 | 14 | class SoftmaxFastExp(Function): 15 | @staticmethod 16 | def forward(ctx, input, bf16_format): 17 | maxes = torch.max(input, -1, keepdim=True)[0] 18 | # maxes = torch.swapaxes(maxes, -2, -1) 19 | x_exp = fastexp_gist((input - maxes).to(torch.float32)) 20 | 21 | if bf16_format == 0: 22 | x_exp = x_exp.half() 23 | else: 24 | x_exp = x_exp.bfloat16() 25 | 26 | x_exp_sum = torch.sum(x_exp, -1, keepdim=True) 27 | output = x_exp / x_exp_sum 28 | ctx.save_for_backward(output) 29 | 30 | return output 31 | 32 | @staticmethod 33 | def backward(ctx, grad_output): 34 | out_data = ctx.saved_tensors[0] 35 | sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1]) 36 | grad_input = (grad_output - sums) * out_data 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | attention_scores.h 3 | init-defines.h 4 | input-sequence.h 5 | mhsa-grads.h 6 | mhsa-output.h 7 | step-check.h -------------------------------------------------------------------------------- /tests/test_mhsa_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * DUMMY MAIN 25 | * Configures cluster, then calls net_step() 26 | */ 27 | int main () { 28 | 29 | printf("\nHello there.\nConfiguring cluster..\n"); 30 | // Configure cluster 31 | struct pi_device cluster_dev; 32 | struct pi_cluster_conf cl_conf; 33 | struct pi_cluster_task cl_task; 34 | 35 | 36 | pi_cluster_conf_init(&cl_conf); 37 | pi_open_from_conf(&cluster_dev, &cl_conf); 38 | if (pi_cluster_open(&cluster_dev)) 39 | { 40 | return -1; 41 | } 42 | 43 | printf("\nLaunching training procedure...\n"); 44 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 45 | 46 | 47 | printf("\nNet training successful!\n"); 48 | pi_cluster_close(&cluster_dev); 49 | 50 | pmsis_exit(0); 51 | } 52 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp32/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | 19 | // User profiling flags 20 | 21 | //#define DEBUG 22 | 23 | #if defined(FORWARD) && !defined(DEBUG) 24 | #define PROF_FWD 25 | #endif 26 | 27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG) 28 | #define PROF_BCKWD 29 | #endif 30 | 31 | // Net sizes 32 | 33 | #define Tker_l0 (Tin_l0*Tout_l0) 34 | 35 | // Tensor checksum definition 36 | #define CHECK_TOLERANCE 0.001 37 | #define ERROR_TOLERANCE 0.001 38 | 39 | // PULP DEFINES 40 | #define STACK_SIZE 4096 41 | #define MOUNT 1 42 | #define UNMOUNT 0 43 | #define CID 0 44 | 45 | // Support functions 46 | static inline void forward(); 47 | static inline void compare_tensors(float *A, float *B, int length); 48 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 49 | static inline void train(); 50 | // Main function 51 | void net_step (); 52 | 53 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp32/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_mhsa_fp32/utils/SoftmaxFastExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | def fastexp_gist(x): 6 | x_copy = x.type(torch.float32) 7 | x_copy = x_copy * 12102203.17133801 + 1064986823.010288 8 | x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32) 9 | x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32) 10 | 11 | return x_copy.type(torch.uint32).view(torch.float32) 12 | 13 | 14 | class SoftmaxFastExp(Function): 15 | @staticmethod 16 | def forward(ctx, input): 17 | maxes = torch.max(input, -1, keepdim=True)[0] 18 | # maxes = torch.swapaxes(maxes, -2, -1) 19 | x_exp = fastexp_gist((input - maxes)) 20 | x_exp_sum = torch.sum(x_exp, -1, keepdim=True) 21 | output = x_exp / x_exp_sum 22 | ctx.save_for_backward(output) 23 | 24 | return output 25 | 26 | @staticmethod 27 | def backward(ctx, grad_output): 28 | out_data = ctx.saved_tensors[0] 29 | sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1]) 30 | grad_input = (grad_output - sums) * out_data 31 | 32 | return grad_input 33 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp32_partialsoftmax_old/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | pylog.txt 4 | attention_scores.h 5 | input-sequence.h 6 | init-defines.h 7 | mhsa-grads.h 8 | mhsa-output.h 9 | step-check.h 10 | utils/GM_old.py 11 | README.md 12 | runs.txt 13 | utils/__pycache__/ -------------------------------------------------------------------------------- /tests/test_mhsa_fp32_partialsoftmax_old/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * DUMMY MAIN 25 | * Configures cluster, then calls net_step() 26 | */ 27 | int main () { 28 | 29 | printf("\nHello there.\nConfiguring cluster..\n"); 30 | // Configure cluster 31 | struct pi_device cluster_dev; 32 | struct pi_cluster_conf cl_conf; 33 | struct pi_cluster_task cl_task; 34 | 35 | pi_cluster_conf_init(&cl_conf); 36 | pi_open_from_conf(&cluster_dev, &cl_conf); 37 | if (pi_cluster_open(&cluster_dev)) 38 | { 39 | return -1; 40 | } 41 | 42 | printf("\nLaunching training procedure...\n"); 43 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 44 | 45 | 46 | printf("\nNet training successful!\n"); 47 | pi_cluster_close(&cluster_dev); 48 | 49 | pmsis_exit(0); 50 | } 51 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp32_partialsoftmax_old/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | 19 | // User profiling flags 20 | 21 | //#define DEBUG 22 | 23 | #if defined(FORWARD) && !defined(DEBUG) 24 | #define PROF_FWD 25 | #endif 26 | 27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG) 28 | #define PROF_BCKWD 29 | #endif 30 | 31 | // Net sizes 32 | 33 | #define Tker_l0 (Tin_l0*Tout_l0) 34 | 35 | // Tensor checksum definition 36 | #define CHECK_TOLERANCE 0.001 37 | #define ERROR_TOLERANCE 0.001 38 | 39 | // PULP DEFINES 40 | #define STACK_SIZE 4096 41 | #define MOUNT 1 42 | #define UNMOUNT 0 43 | #define CID 0 44 | 45 | // Support functions 46 | static inline void forward(); 47 | static inline void compare_tensors(float *A, float *B, int length); 48 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 49 | static inline void train(); 50 | // Main function 51 | void net_step (); 52 | 53 | -------------------------------------------------------------------------------- /tests/test_mhsa_fp32_partialsoftmax_old/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | attention_scores.h 3 | init-defines.h 4 | input-sequence.h 5 | mhsa-grads.h 6 | mhsa-output.h 7 | step-check.h 8 | output-defines.h 9 | output-sequence.h 10 | attention-defines.h 11 | dis.S -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * DUMMY MAIN 25 | * Configures cluster, then calls net_step() 26 | */ 27 | int main () { 28 | 29 | printf("\nHello there.\nConfiguring cluster..\n"); 30 | // Configure cluster 31 | struct pi_device cluster_dev; 32 | struct pi_cluster_conf cl_conf; 33 | struct pi_cluster_task cl_task; 34 | 35 | 36 | pi_cluster_conf_init(&cl_conf); 37 | pi_open_from_conf(&cluster_dev, &cl_conf); 38 | if (pi_cluster_open(&cluster_dev)) 39 | { 40 | return -1; 41 | } 42 | 43 | printf("\nLaunching training procedure...\n"); 44 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 45 | 46 | 47 | printf("\nNet training successful!\n"); 48 | pi_cluster_close(&cluster_dev); 49 | 50 | pmsis_exit(0); 51 | } 52 | -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp16/net-args.h: -------------------------------------------------------------------------------- 1 | #define EMBED_SIZE 512 2 | #define HIDDEN_SIZE 512 3 | #define SEQ_LEN 64 4 | #define N_HEADS 4 5 | -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp16/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | #include "pulp_train_defines.h" 19 | 20 | // User profiling flags 21 | 22 | //#define DEBUG 23 | 24 | #if defined(FORWARD) && !defined(DEBUG) 25 | #define PROF_FWD 26 | #endif 27 | 28 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG) 29 | #define PROF_BCKWD 30 | #endif 31 | 32 | // Net sizes 33 | 34 | #define Tker_l0 (Tin_l0*Tout_l0) 35 | 36 | // Tensor checksum definition 37 | #define CHECK_TOLERANCE 0.001 38 | #define ERROR_TOLERANCE 0.001 39 | 40 | // PULP DEFINES 41 | #define STACK_SIZE 4096 42 | #define MOUNT 1 43 | #define UNMOUNT 0 44 | #define CID 0 45 | #define MAX_SIZE 51264 46 | 47 | // Support functions 48 | static inline void forward(); 49 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 50 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 51 | static inline void train(); 52 | 53 | // Netowork functions 54 | void DNN_init(); 55 | void forward(); 56 | void net_step(); 57 | 58 | // DMA managment functions 59 | void reset_arguments(); 60 | void update_blob(); 61 | void reset_dim(); 62 | -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp16/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp16/utils/SoftmaxFastExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | def fastexp_gist(x): 6 | x_copy = x.type(torch.float32) 7 | x_copy = x_copy * 12102203.17133801 + 1064986823.010288 8 | x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32) 9 | x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32) 10 | 11 | return x_copy.type(torch.uint32).view(torch.float32) 12 | 13 | 14 | class SoftmaxFastExp(Function): 15 | @staticmethod 16 | def forward(ctx, input): 17 | maxes = torch.max(input, -1, keepdim=True)[0].bfloat16() 18 | # maxes = torch.swapaxes(maxes, -2, -1) 19 | x_exp = fastexp_gist((input - maxes).to(torch.float32)) 20 | x_exp = x_exp.bfloat16() 21 | x_exp_sum = torch.sum(x_exp, -1, keepdim=True).bfloat16() 22 | output = x_exp / x_exp_sum 23 | ctx.save_for_backward(output) 24 | 25 | return output.bfloat16() 26 | 27 | @staticmethod 28 | def backward(ctx, grad_output): 29 | out_data = ctx.saved_tensors[0] 30 | sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1]) 31 | grad_input = (grad_output - sums) * out_data 32 | 33 | return grad_input 34 | -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | attention_scores.h 3 | attention-defines.h 4 | init-defines.h 5 | input-sequence.h 6 | mhsa-grads.h 7 | mhsa-output.h 8 | step-check.h 9 | dis.S 10 | output-defines.h 11 | output-sequence.h -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * DUMMY MAIN 25 | * Configures cluster, then calls net_step() 26 | */ 27 | int main () { 28 | 29 | printf("\nHello there.\nConfiguring cluster..\n"); 30 | // Configure cluster 31 | struct pi_device cluster_dev; 32 | struct pi_cluster_conf cl_conf; 33 | struct pi_cluster_task cl_task; 34 | 35 | 36 | pi_cluster_conf_init(&cl_conf); 37 | pi_open_from_conf(&cluster_dev, &cl_conf); 38 | if (pi_cluster_open(&cluster_dev)) 39 | { 40 | return -1; 41 | } 42 | 43 | printf("\nLaunching training procedure...\n"); 44 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 45 | 46 | 47 | printf("\nNet training successful!\n"); 48 | pi_cluster_close(&cluster_dev); 49 | 50 | pmsis_exit(0); 51 | } 52 | -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp32/net-args.h: -------------------------------------------------------------------------------- 1 | #define EMBED_SIZE 512 2 | #define HIDDEN_SIZE 512 3 | #define SEQ_LEN 64 4 | #define N_HEADS 4 5 | -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp32/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | 19 | // User profiling flags 20 | 21 | //#define DEBUG 22 | 23 | #if defined(FORWARD) && !defined(DEBUG) 24 | #define PROF_FWD 25 | #endif 26 | 27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG) 28 | #define PROF_BCKWD 29 | #endif 30 | 31 | // Net sizes 32 | 33 | #define Tker_l0 (Tin_l0*Tout_l0) 34 | 35 | // Tensor checksum definition 36 | #define CHECK_TOLERANCE 0.001 37 | #define ERROR_TOLERANCE 0.001 38 | 39 | // PULP DEFINES 40 | #define STACK_SIZE 4096 41 | #define MOUNT 1 42 | #define UNMOUNT 0 43 | #define CID 0 44 | #define MAX_SIZE 25120 45 | 46 | // Support functions 47 | static inline void forward(); 48 | static inline void compare_tensors(float *A, float *B, int length); 49 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 50 | static inline void train(); 51 | 52 | // Netowork functions 53 | void DNN_init_forward(); 54 | void DNN_init_backward(); 55 | void forward(); 56 | void net_step(); 57 | 58 | // DMA managment functions 59 | void reset_arguments(); 60 | void update_blob(); 61 | void reset_dim(); 62 | -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp32/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ -------------------------------------------------------------------------------- /tests/test_mhsa_paper_fp32/utils/SoftmaxFastExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | def fastexp_gist(x): 6 | x_copy = x.type(torch.float32) 7 | x_copy = x_copy * 12102203.17133801 + 1064986823.010288 8 | x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32) 9 | x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32) 10 | 11 | return x_copy.type(torch.uint32).view(torch.float32) 12 | 13 | 14 | class SoftmaxFastExp(Function): 15 | @staticmethod 16 | def forward(ctx, input): 17 | maxes = torch.max(input, -1, keepdim=True)[0] 18 | # maxes = torch.swapaxes(maxes, -2, -1) 19 | x_exp = fastexp_gist((input - maxes)) 20 | x_exp_sum = torch.sum(x_exp, -1, keepdim=True) 21 | output = x_exp / x_exp_sum 22 | ctx.save_for_backward(output) 23 | 24 | return output 25 | 26 | @staticmethod 27 | def backward(ctx, grad_output): 28 | out_data = ctx.saved_tensors[0] 29 | sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1]) 30 | grad_input = (grad_output - sums) * out_data 31 | 32 | return grad_input 33 | -------------------------------------------------------------------------------- /tests/test_mobilebert_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | attention-defines.h 3 | bottleneck-defines.h 4 | ffn-defines.h 5 | input-sequence.h 6 | intermediate-defines.h 7 | net-args.h 8 | output-defines.h 9 | output-sequence.h 10 | vocabulary.h 11 | token_type_embeds.h 12 | position_embeds.h 13 | embeddings.h 14 | weights.bin 15 | dis.S 16 | net_args.h 17 | files/ -------------------------------------------------------------------------------- /tests/test_mobilebert_fp16/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int test_kickoff (void) { 9 | printf("\nHello there.\nConfiguring cluster..\n"); 10 | // Configure cluster 11 | struct pi_device cluster_dev; 12 | struct pi_cluster_conf cl_conf; 13 | struct pi_cluster_task cl_task; 14 | 15 | pi_cluster_conf_init(&cl_conf); 16 | pi_open_from_conf(&cluster_dev, &cl_conf); 17 | if (pi_cluster_open(&cluster_dev)) 18 | { 19 | return -1; 20 | } 21 | 22 | printf("\nMobilebert procedure...\n"); 23 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 24 | 25 | printf("Done, successful!\n"); 26 | pi_cluster_close(&cluster_dev); 27 | 28 | pmsis_exit(0); 29 | } 30 | 31 | int main(){ 32 | return pmsis_kickoff((void *) test_kickoff); 33 | } 34 | -------------------------------------------------------------------------------- /tests/test_mobilebert_fp16/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | #define MAX_SIZE 33824 7 | #define MAX_SIZE_L2 180736 8 | 9 | #include "pulp_train_defines.h" 10 | #include "pmsis.h" 11 | #include 12 | #include "bsp/ram/hyperram.h" 13 | #include "bsp/ram/spiram.h" 14 | #include "bsp/flash/hyperflash.h" 15 | #include "bsp/flash/spiflash.h" 16 | #include 17 | #include "bsp/fs.h" 18 | 19 | // Tensor checksum definition 20 | #define CHECK_TOLERANCE 0.001 21 | #define ERROR_TOLERANCE 0.001 22 | 23 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 24 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 25 | 26 | // Netowork functions 27 | void DNN_init(); 28 | void forward(); 29 | void net_step(); 30 | //void tiled_matmul(void* matmul_args, int flash_input); 31 | void tiled_matmul(void* matmul_args); 32 | void tiled_norm(void* nonorm_args); 33 | // void tiled_skip(void* residual_args, int flash_lout); 34 | void tiled_skip(void* residual_args); 35 | void tiled_relu(void* Relu_args); 36 | 37 | // DMA managment functions 38 | void reset_arguments(); 39 | void update_blob(); 40 | void reset_dim(); 41 | 42 | //utility struct and functions for reading from a file inside GAP9 43 | 44 | typedef struct{ 45 | struct pi_device fs; 46 | struct pi_device flash; 47 | pi_fs_file_t *file; 48 | } AT_FLASH_FS_T; 49 | 50 | static inline void __at_flash_fs_open(AT_FLASH_FS_T *file, int is_write, struct pi_readfs_conf *conf, const char *filename, int *err); 51 | static inline void __at_default_flash_fs_open(AT_FLASH_FS_T *file, int is_write, struct pi_readfs_conf *conf, const char *filename, int *err); 52 | static inline void __at_flash_fs_close(AT_FLASH_FS_T *file); 53 | static inline void __at_default_flash_file_open(AT_FLASH_FS_T *file, int is_write, const char *filename, int *err); 54 | 55 | -------------------------------------------------------------------------------- /tests/test_mobilebert_fp16/net_args.h: -------------------------------------------------------------------------------- 1 | // Float16 Mobilebert 2 | #define FLOAT16 3 | 4 | #define VOCAB_SIZE 30522 5 | 6 | #define EMBED_SIZE 128 7 | 8 | #define HIDDEN_SIZE 512 9 | 10 | #define INTERMEDIATE_SIZE 512 11 | 12 | #define NUM_HEADS 4 13 | 14 | #define N_HIDDEN_LAYERS 1 15 | 16 | #define N_FFN 4 17 | 18 | #define BOTTLENECK_SIZE 128 19 | 20 | #define ATTENTION_DROPOUT 0.0 21 | 22 | #define HIDDEN_DROPOUT 0.0 23 | 24 | #define TYPE_VOCAB_SIZE 2 25 | 26 | #define SEQ_LEN 128 27 | 28 | #define INPUT_SIZE 65536 29 | 30 | #define OUTPUT_SIZE 65536 31 | 32 | -------------------------------------------------------------------------------- /tests/test_mobilebert_fp16/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | MobileBert_PyTorch/ 3 | params.txt/ 4 | MobileBert_PyTorch.7z -------------------------------------------------------------------------------- /tests/test_mobilebert_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | attention-defines.h 3 | bottleneck-defines.h 4 | ffn-defines.h 5 | input-sequence.h 6 | intermediate-defines.h 7 | net-args.h 8 | output-defines.h 9 | output-sequence.h 10 | vocabulary.h 11 | -------------------------------------------------------------------------------- /tests/test_mobilebert_fp32/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nMobilebert procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Done, successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_mobilebert_fp32/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | #define MAX_SIZE 25104 7 | 8 | #include "pulp_train_defines.h" 9 | 10 | // Tensor checksum definition 11 | #define CHECK_TOLERANCE 0.001 12 | #define ERROR_TOLERANCE 0.001 13 | 14 | static inline void compare_tensors(float *A, float *B, int length); 15 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 16 | 17 | // Netowork functions 18 | void DNN_init(); 19 | void forward(); 20 | void net_step(); 21 | void tiled_matmul(void* matmul_args); 22 | void tiled_norm(void* nonorm_args); 23 | void tiled_skip(void* residual_args); 24 | void tiled_relu(void* Relu_args); 25 | 26 | // DMA managment functions 27 | void load_input(void * src_blob, uint8_t data_diff_both); 28 | void load_output(void * src_blob, uint8_t data_diff_both); 29 | void load_coeff(void * src_blob, uint8_t data_diff_both); 30 | void store_output(void * dest_blob, uint8_t data_diff_both); 31 | void store_input(void * dest_blob, uint8_t data_diff_both); 32 | void store_coeff(void * dest_blob, uint8_t data_diff_both); 33 | void copy_struct_param(unsigned int from, unsigned int to, int size); 34 | void get_input_dim(void * b); 35 | void get_output_dim(void * b); 36 | void get_weight_dim(void * b); 37 | void reset_arguments(); 38 | void update_blob(); 39 | void reset_dim(); -------------------------------------------------------------------------------- /tests/test_mobilebert_fp32/net_args.h: -------------------------------------------------------------------------------- 1 | // Float32 Mobilebert 2 | #define FLOAT32 3 | 4 | #define VOCAB_SIZE 30522 5 | 6 | #define EMBED_SIZE 128 7 | 8 | #define HIDDEN_SIZE 512 9 | 10 | #define INTERMEDIATE_SIZE 512 11 | 12 | #define NUM_HEADS 4 13 | 14 | #define N_HIDDEN_LAYERS 1 15 | 16 | #define N_FFN 4 17 | 18 | #define BOTTLENECK_SIZE 128 19 | 20 | #define ATTENTION_DROPOUT 0.0 21 | 22 | #define HIDDEN_DROPOUT 0.0 23 | 24 | #define TYPE_VOCAB_SIZE 2 25 | 26 | #define SEQ_LEN 128 27 | 28 | -------------------------------------------------------------------------------- /tests/test_mobilebert_fp32/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | MobileBert_PyTorch/ 3 | params.txt/ 4 | MobileBert_PyTorch.7z -------------------------------------------------------------------------------- /tests/test_pad/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt -------------------------------------------------------------------------------- /tests/test_pad/Makefile: -------------------------------------------------------------------------------- 1 | APP = padder 2 | 3 | # User code 4 | NUM_CORES?=8 5 | CH_IN?=2 6 | H_IN?=3 7 | W_IN?=3 8 | LPAD?=1 9 | RPAD?=1 10 | UPAD?=1 11 | DPAD?=1 12 | HWC_LAY?=0 # =0 use CHW layout, =1 use HWC layout for the weights 13 | APP_CFLAGS += -DPRINT_MATS 14 | # End of user code 15 | 16 | 17 | TRAIN_LIB=../../lib 18 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 19 | APP_SRCS = main.c net.c 20 | #APP_CFLAGS += -DDEBUG 21 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 22 | APP_CFLAGS += -O3 -g3 -mno-memcpy 23 | APP_CFLAGS += -DFABRIC 24 | APP_CFLAGS += -DCLUSTER 25 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 26 | APP_CFLAGS += -DPROF_NET 27 | APP_CFLAGS += -mhwloopalign 28 | APP_CFLAGS += -DTin_C=$(CH_IN) 29 | APP_CFLAGS += -DTin_H=$(H_IN) 30 | APP_CFLAGS += -DTin_W=$(W_IN) 31 | APP_CFLAGS += -DLPAD=$(LPAD) 32 | APP_CFLAGS += -DRPAD=$(RPAD) 33 | APP_CFLAGS += -DUPAD=$(UPAD) 34 | APP_CFLAGS += -DDPAD=$(DPAD) 35 | APP_CFLAGS += -DHWC_LAYOUT=$(HWC_LAY) 36 | APP_LDFLAGS += -lm 37 | 38 | # STATISTICS 39 | APP_CFLAGS += -DSTATS 40 | 41 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 42 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 43 | 44 | include $(RULES_DIR)/pmsis_rules.mk 45 | -------------------------------------------------------------------------------- /tests/test_pad/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching transposition procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Transposition successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_pad/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | // Padded sizes 8 | #define Tout_H (Tin_H+UPAD+DPAD) 9 | #define Tout_W (Tin_W+RPAD+LPAD) 10 | 11 | void net_step (); -------------------------------------------------------------------------------- /tests/test_pooling/.gitignore: -------------------------------------------------------------------------------- 1 | init_defines.h 2 | pool_data.h 3 | log.txt 4 | dis.S 5 | BUILD/ -------------------------------------------------------------------------------- /tests/test_pooling/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_pooling 2 | 3 | # User settings 4 | # Standard matmul arguments 5 | IN_H?=6 6 | IN_W?=6 7 | IN_C?=1 8 | KER_H?=6 9 | KER_W?=6 10 | H_STR?=1 11 | W_STR?=1 12 | VALUE?=0.5 13 | # General arguments 14 | NUM_CORES?=8 15 | # End of user settings 16 | 17 | TRAIN_LIB=../../lib 18 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 19 | APP_SRCS += main.c net.c 20 | 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp32.c 24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_linear_fp32.c 25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c 26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp32.c 27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c 29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c 30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_pooling_fp32.c 31 | 32 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 33 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 34 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 35 | APP_CFLAGS += -DPROF_NET 36 | APP_CFLAGS += -DIN_H=$(IN_H) 37 | APP_CFLAGS += -DIN_W=$(IN_W) 38 | APP_CFLAGS += -DIN_C=$(IN_C) 39 | APP_CFLAGS += -DKER_H=$(KER_H) 40 | APP_CFLAGS += -DKER_W=$(KER_W) 41 | APP_CFLAGS += -DH_STR=$(H_STR) 42 | APP_CFLAGS += -DW_STR=$(W_STR) 43 | APP_CFLAGS += -DVALUE=$(VALUE) 44 | 45 | APP_LDFLAGS += -lm 46 | 47 | # STATISTICS 48 | APP_CFLAGS += -DSTATS 49 | 50 | get_golden: 51 | python3 ./utils/GM.py --in_c $(IN_C) --in_h $(IN_H) --in_w $(IN_W) --ker_h $(KER_H) --ker_w $(KER_W) --stride_h $(H_STR) --stride_w $(W_STR) --value $(VALUE) 52 | 53 | include $(RULES_DIR)/pmsis_rules.mk 54 | -------------------------------------------------------------------------------- /tests/test_pooling/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching pooling evaluation...\n\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("\nPooling evaluation successfully terminated :)\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } -------------------------------------------------------------------------------- /tests/test_pooling/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // User profiling flags 18 | #define FLOAT32 19 | // Tensor checksum definition 20 | #define CHECK_TOLERANCE 1e-12 21 | #define ERROR_TOLERANCE 1e-12 22 | 23 | // PULP DEFINES 24 | #define STACK_SIZE 4096 25 | #define MOUNT 1 26 | #define UNMOUNT 0 27 | #define CID 0 28 | 29 | void net_step(); 30 | -------------------------------------------------------------------------------- /tests/test_random/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt -------------------------------------------------------------------------------- /tests/test_random/Makefile: -------------------------------------------------------------------------------- 1 | APP = rng 2 | 3 | # User code 4 | NUM_CORES?=8 5 | PROBABILITY?=0.5 6 | SEED?=10 7 | # End of user code 8 | 9 | TRAIN_LIB=../../lib 10 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 11 | APP_SRCS = main.c net.c 12 | #APP_CFLAGS += -DDEBUG 13 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 14 | APP_CFLAGS += -O3 -g3 -mno-memcpy 15 | APP_CFLAGS += -DFABRIC 16 | APP_CFLAGS += -DCLUSTER 17 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 18 | APP_CFLAGS += -DPROF_NET 19 | APP_CFLAGS += -mhwloopalign 20 | APP_CFLAGS += -DPROBABILITY=$(PROBABILITY) 21 | APP_CFLAGS += -DSEED=$(SEED) 22 | APP_LDFLAGS += -lm 23 | 24 | # STATISTICS 25 | APP_CFLAGS += -DSTATS 26 | 27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_random.c 30 | 31 | include $(RULES_DIR)/pmsis_rules.mk 32 | -------------------------------------------------------------------------------- /tests/test_random/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main (void) { 9 | 10 | 11 | printf("\nHello there.\nConfiguring cluster..\n"); 12 | // Configure cluster 13 | struct pi_device cluster_dev; 14 | struct pi_cluster_conf cl_conf; 15 | struct pi_cluster_task cl_task; 16 | 17 | pi_cluster_conf_init(&cl_conf); 18 | pi_open_from_conf(&cluster_dev, &cl_conf); 19 | if (pi_cluster_open(&cluster_dev)) 20 | { 21 | return -1; 22 | } 23 | 24 | printf("\nLaunching random number generation procedure...\n"); 25 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 26 | 27 | printf("Transposition successful!\n"); 28 | pi_cluster_close(&cluster_dev); 29 | 30 | pmsis_exit(0); 31 | } 32 | -------------------------------------------------------------------------------- /tests/test_random/net.c: -------------------------------------------------------------------------------- 1 | #include "pulp_train.h" 2 | 3 | #include "stats.h" 4 | #include "net.h" 5 | 6 | // ----------------- FP32 data ---------------------- 7 | PI_L1 float probability; 8 | PI_L1 int output; 9 | 10 | // Main function 11 | void net_step () 12 | { 13 | #ifdef PROF_NET 14 | INIT_STATS(); 15 | PRE_START_STATS(); 16 | #endif 17 | 18 | struct integer_random_args args_fp32; 19 | probability = PROBABILITY; 20 | args_fp32.seed = SEED; 21 | args_fp32.probability = probability; 22 | args_fp32.output = &output; 23 | 24 | printf("Bernoulli Random Number Generator (from seed):\n"); 25 | #ifdef PROF_NET 26 | START_STATS(); 27 | #endif 28 | 29 | pulp_random_bernoulli(&args_fp32); 30 | 31 | #ifdef PROF_NET 32 | STOP_STATS(); 33 | #endif 34 | 35 | printf("First run output: %d\n", output); 36 | 37 | printf("\nTest random number generation stats:"); 38 | float mean = 0; 39 | int acc = 0; 40 | float var = 0; 41 | for (int i=0; i<1000; i++) { 42 | pulp_random_bernoulli(&args_fp32); 43 | acc += output; 44 | } 45 | mean = (float) acc / 1000.0; 46 | printf("Mean: %f\n", mean); 47 | 48 | return; 49 | } 50 | -------------------------------------------------------------------------------- /tests/test_random/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | 8 | void net_step (); -------------------------------------------------------------------------------- /tests/test_reduce_mean/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | matmul_data.h 4 | net_args.h 5 | dis.S 6 | fastest_matmul.txt 7 | test_data.h 8 | -------------------------------------------------------------------------------- /tests/test_reduce_mean/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_reduce_mean 2 | 3 | # User settings 4 | # Standard matmul arguments 5 | DIMS = 1 7 800 2 6 | REDUCE_AXIS = 2 7 | 8 | NUM_CORES = 8 9 | DATA_TYPE = 32 # 32 for fp32, 16 for fp16 10 | # End of user settings 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS += main.c net.c 15 | 16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 18 | 19 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 20 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 21 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 22 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE) 23 | APP_CFLAGS += -DPROF_NET 24 | 25 | APP_LDFLAGS += -lm 26 | 27 | # STATISTICS 28 | APP_CFLAGS += -DSTATS 29 | 30 | get_golden: 31 | rm -rf BUILD/ 32 | python3 utils/GM.py --input_dims $(DIMS) --reduce_axis $(REDUCE_AXIS) --dtype $(DATA_TYPE) 33 | 34 | include $(RULES_DIR)/pmsis_rules.mk 35 | -------------------------------------------------------------------------------- /tests/test_reduce_mean/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | 5 | int main(void) { 6 | printf("\nHello there.\nConfiguring cluster..\n"); 7 | 8 | // Configure cluster 9 | struct pi_device cluster_dev; 10 | struct pi_cluster_conf cl_conf; 11 | struct pi_cluster_task cl_task; 12 | 13 | pi_cluster_conf_init(&cl_conf); 14 | pi_open_from_conf(&cluster_dev, &cl_conf); 15 | 16 | if (pi_cluster_open(&cluster_dev)) { 17 | return -1; 18 | } 19 | 20 | printf("\nLaunching broadcast matmul evaluation...\n\n"); 21 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, reduce_mean_test, NULL)); 22 | 23 | printf("\nMatmul evaluation successfully terminated :)\n"); 24 | pi_cluster_close(&cluster_dev); 25 | 26 | pmsis_exit(0); 27 | } 28 | -------------------------------------------------------------------------------- /tests/test_reduce_mean/net.c: -------------------------------------------------------------------------------- 1 | #include "pulp_train.h" 2 | 3 | #include "net.h" 4 | #include "stats.h" 5 | 6 | #include "test_data.h" 7 | 8 | #include "tensor_checkers.h" 9 | 10 | 11 | void reduce_mean_test() { 12 | #ifdef PROF_NET 13 | INIT_STATS(); 14 | PRE_START_STATS(); 15 | #endif 16 | printf("Executing on %d cores.\n", NUM_CORES); 17 | 18 | #if DATA_TYPE == 32 19 | struct reduce_mean_args_fp32 args; 20 | printf("WORKING ON FP32\n"); 21 | #elif DATA_TYPE == 16 22 | struct reduce_mean_args_fp16 args; 23 | printf("WORKING ON FP16\n"); 24 | #endif 25 | 26 | // Get arguments 27 | args.input = IN_MATRIX; 28 | args.output = OUT_MATRIX; 29 | 30 | args.dims = DIMS; 31 | args.dims_len = N_DIMS; 32 | args.reduce_axis = REDUCE_AXIS; 33 | 34 | #ifdef PROF_NET 35 | START_STATS(); 36 | #endif 37 | 38 | // Perform operation 39 | #if DATA_TYPE == 32 40 | pi_cl_team_fork(NUM_CORES, reduce_mean_fp32, &args); 41 | #elif DATA_TYPE == 16 42 | pi_cl_team_fork(NUM_CORES, reduce_mean_fp16, &args); 43 | #endif 44 | 45 | // Stop stats 46 | #ifdef PROF_NET 47 | STOP_STATS(); 48 | #endif 49 | 50 | mean_error_checker(args.output, TEST_OUT, TOTAL_SIZE_OUT); 51 | elementwise_checker(args.output, TEST_OUT, TOTAL_SIZE_OUT); 52 | 53 | return; 54 | } 55 | -------------------------------------------------------------------------------- /tests/test_reduce_mean/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | void reduce_mean_test(); 8 | -------------------------------------------------------------------------------- /tests/test_reduce_mean/utils/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | -------------------------------------------------------------------------------- /tests/test_residual/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | data.h 3 | init_defines.h -------------------------------------------------------------------------------- /tests/test_residual/Makefile: -------------------------------------------------------------------------------- 1 | APP = test_residual 2 | 3 | CI?=64 4 | HI?=56 5 | WI?=56 6 | KER?=1 7 | NUM_CORES?=8 8 | HWC?=0 9 | DEBUG_INFO?=0 10 | STEP?='BACKWARD' 11 | DATA_TYPE?='FLOAT32' 12 | USE_IM2COL?=1 13 | USE_DMA?=0 14 | MATMUL_TYPE?=0 15 | 16 | TRAIN_LIB=../../lib 17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 18 | APP_SRCS += main.c net.c 19 | 20 | 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp32.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp16.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c 26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c 27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c 29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c 31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_residual_fp32.c 32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_residual_fp16.c 33 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c 34 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c 35 | 36 | 37 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 38 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3 39 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 40 | APP_CFLAGS += -DPROF_NET 41 | APP_CFLAGS += -DOPTIMIZE 42 | 43 | 44 | 45 | APP_LDFLAGS += -lm 46 | 47 | # STATISTICS 48 | APP_CFLAGS += -DSTATS 49 | 50 | get_golden: 51 | python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -KER ${KER} -NUM_CORES ${NUM_CORES} -HWC ${HWC} -DEBUG_INFO ${DEBUG_INFO} -STEP ${STEP} -DATA_TYPE ${DATA_TYPE} -USE_IM2COL ${USE_IM2COL} -USE_DMA ${USE_DMA} -MATMUL_TYPE ${MATMUL_TYPE} 52 | 53 | include $(RULES_DIR)/pmsis_rules.mk 54 | 55 | 56 | -------------------------------------------------------------------------------- /tests/test_residual/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | //#include "pmsis.h" 18 | #include "pmsis.h" 19 | #include "net.h" 20 | 21 | /* 22 | * DUMMY MAIN 23 | * Configures cluster, then calls net_step() 24 | */ 25 | int main (void) { 26 | 27 | 28 | printf("\nHello there.\nConfiguring cluster..\n"); 29 | // Configure cluster 30 | struct pi_device cluster_dev; 31 | struct pi_cluster_conf cl_conf; 32 | struct pi_cluster_task cl_task; 33 | 34 | pi_cluster_conf_init(&cl_conf); 35 | pi_open_from_conf(&cluster_dev, &cl_conf); 36 | if (pi_cluster_open(&cluster_dev)) 37 | { 38 | return -1; 39 | } 40 | 41 | printf("\nLaunching residuals evaluation...\n\n"); 42 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 43 | 44 | printf("\nResiduals evaluation successfully terminated :)\n"); 45 | pi_cluster_close(&cluster_dev); 46 | 47 | pmsis_exit(0); 48 | } 49 | -------------------------------------------------------------------------------- /tests/test_residual/net.h: -------------------------------------------------------------------------------- 1 | 2 | #define I2C_SIZE KER_SIZE*KER_SIZE*CI*(HI - KER_SIZE + 2*PAD_SIZE + 1)*(WI - KER_SIZE + 2*PAD_SIZE +1) 3 | 4 | 5 | void net_step(); 6 | void prepare_data(); 7 | void forward(); 8 | void backward(); 9 | void PrintBlob(void * b, int step); 10 | 11 | 12 | #define PROFILE 13 | 14 | -------------------------------------------------------------------------------- /tests/test_residual/utils/dump_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def TensorToArray(T, hwc): 5 | res=[] 6 | dim = len(T.size()) 7 | 8 | if (dim == 1 ): 9 | for i in range(len(T)): 10 | res.append(T[i]) 11 | return res 12 | 13 | if(dim == 3): 14 | if(hwc): 15 | for h in range(T.size(1)): 16 | for w in range(T.size(2)): 17 | for c in range(T.size(0)): 18 | res.append(float(T[c][h][w])) 19 | 20 | else: 21 | for c in range(T.size(0)): 22 | for h in range(T.size(1)): 23 | for w in range(T.size(2)): 24 | res.append(float(T[c][h][w])) 25 | return res 26 | 27 | 28 | def WriteArray(array, name, f, d): 29 | l = len(array) 30 | name = str(name) 31 | f.write(f"\n{d} {name}[{l}] = ") 32 | f.write(" {") 33 | for i in range(l): 34 | if d == "fp16": 35 | f.write(f" {np.float16(array[i])}") 36 | else: 37 | f.write(f" {array[i]}") 38 | if(i != l-1): 39 | f.write(",") 40 | f.write("};\n") 41 | -------------------------------------------------------------------------------- /tests/test_rnn_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | init-defines.h 3 | input-sequence.h 4 | rnn-grads.h 5 | rnn-output.h 6 | step-check.h 7 | -------------------------------------------------------------------------------- /tests/test_rnn_fp32/Makefile: -------------------------------------------------------------------------------- 1 | APP = rnn_fp32 2 | 3 | # User settings 4 | IN_H?=64 # Sequence Length 5 | IN_W?=8 # Token Size 6 | OUT_W?=16 7 | IN_CH?=1 8 | OUT_CH?=1 9 | NUM_CORES?=8 10 | STEP?='FORWARD' # Possible steps: 'FORWARD', 'BACKWARD' 11 | APP_CFLAGS += -DOPTIMIZE 12 | MATMUL_TYPE?=0 13 | NUM_MATMULS?=24 # When profiling with multiple matmul algorithms 14 | NUM_SIZES?=3 # When profiling multiple sizes of the network 15 | # End of user settings 16 | 17 | TRAIN_LIB=../../lib 18 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 19 | APP_SRCS = main.c net.c 20 | 21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_rnn_fp32.c 23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c 24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c 26 | 27 | DATA_TYPE?='fp32' 28 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 29 | APP_CFLAGS += -O3 -g 30 | APP_CFLAGS += -DFABRIC 31 | APP_CFLAGS += -DCLUSTER 32 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 33 | APP_CFLAGS += -DPROF_NET 34 | APP_CFLAGS += -DMEMOCC_COMP 35 | APP_CFLAGS += -mhwloopalign 36 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE} 37 | #APP_CFLAGS += -DDEBUG 38 | APP_LDFLAGS += -lm 39 | 40 | # STATISTICS 41 | APP_CFLAGS += -DSTATS 42 | 43 | get_golden: 44 | python3 ./utils/GM.py --step $(STEP) --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH} --out_width $(OUT_W) 45 | 46 | profile_all_optim: 47 | python3 ./utils/profile_optimized.py --num_matmuls ${NUM_MATMULS} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH} --out_width $(OUT_W) 48 | 49 | profile_all_sizes: 50 | python3 ./utils/profile_sizes.py --num_sizes ${NUM_SIZES} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --matmul_type ${MATMUL_TYPE} 51 | 52 | include $(RULES_DIR)/pmsis_rules.mk 53 | -------------------------------------------------------------------------------- /tests/test_rnn_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #include "pmsis.h" 19 | #include "stdio.h" 20 | #include "stdlib.h" 21 | #include "net.h" 22 | 23 | /* 24 | * DUMMY MAIN 25 | * Configures cluster, then calls net_step() 26 | */ 27 | int main () { 28 | 29 | printf("\nHello there.\nConfiguring cluster..\n"); 30 | // Configure cluster 31 | struct pi_device cluster_dev; 32 | struct pi_cluster_conf cl_conf; 33 | struct pi_cluster_task cl_task; 34 | 35 | pi_cluster_conf_init(&cl_conf); 36 | pi_open_from_conf(&cluster_dev, &cl_conf); 37 | if (pi_cluster_open(&cluster_dev)) 38 | { 39 | return -1; 40 | } 41 | 42 | printf("\nLaunching training procedure...\n"); 43 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 44 | 45 | 46 | printf("\nNet training successful!\n"); 47 | pi_cluster_close(&cluster_dev); 48 | 49 | pmsis_exit(0); 50 | } 51 | -------------------------------------------------------------------------------- /tests/test_rnn_fp32/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | 19 | // User profiling flags 20 | 21 | //#define DEBUG 22 | 23 | #if defined(FORWARD) && !defined(DEBUG) 24 | #define PROF_FWD 25 | #endif 26 | 27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG) 28 | #define PROF_BCKWD 29 | #endif 30 | 31 | // Net sizes 32 | 33 | #define Tker_l0 (Tin_l0*Tout_l0) 34 | 35 | // Tensor checksum definition 36 | #define CHECK_TOLERANCE 1e-2 37 | #define ERROR_TOLERANCE 0.001 38 | 39 | // PULP DEFINES 40 | #define STACK_SIZE 4096 41 | #define MOUNT 1 42 | #define UNMOUNT 0 43 | #define CID 0 44 | 45 | // Support functions 46 | static inline void forward(); 47 | static inline void compare_tensors(float *A, float *B, int length); 48 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 49 | static inline void train(); 50 | // Main function 51 | void net_step (); 52 | 53 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD 2 | 3 | input_sequence.h 4 | output_sequence.h 5 | model_components.h 6 | model_defines.h 7 | 8 | *.onnx 9 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/Makefile: -------------------------------------------------------------------------------- 1 | APP = vit_fp32 2 | 3 | # User code 4 | NUM_CORES = 8 5 | MATMUL_TYPE?=9 6 | DATA_TYPE?=32 7 | 8 | CONFIG_NAME = "TINY_VIT_5M" 9 | # End of user code 10 | 11 | TASK_NAME=sst-2 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | 15 | #APP_CFLAGS += -DDEBUG 16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 17 | APP_CFLAGS += -O3 -g 18 | APP_CFLAGS += -DFABRIC 19 | APP_CFLAGS += -DCLUSTER 20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 21 | APP_CFLAGS += -DN_HEADS=$(N_HEADS) 22 | APP_CFLAGS += -DPROF_NET 23 | APP_CFLAGS += -mhwloopalign 24 | APP_LDFLAGS += -lm 25 | APP_CFLAGS += -DMEMOCC_COMP 26 | 27 | APP_CFLAGS += -DTILE_H=$(TILE_H) 28 | APP_CFLAGS += -DTILE_W=$(TILE_W) 29 | APP_CFLAGS += -DTILE_DIM=$(TILE_DIM) 30 | 31 | APP_CFLAGS += -DOPTIMIZE 32 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE} 33 | 34 | # STATISTICS 35 | APP_CFLAGS += -DSTATS 36 | 37 | # =============== SOURCES =============== 38 | APP_SRCS = main.c net.c 39 | 40 | # For Conv2D 41 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp32.c 42 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_naive_fp32.c 43 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c 44 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 45 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c 46 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp32.c 47 | 48 | # For GELU 49 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c 50 | 51 | # For LayerNorm 52 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_layernorm_fp32.c 53 | 54 | 55 | include $(RULES_DIR)/pmsis_rules.mk 56 | 57 | get_golden: 58 | rm -rf BUILD/ 59 | python3 utils/GM.py --config $(CONFIG_NAME) 60 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | int main (void) { 5 | printf("\nHello there.\nConfiguring cluster..\n"); 6 | 7 | // Configure cluster 8 | struct pi_device cluster_dev; 9 | struct pi_cluster_conf cl_conf; 10 | struct pi_cluster_task cl_task; 11 | 12 | pi_cluster_conf_init(&cl_conf); 13 | pi_open_from_conf(&cluster_dev, &cl_conf); 14 | if (pi_cluster_open(&cluster_dev)) { 15 | return -1; 16 | } 17 | 18 | printf("Launching ViT procedure...\n"); 19 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 20 | 21 | printf("Done, successful!\n"); 22 | pi_cluster_close(&cluster_dev); 23 | 24 | pmsis_exit(0); 25 | } 26 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/net.c: -------------------------------------------------------------------------------- 1 | // ~~~~~~~~~~ INCLUDES ~~~~~~~~~~ 2 | #include "pulp_train.h" 3 | 4 | #include "model_components.h" 5 | 6 | #include "stats.h" 7 | #include "net.h" 8 | 9 | 10 | // Main function 11 | void net_step() { 12 | // Initialize performance counters 13 | #ifdef PROF_NET 14 | INIT_STATS(); 15 | PRE_START_STATS(); 16 | #endif 17 | 18 | // Initialize model components 19 | printf("Tiny ViT test:\n"); 20 | printf("Initializing components...\n"); 21 | init_and_connect_blobs(); 22 | 23 | // Forward pass 24 | printf("Forward pass...\n"); 25 | #ifdef PROF_NET 26 | START_STATS(); 27 | #endif 28 | forward(); 29 | #ifdef PROF_NET 30 | STOP_STATS(); 31 | #endif 32 | 33 | // Perform forward check 34 | printf("\nChecking forward step results: \n"); 35 | 36 | // Check the output 37 | check_output(); 38 | 39 | return; 40 | } 41 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/net.h: -------------------------------------------------------------------------------- 1 | #ifndef NET_H 2 | #define NET_H 3 | 4 | // PULP DEFINES 5 | #define STACK_SIZE 40960 6 | #define MOUNT 1 7 | #define UNMOUNT 0 8 | #define CID 0 9 | #define MAX_SIZE 25104 10 | 11 | #include "pulp_train_defines.h" 12 | 13 | // net functions 14 | void forward(); 15 | void net_step(); 16 | 17 | // DMA managment functions 18 | void load_input(void * src_blob, uint8_t data_diff_both); 19 | void load_output(void * src_blob, uint8_t data_diff_both); 20 | void load_coeff(void * src_blob, uint8_t data_diff_both); 21 | void store_output(void * dest_blob, uint8_t data_diff_both); 22 | void store_input(void * dest_blob, uint8_t data_diff_both); 23 | void store_coeff(void * dest_blob, uint8_t data_diff_both); 24 | void copy_struct_param(unsigned int from, unsigned int to, int size); 25 | void get_input_dim(void * b); 26 | void get_output_dim(void * b); 27 | void get_weight_dim(void * b); 28 | void reset_arguments(); 29 | void update_blob(); 30 | void reset_dim(); 31 | #endif 32 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/.gitignore: -------------------------------------------------------------------------------- 1 | sample_data 2 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/Conv2dBN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Conv2dBN(torch.nn.Sequential): 5 | def __init__( 6 | self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1.0 7 | ): 8 | super().__init__() 9 | 10 | self.add_module( 11 | "c", torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False) 12 | ) 13 | 14 | bn = torch.nn.BatchNorm2d(b) 15 | torch.nn.init.constant_(bn.weight, bn_weight_init) 16 | torch.nn.init.constant_(bn.bias, 0) 17 | 18 | self.add_module("bn", bn) 19 | 20 | @torch.no_grad() 21 | def fuse(self): 22 | c, bn = self._modules.values() 23 | 24 | w = bn.weight / (bn.running_var + bn.eps) ** 0.5 25 | w = c.weight * w[:, None, None, None] 26 | 27 | b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5 28 | 29 | m = torch.nn.Conv2d( 30 | w.size(1) * self.c.groups, 31 | w.size(0), 32 | w.shape[2:], 33 | stride=self.c.stride, 34 | padding=self.c.padding, 35 | dilation=self.c.dilation, 36 | groups=self.c.groups, 37 | ) 38 | 39 | m.weight.data.copy_(w) 40 | m.bias.data.copy_(b) 41 | 42 | return m 43 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/ConvLayer.py: -------------------------------------------------------------------------------- 1 | from model.MBConv import MBConv 2 | from torch import nn 3 | 4 | 5 | class ConvLayer(nn.Module): 6 | def __init__( 7 | self, 8 | dim, 9 | input_resolution, 10 | depth, 11 | activation, 12 | drop_path=0.0, 13 | downsample=None, 14 | use_checkpoint=False, 15 | out_dim=None, 16 | conv_expand_ratio=4.0, 17 | ): 18 | super().__init__() 19 | 20 | self.dim = dim 21 | self.input_resolution = input_resolution 22 | self.depth = depth 23 | self.use_checkpoint = use_checkpoint 24 | 25 | # build blocks 26 | self.blocks = nn.ModuleList( 27 | [ 28 | MBConv( 29 | dim, 30 | dim, 31 | conv_expand_ratio, 32 | activation, 33 | drop_path[i] if isinstance(drop_path, list) else drop_path, 34 | ) 35 | for i in range(depth) 36 | ] 37 | ) 38 | 39 | # patch merging layer 40 | if downsample is not None: 41 | self.downsample = downsample( 42 | input_resolution, dim=dim, out_dim=out_dim, activation=activation 43 | ) 44 | else: 45 | self.downsample = None 46 | 47 | def forward(self, x): 48 | for blk in self.blocks: 49 | x = blk(x) 50 | 51 | if self.downsample is not None: 52 | x = self.downsample(x) 53 | 54 | return x 55 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/DropPath.py: -------------------------------------------------------------------------------- 1 | from timm.models.layers import DropPath as TimmDropPath 2 | 3 | 4 | class DropPath(TimmDropPath): 5 | def __init__(self, drop_prob=None): 6 | super().__init__(drop_prob=drop_prob) 7 | 8 | self.drop_prob = drop_prob 9 | 10 | def __repr__(self): 11 | msg = super().__repr__() 12 | msg += f"(drop_prob={self.drop_prob})" 13 | 14 | return msg 15 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/MBConv.py: -------------------------------------------------------------------------------- 1 | from model.Conv2dBN import Conv2dBN 2 | from model.DropPath import DropPath 3 | from torch import nn 4 | 5 | 6 | class MBConv(nn.Module): 7 | def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path): 8 | super().__init__() 9 | 10 | self.in_chans = in_chans 11 | self.hidden_chans = int(in_chans * expand_ratio) 12 | self.out_chans = out_chans 13 | 14 | self.conv1 = Conv2dBN(in_chans, self.hidden_chans, ks=1) 15 | self.act1 = activation() 16 | 17 | self.conv2 = Conv2dBN( 18 | self.hidden_chans, 19 | self.hidden_chans, 20 | ks=3, 21 | stride=1, 22 | pad=1, 23 | groups=self.hidden_chans, 24 | ) 25 | 26 | self.act2 = activation() 27 | 28 | self.conv3 = Conv2dBN(self.hidden_chans, self.out_chans, ks=1, bn_weight_init=0) 29 | self.act3 = activation() 30 | 31 | self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() 32 | 33 | def forward(self, x): 34 | shortcut = x 35 | 36 | x = self.conv1(x) 37 | x = self.act1(x) 38 | 39 | x = self.conv2(x) 40 | x = self.act2(x) 41 | 42 | x = self.conv3(x) 43 | 44 | x = self.drop_path(x) 45 | 46 | x += shortcut 47 | x = self.act3(x) 48 | 49 | return x 50 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/Mlp.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class Mlp(nn.Module): 5 | def __init__( 6 | self, 7 | in_features, 8 | hidden_features=None, 9 | out_features=None, 10 | act_layer=nn.GELU, 11 | drop=0.0, 12 | ): 13 | super().__init__() 14 | 15 | out_features = out_features or in_features 16 | hidden_features = hidden_features or in_features 17 | 18 | self.norm = nn.LayerNorm(in_features) 19 | self.fc1 = nn.Linear(in_features, hidden_features) 20 | self.fc2 = nn.Linear(hidden_features, out_features) 21 | self.act = act_layer() 22 | self.drop = nn.Dropout(drop) 23 | 24 | def forward(self, x): 25 | x = self.norm(x) 26 | 27 | x = self.fc1(x) 28 | x = self.act(x) 29 | x = self.drop(x) 30 | 31 | x = self.fc2(x) 32 | x = self.drop(x) 33 | 34 | return x 35 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/PatchEmbed.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from model.Conv2dBN import Conv2dBN 4 | from timm.models.layers import to_2tuple 5 | from torch import nn 6 | 7 | 8 | class PatchEmbed(nn.Module): 9 | def __init__(self, in_chans, embed_dim, resolution, activation): 10 | super().__init__() 11 | 12 | img_size: Tuple[int, int] = to_2tuple(resolution) 13 | 14 | self.patches_resolution = (img_size[0] // 4, img_size[1] // 4) 15 | self.num_patches = self.patches_resolution[0] * self.patches_resolution[1] 16 | self.in_chans = in_chans 17 | self.embed_dim = embed_dim 18 | 19 | n = embed_dim 20 | 21 | self.seq = nn.Sequential( 22 | Conv2dBN(in_chans, n // 2, 3, 2, 1), 23 | activation(), 24 | Conv2dBN(n // 2, n, 3, 2, 1), 25 | ) 26 | 27 | def forward(self, x): 28 | return self.seq(x) 29 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/PatchMerging.py: -------------------------------------------------------------------------------- 1 | from model.Conv2dBN import Conv2dBN 2 | from torch import nn 3 | 4 | 5 | class PatchMerging(nn.Module): 6 | def __init__(self, input_resolution, dim, out_dim, activation): 7 | super().__init__() 8 | 9 | self.input_resolution = input_resolution 10 | self.dim = dim 11 | self.out_dim = out_dim 12 | 13 | self.act = activation() 14 | 15 | self.conv1 = Conv2dBN(dim, out_dim, 1, 1, 0) 16 | self.conv2 = Conv2dBN(out_dim, out_dim, 3, 2, 1, groups=out_dim) 17 | self.conv3 = Conv2dBN(out_dim, out_dim, 1, 1, 0) 18 | 19 | def forward(self, x): 20 | if x.ndim == 3: 21 | h, w = self.input_resolution 22 | b = len(x) 23 | 24 | # (B, C, H, W) 25 | x = x.view(b, h, w, -1).permute(0, 3, 1, 2) 26 | 27 | x = self.conv1(x) 28 | x = self.act(x) 29 | 30 | x = self.conv2(x) 31 | x = self.act(x) 32 | 33 | x = self.conv3(x) 34 | 35 | x = x.flatten(2).transpose(1, 2) 36 | 37 | return x 38 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model/SinusoidalEmbeddings.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from einops import rearrange 3 | from torch import nn 4 | from torch.amp import autocast 5 | 6 | 7 | class SinusoidalEmbeddings(nn.Module): 8 | def __init__(self, dim, scale_base=None, use_xpos=False, theta=10000): 9 | super().__init__() 10 | 11 | inv_freq = 1.0 / (theta ** (torch.arange(0, dim, 2).float() / dim)) 12 | 13 | self.register_buffer("inv_freq", inv_freq) 14 | 15 | # xpos related 16 | self.use_xpos = use_xpos 17 | self.scale_base = scale_base 18 | 19 | assert not ( 20 | use_xpos and (scale_base is None) 21 | ), "scale base must be defined if using xpos" 22 | 23 | scale = (torch.arange(0, dim, 2) + 0.4 * dim) / (1.4 * dim) 24 | self.register_buffer("scale", scale, persistent=False) 25 | 26 | @autocast("cuda", enabled=False) 27 | def forward(self, x): 28 | seq_len, device = x.shape[-2], x.device 29 | 30 | t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) 31 | 32 | freqs = torch.einsum("i , j -> i j", t, self.inv_freq) 33 | freqs = torch.cat((freqs, freqs), dim=-1) 34 | 35 | if not self.use_xpos: 36 | return freqs, torch.ones(1, device=device) 37 | 38 | power = (t - (seq_len // 2)) / self.scale_base 39 | scale = self.scale ** rearrange(power, "n -> n 1") 40 | scale = torch.cat((scale, scale), dim=-1) 41 | 42 | return freqs, scale 43 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/model_configs.py: -------------------------------------------------------------------------------- 1 | MODEL_CONFIGS = { 2 | "DEMO_TINY_VIT_CONFIG": { 3 | "IN_IMG_SIZE": 32, 4 | "IN_CHANS": 3, 5 | "NUM_CLASSES": 10, 6 | "EMBED_DIMS": [4, 4, 4], 7 | "DEPTHS": [1, 2, 2], 8 | "NUM_HEADS": [2, 4, 4], 9 | "WINDOW_SIZES": [7, 1, 1], 10 | "MLP_RATIO": 4.0, 11 | "DROP_RATE": 0.0, 12 | "DROP_PATH_RATE": 0.0, 13 | "USE_CHECKPOINT": False, 14 | "MBCONV_EXPAND_RATIO": 2.0, 15 | "LOCAL_CONV_SIZE": 3, 16 | }, 17 | "TINY_VIT_5M": { 18 | "IN_IMG_SIZE": 224, 19 | "IN_CHANS": 3, 20 | "NUM_CLASSES": 1000, 21 | "EMBED_DIMS": [64, 128, 160, 320], 22 | "DEPTHS": [2, 2, 6, 2], 23 | "NUM_HEADS": [2, 4, 5, 10], 24 | "WINDOW_SIZES": [7, 7, 14, 7], 25 | "MLP_RATIO": 4.0, 26 | "DROP_RATE": 0.0, 27 | "DROP_PATH_RATE": 0.0, 28 | "USE_CHECKPOINT": False, 29 | "MBCONV_EXPAND_RATIO": 4.0, 30 | "LOCAL_CONV_SIZE": 3, 31 | }, 32 | } 33 | -------------------------------------------------------------------------------- /tests/test_tiny_vit_fp32/utils/writers/writers_utils.py: -------------------------------------------------------------------------------- 1 | def get_initialization_text(dim, data_name, filler): 2 | to_return = ( 3 | "\tfor (int i = 0; i < " + str(dim) + "; i++) " + data_name + "[i] = " + filler 4 | ) 5 | to_return += "[i];\n" if filler not in ["zero_init", "min_float"] else ";\n" 6 | 7 | return to_return 8 | 9 | 10 | def get_connect_text(blob_name, elements): 11 | text = "" 12 | 13 | for key in elements.keys(): 14 | text += "\t" + blob_name + "." + key + " = " + str(elements[key]) + ";\n" 15 | 16 | text += "\n" 17 | 18 | return text 19 | 20 | 21 | def adapt_onnx_name(name): 22 | return "_" + str(name).replace("/", "_").replace(".", "_").replace(":", "_") 23 | 24 | 25 | def extract_input_information(node): 26 | if isinstance(node, dict): 27 | return node["data"], tuple(node["shape"]) 28 | else: 29 | try: 30 | return node.name, tuple(node.dims) 31 | except: 32 | raise NotImplementedError("Node structure not recognized") 33 | -------------------------------------------------------------------------------- /tests/test_transp_conv2d_fp16/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | transpconv2d-grads.h 4 | transpconv2d-output.h 5 | init-defines.h 6 | input-image.h 7 | step-check.h 8 | runs.txt 9 | log.c -------------------------------------------------------------------------------- /tests/test_transp_conv2d_fp16/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_transp_conv2d_fp16/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pulp_train_defines.h" 18 | #include "step-check.h" 19 | 20 | // User profiling flags 21 | 22 | #if defined(FORWARD) && !defined(DEBUG) 23 | #define PROF_FWD 24 | #endif 25 | 26 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG) 27 | #define PROF_BKWD 28 | #endif 29 | 30 | // Net sizes 31 | 32 | // TRANSPOSED CONV2D 33 | #define Tout_H_l1 ((Tin_H_l1-1)*STRIDE_H-(PAD_U+PAD_D)+(Tker_H_l1-1)+1) 34 | #define Tout_W_l1 ((Tin_W_l1-1)*STRIDE_W-(PAD_L+PAD_R)+(Tker_W_l1-1)+1) 35 | 36 | // Tensor checksum definition 37 | #define CHECK_TOLERANCE 1e-3 38 | #define ERROR_TOLERANCE 1e-3 39 | 40 | // PULP DEFINES 41 | #define STACK_SIZE 4096 42 | #define MOUNT 1 43 | #define UNMOUNT 0 44 | #define CID 0 45 | 46 | // Support functions 47 | static inline void forward(); 48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length); 49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size); 50 | static inline void train(); 51 | // Main function 52 | void net_step (); 53 | 54 | -------------------------------------------------------------------------------- /tests/test_transp_conv2d_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | transpconv2d-grads.h 4 | transpconv2d-output.h 5 | init-defines.h 6 | input-image.h 7 | step-check.h 8 | runs.txt 9 | log.c -------------------------------------------------------------------------------- /tests/test_transp_conv2d_fp32/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /* 21 | * DUMMY MAIN 22 | * Configures cluster, then calls net_step() 23 | */ 24 | int main (void) { 25 | 26 | 27 | printf("\nHello there.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Net training successful!\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_transp_conv2d_fp32/net.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "step-check.h" 18 | 19 | // User profiling flags 20 | 21 | #if defined(FORWARD) && !defined(DEBUG) 22 | #define PROF_FWD 23 | #endif 24 | 25 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG) 26 | #define PROF_BKWD 27 | #endif 28 | 29 | // Net sizes 30 | 31 | // TRANSPOSED CONV2D 32 | #define Tout_H_l1 ((Tin_H_l1-1)*STRIDE_H-(PAD_U+PAD_D)+(Tker_H_l1-1)+1) 33 | #define Tout_W_l1 ((Tin_W_l1-1)*STRIDE_W-(PAD_L+PAD_R)+(Tker_W_l1-1)+1) 34 | 35 | // Tensor checksum definition 36 | #define CHECK_TOLERANCE 1e-6 37 | #define ERROR_TOLERANCE 1e-6 38 | 39 | // PULP DEFINES 40 | #define STACK_SIZE 4096 41 | #define MOUNT 1 42 | #define UNMOUNT 0 43 | #define CID 0 44 | 45 | // Support functions 46 | static inline void forward(); 47 | static inline void compare_tensors(float *A, float *B, int length); 48 | int check_tensor(float * tensor_out, float * tensor_ref, int size); 49 | static inline void train(); 50 | // Main function 51 | void net_step (); 52 | 53 | -------------------------------------------------------------------------------- /tests/test_transpose/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD/ 2 | log.txt 3 | test_data.h 4 | -------------------------------------------------------------------------------- /tests/test_transpose/Makefile: -------------------------------------------------------------------------------- 1 | APP = transposer 2 | 3 | # User code 4 | DIMS = 13 15 17 5 | TRANSPOSED_AXES = 2 0 1 6 | 7 | NUM_CORES = 8 8 | DATA_TYPE = 32 9 | # End of user code 10 | 11 | 12 | TRAIN_LIB=../../lib 13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources 14 | APP_SRCS = main.c net.c 15 | #APP_CFLAGS += -DDEBUG 16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include 17 | APP_CFLAGS += -O3 -g3 -mno-memcpy 18 | APP_CFLAGS += -DFABRIC 19 | APP_CFLAGS += -DCLUSTER 20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES) 21 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE) 22 | APP_CFLAGS += -DPROF_NET 23 | APP_CFLAGS += -mhwloopalign 24 | APP_LDFLAGS += -lm 25 | 26 | # STATISTICS 27 | APP_CFLAGS += -DSTATS 28 | 29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c 30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c 31 | 32 | include $(RULES_DIR)/pmsis_rules.mk 33 | 34 | get_golden: 35 | rm -rf BUILD/ 36 | python3 utils/GM.py --dims $(DIMS) --transposed_axes $(TRANSPOSED_AXES) --dtype $(DATA_TYPE) 37 | -------------------------------------------------------------------------------- /tests/test_transpose/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | /* 5 | * DUMMY MAIN 6 | * Configures cluster, then calls a simple net_step() 7 | */ 8 | int main(void) { 9 | printf("\nHello there.\nConfiguring cluster..\n"); 10 | 11 | // Configure cluster 12 | struct pi_device cluster_dev; 13 | struct pi_cluster_conf cl_conf; 14 | struct pi_cluster_task cl_task; 15 | 16 | pi_cluster_conf_init(&cl_conf); 17 | pi_open_from_conf(&cluster_dev, &cl_conf); 18 | 19 | if (pi_cluster_open(&cluster_dev)) { 20 | return -1; 21 | } 22 | 23 | printf("\nLaunching transposition procedure...\n"); 24 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, transpose_matrices_fp32, NULL)); 25 | 26 | printf("Transposition successful!\n"); 27 | pi_cluster_close(&cluster_dev); 28 | 29 | pmsis_exit(0); 30 | } 31 | -------------------------------------------------------------------------------- /tests/test_transpose/net.c: -------------------------------------------------------------------------------- 1 | #include "pulp_train.h" 2 | 3 | #include "net.h" 4 | #include "stats.h" 5 | 6 | #include "test_data.h" 7 | 8 | #include "tensor_checkers.h" 9 | 10 | 11 | void transpose_matrices_fp32() { 12 | #ifdef PROF_NET 13 | INIT_STATS(); 14 | PRE_START_STATS(); 15 | #endif 16 | printf("Executing on %d cores.\n", NUM_CORES); 17 | 18 | #if DATA_TYPE == 32 19 | struct transp_args args; 20 | printf("WORKING ON FP32\n"); 21 | #elif DATA_TYPE == 16 22 | struct transp_args_fp16 args; 23 | printf("WORKING ON FP16\n"); 24 | #endif 25 | 26 | // Get arguments 27 | args.in_matrix = IN_M; 28 | args.out_matrix = OUT_M; 29 | args.dim = DIMS; 30 | args.transposed_axes = TRANSPOSED_AXES; 31 | args.n_dim = N_DIMS; 32 | 33 | #ifdef PROF_NET 34 | START_STATS(); 35 | #endif 36 | 37 | // Perform transposition 38 | #if DATA_TYPE == 32 39 | pi_cl_team_fork(NUM_CORES, transpose, &args); 40 | #elif DATA_TYPE == 16 41 | pi_cl_team_fork(NUM_CORES, transpose_fp16, &args); 42 | #endif 43 | 44 | // Stop stats 45 | #ifdef PROF_NET 46 | STOP_STATS(); 47 | #endif 48 | 49 | mean_error_checker(OUT_M, TEST_TRANSPOSE_OUT, TOTAL_SIZE); 50 | elementwise_checker(OUT_M, TEST_TRANSPOSE_OUT, TOTAL_SIZE); 51 | 52 | return; 53 | } 54 | -------------------------------------------------------------------------------- /tests/test_transpose/net.h: -------------------------------------------------------------------------------- 1 | // PULP DEFINES 2 | #define STACK_SIZE 4096 3 | #define MOUNT 1 4 | #define UNMOUNT 0 5 | #define CID 0 6 | 7 | void transpose_matrices_fp32(); 8 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/.gitignore: -------------------------------------------------------------------------------- 1 | BUILD 2 | 3 | input_sequence.h 4 | model_components.c 5 | model_components.h 6 | model_defines.h 7 | net_args.h 8 | output_sequence.h 9 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/main.c: -------------------------------------------------------------------------------- 1 | #include "pmsis.h" 2 | #include "net.h" 3 | 4 | int main (void) { 5 | printf("\nHello there.\nConfiguring cluster..\n"); 6 | 7 | // Configure cluster 8 | struct pi_device cluster_dev; 9 | struct pi_cluster_conf cl_conf; 10 | struct pi_cluster_task cl_task; 11 | 12 | pi_cluster_conf_init(&cl_conf); 13 | pi_open_from_conf(&cluster_dev, &cl_conf); 14 | if (pi_cluster_open(&cluster_dev)) { 15 | return -1; 16 | } 17 | 18 | printf("Launching ViT procedure...\n"); 19 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 20 | 21 | printf("Done, successful!\n"); 22 | pi_cluster_close(&cluster_dev); 23 | 24 | pmsis_exit(0); 25 | } 26 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/net.c: -------------------------------------------------------------------------------- 1 | // ~~~~~~~~~~ INCLUDES ~~~~~~~~~~ 2 | #include "pulp_train.h" 3 | 4 | #include "model_components.h" 5 | #include "tensor_checkers.h" 6 | 7 | #include "stats.h" 8 | #include "net.h" 9 | 10 | 11 | // Main function 12 | void net_step() { 13 | // Initialize performance counters 14 | #ifdef PROF_NET 15 | INIT_STATS(); 16 | PRE_START_STATS(); 17 | #endif 18 | 19 | // Initialize model components 20 | printf("ViT test:\n"); 21 | printf("Initializing components...\n"); 22 | init_and_connect_blobs(); 23 | 24 | // Forward pass 25 | printf("Forward pass...\n"); 26 | #ifdef PROF_NET 27 | START_STATS(); 28 | #endif 29 | forward(); 30 | #ifdef PROF_NET 31 | STOP_STATS(); 32 | #endif 33 | 34 | // Perform forward check 35 | printf("\nChecking forward step results: \n"); 36 | 37 | mean_error_checker(fc_output_data, OUTPUT, OUTPUT_SIZE); 38 | elementwise_checker(fc_output_data, OUTPUT, OUTPUT_SIZE); 39 | 40 | return; 41 | } 42 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/net.h: -------------------------------------------------------------------------------- 1 | #ifndef NET_H 2 | #define NET_H 3 | 4 | // PULP DEFINES 5 | #define STACK_SIZE 40960 6 | #define MOUNT 1 7 | #define UNMOUNT 0 8 | #define CID 0 9 | #define MAX_SIZE 25104 10 | 11 | #include "pulp_train_defines.h" 12 | 13 | // net functions 14 | void forward(); 15 | void net_step(); 16 | 17 | // DMA managment functions 18 | void load_input(void * src_blob, uint8_t data_diff_both); 19 | void load_output(void * src_blob, uint8_t data_diff_both); 20 | void load_coeff(void * src_blob, uint8_t data_diff_both); 21 | void store_output(void * dest_blob, uint8_t data_diff_both); 22 | void store_input(void * dest_blob, uint8_t data_diff_both); 23 | void store_coeff(void * dest_blob, uint8_t data_diff_both); 24 | void copy_struct_param(unsigned int from, unsigned int to, int size); 25 | void get_input_dim(void * b); 26 | void get_output_dim(void * b); 27 | void get_weight_dim(void * b); 28 | void reset_arguments(); 29 | void update_blob(); 30 | void reset_dim(); 31 | #endif 32 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/utils/.gitignore: -------------------------------------------------------------------------------- 1 | sample_data 2 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/utils/torch_to_trainlib.py: -------------------------------------------------------------------------------- 1 | from utils.writers.component_writers import ( 2 | concat_writer, 3 | conv2d_writer, 4 | gelu_writer, 5 | layer_norm_writer, 6 | linear_writer, 7 | mhsa_writer, 8 | tanh_writer, 9 | transpose_writer, 10 | vector_sum_writer, 11 | ) 12 | 13 | 14 | VIT_COMPONENTS_WRITERS = { 15 | "patch_embedding": conv2d_writer, 16 | "flatten_and_transpose": transpose_writer, 17 | "concat": concat_writer, 18 | "positional_embedding": vector_sum_writer, 19 | "tanh": tanh_writer, 20 | "norm": layer_norm_writer, 21 | "fc": linear_writer, 22 | } 23 | 24 | for i in range(12): 25 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_norm1"] = layer_norm_writer 26 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pre_attn_transpose"] = ( 27 | transpose_writer 28 | ) 29 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_attn"] = mhsa_writer 30 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_post_attn_transpose"] = ( 31 | transpose_writer 32 | ) 33 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_proj"] = linear_writer 34 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_add_1"] = vector_sum_writer 35 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_norm2"] = layer_norm_writer 36 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pwff_fc1"] = linear_writer 37 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pwff_gelu"] = gelu_writer 38 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pwff_fc2"] = linear_writer 39 | VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_add_2"] = vector_sum_writer 40 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/utils/vit_lr/PositionWiseFeedForward.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import functional as F 3 | 4 | 5 | class PositionWiseFeedForward(nn.Module): 6 | def __init__(self, dim, ff_dim): 7 | super().__init__() 8 | self.fc1 = nn.Linear(dim, ff_dim) 9 | self.fc2 = nn.Linear(ff_dim, dim) 10 | 11 | def forward(self, x): 12 | x = self.fc1(x) 13 | x = F.gelu(x) 14 | x = self.fc2(x) 15 | 16 | return x 17 | 18 | def get_model_graph_information(self, x, name): 19 | all_nodes = dict() 20 | ordered_nodes = [] 21 | 22 | previous_shape = x.shape[-2:] 23 | h = self.fc1(x) 24 | all_nodes[name + "_fc1"] = { 25 | "input_a": name[:-5] + "_norm2_output_data", 26 | "input_b": (name + "_fc1_weight").upper(), 27 | "input_a_shape": tuple(previous_shape), 28 | "bias_shape": tuple(self.fc1.bias.shape), 29 | "output_shape": tuple(h.shape[-2:]), 30 | } 31 | ordered_nodes.append(name + "_fc1") 32 | 33 | h = F.gelu(h, approximate="tanh") 34 | all_nodes[name + "_gelu"] = { 35 | "shape": tuple(h.shape[1:]), 36 | "input": name + "_fc1_output_data", 37 | } 38 | ordered_nodes.append(name + "_gelu") 39 | 40 | x = self.fc2(h) 41 | all_nodes[name + "_fc2"] = { 42 | "input_a": name + "_gelu_output_data", 43 | "input_b": (name + "_fc2_weight").upper(), 44 | "input_a_shape": tuple(h.shape[-2:]), 45 | "bias_shape": tuple(self.fc2.bias.shape), 46 | "output_shape": tuple(x.shape[-2:]), 47 | } 48 | ordered_nodes.append(name + "_fc2") 49 | 50 | return x, all_nodes, ordered_nodes 51 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/utils/vit_lr/PositionalEmbedding1D.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class PositionalEmbedding1D(nn.Module): 6 | def __init__(self, seq_len, dim): 7 | super().__init__() 8 | self.pos_embedding = nn.Parameter(torch.zeros(1, seq_len, dim)) 9 | 10 | def forward(self, x): 11 | return x + self.pos_embedding 12 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/utils/vit_lr/README.md: -------------------------------------------------------------------------------- 1 | ## Adapted from [here](https://github.com/Dequino/ViT-LR). -------------------------------------------------------------------------------- /tests/test_vit_fp32/utils/vit_lr/ResizeProcedure.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ResizeProcedure(Enum): 5 | NONE = 0 6 | BORDER = 1 7 | -------------------------------------------------------------------------------- /tests/test_vit_fp32/utils/vit_lr/SoftmaxFastExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from utils.vit_lr.vit_lr_utils import fastexp_gist 5 | 6 | 7 | class SoftmaxFastExp(Function): 8 | @staticmethod 9 | def forward(ctx, input): 10 | maxes = torch.max(input, -1, keepdim=True)[0] 11 | # maxes = torch.swapaxes(maxes, -2, -1) 12 | x_exp = fastexp_gist((input - maxes)) 13 | x_exp_sum = torch.sum(x_exp, -1, keepdim=True) 14 | output = x_exp / x_exp_sum 15 | ctx.save_for_backward(output) 16 | 17 | return output 18 | 19 | @staticmethod 20 | def backward(ctx, grad_output): 21 | out_data = ctx.saved_tensors[0] 22 | sums = torch.sum(grad_output * out_data, -1, keepdim=True).repeat( 23 | 1, 1, 1, grad_output.shape[-1] 24 | ) 25 | grad_input = (grad_output - sums) * out_data 26 | 27 | return grad_input 28 | -------------------------------------------------------------------------------- /tools/.gitignore: -------------------------------------------------------------------------------- 1 | memory_footprint_tool/memreport.txt 2 | AutoTuner/fastest_tiling.txt 3 | AutoTuner/error_log.txt 4 | AutoTuner/raw_data_tiling.txt 5 | AutoTuner/basic.yml 6 | AutoTuner/treegen.py 7 | AutoTuner/Makefile 8 | TrainLib_Deployer/Test_CNN/ 9 | TrainLib_Deployer/Makefile 10 | # Momentary 11 | # TrainLib_Deployer/ -------------------------------------------------------------------------------- /tools/AutoTuner/server_execution_files/run_regression.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # priority timeout out_xml proc in_yaml 4 | nice -n10 python sw/bwruntest.py --report-junit -t 1800 --yaml -o ne16_tests.xml -p 32 ./basic.yml 5 | -------------------------------------------------------------------------------- /tools/TrainLib_Deployer/deployer_utils/DNN_Reader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | ''' 18 | Authors: Davide Nadalini 19 | ''' -------------------------------------------------------------------------------- /tools/TrainLib_Deployer/deployer_utils/srcfiles/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021-2022 ETH Zurich and University of Bologna 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "pmsis.h" 18 | #include "net.h" 19 | 20 | /** 21 | * Configures cluster, then calls net_step() 22 | **/ 23 | 24 | int main (void) { 25 | 26 | 27 | printf("\nHello sir.\nConfiguring cluster..\n"); 28 | // Configure cluster 29 | struct pi_device cluster_dev; 30 | struct pi_cluster_conf cl_conf; 31 | struct pi_cluster_task cl_task; 32 | 33 | pi_cluster_conf_init(&cl_conf); 34 | pi_open_from_conf(&cluster_dev, &cl_conf); 35 | if (pi_cluster_open(&cluster_dev)) 36 | { 37 | return -1; 38 | } 39 | 40 | printf("\nLaunching training procedure...\n"); 41 | pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL)); 42 | 43 | printf("Exiting DNN Training.\n"); 44 | pi_cluster_close(&cluster_dev); 45 | 46 | pmsis_exit(0); 47 | } 48 | --------------------------------------------------------------------------------