├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── CI_test_suite
    │   ├── .gitignore
    │   ├── README.md
    │   ├── ci_utils.py
    │   ├── profile_utils.py
    │   └── test_suite.py
    └── img
    │   ├── autotuner.png
    │   ├── pulp-trainlib-mm-flow.png
    │   ├── pulp-trainlib-primitives.png
    │   └── trainlib-deployer.png
├── lib
    ├── README.md
    ├── include
    │   ├── mm_manager_list.txt
    │   ├── mm_manager_list_fp16.txt
    │   ├── pulp_act_fp16.h
    │   ├── pulp_act_fp32.h
    │   ├── pulp_batchnorm_fp32.h
    │   ├── pulp_conv2d_fp16.h
    │   ├── pulp_conv2d_fp32.h
    │   ├── pulp_conv_dw_fp16.h
    │   ├── pulp_conv_dw_fp32.h
    │   ├── pulp_conv_naive_fp16.h
    │   ├── pulp_conv_naive_fp32.h
    │   ├── pulp_conv_pw_fp16.h
    │   ├── pulp_conv_pw_fp32.h
    │   ├── pulp_dropout_fp16.h
    │   ├── pulp_dropout_fp32.h
    │   ├── pulp_embedding_fp16.h
    │   ├── pulp_im2col_fp16.h
    │   ├── pulp_im2col_fp32.h
    │   ├── pulp_instnorm_fp16.h
    │   ├── pulp_instnorm_fp32.h
    │   ├── pulp_interpolation_fp16.h
    │   ├── pulp_interpolation_fp32.h
    │   ├── pulp_layernorm_fp32.h
    │   ├── pulp_linear_fp16.h
    │   ├── pulp_linear_fp32.h
    │   ├── pulp_losses_fp16.h
    │   ├── pulp_losses_fp32.h
    │   ├── pulp_matmul_fp16.h
    │   ├── pulp_matmul_fp32.h
    │   ├── pulp_mhsa_fp16.h
    │   ├── pulp_mhsa_fp32.h
    │   ├── pulp_nonorm_fp16.h
    │   ├── pulp_nonorm_fp32.h
    │   ├── pulp_optimizers_fp16.h
    │   ├── pulp_optimizers_fp32.h
    │   ├── pulp_pooling_fp16.h
    │   ├── pulp_pooling_fp32.h
    │   ├── pulp_random.h
    │   ├── pulp_residual_fp16.h
    │   ├── pulp_residual_fp32.h
    │   ├── pulp_rmsnorm_fp16.h
    │   ├── pulp_rmsnorm_fp32.h
    │   ├── pulp_rnn_fp32.h
    │   ├── pulp_train.h
    │   ├── pulp_train_defines.h
    │   ├── pulp_train_utils_fp16.h
    │   ├── pulp_train_utils_fp32.h
    │   ├── pulp_transp_conv2d_fp16.h
    │   ├── pulp_transp_conv2d_fp32.h
    │   └── tensor_checkers.h
    └── sources
    │   ├── pulp_act_fp16.c
    │   ├── pulp_act_fp32.c
    │   ├── pulp_batchnorm_fp32.c
    │   ├── pulp_conv2d_fp16.c
    │   ├── pulp_conv2d_fp32.c
    │   ├── pulp_conv_dw_fp16.c
    │   ├── pulp_conv_dw_fp32.c
    │   ├── pulp_conv_naive_fp16.c
    │   ├── pulp_conv_naive_fp32.c
    │   ├── pulp_conv_pw_fp16.c
    │   ├── pulp_conv_pw_fp32.c
    │   ├── pulp_dropout_fp16.c
    │   ├── pulp_dropout_fp32.c
    │   ├── pulp_embedding_fp16.c
    │   ├── pulp_im2col_fp16.c
    │   ├── pulp_im2col_fp32.c
    │   ├── pulp_instnorm_fp16.c
    │   ├── pulp_instnorm_fp32.c
    │   ├── pulp_interpolation_fp16.c
    │   ├── pulp_interpolation_fp32.c
    │   ├── pulp_layernorm_fp32.c
    │   ├── pulp_linear_fp16.c
    │   ├── pulp_linear_fp32.c
    │   ├── pulp_losses_fp16.c
    │   ├── pulp_losses_fp32.c
    │   ├── pulp_matmul_fp16.c
    │   ├── pulp_matmul_fp32.c
    │   ├── pulp_mhsa_fp16.c
    │   ├── pulp_mhsa_fp32.c
    │   ├── pulp_nonorm_fp16.c
    │   ├── pulp_nonorm_fp32.c
    │   ├── pulp_optimizers_fp16.c
    │   ├── pulp_optimizers_fp32.c
    │   ├── pulp_pooling_fp16.c
    │   ├── pulp_pooling_fp32.c
    │   ├── pulp_random.c
    │   ├── pulp_residual_fp16.c
    │   ├── pulp_residual_fp32.c
    │   ├── pulp_rmsnorm_fp16.c
    │   ├── pulp_rmsnorm_fp32.c
    │   ├── pulp_rnn_fp32.c
    │   ├── pulp_train_utils_fp16.c
    │   ├── pulp_train_utils_fp32.c
    │   ├── pulp_transp_conv2d_fp16.c
    │   └── pulp_transp_conv2d_fp32.c
├── tests
    ├── .gitignore
    ├── README.md
    ├── mm_manager_list.txt
    ├── mm_manager_list_fp16.txt
    ├── test_DMA_tensor
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   └── stats.h
    ├── test_ResNet_CIFAR10
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── pulp-sdk-configs
    │   │   ├── link.ld
    │   │   └── pulp.json
    │   ├── readme.txt
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_act
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── SoftmaxFastExp.py
    │   │   └── dump_utils.py
    ├── test_batchnorm_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_blocktranspose
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   └── stats.h
    ├── test_broadcast_add
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   └── GM.py
    ├── test_broadcast_matmul
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   └── GM.py
    ├── test_conv2d_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_conv2d_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_conv_pw_dw_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_conv_pw_dw_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_cordic
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_dropout
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_gelu_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   ├── profile_utils.py
    │   │   └── test_model.py
    ├── test_im2col
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   └── stats.h
    ├── test_instnorm_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── readme.txt
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_instnorm_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── readme.txt
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_interpolation
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_layernorm_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   ├── tensor_checkers.h
    │   └── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_layout_change
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   └── stats.h
    ├── test_linear_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_linear_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_losses_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   └── losses.py
    ├── test_losses_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   └── losses.py
    ├── test_matmul
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── output_eval.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   └── profile_fastest.py
    ├── test_mhsa_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── SoftmaxFastExp.py
    │   │   ├── dump_utils.py
    │   │   ├── mhsa.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_mhsa_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── SoftmaxFastExp.py
    │   │   ├── dump_utils.py
    │   │   ├── mhsa.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_mhsa_fp32_partialsoftmax_old
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── mhsa.py
    │   │   ├── mhsa_partial_softmax.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_mhsa_paper_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net-args.h
    │   ├── net.c
    │   ├── net.h
    │   ├── net_l1.c
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── SoftmaxFastExp.py
    │   │   ├── dump_utils.py
    │   │   ├── mhsa.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_mhsa_paper_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net-args.h
    │   ├── net.c
    │   ├── net.h
    │   ├── net_l1.c
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── SoftmaxFastExp.py
    │   │   ├── dump_utils.py
    │   │   ├── mhsa.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_mobilebert_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── binarygen.py
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── net_args.h
    │   ├── net_flash.c
    │   ├── net_l1.c
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_mobilebert_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── net_args.h
    │   ├── net_l1.c
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_pad
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   └── stats.h
    ├── test_pooling
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    ├── test_random
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   └── stats.h
    ├── test_reduce_mean
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   └── GM.py
    ├── test_residual
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   ├── utils
    │   │   ├── GM.py
    │   │   └── dump_utils.py
    │   └── variables.h
    ├── test_rnn_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── profile_optimized.py
    │   │   ├── profile_sizes.py
    │   │   └── profile_utils.py
    ├── test_tiny_vit_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   ├── tensor_checkers.h
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   ├── model
    │   │       ├── Attention.py
    │   │       ├── BasicLayer.py
    │   │       ├── Conv2dBN.py
    │   │       ├── ConvLayer.py
    │   │       ├── DropPath.py
    │   │       ├── LocalAttention.py
    │   │       ├── MBConv.py
    │   │       ├── Mlp.py
    │   │       ├── PatchEmbed.py
    │   │       ├── PatchMerging.py
    │   │       ├── RotaryEmbedding.py
    │   │       ├── SinusoidalEmbeddings.py
    │   │       ├── SparseAttention.py
    │   │       ├── TinyViT.py
    │   │       ├── TinyViTBlock.py
    │   │       └── model_utils.py
    │   │   ├── model_configs.py
    │   │   └── writers
    │   │       ├── component_writers.py
    │   │       ├── file_writers.py
    │   │       └── writers_utils.py
    ├── test_transp_conv2d_fp16
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   └── profile_utils.py
    ├── test_transp_conv2d_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   ├── GM.py
    │   │   ├── dump_utils.py
    │   │   └── profile_utils.py
    ├── test_transpose
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   └── utils
    │   │   └── GM.py
    └── test_vit_fp32
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── main.c
    │   ├── net.c
    │   ├── net.h
    │   ├── stats.h
    │   ├── tensor_checkers.h
    │   └── utils
    │       ├── .gitignore
    │       ├── GM.py
    │       ├── dump_utils.py
    │       ├── torch_to_trainlib.py
    │       ├── vit_lr
    │           ├── MultiHeadSelfAttention.py
    │           ├── PositionWiseFeedForward.py
    │           ├── PositionalEmbedding1D.py
    │           ├── README.md
    │           ├── ResizeProcedure.py
    │           ├── SoftmaxFastExp.py
    │           ├── Transformer.py
    │           ├── TransformerBlock.py
    │           ├── ViTLR_model.py
    │           └── vit_lr_utils.py
    │       └── writers
    │           ├── component_writers.py
    │           └── file_writers.py
└── tools
    ├── .gitignore
    ├── AutoTuner
        ├── autotuner.py
        ├── server_execution_files
        │   ├── run_regression.sh
        │   └── sw
        │   │   └── bwruntest.py
        └── tiling_utils.py
    ├── README.md
    ├── TrainLib_Deployer
        ├── TrainLib_Deployer.py
        └── deployer_utils
        │   ├── DNN_Composer.py
        │   ├── DNN_Reader.py
        │   ├── GM_templates.py
        │   ├── deployment_utils.py
        │   ├── deployment_utils_double_buffer.py
        │   ├── deployment_utils_single_buffer.py
        │   ├── net_templates.py
        │   ├── net_templates_double_buffer.py
        │   ├── net_templates_single_buffer.py
        │   └── srcfiles
        │       ├── dump_utils.py
        │       ├── main.c
        │       └── stats.h
    └── memory_footprint_tool
        └── memory_footprint_eval.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | **/.vscode/
3 | **/__pycache__/
4 | .idea/
5 | 


--------------------------------------------------------------------------------
/assets/CI_test_suite/.gitignore:
--------------------------------------------------------------------------------
1 | /__pycache__/
2 | test_suite_results.txt
3 | temp/


--------------------------------------------------------------------------------
/assets/CI_test_suite/README.md:
--------------------------------------------------------------------------------
1 | # Test suite for continuous integration
2 | 
3 | By launching the [test suite](test_suite.py), users can verify PULP-TrainLib's primitives. 
4 | To extend the test suite, please insert a new section in the Python suite, by following the structure of the other primitives.
5 | 
6 | The test suite is designed to create a `temp/` folder which contains all the tests that have been executed. In each test, the output is contained into its respective `log.txt` file, which is filled with the terminal's output. A summary of the execution of each test is then stored into `test_suite_results.txt`. Check for the expression `CONTAINS ERRORS` to check for tests which failed.


--------------------------------------------------------------------------------
/assets/CI_test_suite/ci_utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | import os
17 | import shutil
18 | 
19 | # Copy related test folder into temp
20 | def copy_test_folder_ci (test_id, ci_test_folder, test_folder):
21 | 
22 |     test_dest_folder = str(ci_test_folder)+"/temp/tests/ci_test_"+str(test_id)
23 |     if not os.path.exists(test_dest_folder):
24 |         os.mkdir(test_dest_folder)
25 |     os.chdir(test_dest_folder)
26 |     shutil.copytree(test_folder, test_dest_folder, dirs_exist_ok=True)
27 | 
28 | 
29 | # Copy the trainlib into the suitable position
30 | def copy_trainlib_ci (ci_test_folder, trainlib_folder):
31 | 
32 |     trainlib_dest_folder = str(ci_test_folder)+"/temp/lib"
33 |     os.chdir(trainlib_dest_folder)
34 |     shutil.copytree(trainlib_folder, trainlib_dest_folder, dirs_exist_ok=True)
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/assets/img/autotuner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/autotuner.png


--------------------------------------------------------------------------------
/assets/img/pulp-trainlib-mm-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/pulp-trainlib-mm-flow.png


--------------------------------------------------------------------------------
/assets/img/pulp-trainlib-primitives.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/pulp-trainlib-primitives.png


--------------------------------------------------------------------------------
/assets/img/trainlib-deployer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pulp-platform/pulp-trainlib/784f73e0b7dbd183f742b2750708fb65418d7f23/assets/img/trainlib-deployer.png


--------------------------------------------------------------------------------
/lib/include/pulp_dropout_fp16.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /**
18 |  * Authors: Alberto Dequino
19 | */ 
20 | 
21 | #include <stdint.h>
22 | #include "pulp_train_defines.h"
23 | 
24 | /**
25 |  * @brief Structure for FP16 dropout
26 |  * @param probability the probability of the single value to be dropped
27 |  * @param input input to apply the dropout
28 |  * @param use_mask flag to choose wheter to do a real dropout or just apply a mask (useful for reproducing GM results)
29 |  * @param mask vector used for masking (requires use_mask==1, and same size of input vector)
30 |  * @param size input/mask vector size
31 |  * @param seed initial seed value
32 |  */
33 | struct dropout_args_fp16{
34 |     fp16 probability;
35 |     fp16 * input;
36 |     int use_mask;
37 |     fp16 * mask;
38 |     int size;
39 |     int seed;
40 | };
41 | 
42 | 
43 | /**
44 |  * PULP-TrainLib's definitions
45 |  */
46 | 
47 | 
48 | /**
49 |  * @brief FP16 Dropout function
50 |  */
51 |  void pulp_dropout_fp16_cl(void * dropout_args);


--------------------------------------------------------------------------------
/lib/include/pulp_dropout_fp32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /**
18 |  * Authors: Alberto Dequino
19 | */ 
20 | 
21 | #include <stdint.h>
22 | 
23 | /**
24 |  * @brief Structure for FP32 dropout
25 |  * @param probability the probability of the single value to be dropped
26 |  * @param input input to apply the dropout
27 |  * @param use_mask flag to choose wheter to do a real dropout or just apply a mask (useful for reproducing GM results)
28 |  * @param mask vector used for masking (requires use_mask==1, and same size of input vector)
29 |  * @param size input/mask vector size
30 |  * @param seed initial seed value
31 |  */
32 | struct dropout_args_fp32{
33 |     float probability;
34 |     float * input;
35 |     int use_mask;
36 |     float * mask;
37 |     int size;
38 |     int seed;
39 | };
40 | 
41 | 
42 | /**
43 |  * PULP-TrainLib's definitions
44 |  */
45 | 
46 | 
47 | /**
48 |  * @brief FP32 Dropout function
49 |  */
50 |  void pulp_dropout_fp32_cl(void * dropout_args);


--------------------------------------------------------------------------------
/lib/include/pulp_embedding_fp16.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2024 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | /**
19 |  * Authors: Alberto Dequino
20 | */ 
21 | 
22 | struct Embedding_args_fp16{
23 |     fp16* BUFF;
24 |     int dim;
25 |     int embed_dim;
26 |     int *ids;
27 |     fp16 *embeds;
28 |     fp16 *out;
29 | };
30 | 
31 | void embedding_fw_tiled_fp16(void *embedding_args);


--------------------------------------------------------------------------------
/lib/include/pulp_layernorm_fp32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2024 University of Bologna
 3 |  * All rights reserved.
 4 |  *
 5 |  * This software may be modified and distributed under the terms
 6 |  * of the BSD license.  See the LICENSE file for details.
 7 |  *
 8 |  * Authors: Calin Diaconu (calin.diaconu@studio.unibo.it)
 9 |  */
10 | 
11 | #ifndef PULP_TRAINLIB_PULP_LAYERNORM_FP32_H
12 | #define PULP_TRAINLIB_PULP_LAYERNORM_FP32_H
13 | 
14 | #include "math.h"
15 | 
16 | /**
17 |  * @brief Arguments for the forward pass of the LayerNorm layer.
18 |  * @brief x: input tensor
19 |  * @brief weight: weight tensor
20 |  * @brief bias: bias tensor
21 |  * @brief output: output tensor
22 |  * @brief eps: epsilon value
23 |  * @brief size: size of the tensors
24 |  * @brief step_size: step size over which the normalization is performed
25 |  */
26 | struct LayerNorm_args_fp32 {
27 |     float *x;
28 |     float *weight;
29 |     float *bias;
30 |     float *output;
31 |     float *eps;
32 |     int size;
33 |     int step_size;
34 | };
35 | 
36 | /**
37 |  * @brief Forward function that calls the parallelized version for the LayerNorm layer.
38 |  * @param (void *)  (struct LayerNorm_args_fp32 void_args)
39 |  */
40 | void pulp_layerNorm_fp32_fw_cl(void *layer_norm_args);
41 | 
42 | #endif //PULP_TRAINLIB_PULP_LAYERNORM_FP32_H
43 | 


--------------------------------------------------------------------------------
/lib/include/pulp_nonorm_fp16.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | /**
19 |  * Nonorm training functions, grouped into FW and BW
20 |  *
21 |  * Authors: Alberto Dequino
22 | */ 
23 | 
24 | 
25 | /**
26 |  * Nonorm layer configuration structure
27 |  */
28 | 
29 | /**
30 |  * @brief Structure for NoNorm Training in FP32
31 |  * @param input  input for the nonorm layer (from forward perspective)
32 |  * @param coeff  weight vector 
33 |  * @param bias  bias 
34 |  * @param output  output for the nonorm layer (from forward perspective)
35 |  */
36 |  struct Nonorm_args_fp16 {
37 | 	struct blob_fp16 * input; 
38 | 	struct blob_fp16 * coeff; 
39 | 	struct blob_fp16 * bias; 
40 | 	struct blob_fp16 * output;
41 | };
42 | 
43 | /**
44 |  * Nonorm layer training functions, grouped into FW and BW
45 | */
46 | 
47 | // FORWARD FUNCTIONS
48 | 
49 | /**
50 |  * @brief Forward pass function, forked on PULP cluster.
51 |  * @param Nonorm_args_fp16 structure configuring the nonorm layer.
52 |  */
53 | void pulp_nonorm_fp16_fw_cl( void * Nonorm_args );
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/lib/include/pulp_nonorm_fp32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | /**
19 |  * Nonorm training functions, grouped into FW and BW
20 |  *
21 |  * Authors: Alberto Dequino
22 | */ 
23 | 
24 | 
25 | /**
26 |  * Nonorm layer configuration structure
27 |  */
28 | 
29 | /**
30 |  * @brief Structure for NoNorm Training in FP32
31 |  * @param input  input for the nonorm layer (from forward perspective)
32 |  * @param coeff  weight vector 
33 |  * @param bias  bias 
34 |  * @param output  output for the nonorm layer (from forward perspective)
35 |  */
36 |  struct Nonorm_args {
37 | 	struct blob * input; 
38 | 	struct blob * coeff; 
39 | 	struct blob * bias; 
40 | 	struct blob * output;
41 | };
42 | 
43 | /**
44 |  * Nonorm layer training functions, grouped into FW and BW
45 | */
46 | 
47 | // FORWARD FUNCTIONS
48 | 
49 | /**
50 |  * @brief Forward pass function, forked on PULP cluster.
51 |  * @param Nonorm_args structure configuring the nonorm layer.
52 |  */
53 | void pulp_nonorm_fp32_fw_cl( void * Nonorm_args );
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/lib/include/pulp_optimizers_fp16.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /**
18 |  * Authors: Davide Nadalini, Leonardo Ravaglia
19 | */ 
20 | 
21 | #include "pulp_train_defines.h"
22 | 
23 | /**
24 |  * Optimizer configuration structure
25 |  */
26 | 
27 | /**
28 |  * @brief Parameters for optimizer fucntions for every single layer
29 |  * @param weights blob of the weights (with their gradient inside)
30 |  * @param bias blob of the biases (with their gradient inside)
31 |  * @param learning_rate the learning rate of the optimizer
32 |  * @param use_biases flag: use bias (1) or not use bias (0).
33 |  */
34 | struct optim_args_fp16 {
35 |   struct blob_fp16 * weights;
36 |   struct blob_fp16 * biases;
37 |   fp16 learning_rate;
38 |   int use_biases;
39 | };
40 | 
41 | 
42 | 
43 | /**
44 |  * Optimizers
45 |  **/
46 | 
47 | /**
48 |  * @brief Gradient descent optimizer for a single layer. Use pi_cl_team_fork(NUM_CORES, pulp_gradient_descent_fp32, &args) to parallelize.
49 |  * @param optim_args pointer to optim_args structure (see pulp_train_utils_fp32.h) 
50 |  */
51 | void pulp_gradient_descent_fp16(
52 |     void * optim_args
53 | );
54 | 


--------------------------------------------------------------------------------
/lib/include/pulp_optimizers_fp32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /**
18 |  * Authors: Davide Nadalini, Leonardo Ravaglia
19 | */ 
20 | 
21 | 
22 | /**
23 |  * Optimizer configuration structure
24 |  */
25 | 
26 | /**
27 |  * @brief Structure for optimizers
28 |  * @param weights blob of the weights (with their gradient inside)
29 |  * @param bias blob of the biases (with their gradient inside)
30 |  * @param learning_rate the learning rate of the optimizer
31 |  * @param use_biases flag: use bias (1) or not use bias (0).
32 |  */
33 | struct optim_args {
34 |   struct blob * weights;
35 |   struct blob * biases;
36 |   float learning_rate;
37 |   int use_biases;
38 | };
39 | 
40 | 
41 | 
42 | /**
43 |  * Optimizers
44 |  **/
45 | 
46 | /**
47 |  * @brief Gradient descent optimizer for a single layer. Use pi_cl_team_fork(NUM_CORES, pulp_gradient_descent_fp32, &args) to parallelize.
48 |  * @param optim_args pointer to optim_args structure (see pulp_train_utils_fp32.h) 
49 |  */
50 | void pulp_gradient_descent_fp32(
51 |     void * optim_args
52 | );
53 | 


--------------------------------------------------------------------------------
/lib/include/pulp_rmsnorm_fp16.h:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "pulp_train_defines.h"
 3 | 
 4 | struct weighted_scaling_args_fp16 {
 5 |     fp16* out;
 6 |     fp16* in;
 7 |     fp16* w;
 8 |     fp16 scaling_factor;
 9 |     unsigned int size;
10 | };
11 | 
12 | struct sum_of_squares_args_fp16 {
13 |     fp16* out;
14 |     fp16* in;
15 |     unsigned int size;
16 | };
17 | 
18 | void weighted_scaling_fp16_cl(void* weighted_scaling_args_fp16);
19 | 
20 | void sum_of_squares_fp16_cl(void* sum_of_squares_args_fp16);
21 | 
22 | void rmsnorm_parallelized_fp16(fp16* o, fp16* x, fp16* weight, fp16* buffer_n_cores, int size);
23 | 


--------------------------------------------------------------------------------
/lib/sources/pulp_embedding_fp16.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2024 University of Bologna
 3 |  * All rights reserved.
 4 |  *
 5 |  * This software may be modified and distributed under the terms
 6 |  * of the BSD license.  See the LICENSE file for details.
 7 |  *
 8 |  * Authors: Alberto Dequino (alberto.dequino@unibo.it)
 9 |  */
10 | 
11 |  #include "pulp_train_utils_fp16.h"
12 |  #include "pulp_embedding_fp16.h"
13 |  #include "pulp_train_defines.h"
14 |  #include "pmsis.h"
15 | 
16 |  // FORWARD, TILED
17 |  
18 |  void embedding_fw_tiled_fp16(void *embedding_args){
19 |     struct Embedding_args_fp16 *args = (struct Embedding_args_fp16*) embedding_args;
20 | 
21 |     fp16 *BUFF = args->BUFF;
22 | 
23 |     int dim = args->dim;
24 |     int embed_dim = args->embed_dim;
25 | 
26 |     pi_cl_dma_cmd_t * cmd_store;
27 |     pi_cl_dma_cmd_t * cmd_load;
28 | 
29 |     const int blockSize=(dim+NUM_CORES-1)/NUM_CORES;
30 |     const int start = pi_core_id()*blockSize;
31 |     const int stop = start + blockSize > dim ? dim : start+blockSize;
32 | 
33 |     for(int i = start; i < stop; i++){
34 |         int id = (args->ids)[i];
35 |         pi_cl_dma_cmd((uint32_t) (args->embeds + id * embed_dim), (uint32_t) (BUFF + (int) (pi_core_id()) * embed_dim), 2 * embed_dim, PI_CL_DMA_DIR_EXT2LOC, cmd_load);
36 |         pi_cl_dma_cmd_wait(cmd_load);
37 |         pi_cl_dma_cmd((uint32_t) (args->out + i * embed_dim), (uint32_t) (BUFF + (int) (pi_core_id()) * embed_dim), 2 * embed_dim, PI_CL_DMA_DIR_LOC2EXT, cmd_store);
38 |         pi_cl_dma_cmd_wait(cmd_store);
39 |     }
40 |  }


--------------------------------------------------------------------------------
/lib/sources/pulp_nonorm_fp16.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /**
18 |  * Authors: Alberto Dequino
19 | */ 
20 | 
21 | #include "pulp_train_utils_fp16.h"
22 | #include "pulp_nonorm_fp16.h"
23 | 
24 | void pulp_nonorm_fp16_fw_cl( void * Nonorm_args )
25 | {
26 |     struct Nonorm_args_fp16 * NN_args = (struct Nonorm_args_fp16 *) Nonorm_args;
27 |     fp16 *coeffData = NN_args->coeff->data;
28 |     fp16 *biasData = NN_args->bias->data;
29 |     fp16 *outData = NN_args->output->data;  
30 |     fp16 *inputData = NN_args->input->data;
31 |     fp16 temp;
32 | 
33 |     int N = (NN_args->input)->H; // Sequence Length (we parallelize on this)
34 |     int W = (NN_args->input)->W; // Embedding size
35 | 
36 |     const uint32_t blockSize = (N+NUM_CORES-1) / NUM_CORES;
37 |     const uint32_t start = pi_core_id()*blockSize;
38 |     const uint32_t stop = start+blockSize > N ? N : start+blockSize;
39 | 
40 |     for(uint32_t i = start; i < stop; i++){
41 |         int row = i * W;
42 |         for(uint32_t j = 0; j < W; j++){
43 |             temp = inputData[row + j] * coeffData[j]; 
44 |             outData[row + j] = temp + biasData[j];
45 |         }
46 |     }
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/sources/pulp_nonorm_fp32.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /**
18 |  * Authors: Alberto Dequino
19 | */ 
20 | 
21 | #include "pulp_train_utils_fp32.h"
22 | #include "pulp_nonorm_fp32.h"
23 | 
24 | void pulp_nonorm_fp32_fw_cl( void * Nonorm_args )
25 | {
26 |     struct Nonorm_args * NN_args = (struct Nonorm_args *) Nonorm_args;
27 |     float *coeffData = NN_args->coeff->data;
28 |     float *biasData = NN_args->bias->data;
29 |     float *outData = NN_args->output->data;  
30 |     float *inputData = NN_args->input->data;
31 |     float temp;
32 | 
33 |     int N = (NN_args->input)->H; // Sequence Length (we parallelize on this)
34 |     int W = (NN_args->input)->W; // Embedding size
35 | 
36 |     const uint32_t blockSize = (N+NUM_CORES-1) / NUM_CORES;
37 |     const uint32_t start = pi_core_id()*blockSize;
38 |     const uint32_t stop = start+blockSize > N ? N : start+blockSize;
39 | 
40 |     for(uint32_t i = start; i < stop; i++){
41 |         int row = i * W;
42 |         for(uint32_t j = 0; j < W; j++){
43 |             temp = inputData[row + j] * coeffData[j]; 
44 |             outData[row + j] = temp + biasData[j];
45 |         }
46 |     }
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | build_every_layer_optim.py
2 | allprof.txt
3 | build_every_matmul.py
4 | all_matmul.txt
5 | start_pulp.sh
6 | v2_start_pulp.sh
7 | license_header.txt
8 | 


--------------------------------------------------------------------------------
/tests/mm_manager_list_fp16.txt:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------------
 2 | -- Copyright (C) 2021-2022 ETH Zurich and University of Bologna             --
 3 | --                                                                          --
 4 | -- Licensed under the Apache License, Version 2.0 (the "License");          --
 5 | -- you may not use this file except in compliance with the License.         --
 6 | -- You may obtain a copy of the License at                                  --
 7 | --                                                                          --
 8 | --     http://www.apache.org/licenses/LICENSE-2.0                           --
 9 | --                                                                          --
10 | -- Unless required by applicable law or agreed to in writing, software      --
11 | -- distributed under the License is distributed on an "AS IS" BASIS,        --
12 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --
13 | -- See the License for the specific language governing permissions and      --
14 | -- limitations under the License.                                           --
15 | ------------------------------------------------------------------------------
16 | 
17 | 
18 | ----------------------------------------------------
19 | --- MM_MANAGER MATMUL LIST (for layer profiling) ---
20 | ----------------------------------------------------
21 | 
22 | STANDARD MATMULS:
23 | 
24 | // Naives
25 | matmul_type == 0      
26 | mm_fp16
27 | matmul_type == 1      
28 | mm_M_fp16
29 | 
30 | // Parallelism on N
31 | matmul_type == 2      
32 | mm_fp16_SIMD_2x4
33 | matmul_type == 3      
34 | mm_fp16_SIMD_4x8
35 | 
36 | // Parallelism on M
37 | matmul_type == 4
38 | mm_M_fp16_SIMD_2x4
39 | matmul_type == 5
40 | mm_M_fp16_SIMD_4x8
41 | 
42 | END STANDARD 
43 | 


--------------------------------------------------------------------------------
/tests/test_DMA_tensor/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt


--------------------------------------------------------------------------------
/tests/test_DMA_tensor/Makefile:
--------------------------------------------------------------------------------
 1 | APP = dma_transfer
 2 | 
 3 | # User settings
 4 | NUM_CORES?=1
 5 | BITS?=16
 6 | # Layer sizes
 7 | HEIGHT?=4
 8 | WIDTH?=4
 9 | CHANNELS?=64*12
10 | #APP_CFLAGS += -DMERGE_PARALLEL
11 | #APP_CFLAGS += -DDEBUG_APP
12 | #APP_CFLAGS += -DPRINT_OUTPUT
13 | # End of user settings
14 | 
15 | TRAIN_LIB=../../lib
16 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
17 | APP_SRCS = main.c net.c
18 | 
19 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
20 | APP_CFLAGS += -O3 -g3 -mno-memcpy
21 | APP_CFLAGS += -DFABRIC
22 | APP_CFLAGS += -DCLUSTER
23 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
24 | APP_CFLAGS += -DBITS=$(BITS)
25 | APP_CFLAGS += -DPROF_NET
26 | APP_CFLAGS += -DHEIGHT=$(HEIGHT)
27 | APP_CFLAGS += -DWIDTH=$(WIDTH)
28 | APP_CFLAGS += -DCHANNELS=$(CHANNELS)
29 | #APP_CFLAGS += -DDEBUG
30 | APP_CFLAGS += -mhwloopalign
31 | APP_LDFLAGS += -lm
32 | 
33 | # STATISTICS
34 | APP_CFLAGS += -DSTATS
35 | 
36 | # Sources
37 | # APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
38 | 
39 | include $(RULES_DIR)/pmsis_rules.mk
40 | 


--------------------------------------------------------------------------------
/tests/test_DMA_tensor/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching training procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Net training successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_DMA_tensor/net.h:
--------------------------------------------------------------------------------
 1 | // Tensor sizes
 2 | #define TENSOR_SIZE (WIDTH*HEIGHT*CHANNELS)
 3 | 
 4 | // PULP DEFINES
 5 | #define STACK_SIZE      4096
 6 | #define MOUNT           1
 7 | #define UNMOUNT         0
 8 | #define CID             0
 9 | 
10 | void net_step ();


--------------------------------------------------------------------------------
/tests/test_ResNet_CIFAR10/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | cifar*
3 | log.csv
4 | init-defines.h
5 | io_data.h
6 | 


--------------------------------------------------------------------------------
/tests/test_ResNet_CIFAR10/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /**
21 |  *  Configures cluster, then calls net_step()
22 | **/
23 | 
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello sir.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Exiting DNN Training.\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_ResNet_CIFAR10/net.h:
--------------------------------------------------------------------------------
 1 | // PULP Defines
 2 | #define STACK_SIZE      4096
 3 | 
 4 | // Tolerance to check updated output
 5 | #define TOLERANCE 1e-12
 6 | 
 7 | // Training functions
 8 | void DNN_init();
 9 | void compute_loss(int idx);
10 | void update_weights();
11 | void forward();
12 | void backward();
13 | void net_step();
14 | 
15 | // Print and check functions
16 | void print_output();
17 | void check_post_training_output();
18 | 
19 | // DMA managment functions
20 | void load_input(void * src_blob, uint8_t data_diff_both);
21 | void load_output(void * src_blob, uint8_t data_diff_both);
22 | void load_coeff(void * src_blob, uint8_t data_diff_both);
23 | void store_output(void * dest_blob, uint8_t data_diff_both);
24 | void store_input(void * dest_blob, uint8_t data_diff_both);
25 | void store_coeff(void * dest_blob, uint8_t data_diff_both);
26 | void copy_struct_param(unsigned int from, unsigned int to, int size);
27 | void get_input_dim(void * b);
28 | void get_output_dim(void * b);
29 | void get_weight_dim(void * b);
30 | void reset_arguments();
31 | void update_blob();
32 | void PrintBlob(void * b, int step);
33 | void reset_dim();
34 | #define MAX_IN_SIZE 6400
35 | #define MAX_WGT_SIZE 6400
36 | #define MAX_SIZE 27664
37 | 


--------------------------------------------------------------------------------
/tests/test_ResNet_CIFAR10/readme.txt:
--------------------------------------------------------------------------------
 1 | To compile the application, run "make clean get_golden all run".
 2 | To modify the hyperparameters (learning rate, epochs, batch size still not implemented), edit the variables inside "utils/GM.py".
 3 | 
 4 | N.B: this project needs to have an L2 of at least 8 MB in GVSoC, please edit GVSoC's memory map to fit this requirement.
 5 | To do so, copy & paste the content of "pulp-sdk-configs/"'s files in the respective files of your pulp.sdk (THIS VERSION IS PREFERABLE: https://github.com/pulp-platform/pulp-sdk/releases/tag/2021.09.15):
 6 | 
 7 | 1) pulp-sdk/rtos/pulpos/kernel/chips/pulp/link.ld
 8 | 
 9 | 2) pulp-sdk/tools/gap-configs/configs/config/pulp.json
10 | 


--------------------------------------------------------------------------------
/tests/test_act/.gitignore:
--------------------------------------------------------------------------------
1 | init_defines.h
2 | act_data.h
3 | log.txt
4 | BUILD/


--------------------------------------------------------------------------------
/tests/test_act/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_act
 2 | 
 3 | # ~~~~~~~~~~ User settings ~~~~~~~~~~
 4 | # Standard matmul arguments
 5 | IN_H?=4
 6 | IN_W?=4
 7 | IN_C?=8
 8 | VALUE?=0.05
 9 | 
10 | # General arguments
11 | DATA_TYPE?='FP16'	# FP32 or FP16
12 | NUM_CORES?=8
13 | 
14 | # ~~~~~~~~~~ End of user settings ~~~~~~~~~~
15 | 
16 | TRAIN_LIB=../../lib
17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
18 | APP_SRCS += main.c net.c
19 | 
20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c
24 | 
25 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
26 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
27 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
28 | APP_CFLAGS += -DPROF_NET
29 | APP_CFLAGS += -DIN_H=$(IN_H)
30 | APP_CFLAGS += -DIN_W=$(IN_W)
31 | APP_CFLAGS += -DIN_C=$(IN_C)
32 | APP_CFLAGS += -DVALUE=$(VALUE)
33 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE)
34 | #APP_CFLAGS += -DDEBUG
35 | 
36 | APP_LDFLAGS += -lm 
37 | 
38 | # STATISTICS
39 | APP_CFLAGS += -DSTATS
40 | 
41 | get_golden:
42 | 	rm -rf BUILD/
43 | 	python3 ./utils/GM.py --in_c $(IN_C) --in_h $(IN_H) --in_w $(IN_W) --value $(VALUE) --data_type $(DATA_TYPE)
44 | 
45 | include $(RULES_DIR)/pmsis_rules.mk
46 | 


--------------------------------------------------------------------------------
/tests/test_act/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 |     printf("\nHello there.\nConfiguring cluster..\n");
26 | 
27 |     // Configure cluster
28 |     struct pi_device cluster_dev;
29 |     struct pi_cluster_conf cl_conf;
30 |     struct pi_cluster_task cl_task;
31 | 
32 |     pi_cluster_conf_init(&cl_conf);
33 |     pi_open_from_conf(&cluster_dev, &cl_conf);
34 |     if (pi_cluster_open(&cluster_dev)) {
35 |         return -1;
36 |     }
37 | 
38 |     printf("\nLaunching activations evaluation...\n\n");
39 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
40 | 
41 |     printf("\nActivation function evaluation successfully terminated :)\n");
42 |     pi_cluster_close(&cluster_dev);
43 | 
44 |     pmsis_exit(0);
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/test_act/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // User profiling flags
18 | #define FP32 32
19 | #define FP16 16
20 | // Tensor checksum definition
21 | #if DATA_TYPE == FP32
22 |     #define CHECK_TOLERANCE 1e-9
23 |     #define ERROR_TOLERANCE 1e-9
24 | 
25 |     #define GELU_TANH_APPROX_CHECK_TOLERANCE 1e-4
26 |     #define GELU_TANH_APPROX_ERROR_TOLERANCE 1e-4
27 | 
28 |     #define TANH_CHECK_TOLERANCE 1e-4
29 |     #define TANH_ERROR_TOLERANCE 1e-4
30 | #elif DATA_TYPE == FP16
31 |     #define CHECK_TOLERANCE 1e-2
32 |     #define ERROR_TOLERANCE 1e-2
33 | #endif
34 | 
35 | 
36 | // PULP DEFINES
37 | #define STACK_SIZE      4096
38 | #define MOUNT           1
39 | #define UNMOUNT         0
40 | #define CID             0
41 | 
42 | void net_step();
43 | 


--------------------------------------------------------------------------------
/tests/test_act/utils/SoftmaxFastExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | def fastexp_gist(x):
 6 |     x_copy = x.type(torch.float32)
 7 |     x_copy = x_copy * 12102203.17133801 + 1064986823.010288
 8 |     x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32)
 9 |     x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32)
10 | 
11 |     return x_copy.type(torch.uint32).view(torch.float32)
12 | 
13 | 
14 | class SoftmaxFastExp(Function):
15 |     @staticmethod
16 |     def forward(ctx, input):
17 |         maxes = torch.max(input, -1, keepdim=True)[0]
18 |         # maxes = torch.swapaxes(maxes, -2, -1)
19 |         x_exp = fastexp_gist((input - maxes))
20 |         x_exp_sum = torch.sum(x_exp, -1, keepdim=True)
21 |         output = x_exp / x_exp_sum
22 |         ctx.save_for_backward(output)
23 | 
24 |         return output
25 | 
26 |     @staticmethod
27 |     def backward(ctx, grad_output):
28 |         out_data = ctx.saved_tensors[0]
29 |         sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1])
30 |         grad_input = (grad_output - sums) * out_data
31 | 
32 |         return grad_input
33 | 


--------------------------------------------------------------------------------
/tests/test_batchnorm_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | data.h
3 | init-defines.h
4 | io_data.h
5 | readme.txt
6 | log.txt


--------------------------------------------------------------------------------
/tests/test_batchnorm_fp32/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_batchnorm_fp32
 2 | 
 3 | # User Section
 4 | CI?=4
 5 | HI?=4
 6 | WI?=4
 7 | 
 8 | BATCH_SIZE?=2
 9 | NUM_CORES?=2
10 | HWC?=0
11 | 
12 | STEP?='FORWARD'			# 'FORWARD' or 'BACKWARD_GRAD' or 'BACKWARD_ERROR'
13 | # End of User Section
14 | 
15 | TRAIN_LIB=../../lib
16 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
17 | APP_SRCS = main.c net.c
18 | 
19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_batchnorm_fp32.c
20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c
25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c
26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c
28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c
30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp32.c
31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp16.c
32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c
33 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp16.c
34 | 
35 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
36 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
37 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
38 | APP_CFLAGS += -DPROF_NET
39 | APP_CFLAGS += -DOPTIMIZE
40 | APP_CFLAGS += -DDEBUG
41 | 
42 | APP_LDFLAGS += -lm 
43 | 
44 | # STATISTICS
45 | APP_CFLAGS += -DSTATS
46 | 
47 | get_golden:
48 | 	python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -NUM_CORES ${NUM_CORES} -STEP ${STEP} -BATCH_SIZE ${BATCH_SIZE}
49 | 
50 | include $(RULES_DIR)/pmsis_rules.mk
51 | 


--------------------------------------------------------------------------------
/tests/test_batchnorm_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2025 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "stdio.h"
19 | #include "stdlib.h"
20 | #include "net.h"
21 | 
22 | /**
23 |  *  Configures cluster, then calls net_step()
24 | **/
25 | int main(void) {
26 |     printf("\nConfiguring cluster..\n");
27 | 
28 |     // Configure cluster
29 |     struct pi_device cluster_dev;
30 |     struct pi_cluster_conf cl_conf;
31 |     struct pi_cluster_task cl_task;
32 | 
33 |     pi_cluster_conf_init(&cl_conf);
34 |     pi_open_from_conf(&cluster_dev, &cl_conf);
35 | 
36 |     if (pi_cluster_open(&cluster_dev)) {
37 |         return -1;
38 |     }
39 | 
40 |     printf("\nLaunching training procedure...\n");
41 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |     printf("Exiting DNN Training.\n");
44 |     pi_cluster_close(&cluster_dev);
45 | 
46 |     pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_batchnorm_fp32/net.h:
--------------------------------------------------------------------------------
 1 | // PULP Defines
 2 | #define STACK_SIZE 4096
 3 | 
 4 | // Tolerance to check updated output
 5 | #define TOLERANCE 1e-6
 6 | 
 7 | 
 8 | // Training functions
 9 | void DNN_init();
10 | 
11 | void compute_loss();
12 | 
13 | void update_weights();
14 | 
15 | void forward();
16 | 
17 | void backward();
18 | 
19 | void net_step();
20 | 
21 | 
22 | // Print and check functions
23 | void print_output();
24 | 
25 | void check_post_training_output();
26 | 


--------------------------------------------------------------------------------
/tests/test_blocktranspose/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt


--------------------------------------------------------------------------------
/tests/test_blocktranspose/Makefile:
--------------------------------------------------------------------------------
 1 | APP = blocktransposer
 2 | 
 3 | # User code
 4 | NUM_CORES?=8
 5 | CH_IN?=2
 6 | CH_OUT?=3
 7 | HK?=2
 8 | WK?=2
 9 | HWC_LAY?=1		# =0 use CHW layout, =1 use HWC layout for the weights
10 | APP_CFLAGS += -DPRINT_MATS
11 | # End of user code
12 | 
13 | 
14 | TRAIN_LIB=../../lib
15 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
16 | APP_SRCS = main.c net.c
17 | #APP_CFLAGS += -DDEBUG
18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
19 | APP_CFLAGS += -O3 -g3 -mno-memcpy
20 | APP_CFLAGS += -DFABRIC
21 | APP_CFLAGS += -DCLUSTER
22 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
23 | APP_CFLAGS += -DPROF_NET
24 | APP_CFLAGS += -mhwloopalign
25 | APP_CFLAGS += -DTin_Cout=$(CH_OUT)
26 | APP_CFLAGS += -DTin_Cin=$(CH_IN)
27 | APP_CFLAGS += -DTin_Hk=$(HK)
28 | APP_CFLAGS += -DTin_Wk=$(WK)
29 | APP_CFLAGS += -DHWC_LAYOUT=$(HWC_LAY)
30 | APP_LDFLAGS += -lm
31 | 
32 | # STATISTICS
33 | APP_CFLAGS += -DSTATS
34 | 
35 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
36 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c
37 | 
38 | include $(RULES_DIR)/pmsis_rules.mk
39 | 


--------------------------------------------------------------------------------
/tests/test_blocktranspose/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching transposition procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Transposition successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_blocktranspose/net.h:
--------------------------------------------------------------------------------
 1 | // PULP DEFINES
 2 | #define STACK_SIZE      4096
 3 | #define MOUNT           1
 4 | #define UNMOUNT         0
 5 | #define CID             0
 6 | 
 7 | // // In data
 8 | // #define Tin_Cout 16
 9 | // #define Tin_Cin 3
10 | // #define Tin_Hk 3
11 | // #define Tin_Wk 3
12 | 
13 | void net_step ();


--------------------------------------------------------------------------------
/tests/test_broadcast_add/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | test_data.h
4 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_add/Makefile:
--------------------------------------------------------------------------------
 1 | APP = transposer
 2 | 
 3 | # User code
 4 | DIMS_1 = 17 1 31
 5 | DIMS_2 = 23 1
 6 | 
 7 | NUM_CORES = 8
 8 | DATA_TYPE = 32
 9 | 
10 | #APP_CFLAGS += -DPRINT_MATS
11 | # End of user code
12 | 
13 | 
14 | TRAIN_LIB=../../lib
15 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
16 | APP_SRCS = main.c net.c
17 | #APP_CFLAGS += -DDEBUG
18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
19 | APP_CFLAGS += -O3 -g3 -mno-memcpy
20 | APP_CFLAGS += -DFABRIC
21 | APP_CFLAGS += -DCLUSTER
22 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
23 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE)
24 | APP_CFLAGS += -DPROF_NET
25 | APP_CFLAGS += -mhwloopalign
26 | APP_LDFLAGS += -lm
27 | 
28 | # STATISTICS
29 | APP_CFLAGS += -DSTATS
30 | 
31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
33 | 
34 | include $(RULES_DIR)/pmsis_rules.mk
35 | 
36 | get_golden:
37 | 	rm -rf BUILD/
38 | 	python3 utils/GM.py --dims_1 $(DIMS_1) --dims_2 $(DIMS_2) --dtype $(DATA_TYPE)
39 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_add/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main(void) {
 9 |     printf("\nHello there.\nConfiguring cluster..\n");
10 | 
11 |     // Configure cluster
12 |     struct pi_device cluster_dev;
13 |     struct pi_cluster_conf cl_conf;
14 |     struct pi_cluster_task cl_task;
15 | 
16 |     pi_cluster_conf_init(&cl_conf);
17 |     pi_open_from_conf(&cluster_dev, &cl_conf);
18 | 
19 |     if (pi_cluster_open(&cluster_dev)) {
20 |         return -1;
21 |     }
22 | 
23 |     printf("\nLaunching broadcast addition procedure...\n");
24 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, broadcast_add_test, NULL));
25 | 
26 |     printf("Broadcast addition finished!\n");
27 |     pi_cluster_close(&cluster_dev);
28 | 
29 |     pmsis_exit(0);
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_add/net.c:
--------------------------------------------------------------------------------
 1 | #include "pulp_train.h"
 2 | 
 3 | #include "net.h"
 4 | #include "stats.h"
 5 | 
 6 | #include "test_data.h"
 7 | 
 8 | #include "tensor_checkers.h"
 9 | 
10 | 
11 | void broadcast_add_test() {
12 | #ifdef PROF_NET
13 |     INIT_STATS();
14 |     PRE_START_STATS();
15 | #endif
16 |     printf("Executing on %d cores.\n", NUM_CORES);
17 | 
18 | #if DATA_TYPE == 32
19 |     struct array_broadcast_sum_fp32_args args;
20 |     printf("WORKING ON FP32\n");
21 | #elif DATA_TYPE == 16
22 |     struct array_broadcast_sum_fp16_args args;
23 |     printf("WORKING ON FP16\n");
24 | #endif
25 | 
26 |     // Get arguments
27 |     args.op_1 = IN_MATRIX_1;
28 |     args.op_2 = IN_MATRIX_2;
29 |     args.dest = OUT_MATRIX;
30 | 
31 |     args.op_1_dims = DIMS_1;
32 |     args.op_2_dims = DIMS_2;
33 |     
34 |     args.op_1_dims_len = N_DIMS_1;
35 |     args.op_2_dims_len = N_DIMS_2;
36 | 
37 | #ifdef PROF_NET
38 |     START_STATS();
39 | #endif
40 | 
41 |     // Perform transposition
42 | #if DATA_TYPE == 32
43 |     pi_cl_team_fork(NUM_CORES, array_broadcast_sum_fp32, &args);
44 | #elif DATA_TYPE == 16
45 |     pi_cl_team_fork(NUM_CORES, array_broadcast_sum_fp16, &args);
46 | #endif
47 | 
48 |     // Stop stats
49 | #ifdef PROF_NET
50 |     STOP_STATS();
51 | #endif
52 | 
53 |     mean_error_checker(args.dest, TEST_OUT, TOTAL_SIZE_OUT);
54 |     elementwise_checker(args.dest, TEST_OUT, TOTAL_SIZE_OUT);
55 | 
56 |     return;
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_add/net.h:
--------------------------------------------------------------------------------
1 | // PULP DEFINES
2 | #define STACK_SIZE      4096
3 | #define MOUNT           1
4 | #define UNMOUNT         0
5 | #define CID             0
6 | 
7 | void broadcast_add_test();
8 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_matmul/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | matmul_data.h
4 | net_args.h
5 | dis.S
6 | fastest_matmul.txt
7 | test_data.h
8 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_matmul/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_broadcast_matmul
 2 | 
 3 | # User settings
 4 | # Standard matmul arguments
 5 | DIMS_1 = 2 1 3 2 5
 6 | DIMS_2 = 5 1 5 3
 7 | 
 8 | NUM_CORES = 8
 9 | DATA_TYPE = 16 	# 32 for fp32, 16 for fp16
10 | # End of user settings
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS += main.c net.c
15 | 
16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c
18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c
20 | 
21 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
22 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
23 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
24 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE)
25 | APP_CFLAGS += -DPROF_NET
26 | 
27 | APP_LDFLAGS += -lm 
28 | 
29 | # STATISTICS
30 | APP_CFLAGS += -DSTATS
31 | 
32 | get_golden:
33 | 	rm -rf BUILD/
34 | 	python3 utils/GM.py --dims_1 $(DIMS_1) --dims_2 $(DIMS_2) --dtype $(DATA_TYPE)
35 | 
36 | include $(RULES_DIR)/pmsis_rules.mk
37 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_matmul/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | 
 5 | int main(void) {
 6 |     printf("\nHello there.\nConfiguring cluster..\n");
 7 | 
 8 |     // Configure cluster
 9 |     struct pi_device cluster_dev;
10 |     struct pi_cluster_conf cl_conf;
11 |     struct pi_cluster_task cl_task;
12 | 
13 |     pi_cluster_conf_init(&cl_conf);
14 |     pi_open_from_conf(&cluster_dev, &cl_conf);
15 | 
16 |     if (pi_cluster_open(&cluster_dev)) {
17 |         return -1;
18 |     }
19 | 
20 |     printf("\nLaunching broadcast matmul evaluation...\n\n");
21 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, broadcast_matmul_test, NULL));
22 | 
23 |     printf("\nMatmul evaluation successfully terminated :)\n");
24 |     pi_cluster_close(&cluster_dev);
25 | 
26 |     pmsis_exit(0);
27 | }
28 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_matmul/net.c:
--------------------------------------------------------------------------------
 1 | #include "pulp_train.h"
 2 | 
 3 | #include "net.h"
 4 | #include "stats.h"
 5 | 
 6 | #include "test_data.h"
 7 | 
 8 | #include "tensor_checkers.h"
 9 | 
10 | 
11 | void broadcast_matmul_test() {
12 | #ifdef PROF_NET
13 |     INIT_STATS();
14 |     PRE_START_STATS();
15 | #endif
16 |     printf("Executing on %d cores.\n", NUM_CORES);
17 | 
18 | #if DATA_TYPE == 32
19 |     struct broadcastMatMul_args_fp32 args;
20 |     printf("WORKING ON FP32\n");
21 | #elif DATA_TYPE == 16
22 |     struct broadcastMatMul_args_fp16 args;
23 |     printf("WORKING ON FP16\n");
24 | #endif
25 | 
26 |     // Get arguments
27 |     args.A = IN_MATRIX_1;
28 |     args.B = IN_MATRIX_2;
29 |     args.C = OUT_MATRIX;
30 | 
31 |     args.A_dims = DIMS_1;
32 |     args.B_dims = DIMS_2;
33 | 
34 |     args.A_dims_len = N_DIMS_1;
35 |     args.B_dims_len = N_DIMS_2;
36 | 
37 | #ifdef PROF_NET
38 |     START_STATS();
39 | #endif
40 | 
41 |     // Perform transposition
42 | #if DATA_TYPE == 32
43 |     mm_broadcast_fp32(&args);
44 | #elif DATA_TYPE == 16
45 |     mm_broadcast_fp16(&args);
46 | #endif
47 | 
48 |     // Stop stats
49 | #ifdef PROF_NET
50 |     STOP_STATS();
51 | #endif
52 | 
53 |     mean_error_checker(args.C, TEST_OUT, TOTAL_SIZE_OUT);
54 |     elementwise_checker(args.C, TEST_OUT, TOTAL_SIZE_OUT);
55 | 
56 |     return;
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_matmul/net.h:
--------------------------------------------------------------------------------
1 | // PULP DEFINES
2 | #define STACK_SIZE      4096
3 | #define MOUNT           1
4 | #define UNMOUNT         0
5 | #define CID             0
6 | 
7 | void broadcast_matmul_test();
8 | 


--------------------------------------------------------------------------------
/tests/test_broadcast_matmul/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | 


--------------------------------------------------------------------------------
/tests/test_conv2d_fp16/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | conv2d-grads.h
4 | conv2d-output.h
5 | init-defines.h
6 | input-image.h
7 | step-check.h
8 | runs.txt


--------------------------------------------------------------------------------
/tests/test_conv2d_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_conv2d_fp16/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pulp_train_defines.h"
18 | #include "step-check.h"
19 | 
20 | // User profiling flags
21 | 
22 | #if defined(FORWARD) && !defined(DEBUG) 
23 | #define PROF_FWD
24 | #endif
25 | 
26 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG)
27 | #define PROF_BKWD
28 | #endif
29 | 
30 | // Net sizes
31 | 
32 | // CONV2D
33 | #define Tout_H_l1   ((Tin_H_l1-Tker_H_l1+PAD_U+PAD_D)/STRIDE_H + 1)
34 | #define Tout_W_l1   ((Tin_W_l1-Tker_W_l1+PAD_L+PAD_R)/STRIDE_W + 1)
35 | 
36 | // Tensor checksum definition
37 | #define CHECK_TOLERANCE 1e-3
38 | #define ERROR_TOLERANCE 1e-3
39 | 
40 | // PULP DEFINES
41 | #define STACK_SIZE      4096
42 | #define MOUNT           1
43 | #define UNMOUNT         0
44 | #define CID             0
45 | 
46 | // Support functions
47 | static inline void forward();
48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
50 | static inline void train();
51 | // Main function
52 | void net_step ();
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/test_conv2d_fp16/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_conv2d_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | conv2d-grads.h
4 | conv2d-output.h
5 | init-defines.h
6 | input-image.h
7 | step-check.h
8 | runs.txt
9 | log.c


--------------------------------------------------------------------------------
/tests/test_conv2d_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_conv2d_fp32/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | 
19 | // User profiling flags
20 | 
21 | #if defined(FORWARD) && !defined(DEBUG) 
22 | #define PROF_FWD
23 | #endif
24 | 
25 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG)
26 | #define PROF_BKWD
27 | #endif
28 | 
29 | // Net sizes
30 | 
31 | // CONV2D
32 | #define Tout_H_l1   ((Tin_H_l1-Tker_H_l1+PAD_U+PAD_D)/STRIDE_H + 1)
33 | #define Tout_W_l1   ((Tin_W_l1-Tker_W_l1+PAD_L+PAD_R)/STRIDE_W + 1)
34 | 
35 | // Tensor checksum definition
36 | #define CHECK_TOLERANCE 1e-4
37 | #define ERROR_TOLERANCE 1e-4
38 | 
39 | // PULP DEFINES
40 | #define STACK_SIZE      4096
41 | #define MOUNT           1
42 | #define UNMOUNT         0
43 | #define CID             0
44 | 
45 | // Support functions
46 | static inline void forward();
47 | static inline void compare_tensors(float *A, float *B, int length);
48 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
49 | static inline void train();
50 | // Main function
51 | void net_step ();
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/test_conv_pw_dw_fp16/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | log.txt
 3 | pylog.txt
 4 | dw-grads.h
 5 | dw-output.h
 6 | init-defines.h
 7 | input-image.h
 8 | pw-grads.h
 9 | pw-output.h
10 | step-check.h
11 | utils/GM_old.py
12 | README.md
13 | runs.txt


--------------------------------------------------------------------------------
/tests/test_conv_pw_dw_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_conv_pw_dw_fp16/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_conv_pw_dw_fp32/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | log.txt
 3 | pylog.txt
 4 | dw-grads.h
 5 | dw-output.h
 6 | init-defines.h
 7 | input-image.h
 8 | pw-grads.h
 9 | pw-output.h
10 | step-check.h
11 | utils/GM_old.py
12 | README.md
13 | runs.txt
14 | utils/__pycache__/


--------------------------------------------------------------------------------
/tests/test_conv_pw_dw_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_cordic/.gitignore:
--------------------------------------------------------------------------------
1 | cordic_data.h
2 | log.txt
3 | BUILD/


--------------------------------------------------------------------------------
/tests/test_cordic/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_cordic
 2 | 
 3 | # User settings
 4 | 
 5 | # General arguments
 6 | N_TEST ?= 200
 7 | NUM_CORES ?= 8
 8 | # End of user settings
 9 | 
10 | TRAIN_LIB=../../lib
11 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
12 | APP_SRCS += main.c net.c
13 | 
14 | 
15 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
17 | 
18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
19 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
20 | APP_CFLAGS += -DN_TEST=$(N_TEST)
21 | APP_CFLAGS += -DNUM_CORES
22 | 
23 | APP_LDFLAGS += -lm 
24 | 
25 | # STATISTICS
26 | APP_CFLAGS += -DSTATS
27 | 
28 | get_golden:
29 | 	python3 ./utils/GM.py --n_test=$(N_TEST)
30 | 
31 | include $(RULES_DIR)/pmsis_rules.mk
32 | 


--------------------------------------------------------------------------------
/tests/test_cordic/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching cordic function evaluation...\n\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("\nCordic function evaluation successfully terminated\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }


--------------------------------------------------------------------------------
/tests/test_cordic/net.h:
--------------------------------------------------------------------------------
1 | void net_step();
2 | 


--------------------------------------------------------------------------------
/tests/test_cordic/utils/GM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import dump_utils as dump
 3 | import argparse
 4 | import math
 5 | 
 6 | parser = argparse.ArgumentParser("Cordic test")
 7 | parser.add_argument( '--n_test', type=int, default=300)
 8 | args = parser.parse_args()
 9 | 
10 | n_test = args.n_test
11 | 
12 | # angles = torch.empty(n_test).uniform_(-math.pi/2, math.pi/2)
13 | angles = torch.empty(n_test).uniform_(0, 10)
14 | 
15 | cos = torch.cos(angles)
16 | sin = torch.sin(angles)
17 | 
18 | # Write data to file
19 | f = open("cordic_data.h", "w")
20 | # f.write("#define N_TEST "+str(n_test)+"\n")
21 | f.write("PI_L1 float gm_angles["+str(n_test)+"] = {"+dump.tensor_to_string(angles)+"};\n")
22 | f.write("PI_L2 float gm_cos["+str(n_test)+"] = {"+dump.tensor_to_string(cos)+"};\n")
23 | f.write("PI_L2 float gm_sin["+str(n_test)+"] = {"+dump.tensor_to_string(sin)+"};\n")
24 | 
25 | f.close()
26 | 
27 | 
28 | def print_constant(N):
29 |     print("atan_pow_2: \n")
30 |     for i in range(0, N):
31 |         print(f"{math.atan(2**(-i))}, ")
32 | 
33 |     sf = 1
34 |     for i in range(0, N):
35 |         sf *= math.cos(math.atan(2**(-i)))
36 | 
37 |     print(f"\nscaling factor: {sf}")


--------------------------------------------------------------------------------
/tests/test_dropout/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | dropout_data.h
4 | net_args.h


--------------------------------------------------------------------------------
/tests/test_dropout/Makefile:
--------------------------------------------------------------------------------
 1 | APP = rng
 2 | 
 3 | # User code
 4 | NUM_CORES?=8
 5 | PROBABILITY?=0.23
 6 | SEED?=0
 7 | SIZE?=100
 8 | USE_MASK?=0
 9 | DATA_TYPE?=float		# 'float' or 'fp16'
10 | # End of user code
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS = main.c net.c
15 | #APP_CFLAGS += -DDEBUG
16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
17 | APP_CFLAGS += -O3 -g3 -mno-memcpy
18 | APP_CFLAGS += -DFABRIC
19 | APP_CFLAGS += -DCLUSTER
20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
21 | APP_CFLAGS += -DPROF_NET
22 | APP_CFLAGS += -mhwloopalign
23 | APP_CFLAGS += -DPROBABILITY=$(PROBABILITY)
24 | APP_CFLAGS += -DSEED=$(SEED) #sneed
25 | APP_CFLAGS += -DSIZE=$(SIZE)
26 | APP_CFLAGS += -DUSE_MASK=$(USE_MASK)
27 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE)
28 | APP_LDFLAGS += -lm
29 | 
30 | 
31 | # STATISTICS
32 | APP_CFLAGS += -DSTATS
33 | 
34 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
35 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
36 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_random.c
37 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_dropout_fp32.c
38 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_dropout_fp16.c
39 | 
40 | include $(RULES_DIR)/pmsis_rules.mk
41 | 
42 | get_golden:
43 | 	python3 utils/GM.py --in_size $(SIZE) --type $(DATA_TYPE) --prob $(PROBABILITY)
44 | 


--------------------------------------------------------------------------------
/tests/test_dropout/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching random number generation procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Dropout successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_dropout/net.c:
--------------------------------------------------------------------------------
 1 | #include "pulp_train.h"
 2 | 
 3 | #include "stats.h"
 4 | #include "net.h"
 5 | 
 6 | #include "net_args.h"
 7 | #include "dropout_data.h"
 8 | #include <string.h>
 9 | 
10 | 
11 | // Main function
12 | void net_step () 
13 | {
14 |     #ifdef PROF_NET
15 |     INIT_STATS();
16 |     PRE_START_STATS();
17 |     #endif
18 | 
19 |     #ifdef FLOAT16
20 |     struct dropout_args_fp16 args;
21 |     args.seed = SEED;
22 |     args.probability = PROBABILITY;
23 |     args.input = input;
24 |     args.mask = mask;
25 |     args.use_mask = USE_MASK;
26 |     args.size = IN_SIZE;
27 | 
28 |     printf("Dropout function:\n");
29 |     #ifdef PROF_NET
30 |     START_STATS();
31 |     #endif
32 | 
33 |     pi_cl_team_fork(NUM_CORES, pulp_dropout_fp16_cl, &args);
34 | 
35 |     #ifdef PROF_NET
36 |     STOP_STATS();
37 |     #endif
38 |     #endif
39 |     
40 |     #ifdef FLOAT32
41 |     struct dropout_args_fp32 args;
42 |     args.seed = SEED;
43 |     args.probability = PROBABILITY;
44 |     args.input = input;
45 |     args.mask = mask;
46 |     args.use_mask = USE_MASK;
47 |     args.size = IN_SIZE;
48 | 
49 |     printf("Dropout function:\n");
50 |     #ifdef PROF_NET
51 |     START_STATS();
52 |     #endif
53 | 
54 |     pi_cl_team_fork(NUM_CORES, pulp_dropout_fp32_cl, &args);
55 | 
56 |     #ifdef PROF_NET
57 |     STOP_STATS();
58 |     #endif
59 |     #endif
60 | 
61 |     int count = 0;
62 | 
63 |     for(int i = 0; i < IN_SIZE; i++){
64 |         //printf("%f\n", input[i]);
65 |         if(input[i]==0.0f)
66 |             count++;
67 |     }
68 | 
69 |     printf("%d\n", count);
70 |     printf("Percentage of dropped out values: %f\%\n", (count*100.0f/SIZE));
71 | 
72 |     return;
73 | }
74 | 


--------------------------------------------------------------------------------
/tests/test_dropout/net.h:
--------------------------------------------------------------------------------
1 | // PULP DEFINES
2 | #define STACK_SIZE      4096
3 | #define MOUNT           1
4 | #define UNMOUNT         0
5 | #define CID             0
6 | 
7 | #include "pulp_train_defines.h"
8 | 
9 | void net_step ();


--------------------------------------------------------------------------------
/tests/test_dropout/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_gelu_fp16/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | gelu-output.h
3 | init-defines.h
4 | input-sequence.h
5 | mhsa-grads.h
6 | mhsa-output.h
7 | step-check.h
8 | dis.S


--------------------------------------------------------------------------------
/tests/test_gelu_fp16/Makefile:
--------------------------------------------------------------------------------
 1 | APP = gelu_fp16
 2 | 
 3 | # User settings
 4 | IN_H?=7 
 5 | IN_W?=7 
 6 | IN_CH?=160
 7 | OUT_CH?=160
 8 | NUM_CORES?=8
 9 | STEP?='FORWARD' # Possible steps: 'FORWARD', 'BACKWARD'
10 | APP_CFLAGS += -DOPTIMIZE
11 | MATMUL_TYPE?=3
12 | NUM_MATMULS?=24		# When profiling with multiple matmul algorithms
13 | NUM_SIZES?=3		# When profiling multiple sizes of the network
14 | # End of user settings
15 | 
16 | TRAIN_LIB=../../lib
17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
18 | APP_SRCS = main.c net.c
19 | 
20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c
22 | 
23 | DATA_TYPE?='fp16'
24 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
25 | APP_CFLAGS += -O3 -g
26 | APP_CFLAGS += -DFABRIC 
27 | APP_CFLAGS += -DCLUSTER
28 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
29 | APP_CFLAGS += -DPROF_NET
30 | APP_CFLAGS += -DMEMOCC_COMP
31 | APP_CFLAGS += -mhwloopalign
32 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE}
33 | #APP_CFLAGS += -DDEBUG
34 | APP_LDFLAGS += -lm 
35 | 
36 | # STATISTICS
37 | APP_CFLAGS += -DSTATS
38 | 
39 | get_golden:
40 | 	python3 ./utils/GM.py --step $(STEP) --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH}
41 | 
42 | profile_all_optim:
43 | 	python3 ./utils/profile_optimized.py --num_matmuls ${NUM_MATMULS} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH} --n_heads $(N_HEADS) --att_dim $(ATT_DIM)
44 | 
45 | profile_all_sizes:
46 | 	python3 ./utils/profile_sizes.py --num_sizes ${NUM_SIZES} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --matmul_type ${MATMUL_TYPE}
47 | 
48 | include $(RULES_DIR)/pmsis_rules.mk
49 | 


--------------------------------------------------------------------------------
/tests/test_gelu_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  DUMMY MAIN
25 | *  Configures cluster, then calls net_step()
26 | */
27 | int main () {
28 | 
29 |   printf("\nHello there.\nConfiguring cluster..\n");
30 |   // Configure cluster
31 |   struct pi_device cluster_dev;
32 |   struct pi_cluster_conf cl_conf;
33 |   struct pi_cluster_task cl_task;
34 | 
35 |   pi_cluster_conf_init(&cl_conf);
36 |   pi_open_from_conf(&cluster_dev, &cl_conf);
37 |   if (pi_cluster_open(&cluster_dev))
38 |   {
39 |       return -1;
40 |   }
41 | 
42 |   printf("\nLaunching training procedure...\n");
43 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
44 | 
45 | 
46 |   printf("\nNet training successful!\n");
47 |   pi_cluster_close(&cluster_dev);
48 | 
49 |   pmsis_exit(0);
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/test_gelu_fp16/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pulp_train_defines.h"
18 | #include "step-check.h"
19 | 
20 | // User profiling flags
21 | 
22 | //#define DEBUG
23 | 
24 | #if defined(FORWARD) && !defined(DEBUG) 
25 | #define PROF_FWD
26 | #endif
27 | 
28 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG)
29 | #define PROF_BCKWD
30 | #endif
31 | 
32 | // Net sizes
33 | 
34 | #define Tker_l0     (Tin_l0*Tout_l0)
35 | 
36 | // Tensor checksum definition
37 | #define CHECK_TOLERANCE 0.001
38 | #define ERROR_TOLERANCE 0.001
39 | 
40 | // PULP DEFINES
41 | #define STACK_SIZE      4096
42 | #define MOUNT           1
43 | #define UNMOUNT         0
44 | #define CID             0
45 | 
46 | // Support functions
47 | static inline void forward();
48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
50 | static inline void train();
51 | // Main function
52 | void net_step ();
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/test_gelu_fp16/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_gelu_fp16/utils/test_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch import float32
 3 | from torch import nn
 4 | from torch import Tensor 
 5 | from torch import cuda
 6 | import torch
 7 | from torch.nn import functional as F
 8 | 
 9 | class TestModel(nn.Module):
10 |     """Just testing the GELU activation"""
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.act = nn.GELU(approximate='tanh')
14 |         self.scores = None # for visualization
15 | 
16 |     def forward(self, x):
17 |         x = self.act(x)
18 |         return x


--------------------------------------------------------------------------------
/tests/test_im2col/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt


--------------------------------------------------------------------------------
/tests/test_im2col/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching training procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Net training successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_im2col/net.h:
--------------------------------------------------------------------------------
 1 | // Tensor sizes
 2 | #define Tker_H_l1 Tker_W_l1
 3 | 
 4 | #define Tout_W_l1 ((int)(Tin_W_l1-Tker_W_l1+LPAD+RPAD+WSTR)/WSTR)
 5 | #define Tout_H_l1 ((int)(Tin_H_l1-Tker_H_l1+UPAD+DPAD+HSTR)/HSTR)
 6 | 
 7 | #define weight_init 0.1
 8 | 
 9 | #define PAD_BW (Tker_W_l1-1)
10 | 
11 | #define i2c_b_size (Tker_H_l1*Tker_W_l1*Tin_C_l1*(Tin_H_l1-Tker_H_l1+UPAD+DPAD+HSTR)/HSTR*(Tin_W_l1-Tker_W_l1+LPAD+RPAD+WSTR)/WSTR)
12 | #define i2c_b_size_bw (Tker_H_l1*Tker_W_l1*Tout_C_l1*Tin_H_l1*Tin_W_l1)
13 | 
14 | // Tensor checksum definition
15 | #define ABS(x) ((x)>0?(x):(-(x)))
16 | #define CHECK_TOLERANCE 1e-3
17 | #define ERROR_TOLERANCE 0.01
18 | 
19 | // PULP DEFINES
20 | #define STACK_SIZE      4096
21 | #define MOUNT           1
22 | #define UNMOUNT         0
23 | #define CID             0
24 | 
25 | void net_step ();


--------------------------------------------------------------------------------
/tests/test_instnorm_fp16/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | data.h
3 | init-defines.h
4 | io_data.h
5 | readme.txt


--------------------------------------------------------------------------------
/tests/test_instnorm_fp16/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_instnorm_fp16
 2 | 
 3 | # User Section
 4 | CI?=8
 5 | HI?=4
 6 | WI?=4
 7 | NUM_CORES?=8
 8 | HWC?=0
 9 | STEP?='FORWARD'			# 'FORWARD' or 'BACKWARD_GRAD' or 'BACKWARD_ERROR'
10 | # End of User Section
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS += main.c net.c
15 | 
16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c
17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c
18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c
24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c
26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp32.c
27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp16.c
28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c
29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp16.c
30 | 
31 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
32 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
33 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
34 | APP_CFLAGS += -DPROF_NET
35 | APP_CFLAGS += -DOPTIMIZE
36 | 
37 | 
38 | 
39 | APP_LDFLAGS += -lm 
40 | 
41 | # STATISTICS
42 | APP_CFLAGS += -DSTATS
43 | 
44 | get_golden:
45 | 	python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -NUM_CORES ${NUM_CORES} -STEP ${STEP}
46 | 
47 | include $(RULES_DIR)/pmsis_rules.mk
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/test_instnorm_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /**
21 |  *  Configures cluster, then calls net_step()
22 | **/
23 | 
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello sir.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Exiting DNN Training.\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_instnorm_fp16/net.h:
--------------------------------------------------------------------------------
 1 | // PULP Defines
 2 | #define STACK_SIZE      4096
 3 | 
 4 | // Tolerance to check updated output
 5 | #define TOLERANCE 1e-6
 6 | 
 7 | // Training functions
 8 | void DNN_init();
 9 | void compute_loss();
10 | void update_weights();
11 | void forward();
12 | void backward();
13 | void net_step();
14 | 
15 | // Print and check functions
16 | void print_output();
17 | void check_post_training_output();
18 | 


--------------------------------------------------------------------------------
/tests/test_instnorm_fp16/readme.txt:
--------------------------------------------------------------------------------
1 | To compile the application, run "make clean get_golden all run > log.txt".
2 | If running on a board (not GVSoC), add "APP_CFLAGS += -DBOARD" to the user section of the Makefile (profiling of cycles only).
3 | To modify the hyperparameters (learning rate, epochs, batch size still not implemented), 
4 | edit the variables inside "utils/GM.py".
5 | 


--------------------------------------------------------------------------------
/tests/test_instnorm_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | data.h
3 | init-defines.h
4 | io_data.h
5 | readme.txt
6 | log.txt


--------------------------------------------------------------------------------
/tests/test_instnorm_fp32/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_instnorm_fp32
 2 | 
 3 | # User Section
 4 | CI?=8
 5 | HI?=4
 6 | WI?=4
 7 | NUM_CORES?=8
 8 | HWC?=0
 9 | STEP?='FORWARD'			# 'FORWARD' or 'BACKWARD_GRAD' or 'BACKWARD_ERROR'
10 | # End of User Section
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS += main.c net.c
15 | 
16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c
17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c
18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c
24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c
26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp32.c
27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_instnorm_fp16.c
28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c
29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp16.c
30 | 
31 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
32 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
33 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
34 | APP_CFLAGS += -DPROF_NET
35 | APP_CFLAGS += -DOPTIMIZE
36 | 
37 | 
38 | 
39 | APP_LDFLAGS += -lm 
40 | 
41 | # STATISTICS
42 | APP_CFLAGS += -DSTATS
43 | 
44 | get_golden:
45 | 	python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -NUM_CORES ${NUM_CORES} -STEP ${STEP}
46 | 
47 | include $(RULES_DIR)/pmsis_rules.mk
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/test_instnorm_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /**
21 |  *  Configures cluster, then calls net_step()
22 | **/
23 | 
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello sir.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Exiting DNN Training.\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_instnorm_fp32/net.h:
--------------------------------------------------------------------------------
 1 | // PULP Defines
 2 | #define STACK_SIZE      4096
 3 | 
 4 | // Tolerance to check updated output
 5 | #define TOLERANCE 1e-6
 6 | 
 7 | // Training functions
 8 | void DNN_init();
 9 | void compute_loss();
10 | void update_weights();
11 | void forward();
12 | void backward();
13 | void net_step();
14 | 
15 | // Print and check functions
16 | void print_output();
17 | void check_post_training_output();
18 | 


--------------------------------------------------------------------------------
/tests/test_instnorm_fp32/readme.txt:
--------------------------------------------------------------------------------
1 | To compile the application, run "make clean get_golden all run > log.txt".
2 | If running on a board (not GVSoC), add "APP_CFLAGS += -DBOARD" to the user section of the Makefile (profiling of cycles only).
3 | To modify the hyperparameters (learning rate, epochs, batch size still not implemented), 
4 | edit the variables inside "utils/GM.py".
5 | 


--------------------------------------------------------------------------------
/tests/test_interpolation/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | intp_data.h
4 | net_args.h


--------------------------------------------------------------------------------
/tests/test_interpolation/Makefile:
--------------------------------------------------------------------------------
 1 | APP = interpolation_fp32
 2 | 
 3 | # User code
 4 | NUM_CORES?=1
 5 | DATA_TYPE?='float'	# 'float' or 'bfloat16'
 6 | INTP_TYPE?=0 		# 0='NEAREST', 1='BILINEAR' 
 7 | CH?=1
 8 | IN_H?=8
 9 | IN_W?=8
10 | OUT_H?=48
11 | OUT_W?=48
12 | # End of user code
13 | 
14 | TRAIN_LIB=../../lib
15 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
16 | APP_SRCS = main.c net.c
17 | #APP_CFLAGS += -DDEBUG
18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
19 | APP_CFLAGS += -O3 -g3 -mno-memcpy
20 | APP_CFLAGS += -DFABRIC
21 | APP_CFLAGS += -DCLUSTER
22 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
23 | APP_CFLAGS += -DPROF_NET
24 | APP_CFLAGS += -mhwloopalign
25 | APP_CFLAGS += -DCH=$(CH)
26 | APP_CFLAGS += -DIN_H=$(IN_H)
27 | APP_CFLAGS += -DIN_W=$(IN_W)
28 | APP_CFLAGS += -DOUT_H=$(OUT_H)
29 | APP_CFLAGS += -DOUT_W=$(OUT_W)
30 | APP_CFLAGS += -DINTP_TYPE=$(INTP_TYPE)
31 | APP_LDFLAGS += -lm
32 | 
33 | # STATISTICS
34 | APP_CFLAGS += -DSTATS
35 | 
36 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
37 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
38 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_interpolation_fp32.c
39 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_interpolation_fp16.c
40 | 
41 | include $(RULES_DIR)/pmsis_rules.mk
42 | 
43 | get_golden:
44 | 	python3 utils/GM.py --in_ch $(CH) --in_height $(IN_H) --in_width $(IN_W) --out_height $(OUT_H) --out_width $(OUT_W) --type $(DATA_TYPE)
45 | 


--------------------------------------------------------------------------------
/tests/test_interpolation/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching interpolation procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Interpolation successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_interpolation/net.h:
--------------------------------------------------------------------------------
 1 | // PULP DEFINES
 2 | #define STACK_SIZE      4096
 3 | #define MOUNT           1
 4 | #define UNMOUNT         0
 5 | #define CID             0
 6 | 
 7 | #include "pulp_train_defines.h"
 8 | #include "net_args.h"
 9 | 
10 | // Tensor checksum definition
11 | #define CHECK_TOLERANCE 1e-6
12 | #define ERROR_TOLERANCE 1e-6
13 | 
14 | #ifdef FLOAT32
15 | static inline void compare_tensors(float *A, float *B, int length);
16 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
17 | #elif defined(BFLOAT16)
18 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
19 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
20 | #endif
21 | void net_step ();
22 | 


--------------------------------------------------------------------------------
/tests/test_interpolation/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_layernorm_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | layer_norm_init_defines.h
3 | layer_norm_input.h
4 | layer_norm_output.h
5 | layer_norm_wb.h
6 | 


--------------------------------------------------------------------------------
/tests/test_layernorm_fp32/Makefile:
--------------------------------------------------------------------------------
 1 | APP = layernorm_fp32
 2 | 
 3 | # User code
 4 | NUM_CORES?=8
 5 | DATA_TYPE?=fp32		# 'fp32'
 6 | 
 7 | INPUT_WIDTH?=16
 8 | INPUT_HEIGHT?=16
 9 | # End of user code
10 | 
11 | TASK_NAME=sst-2
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS = main.c net.c
15 | 
16 | #APP_CFLAGS += -DDEBUG
17 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
18 | APP_CFLAGS += -O3 -g3 -mno-memcpy
19 | APP_CFLAGS += -DFABRIC
20 | APP_CFLAGS += -DCLUSTER
21 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
22 | APP_CFLAGS += -DN_HEADS=$(N_HEADS)
23 | APP_CFLAGS += -DPROF_NET
24 | APP_CFLAGS += -mhwloopalign
25 | APP_LDFLAGS += -lm
26 | 
27 | APP_CFLAGS += -DTILE_H=$(TILE_H)
28 | APP_CFLAGS += -DTILE_W=$(TILE_W)
29 | APP_CFLAGS += -DTILE_DIM=$(TILE_DIM)
30 | 
31 | APP_CFLAGS += -DOPTIMIZE
32 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE}
33 | 
34 | # STATISTICS
35 | APP_CFLAGS += -DSTATS
36 | 
37 | # =============== SOURCES ===============
38 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_layernorm_fp32.c
39 | 
40 | include $(RULES_DIR)/pmsis_rules.mk
41 | 
42 | get_golden:
43 | 	rm -rf BUILD/
44 | 	python3 utils/GM.py --data_type $(DATA_TYPE) --input_shape_height $(INPUT_HEIGHT) --input_shape_width $(INPUT_WIDTH)
45 | 


--------------------------------------------------------------------------------
/tests/test_layernorm_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  Configures cluster, then calls net_step()
25 | */
26 | int main() {
27 |     printf("\nHello there.\nConfiguring cluster..\n");
28 | 
29 |     // Configure cluster
30 |     struct pi_device cluster_dev;
31 |     struct pi_cluster_conf cl_conf;
32 |     struct pi_cluster_task cl_task;
33 | 
34 |     pi_cluster_conf_init(&cl_conf);
35 |     pi_open_from_conf(&cluster_dev, &cl_conf);
36 |     if (pi_cluster_open(&cluster_dev)) {
37 |         return -1;
38 |     }
39 | 
40 |     printf("\nLaunching training procedure...\n");
41 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |     printf("\nNet training successful!\n");
44 |     pi_cluster_close(&cluster_dev);
45 | 
46 |     pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_layernorm_fp32/net.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by diaco on 26/10/2024.
 3 | //
 4 | 
 5 | #ifndef PULP_TRAINLIB_NET_H
 6 | #define PULP_TRAINLIB_NET_H
 7 | 
 8 | // PULP DEFINES
 9 | #define STACK_SIZE      40960
10 | #define MOUNT           1
11 | #define UNMOUNT         0
12 | #define CID             0
13 | #define MAX_SIZE        25104
14 | 
15 | #include "pulp_train_defines.h"
16 | 
17 | // net functions
18 | void init_and_connect_blobs();
19 | void forward();
20 | void net_step();
21 | 
22 | // DMA managment functions
23 | void load_input(void * src_blob, uint8_t data_diff_both);
24 | void load_output(void * src_blob, uint8_t data_diff_both);
25 | void load_coeff(void * src_blob, uint8_t data_diff_both);
26 | void store_output(void * dest_blob, uint8_t data_diff_both);
27 | void store_input(void * dest_blob, uint8_t data_diff_both);
28 | void store_coeff(void * dest_blob, uint8_t data_diff_both);
29 | void copy_struct_param(unsigned int from, unsigned int to, int size);
30 | void get_input_dim(void * b);
31 | void get_output_dim(void * b);
32 | void get_weight_dim(void * b);
33 | void reset_arguments();
34 | void update_blob();
35 | void reset_dim();
36 | 
37 | #endif //PULP_TRAINLIB_NET_H
38 | 


--------------------------------------------------------------------------------
/tests/test_layout_change/.gitignore:
--------------------------------------------------------------------------------
1 | log.txt
2 | BUILD/


--------------------------------------------------------------------------------
/tests/test_layout_change/Makefile:
--------------------------------------------------------------------------------
 1 | APP = layout_change
 2 | 
 3 | # User code
 4 | NUM_CORES?=8
 5 | T_C?=2
 6 | T_H?=3
 7 | T_W?=3
 8 | #APP_CFLAGS += -DPRINT_MATS
 9 | # End of user code
10 | 
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS = main.c net.c
15 | #APP_CFLAGS += -DDEBUG
16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
17 | APP_CFLAGS += -O3 -g3 -mno-memcpy
18 | APP_CFLAGS += -DFABRIC
19 | APP_CFLAGS += -DCLUSTER
20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
21 | APP_CFLAGS += -DPROF_NET
22 | APP_CFLAGS += -mhwloopalign
23 | APP_CFLAGS += -DT_C=$(T_C)
24 | APP_CFLAGS += -DT_H=$(T_H)
25 | APP_CFLAGS += -DT_W=$(T_W)
26 | APP_LDFLAGS += -lm
27 | 
28 | # STATISTICS
29 | APP_CFLAGS += -DSTATS
30 | 
31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
33 | 
34 | include $(RULES_DIR)/pmsis_rules.mk
35 | 


--------------------------------------------------------------------------------
/tests/test_layout_change/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching transposition procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, change_layout, NULL));
26 | 
27 |   printf("Transposition successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_layout_change/net.h:
--------------------------------------------------------------------------------
1 | // PULP DEFINES
2 | #define STACK_SIZE      4096
3 | #define MOUNT           1
4 | #define UNMOUNT         0
5 | #define CID             0
6 | 
7 | void change_layout ();


--------------------------------------------------------------------------------
/tests/test_linear_fp16/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | linear-data.h
4 | step-check.h
5 | output_eval.h
6 | runs.txt


--------------------------------------------------------------------------------
/tests/test_linear_fp16/Makefile:
--------------------------------------------------------------------------------
 1 | APP = linear_test_fp16
 2 | 
 3 | # User settings
 4 | IN_CH?=8
 5 | OUT_CH?=8
 6 | NUM_CORES?=8
 7 | STEP?='FORWARD' # Possible steps: 'FORWARD', 'BACKWARD_GRAD', 'BACKWARD_ERROR'
 8 | #APP_CFLAGS += -DDEBUG
 9 | APP_CFLAGS += -DOPTIMIZE
10 | MATMUL_TYPE?=0
11 | USE_BIASES_LINEAR?=0	# Allocate biases (1) or not (0)
12 | NUM_MATMULS?=6		# When profiling with multiple matmul algorithms
13 | NUM_SIZES?=3		# When profiling multiple sizes of the network
14 | # End of user settings
15 | 
16 | TRAIN_LIB=../../lib
17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
18 | APP_SRCS = main.c net.c
19 | 
20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_linear_fp16.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
24 | 
25 | DATA_TYPE?='fp16'
26 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
27 | APP_CFLAGS += -O3 -g3 
28 | APP_CFLAGS += -DFABRIC 
29 | APP_CFLAGS += -DCLUSTER
30 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
31 | APP_CFLAGS += -DPROF_NET
32 | APP_CFLAGS += -DMEMOCC_COMP
33 | APP_CFLAGS += -mhwloopalign
34 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE}
35 | APP_CFLAGS += -DUSE_BIASES_LINEAR=${USE_BIASES_LINEAR}
36 | APP_LDFLAGS += -lm 
37 | 
38 | # STATISTICS
39 | APP_CFLAGS += -DSTATS
40 | 
41 | get_golden:
42 | 	python3 utils/GM.py --in_size $(IN_CH) --out_size $(OUT_CH) --step $(STEP) --use_bias $(USE_BIASES_LINEAR)
43 | 
44 | profile_all_optim:
45 | 	python3 ./utils/profile_optimized.py --num_matmuls ${NUM_MATMULS} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --in_size ${IN_CH} --out_size ${OUT_CH} --use_bias ${USE_BIASES_LINEAR}
46 | 
47 | profile_all_sizes:
48 | 	python3 ./utils/profile_sizes.py --num_sizes ${NUM_SIZES} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --matmul_type ${MATMUL_TYPE} --use_bias ${USE_BIASES_LINEAR}
49 | 
50 | include $(RULES_DIR)/pmsis_rules.mk
51 | 


--------------------------------------------------------------------------------
/tests/test_linear_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_linear_fp16/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pulp_train_defines.h"
18 | #include "step-check.h"
19 | 
20 | // User profiling flags
21 | 
22 | #if defined(FORWARD) && !defined(DEBUG) 
23 | #define PROF_FWD
24 | #endif
25 | 
26 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG)
27 | #define PROF_BCKWD
28 | #endif
29 | 
30 | // Net sizes
31 | 
32 | #define Tker_l0     (Tin_l0*Tout_l0)
33 | 
34 | // Tensor checksum definition
35 | #define CHECK_TOLERANCE 1e-3
36 | #define ERROR_TOLERANCE 0.01
37 | 
38 | // PULP DEFINES
39 | #define STACK_SIZE      4096
40 | #define MOUNT           1
41 | #define UNMOUNT         0
42 | #define CID             0
43 | 
44 | // Support functions
45 | static inline void forward();
46 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
47 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
48 | static inline void train();
49 | // Main function
50 | void net_step ();
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/test_linear_fp16/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_linear_fp32/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | log.txt
 3 | linear-data.h
 4 | step-check.h
 5 | output_eval.h
 6 | runs.txt
 7 | gapsdk_script.sh
 8 | **/__pycache__/
 9 | log_bkp.txt
10 | runs_bkp.txt
11 | BUILD_bkp/
12 | 


--------------------------------------------------------------------------------
/tests/test_linear_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_linear_fp32/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | 
19 | // User profiling flags
20 | 
21 | #if defined(FORWARD) && !defined(DEBUG) 
22 | #define PROF_FWD
23 | #endif
24 | 
25 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG)
26 | #define PROF_BCKWD
27 | #endif
28 | 
29 | // Net sizes
30 | 
31 | #define Tker_l0     (Tin_l0*Tout_l0)
32 | 
33 | // Tensor checksum definition
34 | #define CHECK_TOLERANCE 1e-3
35 | #define ERROR_TOLERANCE 0.01
36 | 
37 | // PULP DEFINES
38 | #define STACK_SIZE      4096
39 | #define MOUNT           1
40 | #define UNMOUNT         0
41 | #define CID             0
42 | 
43 | // Support functions
44 | static inline void forward();
45 | static inline void compare_tensors(float *A, float *B, int length);
46 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
47 | static inline void train();
48 | // Main function
49 | void net_step ();
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/test_losses_fp16/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | loss_values.h


--------------------------------------------------------------------------------
/tests/test_losses_fp16/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_loss_fp16
 2 | 
 3 | # User settings
 4 | # Standard matmul arguments
 5 | OUT_SIZE?=10
 6 | VALUE?=0.1
 7 | LOSS_FN?=0		# Available options: 0='L1Loss', 1='MSE', 2='CrossEntropy', 3='berHuLoss'
 8 | # General arguments
 9 | NUM_CORES?=1
10 | FP16_FORMAT?=1 # Available formats: 0='FP16', 1='bfloat16'
11 | # End of user settings
12 | 
13 | TRAIN_LIB=../../lib
14 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
15 | APP_SRCS += main.c net.c
16 | 
17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c
18 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
19 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
20 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
21 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
22 | APP_CFLAGS += -DPROF_NET
23 | APP_CFLAGS += -DWGT_SIZE=$(OUT_SIZE)
24 | APP_CFLAGS += -DLOSS_FN=$(LOSS_FN)
25 | 
26 | APP_LDFLAGS += -lm 
27 | 
28 | # STATISTICS
29 | APP_CFLAGS += -DSTATS
30 | 
31 | get_golden:
32 | 	python3 ./utils/GM.py --out_size $(OUT_SIZE) --value $(VALUE) --loss_fn $(LOSS_FN) --format $(FP16_FORMAT)
33 | 
34 | include $(RULES_DIR)/pmsis_rules.mk
35 | 


--------------------------------------------------------------------------------
/tests/test_losses_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching matmul evaluation...\n\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("\nOptimizer evaluation successfully terminated :)\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }


--------------------------------------------------------------------------------
/tests/test_losses_fp16/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // User profiling flags
18 | #define FLOAT32
19 | // Tensor checksum definition
20 | #define CHECK_TOLERANCE 1e-4
21 | #define ERROR_TOLERANCE 1e-4
22 | 
23 | // PULP DEFINES
24 | #define STACK_SIZE      4096
25 | #define MOUNT           1
26 | #define UNMOUNT         0
27 | #define CID             0
28 | 
29 | // Loss defines
30 | #define L1Loss 0
31 | #define MSE 1
32 | #define CrossEntropy 2
33 | #define berHuLoss 3
34 | 
35 | void net_step();
36 | 


--------------------------------------------------------------------------------
/tests/test_losses_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | loss_values.h


--------------------------------------------------------------------------------
/tests/test_losses_fp32/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_loss
 2 | 
 3 | # User settings
 4 | # Standard matmul arguments
 5 | OUT_SIZE?=10
 6 | VALUE?=0.5
 7 | LOSS_FN?=3		# Available options: 0='L1Loss', 1='MSE', 2='CrossEntropy', 3='berHuLoss'
 8 | # General arguments
 9 | NUM_CORES?=1
10 | # End of user settings
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS += main.c net.c
15 | 
16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c
17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
18 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
19 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
21 | APP_CFLAGS += -DPROF_NET
22 | APP_CFLAGS += -DWGT_SIZE=$(OUT_SIZE)
23 | APP_CFLAGS += -DLOSS_FN=$(LOSS_FN)
24 | 
25 | APP_LDFLAGS += -lm 
26 | 
27 | # STATISTICS
28 | APP_CFLAGS += -DSTATS
29 | 
30 | get_golden:
31 | 	python3 ./utils/GM.py --out_size $(OUT_SIZE) --value $(VALUE) --loss_fn $(LOSS_FN)
32 | 
33 | include $(RULES_DIR)/pmsis_rules.mk
34 | 


--------------------------------------------------------------------------------
/tests/test_losses_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching loss function evaluation...\n\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("\nOptimizer evaluation successfully terminated :)\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }


--------------------------------------------------------------------------------
/tests/test_losses_fp32/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // User profiling flags
18 | #define FLOAT32
19 | // Tensor checksum definition
20 | #define CHECK_TOLERANCE 1e-6
21 | #define ERROR_TOLERANCE 1e-6
22 | 
23 | // PULP DEFINES
24 | #define STACK_SIZE      4096
25 | #define MOUNT           1
26 | #define UNMOUNT         0
27 | #define CID             0
28 | 
29 | // Loss defines
30 | #define L1Loss 0
31 | #define MSE 1
32 | #define CrossEntropy 2
33 | #define berHuLoss 3
34 | 
35 | void net_step();
36 | 


--------------------------------------------------------------------------------
/tests/test_matmul/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | matmul_data.h
4 | net_args.h
5 | dis.S
6 | fastest_matmul.txt


--------------------------------------------------------------------------------
/tests/test_matmul/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_matmul
 2 | 
 3 | # User settings
 4 | # Standard matmul arguments
 5 | IN_CH?=32			# Used also to define the number of channels of DW Conv
 6 | MID_CH?=32
 7 | OUT_CH?=32
 8 | # General arguments
 9 | DATA_TYPE?='float' 	# float, fp16 (=>float16), bf16 (=>float16alt)  to select the desired format
10 | DIVIDER?=100000000	# Scaling factor for data initialization in golden model
11 | TRANSP?=0		# Matrix B is transposed if = 1, not transposed if = 0.
12 | NUM_CORES?=8
13 | # End of user settings
14 | 
15 | TRAIN_LIB=../../lib
16 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
17 | APP_SRCS += main.c net.c
18 | 
19 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
20 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c
23 | 
24 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
25 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
26 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
27 | APP_CFLAGS += -DPROF_NET
28 | 
29 | APP_LDFLAGS += -lm 
30 | 
31 | # STATISTICS
32 | APP_CFLAGS += -DSTATS
33 | 
34 | get_golden:
35 | 	python3 utils/GM.py --in_size $(IN_CH) --out_size $(OUT_CH) --mid_size $(MID_CH) --type $(DATA_TYPE) --init_value_div $(DIVIDER) --transpose $(TRANSP) 
36 | 
37 | profile_fastest:
38 | 	python3 utils/profile_fastest.py
39 | 
40 | include $(RULES_DIR)/pmsis_rules.mk
41 | 


--------------------------------------------------------------------------------
/tests/test_matmul/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching matmul evaluation...\n\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("\nMatmul evaluation successfully terminated :)\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }


--------------------------------------------------------------------------------
/tests/test_matmul/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "net_args.h"
18 | 
19 | // User profiling flags
20 | 
21 | #if (defined(STANDARD)) && !defined(DEBUG)
22 | #define PROF_MM
23 | #endif
24 | 
25 | // Tensor checksum definition
26 | #ifdef FLOAT32
27 | #define CHECK_TOLERANCE 1e-3
28 | #define ERROR_TOLERANCE 0.01
29 | #endif
30 | #ifdef FLOAT16
31 | #define CHECK_TOLERANCE 1e0
32 | #define ERROR_TOLERANCE 0.05
33 | #endif
34 | #ifdef BFLOAT16
35 | #define CHECK_TOLERANCE 1e-3
36 | #define ERROR_TOLERANCE 0.05
37 | #endif
38 | 
39 | // PULP DEFINES
40 | #define STACK_SIZE      4096
41 | #define MOUNT           1
42 | #define UNMOUNT         0
43 | #define CID             0
44 | 
45 | void net_step();
46 | 


--------------------------------------------------------------------------------
/tests/test_matmul/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_mhsa_fp16/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | attention_scores.h
3 | init-defines.h
4 | input-sequence.h
5 | mhsa-grads.h
6 | mhsa-output.h
7 | step-check.h
8 | dis.S


--------------------------------------------------------------------------------
/tests/test_mhsa_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  DUMMY MAIN
25 | *  Configures cluster, then calls net_step()
26 | */
27 | int main () {
28 | 
29 |   printf("\nHello there.\nConfiguring cluster..\n");
30 |   // Configure cluster
31 |   struct pi_device cluster_dev;
32 |   struct pi_cluster_conf cl_conf;
33 |   struct pi_cluster_task cl_task;
34 | 
35 |   pi_cluster_conf_init(&cl_conf);
36 |   pi_open_from_conf(&cluster_dev, &cl_conf);
37 |   if (pi_cluster_open(&cluster_dev))
38 |   {
39 |       return -1;
40 |   }
41 | 
42 |   printf("\nLaunching training procedure...\n");
43 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
44 | 
45 | 
46 |   printf("\nNet training successful!\n");
47 |   pi_cluster_close(&cluster_dev);
48 | 
49 |   pmsis_exit(0);
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp16/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pulp_train_defines.h"
18 | #include "step-check.h"
19 | 
20 | // User profiling flags
21 | 
22 | //#define DEBUG
23 | 
24 | #if defined(FORWARD) && !defined(DEBUG) 
25 | #define PROF_FWD
26 | #endif
27 | 
28 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG)
29 | #define PROF_BCKWD
30 | #endif
31 | 
32 | // Net sizes
33 | 
34 | #define Tker_l0     (Tin_l0*Tout_l0)
35 | 
36 | // Tensor checksum definition
37 | #define CHECK_TOLERANCE 0.001
38 | #define ERROR_TOLERANCE 0.001
39 | 
40 | // PULP DEFINES
41 | #define STACK_SIZE      4096
42 | #define MOUNT           1
43 | #define UNMOUNT         0
44 | #define CID             0
45 | 
46 | // Support functions
47 | static inline void forward();
48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
50 | static inline void train();
51 | // Main function
52 | void net_step ();
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp16/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_mhsa_fp16/utils/SoftmaxFastExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | def fastexp_gist(x):
 6 |     x_copy = x.type(torch.float32)
 7 |     x_copy = x_copy * 12102203.17133801 + 1064986823.010288
 8 |     x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32)
 9 |     x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32)
10 | 
11 |     return x_copy.type(torch.uint32).view(torch.float32)
12 | 
13 | 
14 | class SoftmaxFastExp(Function):
15 |     @staticmethod
16 |     def forward(ctx, input, bf16_format):
17 |         maxes = torch.max(input, -1, keepdim=True)[0]
18 |         # maxes = torch.swapaxes(maxes, -2, -1)
19 |         x_exp = fastexp_gist((input - maxes).to(torch.float32))
20 | 
21 |         if bf16_format == 0:
22 |             x_exp = x_exp.half()
23 |         else:
24 |             x_exp = x_exp.bfloat16()
25 | 
26 |         x_exp_sum = torch.sum(x_exp, -1, keepdim=True)
27 |         output = x_exp / x_exp_sum
28 |         ctx.save_for_backward(output)
29 | 
30 |         return output
31 | 
32 |     @staticmethod
33 |     def backward(ctx, grad_output):
34 |         out_data = ctx.saved_tensors[0]
35 |         sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1])
36 |         grad_input = (grad_output - sums) * out_data
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | attention_scores.h
3 | init-defines.h
4 | input-sequence.h
5 | mhsa-grads.h
6 | mhsa-output.h
7 | step-check.h


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  DUMMY MAIN
25 | *  Configures cluster, then calls net_step()
26 | */
27 | int main () {
28 | 
29 |   printf("\nHello there.\nConfiguring cluster..\n");
30 |   // Configure cluster
31 |   struct pi_device cluster_dev;
32 |   struct pi_cluster_conf cl_conf;
33 |   struct pi_cluster_task cl_task;
34 |   
35 |   
36 |   pi_cluster_conf_init(&cl_conf);
37 |   pi_open_from_conf(&cluster_dev, &cl_conf);
38 |   if (pi_cluster_open(&cluster_dev))
39 |   {
40 |       return -1;
41 |   }
42 | 
43 |   printf("\nLaunching training procedure...\n");
44 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
45 | 
46 | 
47 |   printf("\nNet training successful!\n");
48 |   pi_cluster_close(&cluster_dev);
49 | 
50 |   pmsis_exit(0);
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | 
19 | // User profiling flags
20 | 
21 | //#define DEBUG
22 | 
23 | #if defined(FORWARD) && !defined(DEBUG) 
24 | #define PROF_FWD
25 | #endif
26 | 
27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG)
28 | #define PROF_BCKWD
29 | #endif
30 | 
31 | // Net sizes
32 | 
33 | #define Tker_l0     (Tin_l0*Tout_l0)
34 | 
35 | // Tensor checksum definition
36 | #define CHECK_TOLERANCE 0.001
37 | #define ERROR_TOLERANCE 0.001
38 | 
39 | // PULP DEFINES
40 | #define STACK_SIZE      4096
41 | #define MOUNT           1
42 | #define UNMOUNT         0
43 | #define CID             0
44 | 
45 | // Support functions
46 | static inline void forward();
47 | static inline void compare_tensors(float *A, float *B, int length);
48 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
49 | static inline void train();
50 | // Main function
51 | void net_step ();
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32/utils/SoftmaxFastExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | def fastexp_gist(x):
 6 |     x_copy = x.type(torch.float32)
 7 |     x_copy = x_copy * 12102203.17133801 + 1064986823.010288
 8 |     x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32)
 9 |     x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32)
10 | 
11 |     return x_copy.type(torch.uint32).view(torch.float32)
12 | 
13 | 
14 | class SoftmaxFastExp(Function):
15 |     @staticmethod
16 |     def forward(ctx, input):
17 |         maxes = torch.max(input, -1, keepdim=True)[0]
18 |         # maxes = torch.swapaxes(maxes, -2, -1)
19 |         x_exp = fastexp_gist((input - maxes))
20 |         x_exp_sum = torch.sum(x_exp, -1, keepdim=True)
21 |         output = x_exp / x_exp_sum
22 |         ctx.save_for_backward(output)
23 | 
24 |         return output
25 | 
26 |     @staticmethod
27 |     def backward(ctx, grad_output):
28 |         out_data = ctx.saved_tensors[0]
29 |         sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1])
30 |         grad_input = (grad_output - sums) * out_data
31 | 
32 |         return grad_input
33 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32_partialsoftmax_old/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | log.txt
 3 | pylog.txt
 4 | attention_scores.h
 5 | input-sequence.h
 6 | init-defines.h
 7 | mhsa-grads.h
 8 | mhsa-output.h
 9 | step-check.h
10 | utils/GM_old.py
11 | README.md
12 | runs.txt
13 | utils/__pycache__/


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32_partialsoftmax_old/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  DUMMY MAIN
25 | *  Configures cluster, then calls net_step()
26 | */
27 | int main () {
28 | 
29 |   printf("\nHello there.\nConfiguring cluster..\n");
30 |   // Configure cluster
31 |   struct pi_device cluster_dev;
32 |   struct pi_cluster_conf cl_conf;
33 |   struct pi_cluster_task cl_task;
34 | 
35 |   pi_cluster_conf_init(&cl_conf);
36 |   pi_open_from_conf(&cluster_dev, &cl_conf);
37 |   if (pi_cluster_open(&cluster_dev))
38 |   {
39 |       return -1;
40 |   }
41 | 
42 |   printf("\nLaunching training procedure...\n");
43 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
44 | 
45 | 
46 |   printf("\nNet training successful!\n");
47 |   pi_cluster_close(&cluster_dev);
48 | 
49 |   pmsis_exit(0);
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32_partialsoftmax_old/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | 
19 | // User profiling flags
20 | 
21 | //#define DEBUG
22 | 
23 | #if defined(FORWARD) && !defined(DEBUG) 
24 | #define PROF_FWD
25 | #endif
26 | 
27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG)
28 | #define PROF_BCKWD
29 | #endif
30 | 
31 | // Net sizes
32 | 
33 | #define Tker_l0     (Tin_l0*Tout_l0)
34 | 
35 | // Tensor checksum definition
36 | #define CHECK_TOLERANCE 0.001
37 | #define ERROR_TOLERANCE 0.001
38 | 
39 | // PULP DEFINES
40 | #define STACK_SIZE      4096
41 | #define MOUNT           1
42 | #define UNMOUNT         0
43 | #define CID             0
44 | 
45 | // Support functions
46 | static inline void forward();
47 | static inline void compare_tensors(float *A, float *B, int length);
48 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
49 | static inline void train();
50 | // Main function
51 | void net_step ();
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_fp32_partialsoftmax_old/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp16/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | attention_scores.h
 3 | init-defines.h
 4 | input-sequence.h
 5 | mhsa-grads.h
 6 | mhsa-output.h
 7 | step-check.h
 8 | output-defines.h
 9 | output-sequence.h
10 | attention-defines.h
11 | dis.S


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  DUMMY MAIN
25 | *  Configures cluster, then calls net_step()
26 | */
27 | int main () {
28 | 
29 |   printf("\nHello there.\nConfiguring cluster..\n");
30 |   // Configure cluster
31 |   struct pi_device cluster_dev;
32 |   struct pi_cluster_conf cl_conf;
33 |   struct pi_cluster_task cl_task;
34 |   
35 |   
36 |   pi_cluster_conf_init(&cl_conf);
37 |   pi_open_from_conf(&cluster_dev, &cl_conf);
38 |   if (pi_cluster_open(&cluster_dev))
39 |   {
40 |       return -1;
41 |   }
42 | 
43 |   printf("\nLaunching training procedure...\n");
44 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
45 | 
46 | 
47 |   printf("\nNet training successful!\n");
48 |   pi_cluster_close(&cluster_dev);
49 | 
50 |   pmsis_exit(0);
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp16/net-args.h:
--------------------------------------------------------------------------------
1 | #define EMBED_SIZE 512
2 | #define HIDDEN_SIZE 512
3 | #define SEQ_LEN 64
4 | #define N_HEADS 4
5 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp16/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | #include "pulp_train_defines.h"
19 | 
20 | // User profiling flags
21 | 
22 | //#define DEBUG
23 | 
24 | #if defined(FORWARD) && !defined(DEBUG) 
25 | #define PROF_FWD
26 | #endif
27 | 
28 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG)
29 | #define PROF_BCKWD
30 | #endif
31 | 
32 | // Net sizes
33 | 
34 | #define Tker_l0     (Tin_l0*Tout_l0)
35 | 
36 | // Tensor checksum definition
37 | #define CHECK_TOLERANCE 0.001
38 | #define ERROR_TOLERANCE 0.001
39 | 
40 | // PULP DEFINES
41 | #define STACK_SIZE      4096
42 | #define MOUNT           1
43 | #define UNMOUNT         0
44 | #define CID             0
45 | #define MAX_SIZE        51264
46 | 
47 | // Support functions
48 | static inline void forward();
49 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
50 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
51 | static inline void train();
52 | 
53 | // Netowork functions
54 | void DNN_init();
55 | void forward();
56 | void net_step();
57 | 
58 | // DMA managment functions
59 | void reset_arguments();
60 | void update_blob();
61 | void reset_dim();
62 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp16/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp16/utils/SoftmaxFastExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | def fastexp_gist(x):
 6 |     x_copy = x.type(torch.float32)
 7 |     x_copy = x_copy * 12102203.17133801 + 1064986823.010288
 8 |     x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32)
 9 |     x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32)
10 | 
11 |     return x_copy.type(torch.uint32).view(torch.float32)
12 | 
13 | 
14 | class SoftmaxFastExp(Function):
15 |     @staticmethod
16 |     def forward(ctx, input):
17 |         maxes = torch.max(input, -1, keepdim=True)[0].bfloat16()
18 |         # maxes = torch.swapaxes(maxes, -2, -1)
19 |         x_exp = fastexp_gist((input - maxes).to(torch.float32))
20 |         x_exp = x_exp.bfloat16()
21 |         x_exp_sum = torch.sum(x_exp, -1, keepdim=True).bfloat16()
22 |         output = x_exp / x_exp_sum
23 |         ctx.save_for_backward(output)
24 | 
25 |         return output.bfloat16()
26 | 
27 |     @staticmethod
28 |     def backward(ctx, grad_output):
29 |         out_data = ctx.saved_tensors[0]
30 |         sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1])
31 |         grad_input = (grad_output - sums) * out_data
32 | 
33 |         return grad_input
34 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp32/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | attention_scores.h
 3 | attention-defines.h
 4 | init-defines.h
 5 | input-sequence.h
 6 | mhsa-grads.h
 7 | mhsa-output.h
 8 | step-check.h
 9 | dis.S
10 | output-defines.h
11 | output-sequence.h


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  DUMMY MAIN
25 | *  Configures cluster, then calls net_step()
26 | */
27 | int main () {
28 | 
29 |   printf("\nHello there.\nConfiguring cluster..\n");
30 |   // Configure cluster
31 |   struct pi_device cluster_dev;
32 |   struct pi_cluster_conf cl_conf;
33 |   struct pi_cluster_task cl_task;
34 |   
35 |   
36 |   pi_cluster_conf_init(&cl_conf);
37 |   pi_open_from_conf(&cluster_dev, &cl_conf);
38 |   if (pi_cluster_open(&cluster_dev))
39 |   {
40 |       return -1;
41 |   }
42 | 
43 |   printf("\nLaunching training procedure...\n");
44 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
45 | 
46 | 
47 |   printf("\nNet training successful!\n");
48 |   pi_cluster_close(&cluster_dev);
49 | 
50 |   pmsis_exit(0);
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp32/net-args.h:
--------------------------------------------------------------------------------
1 | #define EMBED_SIZE 512
2 | #define HIDDEN_SIZE 512
3 | #define SEQ_LEN 64
4 | #define N_HEADS 4
5 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp32/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | 
19 | // User profiling flags
20 | 
21 | //#define DEBUG
22 | 
23 | #if defined(FORWARD) && !defined(DEBUG) 
24 | #define PROF_FWD
25 | #endif
26 | 
27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG)
28 | #define PROF_BCKWD
29 | #endif
30 | 
31 | // Net sizes
32 | 
33 | #define Tker_l0     (Tin_l0*Tout_l0)
34 | 
35 | // Tensor checksum definition
36 | #define CHECK_TOLERANCE 0.001
37 | #define ERROR_TOLERANCE 0.001
38 | 
39 | // PULP DEFINES
40 | #define STACK_SIZE      4096
41 | #define MOUNT           1
42 | #define UNMOUNT         0
43 | #define CID             0
44 | #define MAX_SIZE        25120
45 | 
46 | // Support functions
47 | static inline void forward();
48 | static inline void compare_tensors(float *A, float *B, int length);
49 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
50 | static inline void train();
51 | 
52 | // Netowork functions
53 | void DNN_init_forward();
54 | void DNN_init_backward();
55 | void forward();
56 | void net_step();
57 | 
58 | // DMA managment functions
59 | void reset_arguments();
60 | void update_blob();
61 | void reset_dim();
62 | 


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp32/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/


--------------------------------------------------------------------------------
/tests/test_mhsa_paper_fp32/utils/SoftmaxFastExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | 
 5 | def fastexp_gist(x):
 6 |     x_copy = x.type(torch.float32)
 7 |     x_copy = x_copy * 12102203.17133801 + 1064986823.010288
 8 |     x_copy = torch.where(x_copy < 8388608, 0, x_copy).type(torch.float32)
 9 |     x_copy = torch.where(x_copy > 2139095040, 2139095040, x_copy).type(torch.float32)
10 | 
11 |     return x_copy.type(torch.uint32).view(torch.float32)
12 | 
13 | 
14 | class SoftmaxFastExp(Function):
15 |     @staticmethod
16 |     def forward(ctx, input):
17 |         maxes = torch.max(input, -1, keepdim=True)[0]
18 |         # maxes = torch.swapaxes(maxes, -2, -1)
19 |         x_exp = fastexp_gist((input - maxes))
20 |         x_exp_sum = torch.sum(x_exp, -1, keepdim=True)
21 |         output = x_exp / x_exp_sum
22 |         ctx.save_for_backward(output)
23 | 
24 |         return output
25 | 
26 |     @staticmethod
27 |     def backward(ctx, grad_output):
28 |         out_data = ctx.saved_tensors[0]
29 |         sums = torch.sum(grad_output * out_data, 2, keepdim=True).repeat(1, 1, grad_output.shape[-1])
30 |         grad_input = (grad_output - sums) * out_data
31 | 
32 |         return grad_input
33 | 


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp16/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | attention-defines.h
 3 | bottleneck-defines.h
 4 | ffn-defines.h
 5 | input-sequence.h
 6 | intermediate-defines.h
 7 | net-args.h
 8 | output-defines.h
 9 | output-sequence.h
10 | vocabulary.h
11 | token_type_embeds.h
12 | position_embeds.h
13 | embeddings.h
14 | weights.bin
15 | dis.S
16 | net_args.h
17 | files/


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp16/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int test_kickoff (void) {
 9 |   printf("\nHello there.\nConfiguring cluster..\n");
10 |   // Configure cluster
11 |   struct pi_device cluster_dev;
12 |   struct pi_cluster_conf cl_conf;
13 |   struct pi_cluster_task cl_task;
14 | 
15 |   pi_cluster_conf_init(&cl_conf);
16 |   pi_open_from_conf(&cluster_dev, &cl_conf);
17 |   if (pi_cluster_open(&cluster_dev))
18 |   {
19 |       return -1;
20 |   }
21 | 
22 |   printf("\nMobilebert procedure...\n");
23 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
24 | 
25 |   printf("Done, successful!\n");
26 |   pi_cluster_close(&cluster_dev);
27 | 
28 |   pmsis_exit(0);
29 | }
30 | 
31 | int main(){
32 |   return pmsis_kickoff((void *) test_kickoff);
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp16/net.h:
--------------------------------------------------------------------------------
 1 | // PULP DEFINES
 2 | #define STACK_SIZE      4096
 3 | #define MOUNT           1
 4 | #define UNMOUNT         0
 5 | #define CID             0
 6 | #define MAX_SIZE        33824
 7 | #define MAX_SIZE_L2     180736
 8 | 
 9 | #include "pulp_train_defines.h"
10 | #include "pmsis.h"
11 | #include <bsp/bsp.h>
12 | #include "bsp/ram/hyperram.h"
13 | #include "bsp/ram/spiram.h"
14 | #include "bsp/flash/hyperflash.h"
15 | #include "bsp/flash/spiflash.h"
16 | #include <bsp/fs/readfs.h>
17 | #include "bsp/fs.h"
18 | 
19 | // Tensor checksum definition
20 | #define CHECK_TOLERANCE 0.001
21 | #define ERROR_TOLERANCE 0.001
22 | 
23 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
24 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
25 | 
26 | // Netowork functions
27 | void DNN_init();
28 | void forward();
29 | void net_step();
30 | //void tiled_matmul(void* matmul_args, int flash_input);
31 | void tiled_matmul(void* matmul_args);
32 | void tiled_norm(void* nonorm_args);
33 | // void tiled_skip(void* residual_args, int flash_lout);
34 | void tiled_skip(void* residual_args);
35 | void tiled_relu(void* Relu_args);
36 | 
37 | // DMA managment functions
38 | void reset_arguments();
39 | void update_blob();
40 | void reset_dim();
41 | 
42 | //utility struct and functions for reading from a file inside GAP9
43 | 
44 | typedef struct{
45 |   struct pi_device fs;
46 |   struct pi_device flash;
47 |   pi_fs_file_t *file;
48 | } AT_FLASH_FS_T;
49 | 
50 | static inline void __at_flash_fs_open(AT_FLASH_FS_T *file, int is_write, struct pi_readfs_conf *conf, const char *filename, int *err);
51 | static inline void __at_default_flash_fs_open(AT_FLASH_FS_T *file, int is_write, struct pi_readfs_conf *conf, const char *filename, int *err);
52 | static inline void __at_flash_fs_close(AT_FLASH_FS_T *file);
53 | static inline void __at_default_flash_file_open(AT_FLASH_FS_T *file, int is_write, const char *filename, int *err);
54 | 
55 | 


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp16/net_args.h:
--------------------------------------------------------------------------------
 1 | // Float16 Mobilebert
 2 | #define FLOAT16
 3 | 
 4 | #define VOCAB_SIZE 30522
 5 | 
 6 | #define EMBED_SIZE 128
 7 | 
 8 | #define HIDDEN_SIZE 512
 9 | 
10 | #define INTERMEDIATE_SIZE 512
11 | 
12 | #define NUM_HEADS 4
13 | 
14 | #define N_HIDDEN_LAYERS 1
15 | 
16 | #define N_FFN 4
17 | 
18 | #define BOTTLENECK_SIZE 128
19 | 
20 | #define ATTENTION_DROPOUT 0.0
21 | 
22 | #define HIDDEN_DROPOUT 0.0
23 | 
24 | #define TYPE_VOCAB_SIZE 2
25 | 
26 | #define SEQ_LEN 128
27 | 
28 | #define INPUT_SIZE 65536
29 | 
30 | #define OUTPUT_SIZE 65536
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp16/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | MobileBert_PyTorch/
3 | params.txt/
4 | MobileBert_PyTorch.7z


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp32/.gitignore:
--------------------------------------------------------------------------------
 1 | BUILD/
 2 | attention-defines.h
 3 | bottleneck-defines.h
 4 | ffn-defines.h
 5 | input-sequence.h
 6 | intermediate-defines.h
 7 | net-args.h
 8 | output-defines.h
 9 | output-sequence.h
10 | vocabulary.h
11 | 


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp32/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nMobilebert procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Done, successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp32/net.h:
--------------------------------------------------------------------------------
 1 | // PULP DEFINES
 2 | #define STACK_SIZE      4096
 3 | #define MOUNT           1
 4 | #define UNMOUNT         0
 5 | #define CID             0
 6 | #define MAX_SIZE        25104
 7 | 
 8 | #include "pulp_train_defines.h"
 9 | 
10 | // Tensor checksum definition
11 | #define CHECK_TOLERANCE 0.001
12 | #define ERROR_TOLERANCE 0.001
13 | 
14 | static inline void compare_tensors(float *A, float *B, int length);
15 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
16 | 
17 | // Netowork functions
18 | void DNN_init();
19 | void forward();
20 | void net_step();
21 | void tiled_matmul(void* matmul_args);
22 | void tiled_norm(void* nonorm_args);
23 | void tiled_skip(void* residual_args);
24 | void tiled_relu(void* Relu_args);
25 | 
26 | // DMA managment functions
27 | void load_input(void * src_blob, uint8_t data_diff_both);
28 | void load_output(void * src_blob, uint8_t data_diff_both);
29 | void load_coeff(void * src_blob, uint8_t data_diff_both);
30 | void store_output(void * dest_blob, uint8_t data_diff_both);
31 | void store_input(void * dest_blob, uint8_t data_diff_both);
32 | void store_coeff(void * dest_blob, uint8_t data_diff_both);
33 | void copy_struct_param(unsigned int from, unsigned int to, int size);
34 | void get_input_dim(void * b);
35 | void get_output_dim(void * b);
36 | void get_weight_dim(void * b);
37 | void reset_arguments();
38 | void update_blob();
39 | void reset_dim();


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp32/net_args.h:
--------------------------------------------------------------------------------
 1 | // Float32 Mobilebert
 2 | #define FLOAT32
 3 | 
 4 | #define VOCAB_SIZE 30522
 5 | 
 6 | #define EMBED_SIZE 128
 7 | 
 8 | #define HIDDEN_SIZE 512
 9 | 
10 | #define INTERMEDIATE_SIZE 512
11 | 
12 | #define NUM_HEADS 4
13 | 
14 | #define N_HIDDEN_LAYERS 1
15 | 
16 | #define N_FFN 4
17 | 
18 | #define BOTTLENECK_SIZE 128
19 | 
20 | #define ATTENTION_DROPOUT 0.0
21 | 
22 | #define HIDDEN_DROPOUT 0.0
23 | 
24 | #define TYPE_VOCAB_SIZE 2
25 | 
26 | #define SEQ_LEN 128
27 | 
28 | 


--------------------------------------------------------------------------------
/tests/test_mobilebert_fp32/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | MobileBert_PyTorch/
3 | params.txt/
4 | MobileBert_PyTorch.7z


--------------------------------------------------------------------------------
/tests/test_pad/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt


--------------------------------------------------------------------------------
/tests/test_pad/Makefile:
--------------------------------------------------------------------------------
 1 | APP = padder
 2 | 
 3 | # User code
 4 | NUM_CORES?=8
 5 | CH_IN?=2
 6 | H_IN?=3
 7 | W_IN?=3
 8 | LPAD?=1
 9 | RPAD?=1
10 | UPAD?=1
11 | DPAD?=1
12 | HWC_LAY?=0		# =0 use CHW layout, =1 use HWC layout for the weights
13 | APP_CFLAGS += -DPRINT_MATS
14 | # End of user code
15 | 
16 | 
17 | TRAIN_LIB=../../lib
18 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
19 | APP_SRCS = main.c net.c
20 | #APP_CFLAGS += -DDEBUG
21 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
22 | APP_CFLAGS += -O3 -g3 -mno-memcpy
23 | APP_CFLAGS += -DFABRIC
24 | APP_CFLAGS += -DCLUSTER
25 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
26 | APP_CFLAGS += -DPROF_NET
27 | APP_CFLAGS += -mhwloopalign
28 | APP_CFLAGS += -DTin_C=$(CH_IN)
29 | APP_CFLAGS += -DTin_H=$(H_IN)
30 | APP_CFLAGS += -DTin_W=$(W_IN)
31 | APP_CFLAGS += -DLPAD=$(LPAD)
32 | APP_CFLAGS += -DRPAD=$(RPAD)
33 | APP_CFLAGS += -DUPAD=$(UPAD)
34 | APP_CFLAGS += -DDPAD=$(DPAD)
35 | APP_CFLAGS += -DHWC_LAYOUT=$(HWC_LAY)
36 | APP_LDFLAGS += -lm
37 | 
38 | # STATISTICS
39 | APP_CFLAGS += -DSTATS
40 | 
41 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
42 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
43 | 
44 | include $(RULES_DIR)/pmsis_rules.mk
45 | 


--------------------------------------------------------------------------------
/tests/test_pad/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching transposition procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Transposition successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_pad/net.h:
--------------------------------------------------------------------------------
 1 | // PULP DEFINES
 2 | #define STACK_SIZE      4096
 3 | #define MOUNT           1
 4 | #define UNMOUNT         0
 5 | #define CID             0
 6 | 
 7 | // Padded sizes
 8 | #define Tout_H (Tin_H+UPAD+DPAD)
 9 | #define Tout_W (Tin_W+RPAD+LPAD)
10 | 
11 | void net_step ();


--------------------------------------------------------------------------------
/tests/test_pooling/.gitignore:
--------------------------------------------------------------------------------
1 | init_defines.h
2 | pool_data.h
3 | log.txt
4 | dis.S
5 | BUILD/


--------------------------------------------------------------------------------
/tests/test_pooling/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_pooling
 2 | 
 3 | # User settings
 4 | # Standard matmul arguments
 5 | IN_H?=6
 6 | IN_W?=6
 7 | IN_C?=1
 8 | KER_H?=6
 9 | KER_W?=6
10 | H_STR?=1
11 | W_STR?=1
12 | VALUE?=0.5
13 | # General arguments
14 | NUM_CORES?=8
15 | # End of user settings
16 | 
17 | TRAIN_LIB=../../lib
18 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
19 | APP_SRCS += main.c net.c
20 | 
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp32.c
24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_linear_fp32.c
25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c
26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp32.c
27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c
29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c
30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_pooling_fp32.c
31 | 
32 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
33 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
34 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
35 | APP_CFLAGS += -DPROF_NET
36 | APP_CFLAGS += -DIN_H=$(IN_H)
37 | APP_CFLAGS += -DIN_W=$(IN_W)
38 | APP_CFLAGS += -DIN_C=$(IN_C)
39 | APP_CFLAGS += -DKER_H=$(KER_H)
40 | APP_CFLAGS += -DKER_W=$(KER_W)
41 | APP_CFLAGS += -DH_STR=$(H_STR)
42 | APP_CFLAGS += -DW_STR=$(W_STR)
43 | APP_CFLAGS += -DVALUE=$(VALUE)
44 | 
45 | APP_LDFLAGS += -lm 
46 | 
47 | # STATISTICS
48 | APP_CFLAGS += -DSTATS
49 | 
50 | get_golden:
51 | 	python3 ./utils/GM.py --in_c $(IN_C) --in_h $(IN_H) --in_w $(IN_W) --ker_h $(KER_H) --ker_w $(KER_W) --stride_h $(H_STR) --stride_w $(W_STR) --value $(VALUE)
52 | 
53 | include $(RULES_DIR)/pmsis_rules.mk
54 | 


--------------------------------------------------------------------------------
/tests/test_pooling/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching pooling evaluation...\n\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("\nPooling evaluation successfully terminated :)\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }


--------------------------------------------------------------------------------
/tests/test_pooling/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | // User profiling flags
18 | #define FLOAT32
19 | // Tensor checksum definition
20 | #define CHECK_TOLERANCE 1e-12
21 | #define ERROR_TOLERANCE 1e-12
22 | 
23 | // PULP DEFINES
24 | #define STACK_SIZE      4096
25 | #define MOUNT           1
26 | #define UNMOUNT         0
27 | #define CID             0
28 | 
29 | void net_step();
30 | 


--------------------------------------------------------------------------------
/tests/test_random/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt


--------------------------------------------------------------------------------
/tests/test_random/Makefile:
--------------------------------------------------------------------------------
 1 | APP = rng
 2 | 
 3 | # User code
 4 | NUM_CORES?=8
 5 | PROBABILITY?=0.5
 6 | SEED?=10
 7 | # End of user code
 8 | 
 9 | TRAIN_LIB=../../lib
10 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
11 | APP_SRCS = main.c net.c
12 | #APP_CFLAGS += -DDEBUG
13 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
14 | APP_CFLAGS += -O3 -g3 -mno-memcpy
15 | APP_CFLAGS += -DFABRIC
16 | APP_CFLAGS += -DCLUSTER
17 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
18 | APP_CFLAGS += -DPROF_NET
19 | APP_CFLAGS += -mhwloopalign
20 | APP_CFLAGS += -DPROBABILITY=$(PROBABILITY)
21 | APP_CFLAGS += -DSEED=$(SEED)
22 | APP_LDFLAGS += -lm
23 | 
24 | # STATISTICS
25 | APP_CFLAGS += -DSTATS
26 | 
27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_random.c
30 | 
31 | include $(RULES_DIR)/pmsis_rules.mk
32 | 


--------------------------------------------------------------------------------
/tests/test_random/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main (void) {
 9 | 
10 | 
11 |   printf("\nHello there.\nConfiguring cluster..\n");
12 |   // Configure cluster
13 |   struct pi_device cluster_dev;
14 |   struct pi_cluster_conf cl_conf;
15 |   struct pi_cluster_task cl_task;
16 | 
17 |   pi_cluster_conf_init(&cl_conf);
18 |   pi_open_from_conf(&cluster_dev, &cl_conf);
19 |   if (pi_cluster_open(&cluster_dev))
20 |   {
21 |       return -1;
22 |   }
23 | 
24 |   printf("\nLaunching random number generation procedure...\n");
25 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
26 | 
27 |   printf("Transposition successful!\n");
28 |   pi_cluster_close(&cluster_dev);
29 | 
30 |   pmsis_exit(0);
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/test_random/net.c:
--------------------------------------------------------------------------------
 1 | #include "pulp_train.h"
 2 | 
 3 | #include "stats.h"
 4 | #include "net.h"
 5 | 
 6 | // ----------------- FP32 data ----------------------
 7 | PI_L1 float probability;
 8 | PI_L1 int output;
 9 | 
10 | // Main function
11 | void net_step () 
12 | {
13 |     #ifdef PROF_NET
14 |     INIT_STATS();
15 |     PRE_START_STATS();
16 |     #endif
17 | 
18 |     struct integer_random_args args_fp32;
19 |     probability = PROBABILITY;
20 |     args_fp32.seed = SEED;
21 |     args_fp32.probability = probability;
22 |     args_fp32.output = &output;
23 | 
24 |     printf("Bernoulli Random Number Generator (from seed):\n");
25 |     #ifdef PROF_NET
26 |     START_STATS();
27 |     #endif
28 | 
29 |     pulp_random_bernoulli(&args_fp32);
30 | 
31 |     #ifdef PROF_NET
32 |     STOP_STATS();
33 |     #endif
34 | 
35 |     printf("First run output: %d\n", output);
36 | 
37 |     printf("\nTest random number generation stats:");
38 |     float mean = 0;
39 |     int acc = 0;
40 |     float var  = 0;
41 |     for (int i=0; i<1000; i++) {
42 |         pulp_random_bernoulli(&args_fp32);
43 |         acc += output;
44 |     }
45 |     mean = (float) acc / 1000.0;
46 |     printf("Mean: %f\n", mean);
47 | 
48 |     return;
49 | }
50 | 


--------------------------------------------------------------------------------
/tests/test_random/net.h:
--------------------------------------------------------------------------------
1 | // PULP DEFINES
2 | #define STACK_SIZE      4096
3 | #define MOUNT           1
4 | #define UNMOUNT         0
5 | #define CID             0
6 | 
7 | 
8 | void net_step ();


--------------------------------------------------------------------------------
/tests/test_reduce_mean/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | matmul_data.h
4 | net_args.h
5 | dis.S
6 | fastest_matmul.txt
7 | test_data.h
8 | 


--------------------------------------------------------------------------------
/tests/test_reduce_mean/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_reduce_mean
 2 | 
 3 | # User settings
 4 | # Standard matmul arguments
 5 | DIMS = 1 7 800 2
 6 | REDUCE_AXIS = 2
 7 | 
 8 | NUM_CORES = 8
 9 | DATA_TYPE = 32 	# 32 for fp32, 16 for fp16
10 | # End of user settings
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS += main.c net.c
15 | 
16 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
17 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
18 | 
19 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
20 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
21 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
22 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE)
23 | APP_CFLAGS += -DPROF_NET
24 | 
25 | APP_LDFLAGS += -lm 
26 | 
27 | # STATISTICS
28 | APP_CFLAGS += -DSTATS
29 | 
30 | get_golden:
31 | 	rm -rf BUILD/
32 | 	python3 utils/GM.py --input_dims $(DIMS) --reduce_axis $(REDUCE_AXIS) --dtype $(DATA_TYPE)
33 | 
34 | include $(RULES_DIR)/pmsis_rules.mk
35 | 


--------------------------------------------------------------------------------
/tests/test_reduce_mean/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | 
 5 | int main(void) {
 6 |     printf("\nHello there.\nConfiguring cluster..\n");
 7 | 
 8 |     // Configure cluster
 9 |     struct pi_device cluster_dev;
10 |     struct pi_cluster_conf cl_conf;
11 |     struct pi_cluster_task cl_task;
12 | 
13 |     pi_cluster_conf_init(&cl_conf);
14 |     pi_open_from_conf(&cluster_dev, &cl_conf);
15 | 
16 |     if (pi_cluster_open(&cluster_dev)) {
17 |         return -1;
18 |     }
19 | 
20 |     printf("\nLaunching broadcast matmul evaluation...\n\n");
21 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, reduce_mean_test, NULL));
22 | 
23 |     printf("\nMatmul evaluation successfully terminated :)\n");
24 |     pi_cluster_close(&cluster_dev);
25 | 
26 |     pmsis_exit(0);
27 | }
28 | 


--------------------------------------------------------------------------------
/tests/test_reduce_mean/net.c:
--------------------------------------------------------------------------------
 1 | #include "pulp_train.h"
 2 | 
 3 | #include "net.h"
 4 | #include "stats.h"
 5 | 
 6 | #include "test_data.h"
 7 | 
 8 | #include "tensor_checkers.h"
 9 | 
10 | 
11 | void reduce_mean_test() {
12 | #ifdef PROF_NET
13 |     INIT_STATS();
14 |     PRE_START_STATS();
15 | #endif
16 |     printf("Executing on %d cores.\n", NUM_CORES);
17 | 
18 | #if DATA_TYPE == 32
19 |     struct reduce_mean_args_fp32 args;
20 |     printf("WORKING ON FP32\n");
21 | #elif DATA_TYPE == 16
22 |     struct reduce_mean_args_fp16 args;
23 |     printf("WORKING ON FP16\n");
24 | #endif
25 | 
26 |     // Get arguments
27 |     args.input = IN_MATRIX;
28 |     args.output = OUT_MATRIX;
29 | 
30 |     args.dims = DIMS;
31 |     args.dims_len = N_DIMS;
32 |     args.reduce_axis = REDUCE_AXIS;
33 | 
34 | #ifdef PROF_NET
35 |     START_STATS();
36 | #endif
37 | 
38 |     // Perform operation
39 | #if DATA_TYPE == 32
40 |     pi_cl_team_fork(NUM_CORES, reduce_mean_fp32, &args);
41 | #elif DATA_TYPE == 16
42 |     pi_cl_team_fork(NUM_CORES, reduce_mean_fp16, &args);
43 | #endif
44 | 
45 |     // Stop stats
46 | #ifdef PROF_NET
47 |     STOP_STATS();
48 | #endif
49 | 
50 |     mean_error_checker(args.output, TEST_OUT, TOTAL_SIZE_OUT);
51 |     elementwise_checker(args.output, TEST_OUT, TOTAL_SIZE_OUT);
52 | 
53 |     return;
54 | }
55 | 


--------------------------------------------------------------------------------
/tests/test_reduce_mean/net.h:
--------------------------------------------------------------------------------
1 | // PULP DEFINES
2 | #define STACK_SIZE      4096
3 | #define MOUNT           1
4 | #define UNMOUNT         0
5 | #define CID             0
6 | 
7 | void reduce_mean_test();
8 | 


--------------------------------------------------------------------------------
/tests/test_reduce_mean/utils/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | 


--------------------------------------------------------------------------------
/tests/test_residual/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | data.h
3 | init_defines.h


--------------------------------------------------------------------------------
/tests/test_residual/Makefile:
--------------------------------------------------------------------------------
 1 | APP = test_residual
 2 | 
 3 | CI?=64
 4 | HI?=56
 5 | WI?=56
 6 | KER?=1
 7 | NUM_CORES?=8
 8 | HWC?=0
 9 | DEBUG_INFO?=0
10 | STEP?='BACKWARD'
11 | DATA_TYPE?='FLOAT32'
12 | USE_IM2COL?=1
13 | USE_DMA?=0
14 | MATMUL_TYPE?=0
15 | 
16 | TRAIN_LIB=../../lib
17 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
18 | APP_SRCS += main.c net.c
19 | 
20 | 
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp32.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp16.c
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c
26 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c
27 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
28 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c
29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c
31 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_residual_fp32.c
32 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_residual_fp16.c
33 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c
34 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c
35 | 
36 | 
37 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
38 | APP_CFLAGS += -DCLUSTER -DFABRIC -O3 -g3
39 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
40 | APP_CFLAGS += -DPROF_NET
41 | APP_CFLAGS += -DOPTIMIZE
42 | 
43 | 
44 | 
45 | APP_LDFLAGS += -lm 
46 | 
47 | # STATISTICS
48 | APP_CFLAGS += -DSTATS
49 | 
50 | get_golden:
51 | 	python3 ./utils/GM.py -CI ${CI} -HI ${HI} -WI ${WI} -KER ${KER} -NUM_CORES ${NUM_CORES} -HWC ${HWC} -DEBUG_INFO ${DEBUG_INFO} -STEP ${STEP} -DATA_TYPE ${DATA_TYPE} -USE_IM2COL ${USE_IM2COL} -USE_DMA ${USE_DMA} -MATMUL_TYPE ${MATMUL_TYPE}
52 | 
53 | include $(RULES_DIR)/pmsis_rules.mk
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/tests/test_residual/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | //#include "pmsis.h"
18 | #include "pmsis.h"
19 | #include "net.h"
20 | 
21 | /*
22 | *  DUMMY MAIN
23 | *  Configures cluster, then calls net_step()
24 | */
25 | int main (void) {
26 | 
27 | 
28 |   printf("\nHello there.\nConfiguring cluster..\n");
29 |   // Configure cluster
30 |   struct pi_device cluster_dev;
31 |   struct pi_cluster_conf cl_conf;
32 |   struct pi_cluster_task cl_task;
33 | 
34 |   pi_cluster_conf_init(&cl_conf);
35 |   pi_open_from_conf(&cluster_dev, &cl_conf);
36 |   if (pi_cluster_open(&cluster_dev))
37 |   {
38 |       return -1;
39 |   }
40 | 
41 |   printf("\nLaunching residuals evaluation...\n\n");
42 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
43 | 
44 |   printf("\nResiduals evaluation successfully terminated :)\n");
45 |   pi_cluster_close(&cluster_dev);
46 | 
47 |   pmsis_exit(0);
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/test_residual/net.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #define I2C_SIZE  KER_SIZE*KER_SIZE*CI*(HI - KER_SIZE + 2*PAD_SIZE + 1)*(WI - KER_SIZE + 2*PAD_SIZE +1)
 3 | 
 4 | 
 5 | void net_step();
 6 | void prepare_data();
 7 | void forward();
 8 | void backward();
 9 | void PrintBlob(void * b, int step);
10 | 
11 | 
12 | #define PROFILE
13 | 
14 | 


--------------------------------------------------------------------------------
/tests/test_residual/utils/dump_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def TensorToArray(T, hwc):
 5 |     res=[]
 6 |     dim = len(T.size())
 7 | 
 8 |     if (dim == 1 ):
 9 |         for i in range(len(T)):
10 |             res.append(T[i])
11 |         return res
12 | 
13 |     if(dim == 3):
14 |         if(hwc):
15 |             for h in range(T.size(1)):
16 |                 for w in range(T.size(2)):
17 |                     for c in range(T.size(0)):
18 |                         res.append(float(T[c][h][w]))
19 | 
20 |         else:
21 |             for c in range(T.size(0)):
22 |                 for h in range(T.size(1)):
23 |                     for w in range(T.size(2)):
24 |                         res.append(float(T[c][h][w]))
25 |     return res
26 | 
27 | 
28 | def WriteArray(array, name, f, d):
29 |     l = len(array)
30 |     name = str(name)
31 |     f.write(f"\n{d} {name}[{l}] = ")
32 |     f.write(" {")
33 |     for i in range(l):
34 |         if d == "fp16":
35 |             f.write(f" {np.float16(array[i])}")
36 |         else:
37 |             f.write(f" {array[i]}")
38 |         if(i != l-1):
39 |             f.write(",")
40 |     f.write("};\n")
41 | 


--------------------------------------------------------------------------------
/tests/test_rnn_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | init-defines.h
3 | input-sequence.h
4 | rnn-grads.h
5 | rnn-output.h
6 | step-check.h
7 | 


--------------------------------------------------------------------------------
/tests/test_rnn_fp32/Makefile:
--------------------------------------------------------------------------------
 1 | APP = rnn_fp32
 2 | 
 3 | # User settings
 4 | IN_H?=64 # Sequence Length
 5 | IN_W?=8 # Token Size 
 6 | OUT_W?=16
 7 | IN_CH?=1
 8 | OUT_CH?=1
 9 | NUM_CORES?=8
10 | STEP?='FORWARD' # Possible steps: 'FORWARD', 'BACKWARD'
11 | APP_CFLAGS += -DOPTIMIZE
12 | MATMUL_TYPE?=0
13 | NUM_MATMULS?=24		# When profiling with multiple matmul algorithms
14 | NUM_SIZES?=3		# When profiling multiple sizes of the network
15 | # End of user settings
16 | 
17 | TRAIN_LIB=../../lib
18 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
19 | APP_SRCS = main.c net.c
20 | 
21 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
22 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_rnn_fp32.c 
23 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c
24 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
25 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c
26 | 
27 | DATA_TYPE?='fp32'
28 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
29 | APP_CFLAGS += -O3 -g
30 | APP_CFLAGS += -DFABRIC 
31 | APP_CFLAGS += -DCLUSTER
32 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
33 | APP_CFLAGS += -DPROF_NET
34 | APP_CFLAGS += -DMEMOCC_COMP
35 | APP_CFLAGS += -mhwloopalign
36 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE}
37 | #APP_CFLAGS += -DDEBUG
38 | APP_LDFLAGS += -lm 
39 | 
40 | # STATISTICS
41 | APP_CFLAGS += -DSTATS
42 | 
43 | get_golden:
44 | 	python3 ./utils/GM.py --step $(STEP) --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH} --out_width $(OUT_W)
45 | 
46 | profile_all_optim:
47 | 	python3 ./utils/profile_optimized.py --num_matmuls ${NUM_MATMULS} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --in_width $(IN_W) --in_height $(IN_H) --ch_in ${IN_CH} --ch_out ${OUT_CH} --out_width $(OUT_W)
48 | 
49 | profile_all_sizes:
50 | 	python3 ./utils/profile_sizes.py --num_sizes ${NUM_SIZES} --step ${STEP} --cores ${NUM_CORES} --data_type ${DATA_TYPE} --matmul_type ${MATMUL_TYPE}
51 | 
52 | include $(RULES_DIR)/pmsis_rules.mk
53 | 


--------------------------------------------------------------------------------
/tests/test_rnn_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | #include "pmsis.h"
19 | #include "stdio.h"
20 | #include "stdlib.h"
21 | #include "net.h"
22 | 
23 | /*
24 | *  DUMMY MAIN
25 | *  Configures cluster, then calls net_step()
26 | */
27 | int main () {
28 | 
29 |   printf("\nHello there.\nConfiguring cluster..\n");
30 |   // Configure cluster
31 |   struct pi_device cluster_dev;
32 |   struct pi_cluster_conf cl_conf;
33 |   struct pi_cluster_task cl_task;
34 | 
35 |   pi_cluster_conf_init(&cl_conf);
36 |   pi_open_from_conf(&cluster_dev, &cl_conf);
37 |   if (pi_cluster_open(&cluster_dev))
38 |   {
39 |       return -1;
40 |   }
41 | 
42 |   printf("\nLaunching training procedure...\n");
43 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
44 | 
45 | 
46 |   printf("\nNet training successful!\n");
47 |   pi_cluster_close(&cluster_dev);
48 | 
49 |   pmsis_exit(0);
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/test_rnn_fp32/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | 
19 | // User profiling flags
20 | 
21 | //#define DEBUG
22 | 
23 | #if defined(FORWARD) && !defined(DEBUG) 
24 | #define PROF_FWD
25 | #endif
26 | 
27 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD) || defined(BACKWARD)) && !defined(DEBUG)
28 | #define PROF_BCKWD
29 | #endif
30 | 
31 | // Net sizes
32 | 
33 | #define Tker_l0     (Tin_l0*Tout_l0)
34 | 
35 | // Tensor checksum definition
36 | #define CHECK_TOLERANCE 1e-2
37 | #define ERROR_TOLERANCE 0.001
38 | 
39 | // PULP DEFINES
40 | #define STACK_SIZE      4096
41 | #define MOUNT           1
42 | #define UNMOUNT         0
43 | #define CID             0
44 | 
45 | // Support functions
46 | static inline void forward();
47 | static inline void compare_tensors(float *A, float *B, int length);
48 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
49 | static inline void train();
50 | // Main function
51 | void net_step ();
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD
2 | 
3 | input_sequence.h
4 | output_sequence.h
5 | model_components.h
6 | model_defines.h
7 | 
8 | *.onnx
9 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/Makefile:
--------------------------------------------------------------------------------
 1 | APP = vit_fp32
 2 | 
 3 | # User code
 4 | NUM_CORES = 8
 5 | MATMUL_TYPE?=9
 6 | DATA_TYPE?=32
 7 | 
 8 | CONFIG_NAME = "TINY_VIT_5M"
 9 | # End of user code
10 | 
11 | TASK_NAME=sst-2
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | 
15 | #APP_CFLAGS += -DDEBUG
16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
17 | APP_CFLAGS += -O3 -g
18 | APP_CFLAGS += -DFABRIC
19 | APP_CFLAGS += -DCLUSTER
20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
21 | APP_CFLAGS += -DN_HEADS=$(N_HEADS)
22 | APP_CFLAGS += -DPROF_NET
23 | APP_CFLAGS += -mhwloopalign
24 | APP_LDFLAGS += -lm
25 | APP_CFLAGS += -DMEMOCC_COMP
26 | 
27 | APP_CFLAGS += -DTILE_H=$(TILE_H)
28 | APP_CFLAGS += -DTILE_W=$(TILE_W)
29 | APP_CFLAGS += -DTILE_DIM=$(TILE_DIM)
30 | 
31 | APP_CFLAGS += -DOPTIMIZE
32 | APP_CFLAGS += -DMATMUL_TYPE=${MATMUL_TYPE}
33 | 
34 | # STATISTICS
35 | APP_CFLAGS += -DSTATS
36 | 
37 | # =============== SOURCES ===============
38 | APP_SRCS = main.c net.c
39 | 
40 | # For Conv2D
41 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp32.c
42 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_naive_fp32.c
43 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c
44 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
45 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c
46 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp32.c
47 | 
48 | # For GELU
49 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c
50 | 
51 | # For LayerNorm
52 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_layernorm_fp32.c
53 | 
54 | 
55 | include $(RULES_DIR)/pmsis_rules.mk
56 | 
57 | get_golden:
58 | 	rm -rf BUILD/
59 | 	python3 utils/GM.py --config $(CONFIG_NAME)
60 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | int main (void) {
 5 |     printf("\nHello there.\nConfiguring cluster..\n");
 6 | 
 7 |     // Configure cluster
 8 |     struct pi_device cluster_dev;
 9 |     struct pi_cluster_conf cl_conf;
10 |     struct pi_cluster_task cl_task;
11 | 
12 |     pi_cluster_conf_init(&cl_conf);
13 |     pi_open_from_conf(&cluster_dev, &cl_conf);
14 |     if (pi_cluster_open(&cluster_dev)) {
15 |         return -1;
16 |     }
17 | 
18 |     printf("Launching ViT procedure...\n");
19 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
20 | 
21 |     printf("Done, successful!\n");
22 |     pi_cluster_close(&cluster_dev);
23 | 
24 |     pmsis_exit(0);
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/net.c:
--------------------------------------------------------------------------------
 1 | // ~~~~~~~~~~ INCLUDES ~~~~~~~~~~
 2 | #include "pulp_train.h"
 3 | 
 4 | #include "model_components.h"
 5 | 
 6 | #include "stats.h"
 7 | #include "net.h"
 8 | 
 9 | 
10 | // Main function
11 | void net_step() {
12 |     // Initialize performance counters
13 | #ifdef PROF_NET
14 |     INIT_STATS();
15 |     PRE_START_STATS();
16 | #endif
17 | 
18 |     // Initialize model components
19 |     printf("Tiny ViT test:\n");
20 |     printf("Initializing components...\n");
21 |     init_and_connect_blobs();
22 | 
23 |     // Forward pass
24 |     printf("Forward pass...\n");
25 | #ifdef PROF_NET
26 |     START_STATS();
27 | #endif
28 |     forward();
29 | #ifdef PROF_NET
30 |     STOP_STATS();
31 | #endif
32 | 
33 |     // Perform forward check
34 |     printf("\nChecking forward step results: \n");
35 | 
36 |     // Check the output
37 |     check_output();
38 | 
39 |     return;
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/net.h:
--------------------------------------------------------------------------------
 1 | #ifndef NET_H
 2 | #define NET_H
 3 | 
 4 | // PULP DEFINES
 5 | #define STACK_SIZE      40960
 6 | #define MOUNT           1
 7 | #define UNMOUNT         0
 8 | #define CID             0
 9 | #define MAX_SIZE        25104
10 | 
11 | #include "pulp_train_defines.h"
12 | 
13 | // net functions
14 | void forward();
15 | void net_step();
16 | 
17 | // DMA managment functions
18 | void load_input(void * src_blob, uint8_t data_diff_both);
19 | void load_output(void * src_blob, uint8_t data_diff_both);
20 | void load_coeff(void * src_blob, uint8_t data_diff_both);
21 | void store_output(void * dest_blob, uint8_t data_diff_both);
22 | void store_input(void * dest_blob, uint8_t data_diff_both);
23 | void store_coeff(void * dest_blob, uint8_t data_diff_both);
24 | void copy_struct_param(unsigned int from, unsigned int to, int size);
25 | void get_input_dim(void * b);
26 | void get_output_dim(void * b);
27 | void get_weight_dim(void * b);
28 | void reset_arguments();
29 | void update_blob();
30 | void reset_dim();
31 | #endif
32 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/.gitignore:
--------------------------------------------------------------------------------
1 | sample_data
2 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/Conv2dBN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class Conv2dBN(torch.nn.Sequential):
 5 |     def __init__(
 6 |         self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1.0
 7 |     ):
 8 |         super().__init__()
 9 | 
10 |         self.add_module(
11 |             "c", torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False)
12 |         )
13 | 
14 |         bn = torch.nn.BatchNorm2d(b)
15 |         torch.nn.init.constant_(bn.weight, bn_weight_init)
16 |         torch.nn.init.constant_(bn.bias, 0)
17 | 
18 |         self.add_module("bn", bn)
19 | 
20 |     @torch.no_grad()
21 |     def fuse(self):
22 |         c, bn = self._modules.values()
23 | 
24 |         w = bn.weight / (bn.running_var + bn.eps) ** 0.5
25 |         w = c.weight * w[:, None, None, None]
26 | 
27 |         b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5
28 | 
29 |         m = torch.nn.Conv2d(
30 |             w.size(1) * self.c.groups,
31 |             w.size(0),
32 |             w.shape[2:],
33 |             stride=self.c.stride,
34 |             padding=self.c.padding,
35 |             dilation=self.c.dilation,
36 |             groups=self.c.groups,
37 |         )
38 | 
39 |         m.weight.data.copy_(w)
40 |         m.bias.data.copy_(b)
41 | 
42 |         return m
43 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/ConvLayer.py:
--------------------------------------------------------------------------------
 1 | from model.MBConv import MBConv
 2 | from torch import nn
 3 | 
 4 | 
 5 | class ConvLayer(nn.Module):
 6 |     def __init__(
 7 |         self,
 8 |         dim,
 9 |         input_resolution,
10 |         depth,
11 |         activation,
12 |         drop_path=0.0,
13 |         downsample=None,
14 |         use_checkpoint=False,
15 |         out_dim=None,
16 |         conv_expand_ratio=4.0,
17 |     ):
18 |         super().__init__()
19 | 
20 |         self.dim = dim
21 |         self.input_resolution = input_resolution
22 |         self.depth = depth
23 |         self.use_checkpoint = use_checkpoint
24 | 
25 |         # build blocks
26 |         self.blocks = nn.ModuleList(
27 |             [
28 |                 MBConv(
29 |                     dim,
30 |                     dim,
31 |                     conv_expand_ratio,
32 |                     activation,
33 |                     drop_path[i] if isinstance(drop_path, list) else drop_path,
34 |                 )
35 |                 for i in range(depth)
36 |             ]
37 |         )
38 | 
39 |         # patch merging layer
40 |         if downsample is not None:
41 |             self.downsample = downsample(
42 |                 input_resolution, dim=dim, out_dim=out_dim, activation=activation
43 |             )
44 |         else:
45 |             self.downsample = None
46 | 
47 |     def forward(self, x):
48 |         for blk in self.blocks:
49 |             x = blk(x)
50 | 
51 |         if self.downsample is not None:
52 |             x = self.downsample(x)
53 | 
54 |         return x
55 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/DropPath.py:
--------------------------------------------------------------------------------
 1 | from timm.models.layers import DropPath as TimmDropPath
 2 | 
 3 | 
 4 | class DropPath(TimmDropPath):
 5 |     def __init__(self, drop_prob=None):
 6 |         super().__init__(drop_prob=drop_prob)
 7 | 
 8 |         self.drop_prob = drop_prob
 9 | 
10 |     def __repr__(self):
11 |         msg = super().__repr__()
12 |         msg += f"(drop_prob={self.drop_prob})"
13 | 
14 |         return msg
15 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/MBConv.py:
--------------------------------------------------------------------------------
 1 | from model.Conv2dBN import Conv2dBN
 2 | from model.DropPath import DropPath
 3 | from torch import nn
 4 | 
 5 | 
 6 | class MBConv(nn.Module):
 7 |     def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path):
 8 |         super().__init__()
 9 | 
10 |         self.in_chans = in_chans
11 |         self.hidden_chans = int(in_chans * expand_ratio)
12 |         self.out_chans = out_chans
13 | 
14 |         self.conv1 = Conv2dBN(in_chans, self.hidden_chans, ks=1)
15 |         self.act1 = activation()
16 | 
17 |         self.conv2 = Conv2dBN(
18 |             self.hidden_chans,
19 |             self.hidden_chans,
20 |             ks=3,
21 |             stride=1,
22 |             pad=1,
23 |             groups=self.hidden_chans,
24 |         )
25 | 
26 |         self.act2 = activation()
27 | 
28 |         self.conv3 = Conv2dBN(self.hidden_chans, self.out_chans, ks=1, bn_weight_init=0)
29 |         self.act3 = activation()
30 | 
31 |         self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
32 | 
33 |     def forward(self, x):
34 |         shortcut = x
35 | 
36 |         x = self.conv1(x)
37 |         x = self.act1(x)
38 | 
39 |         x = self.conv2(x)
40 |         x = self.act2(x)
41 | 
42 |         x = self.conv3(x)
43 | 
44 |         x = self.drop_path(x)
45 | 
46 |         x += shortcut
47 |         x = self.act3(x)
48 | 
49 |         return x
50 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/Mlp.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class Mlp(nn.Module):
 5 |     def __init__(
 6 |         self,
 7 |         in_features,
 8 |         hidden_features=None,
 9 |         out_features=None,
10 |         act_layer=nn.GELU,
11 |         drop=0.0,
12 |     ):
13 |         super().__init__()
14 | 
15 |         out_features = out_features or in_features
16 |         hidden_features = hidden_features or in_features
17 | 
18 |         self.norm = nn.LayerNorm(in_features)
19 |         self.fc1 = nn.Linear(in_features, hidden_features)
20 |         self.fc2 = nn.Linear(hidden_features, out_features)
21 |         self.act = act_layer()
22 |         self.drop = nn.Dropout(drop)
23 | 
24 |     def forward(self, x):
25 |         x = self.norm(x)
26 | 
27 |         x = self.fc1(x)
28 |         x = self.act(x)
29 |         x = self.drop(x)
30 | 
31 |         x = self.fc2(x)
32 |         x = self.drop(x)
33 | 
34 |         return x
35 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/PatchEmbed.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | from model.Conv2dBN import Conv2dBN
 4 | from timm.models.layers import to_2tuple
 5 | from torch import nn
 6 | 
 7 | 
 8 | class PatchEmbed(nn.Module):
 9 |     def __init__(self, in_chans, embed_dim, resolution, activation):
10 |         super().__init__()
11 | 
12 |         img_size: Tuple[int, int] = to_2tuple(resolution)
13 | 
14 |         self.patches_resolution = (img_size[0] // 4, img_size[1] // 4)
15 |         self.num_patches = self.patches_resolution[0] * self.patches_resolution[1]
16 |         self.in_chans = in_chans
17 |         self.embed_dim = embed_dim
18 | 
19 |         n = embed_dim
20 | 
21 |         self.seq = nn.Sequential(
22 |             Conv2dBN(in_chans, n // 2, 3, 2, 1),
23 |             activation(),
24 |             Conv2dBN(n // 2, n, 3, 2, 1),
25 |         )
26 | 
27 |     def forward(self, x):
28 |         return self.seq(x)
29 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/PatchMerging.py:
--------------------------------------------------------------------------------
 1 | from model.Conv2dBN import Conv2dBN
 2 | from torch import nn
 3 | 
 4 | 
 5 | class PatchMerging(nn.Module):
 6 |     def __init__(self, input_resolution, dim, out_dim, activation):
 7 |         super().__init__()
 8 | 
 9 |         self.input_resolution = input_resolution
10 |         self.dim = dim
11 |         self.out_dim = out_dim
12 | 
13 |         self.act = activation()
14 | 
15 |         self.conv1 = Conv2dBN(dim, out_dim, 1, 1, 0)
16 |         self.conv2 = Conv2dBN(out_dim, out_dim, 3, 2, 1, groups=out_dim)
17 |         self.conv3 = Conv2dBN(out_dim, out_dim, 1, 1, 0)
18 | 
19 |     def forward(self, x):
20 |         if x.ndim == 3:
21 |             h, w = self.input_resolution
22 |             b = len(x)
23 | 
24 |             # (B, C, H, W)
25 |             x = x.view(b, h, w, -1).permute(0, 3, 1, 2)
26 | 
27 |         x = self.conv1(x)
28 |         x = self.act(x)
29 | 
30 |         x = self.conv2(x)
31 |         x = self.act(x)
32 | 
33 |         x = self.conv3(x)
34 | 
35 |         x = x.flatten(2).transpose(1, 2)
36 | 
37 |         return x
38 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model/SinusoidalEmbeddings.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from einops import rearrange
 3 | from torch import nn
 4 | from torch.amp import autocast
 5 | 
 6 | 
 7 | class SinusoidalEmbeddings(nn.Module):
 8 |     def __init__(self, dim, scale_base=None, use_xpos=False, theta=10000):
 9 |         super().__init__()
10 | 
11 |         inv_freq = 1.0 / (theta ** (torch.arange(0, dim, 2).float() / dim))
12 | 
13 |         self.register_buffer("inv_freq", inv_freq)
14 | 
15 |         # xpos related
16 |         self.use_xpos = use_xpos
17 |         self.scale_base = scale_base
18 | 
19 |         assert not (
20 |             use_xpos and (scale_base is None)
21 |         ), "scale base must be defined if using xpos"
22 | 
23 |         scale = (torch.arange(0, dim, 2) + 0.4 * dim) / (1.4 * dim)
24 |         self.register_buffer("scale", scale, persistent=False)
25 | 
26 |     @autocast("cuda", enabled=False)
27 |     def forward(self, x):
28 |         seq_len, device = x.shape[-2], x.device
29 | 
30 |         t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq)
31 | 
32 |         freqs = torch.einsum("i , j -> i j", t, self.inv_freq)
33 |         freqs = torch.cat((freqs, freqs), dim=-1)
34 | 
35 |         if not self.use_xpos:
36 |             return freqs, torch.ones(1, device=device)
37 | 
38 |         power = (t - (seq_len // 2)) / self.scale_base
39 |         scale = self.scale ** rearrange(power, "n -> n 1")
40 |         scale = torch.cat((scale, scale), dim=-1)
41 | 
42 |         return freqs, scale
43 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/model_configs.py:
--------------------------------------------------------------------------------
 1 | MODEL_CONFIGS = {
 2 |     "DEMO_TINY_VIT_CONFIG": {
 3 |         "IN_IMG_SIZE": 32,
 4 |         "IN_CHANS": 3,
 5 |         "NUM_CLASSES": 10,
 6 |         "EMBED_DIMS": [4, 4, 4],
 7 |         "DEPTHS": [1, 2, 2],
 8 |         "NUM_HEADS": [2, 4, 4],
 9 |         "WINDOW_SIZES": [7, 1, 1],
10 |         "MLP_RATIO": 4.0,
11 |         "DROP_RATE": 0.0,
12 |         "DROP_PATH_RATE": 0.0,
13 |         "USE_CHECKPOINT": False,
14 |         "MBCONV_EXPAND_RATIO": 2.0,
15 |         "LOCAL_CONV_SIZE": 3,
16 |     },
17 |     "TINY_VIT_5M": {
18 |         "IN_IMG_SIZE": 224,
19 |         "IN_CHANS": 3,
20 |         "NUM_CLASSES": 1000,
21 |         "EMBED_DIMS": [64, 128, 160, 320],
22 |         "DEPTHS": [2, 2, 6, 2],
23 |         "NUM_HEADS": [2, 4, 5, 10],
24 |         "WINDOW_SIZES": [7, 7, 14, 7],
25 |         "MLP_RATIO": 4.0,
26 |         "DROP_RATE": 0.0,
27 |         "DROP_PATH_RATE": 0.0,
28 |         "USE_CHECKPOINT": False,
29 |         "MBCONV_EXPAND_RATIO": 4.0,
30 |         "LOCAL_CONV_SIZE": 3,
31 |     },
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/test_tiny_vit_fp32/utils/writers/writers_utils.py:
--------------------------------------------------------------------------------
 1 | def get_initialization_text(dim, data_name, filler):
 2 |     to_return = (
 3 |         "\tfor (int i = 0; i < " + str(dim) + "; i++) " + data_name + "[i] = " + filler
 4 |     )
 5 |     to_return += "[i];\n" if filler not in ["zero_init", "min_float"] else ";\n"
 6 | 
 7 |     return to_return
 8 | 
 9 | 
10 | def get_connect_text(blob_name, elements):
11 |     text = ""
12 | 
13 |     for key in elements.keys():
14 |         text += "\t" + blob_name + "." + key + " = " + str(elements[key]) + ";\n"
15 | 
16 |     text += "\n"
17 | 
18 |     return text
19 | 
20 | 
21 | def adapt_onnx_name(name):
22 |     return "_" + str(name).replace("/", "_").replace(".", "_").replace(":", "_")
23 | 
24 | 
25 | def extract_input_information(node):
26 |     if isinstance(node, dict):
27 |         return node["data"], tuple(node["shape"])
28 |     else:
29 |         try:
30 |             return node.name, tuple(node.dims)
31 |         except:
32 |             raise NotImplementedError("Node structure not recognized")
33 | 


--------------------------------------------------------------------------------
/tests/test_transp_conv2d_fp16/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | transpconv2d-grads.h
4 | transpconv2d-output.h
5 | init-defines.h
6 | input-image.h
7 | step-check.h
8 | runs.txt
9 | log.c


--------------------------------------------------------------------------------
/tests/test_transp_conv2d_fp16/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_transp_conv2d_fp16/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pulp_train_defines.h"
18 | #include "step-check.h"
19 | 
20 | // User profiling flags
21 | 
22 | #if defined(FORWARD) && !defined(DEBUG) 
23 | #define PROF_FWD
24 | #endif
25 | 
26 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG)
27 | #define PROF_BKWD
28 | #endif
29 | 
30 | // Net sizes
31 | 
32 | // TRANSPOSED CONV2D
33 | #define Tout_H_l1   ((Tin_H_l1-1)*STRIDE_H-(PAD_U+PAD_D)+(Tker_H_l1-1)+1)
34 | #define Tout_W_l1   ((Tin_W_l1-1)*STRIDE_W-(PAD_L+PAD_R)+(Tker_W_l1-1)+1)
35 | 
36 | // Tensor checksum definition
37 | #define CHECK_TOLERANCE 1e-3
38 | #define ERROR_TOLERANCE 1e-3
39 | 
40 | // PULP DEFINES
41 | #define STACK_SIZE      4096
42 | #define MOUNT           1
43 | #define UNMOUNT         0
44 | #define CID             0
45 | 
46 | // Support functions
47 | static inline void forward();
48 | static inline void compare_tensors(fp16 *A, fp16 *B, int length);
49 | int check_tensor(fp16 * tensor_out, fp16 * tensor_ref, int size);
50 | static inline void train();
51 | // Main function
52 | void net_step ();
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/test_transp_conv2d_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | transpconv2d-grads.h
4 | transpconv2d-output.h
5 | init-defines.h
6 | input-image.h
7 | step-check.h
8 | runs.txt
9 | log.c


--------------------------------------------------------------------------------
/tests/test_transp_conv2d_fp32/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /*
21 | *  DUMMY MAIN
22 | *  Configures cluster, then calls net_step()
23 | */
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello there.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Net training successful!\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/test_transp_conv2d_fp32/net.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "step-check.h"
18 | 
19 | // User profiling flags
20 | 
21 | #if defined(FORWARD) && !defined(DEBUG) 
22 | #define PROF_FWD
23 | #endif
24 | 
25 | #if (defined(BACKWARD_ERROR) || defined(BACKWARD_GRAD)) && !defined(DEBUG)
26 | #define PROF_BKWD
27 | #endif
28 | 
29 | // Net sizes
30 | 
31 | // TRANSPOSED CONV2D
32 | #define Tout_H_l1   ((Tin_H_l1-1)*STRIDE_H-(PAD_U+PAD_D)+(Tker_H_l1-1)+1)
33 | #define Tout_W_l1   ((Tin_W_l1-1)*STRIDE_W-(PAD_L+PAD_R)+(Tker_W_l1-1)+1)
34 | 
35 | // Tensor checksum definition
36 | #define CHECK_TOLERANCE 1e-6
37 | #define ERROR_TOLERANCE 1e-6
38 | 
39 | // PULP DEFINES
40 | #define STACK_SIZE      4096
41 | #define MOUNT           1
42 | #define UNMOUNT         0
43 | #define CID             0
44 | 
45 | // Support functions
46 | static inline void forward();
47 | static inline void compare_tensors(float *A, float *B, int length);
48 | int check_tensor(float * tensor_out, float * tensor_ref, int size);
49 | static inline void train();
50 | // Main function
51 | void net_step ();
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/test_transpose/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD/
2 | log.txt
3 | test_data.h
4 | 


--------------------------------------------------------------------------------
/tests/test_transpose/Makefile:
--------------------------------------------------------------------------------
 1 | APP = transposer
 2 | 
 3 | # User code
 4 | DIMS = 13 15 17
 5 | TRANSPOSED_AXES = 2 0 1
 6 | 
 7 | NUM_CORES = 8
 8 | DATA_TYPE = 32
 9 | # End of user code
10 | 
11 | 
12 | TRAIN_LIB=../../lib
13 | TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources
14 | APP_SRCS = main.c net.c
15 | #APP_CFLAGS += -DDEBUG
16 | APP_CFLAGS += -I. -I$(TRAIN_LIB)/include
17 | APP_CFLAGS += -O3 -g3 -mno-memcpy
18 | APP_CFLAGS += -DFABRIC
19 | APP_CFLAGS += -DCLUSTER
20 | APP_CFLAGS += -DNUM_CORES=$(NUM_CORES)
21 | APP_CFLAGS += -DDATA_TYPE=$(DATA_TYPE)
22 | APP_CFLAGS += -DPROF_NET
23 | APP_CFLAGS += -mhwloopalign
24 | APP_LDFLAGS += -lm
25 | 
26 | # STATISTICS
27 | APP_CFLAGS += -DSTATS
28 | 
29 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c
30 | APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c
31 | 
32 | include $(RULES_DIR)/pmsis_rules.mk
33 | 
34 | get_golden:
35 | 	rm -rf BUILD/
36 | 	python3 utils/GM.py --dims $(DIMS) --transposed_axes $(TRANSPOSED_AXES) --dtype $(DATA_TYPE)
37 | 


--------------------------------------------------------------------------------
/tests/test_transpose/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | /*
 5 | *  DUMMY MAIN
 6 | *  Configures cluster, then calls a simple net_step()
 7 | */
 8 | int main(void) {
 9 |     printf("\nHello there.\nConfiguring cluster..\n");
10 | 
11 |     // Configure cluster
12 |     struct pi_device cluster_dev;
13 |     struct pi_cluster_conf cl_conf;
14 |     struct pi_cluster_task cl_task;
15 | 
16 |     pi_cluster_conf_init(&cl_conf);
17 |     pi_open_from_conf(&cluster_dev, &cl_conf);
18 | 
19 |     if (pi_cluster_open(&cluster_dev)) {
20 |         return -1;
21 |     }
22 | 
23 |     printf("\nLaunching transposition procedure...\n");
24 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, transpose_matrices_fp32, NULL));
25 | 
26 |     printf("Transposition successful!\n");
27 |     pi_cluster_close(&cluster_dev);
28 | 
29 |     pmsis_exit(0);
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/test_transpose/net.c:
--------------------------------------------------------------------------------
 1 | #include "pulp_train.h"
 2 | 
 3 | #include "net.h"
 4 | #include "stats.h"
 5 | 
 6 | #include "test_data.h"
 7 | 
 8 | #include "tensor_checkers.h"
 9 | 
10 | 
11 | void transpose_matrices_fp32() {
12 | #ifdef PROF_NET
13 |     INIT_STATS();
14 |     PRE_START_STATS();
15 | #endif
16 |     printf("Executing on %d cores.\n", NUM_CORES);
17 | 
18 | #if DATA_TYPE == 32
19 |     struct transp_args args;
20 |     printf("WORKING ON FP32\n");
21 | #elif DATA_TYPE == 16
22 |     struct transp_args_fp16 args;
23 |     printf("WORKING ON FP16\n");
24 | #endif
25 | 
26 |     // Get arguments
27 |     args.in_matrix = IN_M;
28 |     args.out_matrix = OUT_M;
29 |     args.dim = DIMS;
30 |     args.transposed_axes = TRANSPOSED_AXES;
31 |     args.n_dim = N_DIMS;
32 | 
33 | #ifdef PROF_NET
34 |     START_STATS();
35 | #endif
36 | 
37 |     // Perform transposition
38 | #if DATA_TYPE == 32
39 |     pi_cl_team_fork(NUM_CORES, transpose, &args);
40 | #elif DATA_TYPE == 16
41 |     pi_cl_team_fork(NUM_CORES, transpose_fp16, &args);
42 | #endif
43 | 
44 |     // Stop stats
45 | #ifdef PROF_NET
46 |     STOP_STATS();
47 | #endif
48 | 
49 |     mean_error_checker(OUT_M, TEST_TRANSPOSE_OUT, TOTAL_SIZE);
50 |     elementwise_checker(OUT_M, TEST_TRANSPOSE_OUT, TOTAL_SIZE);
51 | 
52 |     return;
53 | }
54 | 


--------------------------------------------------------------------------------
/tests/test_transpose/net.h:
--------------------------------------------------------------------------------
1 | // PULP DEFINES
2 | #define STACK_SIZE      4096
3 | #define MOUNT           1
4 | #define UNMOUNT         0
5 | #define CID             0
6 | 
7 | void transpose_matrices_fp32();
8 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/.gitignore:
--------------------------------------------------------------------------------
1 | BUILD
2 | 
3 | input_sequence.h
4 | model_components.c
5 | model_components.h
6 | model_defines.h
7 | net_args.h
8 | output_sequence.h
9 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/main.c:
--------------------------------------------------------------------------------
 1 | #include "pmsis.h"
 2 | #include "net.h"
 3 | 
 4 | int main (void) {
 5 |     printf("\nHello there.\nConfiguring cluster..\n");
 6 | 
 7 |     // Configure cluster
 8 |     struct pi_device cluster_dev;
 9 |     struct pi_cluster_conf cl_conf;
10 |     struct pi_cluster_task cl_task;
11 | 
12 |     pi_cluster_conf_init(&cl_conf);
13 |     pi_open_from_conf(&cluster_dev, &cl_conf);
14 |     if (pi_cluster_open(&cluster_dev)) {
15 |         return -1;
16 |     }
17 | 
18 |     printf("Launching ViT procedure...\n");
19 |     pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
20 | 
21 |     printf("Done, successful!\n");
22 |     pi_cluster_close(&cluster_dev);
23 | 
24 |     pmsis_exit(0);
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/net.c:
--------------------------------------------------------------------------------
 1 | // ~~~~~~~~~~ INCLUDES ~~~~~~~~~~
 2 | #include "pulp_train.h"
 3 | 
 4 | #include "model_components.h"
 5 | #include "tensor_checkers.h"
 6 | 
 7 | #include "stats.h"
 8 | #include "net.h"
 9 | 
10 | 
11 | // Main function
12 | void net_step() {
13 |     // Initialize performance counters
14 | #ifdef PROF_NET
15 |     INIT_STATS();
16 |     PRE_START_STATS();
17 | #endif
18 | 
19 |     // Initialize model components
20 |     printf("ViT test:\n");
21 |     printf("Initializing components...\n");
22 |     init_and_connect_blobs();
23 | 
24 |     // Forward pass
25 |     printf("Forward pass...\n");
26 | #ifdef PROF_NET
27 |     START_STATS();
28 | #endif
29 |     forward();
30 | #ifdef PROF_NET
31 |     STOP_STATS();
32 | #endif
33 | 
34 |     // Perform forward check
35 |     printf("\nChecking forward step results: \n");
36 | 
37 |     mean_error_checker(fc_output_data, OUTPUT, OUTPUT_SIZE);
38 |     elementwise_checker(fc_output_data, OUTPUT, OUTPUT_SIZE);
39 | 
40 |     return;
41 | }
42 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/net.h:
--------------------------------------------------------------------------------
 1 | #ifndef NET_H
 2 | #define NET_H
 3 | 
 4 | // PULP DEFINES
 5 | #define STACK_SIZE      40960
 6 | #define MOUNT           1
 7 | #define UNMOUNT         0
 8 | #define CID             0
 9 | #define MAX_SIZE        25104
10 | 
11 | #include "pulp_train_defines.h"
12 | 
13 | // net functions
14 | void forward();
15 | void net_step();
16 | 
17 | // DMA managment functions
18 | void load_input(void * src_blob, uint8_t data_diff_both);
19 | void load_output(void * src_blob, uint8_t data_diff_both);
20 | void load_coeff(void * src_blob, uint8_t data_diff_both);
21 | void store_output(void * dest_blob, uint8_t data_diff_both);
22 | void store_input(void * dest_blob, uint8_t data_diff_both);
23 | void store_coeff(void * dest_blob, uint8_t data_diff_both);
24 | void copy_struct_param(unsigned int from, unsigned int to, int size);
25 | void get_input_dim(void * b);
26 | void get_output_dim(void * b);
27 | void get_weight_dim(void * b);
28 | void reset_arguments();
29 | void update_blob();
30 | void reset_dim();
31 | #endif
32 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/utils/.gitignore:
--------------------------------------------------------------------------------
1 | sample_data
2 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/utils/torch_to_trainlib.py:
--------------------------------------------------------------------------------
 1 | from utils.writers.component_writers import (
 2 |     concat_writer,
 3 |     conv2d_writer,
 4 |     gelu_writer,
 5 |     layer_norm_writer,
 6 |     linear_writer,
 7 |     mhsa_writer,
 8 |     tanh_writer,
 9 |     transpose_writer,
10 |     vector_sum_writer,
11 | )
12 | 
13 | 
14 | VIT_COMPONENTS_WRITERS = {
15 |     "patch_embedding": conv2d_writer,
16 |     "flatten_and_transpose": transpose_writer,
17 |     "concat": concat_writer,
18 |     "positional_embedding": vector_sum_writer,
19 |     "tanh": tanh_writer,
20 |     "norm": layer_norm_writer,
21 |     "fc": linear_writer,
22 | }
23 | 
24 | for i in range(12):
25 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_norm1"] = layer_norm_writer
26 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pre_attn_transpose"] = (
27 |         transpose_writer
28 |     )
29 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_attn"] = mhsa_writer
30 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_post_attn_transpose"] = (
31 |         transpose_writer
32 |     )
33 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_proj"] = linear_writer
34 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_add_1"] = vector_sum_writer
35 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_norm2"] = layer_norm_writer
36 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pwff_fc1"] = linear_writer
37 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pwff_gelu"] = gelu_writer
38 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_pwff_fc2"] = linear_writer
39 |     VIT_COMPONENTS_WRITERS[f"transformer_blocks_{i}_add_2"] = vector_sum_writer
40 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/utils/vit_lr/PositionWiseFeedForward.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.nn import functional as F
 3 | 
 4 | 
 5 | class PositionWiseFeedForward(nn.Module):
 6 |     def __init__(self, dim, ff_dim):
 7 |         super().__init__()
 8 |         self.fc1 = nn.Linear(dim, ff_dim)
 9 |         self.fc2 = nn.Linear(ff_dim, dim)
10 | 
11 |     def forward(self, x):
12 |         x = self.fc1(x)
13 |         x = F.gelu(x)
14 |         x = self.fc2(x)
15 | 
16 |         return x
17 | 
18 |     def get_model_graph_information(self, x, name):
19 |         all_nodes = dict()
20 |         ordered_nodes = []
21 | 
22 |         previous_shape = x.shape[-2:]
23 |         h = self.fc1(x)
24 |         all_nodes[name + "_fc1"] = {
25 |             "input_a": name[:-5] + "_norm2_output_data",
26 |             "input_b": (name + "_fc1_weight").upper(),
27 |             "input_a_shape": tuple(previous_shape),
28 |             "bias_shape": tuple(self.fc1.bias.shape),
29 |             "output_shape": tuple(h.shape[-2:]),
30 |         }
31 |         ordered_nodes.append(name + "_fc1")
32 | 
33 |         h = F.gelu(h, approximate="tanh")
34 |         all_nodes[name + "_gelu"] = {
35 |             "shape": tuple(h.shape[1:]),
36 |             "input": name + "_fc1_output_data",
37 |         }
38 |         ordered_nodes.append(name + "_gelu")
39 | 
40 |         x = self.fc2(h)
41 |         all_nodes[name + "_fc2"] = {
42 |             "input_a": name + "_gelu_output_data",
43 |             "input_b": (name + "_fc2_weight").upper(),
44 |             "input_a_shape": tuple(h.shape[-2:]),
45 |             "bias_shape": tuple(self.fc2.bias.shape),
46 |             "output_shape": tuple(x.shape[-2:]),
47 |         }
48 |         ordered_nodes.append(name + "_fc2")
49 | 
50 |         return x, all_nodes, ordered_nodes
51 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/utils/vit_lr/PositionalEmbedding1D.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class PositionalEmbedding1D(nn.Module):
 6 |     def __init__(self, seq_len, dim):
 7 |         super().__init__()
 8 |         self.pos_embedding = nn.Parameter(torch.zeros(1, seq_len, dim))
 9 | 
10 |     def forward(self, x):
11 |         return x + self.pos_embedding
12 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/utils/vit_lr/README.md:
--------------------------------------------------------------------------------
1 | ## Adapted from [here](https://github.com/Dequino/ViT-LR).


--------------------------------------------------------------------------------
/tests/test_vit_fp32/utils/vit_lr/ResizeProcedure.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class ResizeProcedure(Enum):
5 |     NONE = 0
6 |     BORDER = 1
7 | 


--------------------------------------------------------------------------------
/tests/test_vit_fp32/utils/vit_lr/SoftmaxFastExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from utils.vit_lr.vit_lr_utils import fastexp_gist
 5 | 
 6 | 
 7 | class SoftmaxFastExp(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, input):
10 |         maxes = torch.max(input, -1, keepdim=True)[0]
11 |         # maxes = torch.swapaxes(maxes, -2, -1)
12 |         x_exp = fastexp_gist((input - maxes))
13 |         x_exp_sum = torch.sum(x_exp, -1, keepdim=True)
14 |         output = x_exp / x_exp_sum
15 |         ctx.save_for_backward(output)
16 | 
17 |         return output
18 | 
19 |     @staticmethod
20 |     def backward(ctx, grad_output):
21 |         out_data = ctx.saved_tensors[0]
22 |         sums = torch.sum(grad_output * out_data, -1, keepdim=True).repeat(
23 |             1, 1, 1, grad_output.shape[-1]
24 |         )
25 |         grad_input = (grad_output - sums) * out_data
26 | 
27 |         return grad_input
28 | 


--------------------------------------------------------------------------------
/tools/.gitignore:
--------------------------------------------------------------------------------
 1 | memory_footprint_tool/memreport.txt
 2 | AutoTuner/fastest_tiling.txt
 3 | AutoTuner/error_log.txt
 4 | AutoTuner/raw_data_tiling.txt
 5 | AutoTuner/basic.yml
 6 | AutoTuner/treegen.py
 7 | AutoTuner/Makefile
 8 | TrainLib_Deployer/Test_CNN/
 9 | TrainLib_Deployer/Makefile
10 | # Momentary
11 | # TrainLib_Deployer/


--------------------------------------------------------------------------------
/tools/AutoTuner/server_execution_files/run_regression.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # priority	                              timeout       out_xml           proc  in_yaml
4 | nice -n10 python sw/bwruntest.py --report-junit -t 1800 --yaml -o ne16_tests.xml -p 32 ./basic.yml
5 | 


--------------------------------------------------------------------------------
/tools/TrainLib_Deployer/deployer_utils/DNN_Reader.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | '''
18 | Authors: Davide Nadalini
19 | '''


--------------------------------------------------------------------------------
/tools/TrainLib_Deployer/deployer_utils/srcfiles/main.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021-2022 ETH Zurich and University of Bologna
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "pmsis.h"
18 | #include "net.h"
19 | 
20 | /**
21 |  *  Configures cluster, then calls net_step()
22 | **/
23 | 
24 | int main (void) {
25 | 
26 | 
27 |   printf("\nHello sir.\nConfiguring cluster..\n");
28 |   // Configure cluster
29 |   struct pi_device cluster_dev;
30 |   struct pi_cluster_conf cl_conf;
31 |   struct pi_cluster_task cl_task;
32 | 
33 |   pi_cluster_conf_init(&cl_conf);
34 |   pi_open_from_conf(&cluster_dev, &cl_conf);
35 |   if (pi_cluster_open(&cluster_dev))
36 |   {
37 |       return -1;
38 |   }
39 | 
40 |   printf("\nLaunching training procedure...\n");
41 |   pi_cluster_send_task_to_cl(&cluster_dev, pi_cluster_task(&cl_task, net_step, NULL));
42 | 
43 |   printf("Exiting DNN Training.\n");
44 |   pi_cluster_close(&cluster_dev);
45 | 
46 |   pmsis_exit(0);
47 | }
48 | 


--------------------------------------------------------------------------------