├── .clang-format ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── build-sphinx.yml │ ├── pre-commit.yml │ ├── pypi-publish.yml │ ├── test-sphinx.yml │ └── update-branch-on-pr.yml ├── .gitignore ├── .gitlab-ci.yml ├── .gitmodules ├── .pre-commit-config.yaml ├── CITATION.cff ├── CONTRIBUTING.md ├── Jenkinsfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── advanced │ ├── auto.rst │ ├── bramfactor.rst │ ├── extension.rst │ ├── fifo_depth.rst │ ├── hgq.rst │ ├── model_optimization.rst │ └── profiling.rst ├── api │ ├── command.rst │ ├── concepts.rst │ ├── configuration.rst │ └── serialization.rst ├── attr_doc_gen.py ├── backend │ ├── accelerator.rst │ ├── catapult.rst │ ├── oneapi.rst │ ├── quartus.rst │ ├── sr.rst │ └── vitis.rst ├── conf.py ├── frontend │ ├── keras.rst │ ├── pytorch.rst │ └── qonnx.rst ├── img │ ├── act_hls4ml.png │ ├── act_keras.png │ ├── hls4ml_logo.png │ ├── hls4ml_logo.svg │ ├── hls4ml_logo_lightgrey.png │ ├── hls4ml_logo_lightgrey.svg │ ├── hls4ml_logo_navbar.png │ ├── logo.jpg │ ├── logo.png │ ├── nn_map_paper_fig_2.png │ ├── overview.jpg │ ├── overview.pdf │ ├── pynqframe.png │ ├── reuse_factor_paper_fig_8.png │ ├── weights_hls4ml.png │ ├── weights_keras.png │ └── zynq_interfaces.png ├── index.rst ├── intro │ ├── faq.rst │ ├── introduction.rst │ ├── reference.rst │ ├── release_notes.rst │ ├── setup.rst │ └── status.rst ├── ir │ ├── attributes.rst │ ├── flows.rst │ ├── ir.rst │ └── modelgraph.rst └── requirements.txt ├── hls4ml ├── __init__.py ├── backends │ ├── __init__.py │ ├── backend.py │ ├── catapult │ │ ├── __init__.py │ │ ├── catapult_backend.py │ │ ├── catapult_types.py │ │ └── passes │ │ │ ├── __init__.py │ │ │ ├── bn_quant.py │ │ │ ├── broadcast_stream.py │ │ │ ├── conv_same_pad.py │ │ │ ├── conv_stream.py │ │ │ ├── convolution_templates.py │ │ │ ├── convolution_winograd.py │ │ │ ├── core_templates.py │ │ │ ├── fifo_depth_optimization.py │ │ │ ├── garnet_templates.py │ │ │ ├── merge_templates.py │ │ │ ├── pointwise.py │ │ │ ├── pooling_templates.py │ │ │ ├── quantization_templates.py │ │ │ ├── recurrent_templates.py │ │ │ ├── reshaping_templates.py │ │ │ ├── resource_strategy.py │ │ │ └── transform_types.py │ ├── fpga │ │ ├── __init__.py │ │ ├── fpga_backend.py │ │ ├── fpga_layers.py │ │ ├── fpga_types.py │ │ └── passes │ │ │ ├── __init__.py │ │ │ ├── bram_weights.py │ │ │ ├── clone.py │ │ │ ├── embedding.py │ │ │ ├── final_reshape.py │ │ │ ├── fix_softmax_table_size.py │ │ │ ├── hgq_proxy_model.py │ │ │ ├── im2col_codegen.py │ │ │ ├── inplace_parallel_reshape.py │ │ │ ├── inplace_stream_flatten.py │ │ │ ├── remove_softmax.py │ │ │ ├── repack_stream.py │ │ │ └── xnor_pooling.py │ ├── oneapi │ │ ├── __init__.py │ │ ├── oneapi_backend.py │ │ ├── oneapi_template.py │ │ ├── oneapi_types.py │ │ └── passes │ │ │ ├── __init__.py │ │ │ ├── bn_quant.py │ │ │ ├── clone_templates.py │ │ │ ├── convolution_templates.py │ │ │ ├── convolution_winograd.py │ │ │ ├── core_templates.py │ │ │ ├── embedding_templates.py │ │ │ ├── merge_templates.py │ │ │ ├── pointwise.py │ │ │ ├── pooling_templates.py │ │ │ ├── quantization_templates.py │ │ │ ├── recurrent_templates.py │ │ │ ├── reshaping_templates.py │ │ │ ├── resource_strategy.py │ │ │ └── transform_types.py │ ├── quartus │ │ ├── __init__.py │ │ ├── passes │ │ │ ├── __init__.py │ │ │ ├── bn_quant.py │ │ │ ├── convolution_templates.py │ │ │ ├── convolution_winograd.py │ │ │ ├── core_templates.py │ │ │ ├── merge_templates.py │ │ │ ├── pointwise.py │ │ │ ├── pooling_templates.py │ │ │ ├── quantization_templates.py │ │ │ ├── recurrent_templates.py │ │ │ ├── reshaping_templates.py │ │ │ ├── resource_strategy.py │ │ │ └── transform_types.py │ │ ├── quartus_backend.py │ │ └── quartus_types.py │ ├── symbolic │ │ ├── __init__.py │ │ ├── passes │ │ │ ├── __init__.py │ │ │ ├── expr_templates.py │ │ │ └── validate_lut.py │ │ └── symbolic_backend.py │ ├── template.py │ ├── vitis │ │ ├── __init__.py │ │ ├── passes │ │ │ ├── __init__.py │ │ │ ├── feature_check.py │ │ │ └── fifo_depth_optimization.py │ │ └── vitis_backend.py │ ├── vivado │ │ ├── __init__.py │ │ ├── passes │ │ │ ├── __init__.py │ │ │ ├── bn_quant.py │ │ │ ├── broadcast_stream.py │ │ │ ├── conv_same_pad.py │ │ │ ├── conv_stream.py │ │ │ ├── convolution_templates.py │ │ │ ├── core_templates.py │ │ │ ├── einsum.py │ │ │ ├── einsum_dense.py │ │ │ ├── fifo_depth_optimization.py │ │ │ ├── garnet_templates.py │ │ │ ├── merge_templates.py │ │ │ ├── pipeline_style.py │ │ │ ├── pointwise.py │ │ │ ├── pointwise_codegen.py │ │ │ ├── pooling_templates.py │ │ │ ├── quantization_templates.py │ │ │ ├── recurrent_templates.py │ │ │ ├── reshaping_templates.py │ │ │ ├── resource_strategy.py │ │ │ ├── transform_types.py │ │ │ └── unrolled_codegen.py │ │ ├── vivado_backend.py │ │ └── vivado_types.py │ └── vivado_accelerator │ │ ├── __init__.py │ │ ├── passes │ │ ├── __init__.py │ │ └── fifo_depth_optimization.py │ │ ├── supported_boards.json │ │ ├── vivado_accelerator_backend.py │ │ └── vivado_accelerator_config.py ├── cli │ ├── __init__.py │ └── __main__.py ├── contrib │ ├── README.md │ ├── __init__.py │ ├── garnet.py │ └── kl_layer │ │ ├── README.md │ │ ├── kl_layer.h │ │ └── kl_layer.py ├── converters │ ├── __init__.py │ ├── keras │ │ ├── __init__.py │ │ ├── convolution.py │ │ ├── core.py │ │ ├── graph.py │ │ ├── hgq_proxy_model.py │ │ ├── merge.py │ │ ├── model.py │ │ ├── pooling.py │ │ ├── qkeras.py │ │ ├── recurrent.py │ │ ├── reshape.py │ │ └── reshaping.py │ ├── keras_v2_to_hls.py │ ├── keras_v3 │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── conv.py │ │ ├── core.py │ │ ├── einsum_dense.py │ │ ├── merge.py │ │ └── pooling.py │ ├── keras_v3_to_hls.py │ ├── onnx │ │ ├── __init__.py │ │ ├── convolution.py │ │ ├── core.py │ │ ├── merge.py │ │ ├── pooling.py │ │ └── reshape.py │ ├── onnx_to_hls.py │ ├── pytorch │ │ ├── __init__.py │ │ ├── convolution.py │ │ ├── core.py │ │ ├── merge.py │ │ ├── pooling.py │ │ ├── recurrent.py │ │ └── reshape.py │ ├── pytorch_to_hls.py │ └── utils.py ├── model │ ├── __init__.py │ ├── attributes.py │ ├── flow │ │ ├── __init__.py │ │ └── flow.py │ ├── graph.py │ ├── layers.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── optimizer.py │ │ └── passes │ │ │ ├── __init__.py │ │ │ ├── batchnorm_opt.py │ │ │ ├── bn_fuse.py │ │ │ ├── conv_to_convxd.py │ │ │ ├── conv_to_depthwiseconvxd.py │ │ │ ├── convert_to_channels_last.py │ │ │ ├── expand_layer_group.py │ │ │ ├── expand_time_distributed.py │ │ │ ├── fuse_biasadd.py │ │ │ ├── hgq_proxy_model.py │ │ │ ├── infer_precision.py │ │ │ ├── linear.py │ │ │ ├── matmul_const_to_dense.py │ │ │ ├── merge_const.py │ │ │ ├── move_scales.py │ │ │ ├── multi_dense.py │ │ │ ├── qkeras.py │ │ │ ├── quant_opt.py │ │ │ ├── reshape_const.py │ │ │ ├── resize_remove_constants.py │ │ │ ├── seperable_to_dw_conv.py │ │ │ ├── stamp.py │ │ │ └── transpose_opt.py │ ├── profiling.py │ ├── quantizers.py │ └── types.py ├── optimization │ ├── __init__.py │ └── dsp_aware_pruning │ │ ├── __init__.py │ │ ├── attributes.py │ │ ├── config.py │ │ ├── keras │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── config.py │ │ ├── masking.py │ │ ├── reduction.py │ │ ├── regularizers.py │ │ └── utils.py │ │ ├── knapsack.py │ │ ├── objectives │ │ ├── __init__.py │ │ ├── gpu_objectives.py │ │ └── vivado_objectives.py │ │ └── scheduler.py ├── report │ ├── __init__.py │ ├── catapult_report.py │ ├── oneapi_report.py │ ├── quartus_report.py │ └── vivado_report.py ├── templates │ ├── catapult │ │ ├── build_lib.sh │ │ ├── build_prj.tcl │ │ ├── catapult_synth.tcl │ │ ├── firmware │ │ │ ├── defines.h │ │ │ ├── myproject.cpp │ │ │ ├── myproject.h │ │ │ └── parameters.h │ │ ├── myproject_bridge.cpp │ │ ├── myproject_test.cpp │ │ └── nnet_utils │ │ │ ├── ap_shift_reg.h │ │ │ ├── hls_math.h │ │ │ ├── nnet_activation.h │ │ │ ├── nnet_activation_stream.h │ │ │ ├── nnet_array.h │ │ │ ├── nnet_batchnorm.h │ │ │ ├── nnet_batchnorm_stream.h │ │ │ ├── nnet_code_gen.h │ │ │ ├── nnet_common.h │ │ │ ├── nnet_conv1d.h │ │ │ ├── nnet_conv1d_latency.h │ │ │ ├── nnet_conv1d_resource.h │ │ │ ├── nnet_conv1d_stream.h │ │ │ ├── nnet_conv2d.h │ │ │ ├── nnet_conv2d_latency.h │ │ │ ├── nnet_conv2d_resource.h │ │ │ ├── nnet_conv2d_stream.h │ │ │ ├── nnet_conv_stream.h │ │ │ ├── nnet_dense.h │ │ │ ├── nnet_dense_compressed.h │ │ │ ├── nnet_dense_latency.h │ │ │ ├── nnet_dense_resource.h │ │ │ ├── nnet_dense_stream.h │ │ │ ├── nnet_embed.h │ │ │ ├── nnet_embed_stream.h │ │ │ ├── nnet_garnet.h │ │ │ ├── nnet_helpers.h │ │ │ ├── nnet_image.h │ │ │ ├── nnet_image_stream.h │ │ │ ├── nnet_math.h │ │ │ ├── nnet_merge.h │ │ │ ├── nnet_merge_stream.h │ │ │ ├── nnet_mult.h │ │ │ ├── nnet_padding.h │ │ │ ├── nnet_padding_stream.h │ │ │ ├── nnet_pooling.h │ │ │ ├── nnet_pooling_stream.h │ │ │ ├── nnet_recr_activations.h │ │ │ ├── nnet_recurrent.h │ │ │ ├── nnet_sepconv1d_stream.h │ │ │ ├── nnet_sepconv2d.h │ │ │ ├── nnet_sepconv2d_stream.h │ │ │ ├── nnet_sepconv_stream.h │ │ │ ├── nnet_stream.h │ │ │ └── nnet_types.h │ ├── oneapi │ │ ├── CMakeLists.txt │ │ ├── exception_handler.hpp │ │ ├── firmware │ │ │ ├── defines.h │ │ │ ├── myproject.cpp │ │ │ ├── myproject.h │ │ │ ├── nnet_utils │ │ │ │ ├── nnet_activation.h │ │ │ │ ├── nnet_activation_stream.h │ │ │ │ ├── nnet_batchnorm.h │ │ │ │ ├── nnet_batchnorm_stream.h │ │ │ │ ├── nnet_common.h │ │ │ │ ├── nnet_conv1d.h │ │ │ │ ├── nnet_conv1d_resource.h │ │ │ │ ├── nnet_conv1d_stream.h │ │ │ │ ├── nnet_conv2d.h │ │ │ │ ├── nnet_conv2d_resource.h │ │ │ │ ├── nnet_conv2d_stream.h │ │ │ │ ├── nnet_dense.h │ │ │ │ ├── nnet_dense_stream.h │ │ │ │ ├── nnet_depthconv1d.h │ │ │ │ ├── nnet_depthconv1d_resource.h │ │ │ │ ├── nnet_depthconv2d.h │ │ │ │ ├── nnet_depthconv2d_resource.h │ │ │ │ ├── nnet_embed.h │ │ │ │ ├── nnet_embed_stream.h │ │ │ │ ├── nnet_helpers.h │ │ │ │ ├── nnet_merge.h │ │ │ │ ├── nnet_merge_stream.h │ │ │ │ ├── nnet_mult.h │ │ │ │ ├── nnet_padding.h │ │ │ │ ├── nnet_padding_stream.h │ │ │ │ ├── nnet_pooling.h │ │ │ │ ├── nnet_pooling_stream.h │ │ │ │ ├── nnet_printf.h │ │ │ │ ├── nnet_recurrent.h │ │ │ │ ├── nnet_recurrent_activation.h │ │ │ │ ├── nnet_recurrent_stream.h │ │ │ │ ├── nnet_resize.h │ │ │ │ ├── nnet_resize_stream.h │ │ │ │ ├── nnet_stream.h │ │ │ │ ├── nnet_transpose.h │ │ │ │ ├── nnet_transpose_stream.h │ │ │ │ └── nnet_types.h │ │ │ └── parameters.h │ │ ├── myproject_bridge.cpp │ │ └── myproject_test.cpp │ ├── quartus │ │ ├── Makefile │ │ ├── ac_types │ │ │ ├── ac_channel.h │ │ │ ├── ac_complex.h │ │ │ ├── ac_fixed.h │ │ │ ├── ac_float.h │ │ │ ├── ac_int.h │ │ │ ├── ac_sc.h │ │ │ ├── ac_std_float.h │ │ │ └── stream.h │ │ ├── build_lib.sh │ │ ├── firmware │ │ │ ├── defines.h │ │ │ ├── myproject.cpp │ │ │ ├── myproject.h │ │ │ ├── nnet_utils │ │ │ │ ├── nnet_activation.h │ │ │ │ ├── nnet_activation_stream.h │ │ │ │ ├── nnet_batchnorm.h │ │ │ │ ├── nnet_batchnorm_stream.h │ │ │ │ ├── nnet_common.h │ │ │ │ ├── nnet_conv1d.h │ │ │ │ ├── nnet_conv1d_resource.h │ │ │ │ ├── nnet_conv1d_stream.h │ │ │ │ ├── nnet_conv2d.h │ │ │ │ ├── nnet_conv2d_resource.h │ │ │ │ ├── nnet_conv2d_stream.h │ │ │ │ ├── nnet_dense.h │ │ │ │ ├── nnet_dense_compressed.h │ │ │ │ ├── nnet_dense_stream.h │ │ │ │ ├── nnet_embed.h │ │ │ │ ├── nnet_embed_stream.h │ │ │ │ ├── nnet_helpers.h │ │ │ │ ├── nnet_merge.h │ │ │ │ ├── nnet_merge_stream.h │ │ │ │ ├── nnet_mult.h │ │ │ │ ├── nnet_padding.h │ │ │ │ ├── nnet_padding_stream.h │ │ │ │ ├── nnet_pooling.h │ │ │ │ ├── nnet_pooling_stream.h │ │ │ │ ├── nnet_recurrent.h │ │ │ │ ├── nnet_recurrent_activation.h │ │ │ │ ├── nnet_recurrent_stream.h │ │ │ │ ├── nnet_resize.h │ │ │ │ ├── nnet_resize_stream.h │ │ │ │ ├── nnet_stream.h │ │ │ │ ├── nnet_transpose.h │ │ │ │ ├── nnet_transpose_stream.h │ │ │ │ └── nnet_types.h │ │ │ └── parameters.h │ │ ├── myproject_bridge.cpp │ │ ├── myproject_test_parallel.cpp │ │ └── myproject_test_stream.cpp │ ├── symbolic │ │ └── build_lib.sh │ ├── vitis │ │ └── nnet_utils │ │ │ ├── nnet_conv1d.h │ │ │ ├── nnet_conv1d_latency.h │ │ │ ├── nnet_conv1d_resource.h │ │ │ ├── nnet_conv1d_stream.h │ │ │ ├── nnet_conv2d.h │ │ │ ├── nnet_conv2d_latency.h │ │ │ ├── nnet_conv2d_resource.h │ │ │ ├── nnet_conv2d_stream.h │ │ │ ├── nnet_dense_stream.h │ │ │ ├── nnet_pooling.h │ │ │ ├── nnet_pooling_stream.h │ │ │ ├── nnet_sepconv1d_stream.h │ │ │ └── nnet_sepconv2d_stream.h │ ├── vivado │ │ ├── ap_types │ │ │ ├── ap_common.h │ │ │ ├── ap_decl.h │ │ │ ├── ap_fixed.h │ │ │ ├── ap_fixed_base.h │ │ │ ├── ap_fixed_ref.h │ │ │ ├── ap_fixed_special.h │ │ │ ├── ap_int.h │ │ │ ├── ap_int_base.h │ │ │ ├── ap_int_ref.h │ │ │ ├── ap_int_special.h │ │ │ ├── ap_shift_reg.h │ │ │ ├── etc │ │ │ │ └── ap_private.h │ │ │ ├── hls_math.h │ │ │ ├── hls_stream.h │ │ │ └── utils │ │ │ │ └── x_hls_utils.h │ │ ├── build_lib.sh │ │ ├── build_prj.tcl │ │ ├── firmware │ │ │ ├── defines.h │ │ │ ├── myproject.cpp │ │ │ ├── myproject.h │ │ │ └── parameters.h │ │ ├── myproject_bridge.cpp │ │ ├── myproject_test.cpp │ │ ├── nnet_utils │ │ │ ├── nnet_activation.h │ │ │ ├── nnet_activation_stream.h │ │ │ ├── nnet_batchnorm.h │ │ │ ├── nnet_batchnorm_stream.h │ │ │ ├── nnet_code_gen.h │ │ │ ├── nnet_common.h │ │ │ ├── nnet_conv1d.h │ │ │ ├── nnet_conv1d_latency.h │ │ │ ├── nnet_conv1d_resource.h │ │ │ ├── nnet_conv1d_stream.h │ │ │ ├── nnet_conv2d.h │ │ │ ├── nnet_conv2d_latency.h │ │ │ ├── nnet_conv2d_resource.h │ │ │ ├── nnet_conv2d_stream.h │ │ │ ├── nnet_conv_stream.h │ │ │ ├── nnet_dense.h │ │ │ ├── nnet_dense_compressed.h │ │ │ ├── nnet_dense_latency.h │ │ │ ├── nnet_dense_resource.h │ │ │ ├── nnet_dense_stream.h │ │ │ ├── nnet_depthwise_product.h │ │ │ ├── nnet_einsum.h │ │ │ ├── nnet_einsum_dense.h │ │ │ ├── nnet_embed.h │ │ │ ├── nnet_embed_stream.h │ │ │ ├── nnet_function_stubs.h │ │ │ ├── nnet_garnet.h │ │ │ ├── nnet_helpers.h │ │ │ ├── nnet_image.h │ │ │ ├── nnet_image_stream.h │ │ │ ├── nnet_math.h │ │ │ ├── nnet_merge.h │ │ │ ├── nnet_merge_stream.h │ │ │ ├── nnet_mult.h │ │ │ ├── nnet_padding.h │ │ │ ├── nnet_padding_stream.h │ │ │ ├── nnet_pooling.h │ │ │ ├── nnet_pooling_stream.h │ │ │ ├── nnet_recr_activations.h │ │ │ ├── nnet_recurrent.h │ │ │ ├── nnet_sepconv1d.h │ │ │ ├── nnet_sepconv1d_latency.h │ │ │ ├── nnet_sepconv1d_stream.h │ │ │ ├── nnet_sepconv2d.h │ │ │ ├── nnet_sepconv2d_latency.h │ │ │ ├── nnet_sepconv2d_stream.h │ │ │ ├── nnet_sepconv_stream.h │ │ │ ├── nnet_stream.h │ │ │ ├── nnet_time_distributed.h │ │ │ ├── nnet_transpose.h │ │ │ ├── nnet_transpose_stream.h │ │ │ └── nnet_types.h │ │ └── vivado_synth.tcl │ └── vivado_accelerator │ │ ├── alveo │ │ ├── krnl_rtl_src │ │ │ ├── krnl_rtl_axi_read_master.sv │ │ │ ├── krnl_rtl_axi_write_master.sv │ │ │ ├── krnl_rtl_control_s_axi.v │ │ │ ├── krnl_rtl_counter.sv │ │ │ ├── krnl_rtl_int.sv │ │ │ └── myproject_kernel.v │ │ ├── python_drivers │ │ │ └── axi_stream_driver.py │ │ └── tcl_scripts │ │ │ └── axi_stream_design.tcl │ │ ├── build_lib.sh │ │ ├── myproject_axi.cpp │ │ ├── myproject_axi.h │ │ ├── pynq-z2 │ │ ├── python_drivers │ │ │ └── axi_stream_driver.py │ │ └── tcl_scripts │ │ │ ├── axi_lite_design.tcl │ │ │ └── axi_stream_design.tcl │ │ └── zcu102 │ │ ├── python_drivers │ │ └── axi_stream_driver.py │ │ └── tcl_scripts │ │ └── axi_stream_design.tcl ├── utils │ ├── __init__.py │ ├── attribute_descriptions.py │ ├── config.py │ ├── dependency.py │ ├── einsum_utils.py │ ├── example_models.py │ ├── fixed_point_utils.py │ ├── link.py │ ├── plot.py │ ├── profiling_utils.py │ ├── serialization.py │ ├── string_utils.py │ ├── symbolic_utils.py │ ├── torch.py │ └── transpose_utils.py └── writer │ ├── __init__.py │ ├── catapult_writer.py │ ├── oneapi_writer.py │ ├── quartus_writer.py │ ├── symbolic_writer.py │ ├── vitis_writer.py │ ├── vivado_accelerator_writer.py │ ├── vivado_writer.py │ └── writers.py ├── pyproject.toml └── test ├── build-prj.sh ├── cleanup.sh ├── compare-reports.sh ├── convert-keras-models.sh ├── convert-onnx-models.sh ├── convert-pytorch-models.sh ├── gather-reports.sh ├── hls4ml-keras-test.sh ├── hls4ml-onnx-test.sh ├── hls4ml-pytorch-test.sh ├── keras-models.txt ├── keras-to-hls.sh ├── onnx-models.txt ├── onnx-to-hls.sh ├── pytest ├── ci-template.yml ├── generate_ci_yaml.py ├── test_activations.py ├── test_auto_precision.py ├── test_backend_config.py ├── test_batchnorm.py ├── test_batchnorm_pytorch.py ├── test_binary_cnn.py ├── test_boxplot.py ├── test_bram_factor.py ├── test_causalpadding.py ├── test_clone_flatten.py ├── test_cnn_mnist_qkeras.py ├── test_conv1d.py ├── test_conv1d_narrow.py ├── test_conv2d_narrow.py ├── test_dense_unrolled.py ├── test_depthconv1d.py ├── test_depthconv2d.py ├── test_einsum_dense.py ├── test_embed.py ├── test_extensions.py ├── test_extensions_pytorch.py ├── test_fetch_example.py ├── test_fifo_depth.py ├── test_flows.py ├── test_garnet.py ├── test_globalpooling.py ├── test_graph.py ├── test_hgq_layers.py ├── test_hgq_players.py ├── test_keras_api.py ├── test_keras_h5_loader.py ├── test_keras_nested_model.py ├── test_keras_v3_api.py ├── test_merge.py ├── test_merge_pytorch.py ├── test_multi_dense.py ├── test_multiout_network.py ├── test_multiout_onnx.py ├── test_optimization │ ├── test_attributes.py │ ├── test_keras │ │ ├── test_masking.py │ │ ├── test_reduction.py │ │ ├── test_regularizers.py │ │ └── test_weight_sharing.py │ ├── test_knapsack.py │ ├── test_objectives.py │ └── test_scheduler.py ├── test_pipeline_style.py ├── test_plot_model.py ├── test_pointwiseconv.py ├── test_pooling.py ├── test_pytorch_api.py ├── test_pytorch_profiler.py ├── test_qkeras.py ├── test_qonnx.py ├── test_recurrent_pytorch.py ├── test_repack_stream.py ├── test_report.py ├── test_report │ ├── Vivado │ │ ├── myproject_csynth.rpt │ │ ├── myproject_csynth.xml │ │ ├── vivado_hls.app │ │ └── vivado_synth.rpt │ └── oneAPI │ │ ├── loop_attr.ndjson │ │ ├── quartus.ndjson │ │ └── summary.ndjson ├── test_reshape.py ├── test_rnn.py ├── test_sepconv1d.py ├── test_sepconv2d.py ├── test_sequential_parsing_pytorch.py ├── test_serialization.py ├── test_softmax.py ├── test_softsign.py ├── test_sr.py ├── test_stream_clone.py ├── test_time_distributed.py ├── test_trace.py ├── test_transpose_concat.py ├── test_types.py ├── test_upsampling.py ├── test_upsampling_pytorch.py ├── test_weight_writer.py ├── test_writer_config.py └── test_zeropadding.py ├── pytorch-models.txt └── pytorch-to-hls.sh /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Something isn't working as expected 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | ## Prerequisites 12 | Please make sure to check off these prerequisites before submitting a bug report. 13 | - [ ] Test that the bug appears on the current version of the master branch. Make sure to include the commit hash of the commit you checked out. 14 | - [ ] Check that the issue hasn't already been reported, by checking the currently open issues. 15 | - [ ] If there are steps to reproduce the problem, make sure to write them down below. 16 | - [ ] If relevant, please include the hls4ml project files, which were created directly before and/or after the bug. 17 | 18 | ## Quick summary 19 | Please give a brief and concise description of the bug. 20 | 21 | ## Details 22 | Please add to the following sections to describe the bug as accurately as possible. 23 | 24 | ### Steps to Reproduce 25 | Add what needs to be done to reproduce the bug. Add *commented* code examples and make sure to include the original model files / code, and the commit hash you are working on. 26 | 27 | 1. Clone the hls4ml repository 28 | 2. Checkout the master branch, with commit hash: [...] 29 | 3. Run conversion [...] on model file with code [...] 30 | 4. [Further steps ...] 31 | 32 | ### Expected behavior 33 | Please add a brief description of what you expected to happen. 34 | 35 | ### Actual behavior 36 | Describe what actually happens instead. 37 | 38 | ## Optional 39 | 40 | ### Possible fix 41 | If you already know where the issue stems from, or you have a hint please let us know. 42 | 43 | ### Additional context 44 | Add any other context about the problem here. 45 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Talk and engage with the comunity 4 | url: https://github.com/fastmachinelearning/hls4ml/discussions/categories/general 5 | about: Check out the GitHub discusisons page for hls4ml. This is the best way to get in touch with us. In particular, if you have a question about hls4ml or a general problem that is likely not a bug. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for hls4ml 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Prerequisites 11 | Please talk to us before creating a new feature request. So that you can check that the idea is not already in active development. 12 | 13 | You can present your idea over here at the GitHub discussions page for hls4ml: https://github.com/fastmachinelearning/hls4ml/discussions/categories/ideas 14 | 15 | Even if an idea is already being worked on you can still create a feature request, 16 | if you would like to open a discussion about the feature or want to contribute to it. 17 | 18 | ## Details 19 | Please add to the following sections to describe the feature as accurately as possible. 20 | 21 | ### New behavior 22 | Please add a brief and concise description of what you would like to happen in hls4ml in the future. 23 | 24 | ### Motivation 25 | Please tell us why this feature is important to the community. 26 | 27 | ### Parts of hls4ml being affected 28 | Please describe which parts of hls4ml would be affected by this feature. 29 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | > :memo: Please include a summary of the change. 4 | > 5 | > * Please also include relevant motivation and context. 6 | > * List any dependencies that are required for this change. 7 | 8 | ## Type of change 9 | 10 | For a new feature or function, please create an issue first to discuss it 11 | with us before submitting a pull request. 12 | 13 | Note: Please delete options that are not relevant. 14 | 15 | - [ ] Bug fix (non-breaking change that fixes an issue) 16 | - [ ] Documentation update 17 | - [ ] New feature (non-breaking change which adds functionality) 18 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 19 | - [ ] A new research paper code implementation 20 | - [ ] Other (Specify) 21 | 22 | ## Tests 23 | 24 | > :memo: Please describe the tests that you ran to verify your changes. 25 | > 26 | > * Provide instructions so we can reproduce. 27 | > * Please also list any relevant details for your test configuration. 28 | 29 | **Test Configuration**: 30 | 31 | ## Checklist 32 | 33 | - [ ] I have read the [guidelines for contributing](https://github.com/fastmachinelearning/hls4ml/blob/main/CONTRIBUTING.md). 34 | - [ ] I have commented my code, particularly in hard-to-understand areas. 35 | - [ ] I have made corresponding changes to the documentation. 36 | - [ ] My changes generate no new warnings. 37 | - [ ] I have installed and run `pre-commit` on the files I edited or added. 38 | - [ ] I have added tests that prove my fix is effective or that my feature works. 39 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | -------------------------------------------------------------------------------- /.github/workflows/build-sphinx.yml: -------------------------------------------------------------------------------- 1 | name: build-sphinx 2 | on: 3 | push: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 0 16 | ref: ${{ github.event.pull_request.head.sha }} 17 | - name: Allow for file ownership conflicts with Docker and GitHub Actions 18 | run: git config --global --add safe.directory '*' 19 | - uses: jmduarte/sphinx-action@main 20 | env: 21 | SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | with: 23 | pre-build-command: "git config --system --add safe.directory '*'" 24 | docs-folder: "docs/" 25 | - name: Commit Documentation Changes 26 | run: | 27 | git clone https://github.com/fastmachinelearning/hls4ml.git --branch gh-pages --single-branch gh-pages 28 | cp -r docs/_build/html/* gh-pages/ 29 | cd gh-pages 30 | touch .nojekyll 31 | git config --local user.email "action@github.com" 32 | git config --local user.name "GitHub Action" 33 | git add . 34 | git commit -m "Update Sphinx Documentation" -a || true 35 | - name: Push Documentation Changes 36 | uses: ad-m/github-push-action@master 37 | with: 38 | branch: gh-pages 39 | directory: gh-pages 40 | github_token: ${{ secrets.PERSONAL_TOKEN }} 41 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: Run pre-commit 2 | 3 | on: 4 | pull_request: 5 | branches: [ main ] 6 | push: 7 | branches: [ main ] 8 | 9 | jobs: 10 | pre-commit: 11 | name: Format 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: [3.8] 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | with: 21 | submodules: recursive 22 | 23 | - name: Pre-commit 24 | uses: pre-commit/action@v3.0.1 25 | with: 26 | extra_args: --hook-stage manual --all-files 27 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yml: -------------------------------------------------------------------------------- 1 | name: 📦 Packaging release to PyPI 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | branches: [main] 6 | release: 7 | types: [published] 8 | 9 | jobs: 10 | release: 11 | name: Upload new release to PyPI 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout source 15 | uses: actions/checkout@v4 16 | with: 17 | submodules: recursive 18 | fetch-depth: 0 19 | 20 | - name: Build SDist and Wheel 21 | run: pipx run build --sdist --wheel 22 | 23 | - uses: actions/upload-artifact@v4 24 | with: 25 | path: dist/*.* 26 | 27 | - name: Publish 📦 to PyPI 28 | if: startsWith(github.ref, 'refs/tags') 29 | uses: pypa/gh-action-pypi-publish@release/v1 30 | with: 31 | password: ${{ secrets.PYPI_PASSWORD }} 32 | -------------------------------------------------------------------------------- /.github/workflows/test-sphinx.yml: -------------------------------------------------------------------------------- 1 | name: test-sphinx 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | fetch-depth: 0 16 | ref: ${{ github.event.pull_request.head.sha }} 17 | - name: Allow for file ownership conflicts with Docker and GitHub Actions 18 | run: git config --global --add safe.directory '*' 19 | - uses: jmduarte/sphinx-action@main 20 | env: 21 | SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | with: 23 | pre-build-command: "git config --system --add safe.directory '*'" 24 | docs-folder: "docs/" 25 | - uses: actions/upload-artifact@v4 26 | with: 27 | path: docs/_build/html 28 | -------------------------------------------------------------------------------- /.github/workflows/update-branch-on-pr.yml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request_target: 3 | types: [labeled] 4 | branches: [main] 5 | 6 | name: Update branch on PR from fork 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout repo 12 | uses: actions/checkout@v4 13 | if: ${{ github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name && github.event.label.name == 'please test' }} 14 | with: 15 | ref: ${{ github.event.pull_request.head.sha }} 16 | - name: Push changes 17 | if: ${{ github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name && github.event.label.name == 'please test' }} 18 | run: | 19 | git checkout -b pr/${{ github.event.pull_request.number }} 20 | git push --force origin pr/${{ github.event.pull_request.number }} 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | _version.py 3 | __pycache__ 4 | build/ 5 | dist/ 6 | sdist/ 7 | *.egg-info/ 8 | vivado_prj 9 | .vscode 10 | my-hls-test 11 | *.tar.gz 12 | docs/_build 13 | docs/autodoc/* 14 | hls4mlprj_* 15 | *~ 16 | *.ipynb_checkpoints/ 17 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - generate 3 | - trigger 4 | - test 5 | 6 | generator: 7 | stage: generate 8 | image: python:3.8-alpine 9 | variables: 10 | N_TESTS_PER_YAML: 4 11 | tags: 12 | - k8s-default 13 | before_script: 14 | - pip install pyyaml 15 | script: 16 | - cd test/pytest 17 | - python generate_ci_yaml.py 18 | artifacts: 19 | paths: 20 | - test/pytest/pytests.yml 21 | 22 | pytests: 23 | stage: trigger 24 | trigger: 25 | include: 26 | - local: test/pytest/ci-template.yml 27 | - artifact: test/pytest/pytests.yml 28 | job: generator 29 | strategy: depend 30 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "example-models"] 2 | path = example-models 3 | url = https://github.com/hls-fpga-machine-learning/example-models.git 4 | [submodule "hls4ml/templates/catapult/ac_types"] 5 | path = hls4ml/templates/catapult/ac_types 6 | url = https://github.com/hlslibs/ac_types.git 7 | [submodule "hls4ml/templates/catapult/ac_simutils"] 8 | path = hls4ml/templates/catapult/ac_simutils 9 | url = https://github.com/hlslibs/ac_simutils.git 10 | [submodule "hls4ml/templates/catapult/ac_math"] 11 | path = hls4ml/templates/catapult/ac_math 12 | url = https://github.com/hlslibs/ac_math.git 13 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: (^hls4ml\/templates\/(vivado|quartus)\/(ap_types|ac_types)\/|^test/pytest/test_report/) 2 | 3 | repos: 4 | - repo: https://github.com/psf/black 5 | rev: 25.1.0 6 | hooks: 7 | - id: black 8 | language_version: python3 9 | args: ['--line-length=125', 10 | '--skip-string-normalization'] 11 | 12 | - repo: https://github.com/tox-dev/pyproject-fmt 13 | rev: v2.6.0 14 | hooks: 15 | - id: pyproject-fmt 16 | 17 | - repo: https://github.com/pre-commit/pre-commit-hooks 18 | rev: v5.0.0 19 | hooks: 20 | - id: check-added-large-files 21 | - id: check-case-conflict 22 | - id: check-merge-conflict 23 | - id: check-symlinks 24 | - id: check-toml 25 | - id: check-yaml 26 | - id: debug-statements 27 | - id: end-of-file-fixer 28 | - id: mixed-line-ending 29 | - id: requirements-txt-fixer 30 | - id: trailing-whitespace 31 | 32 | - repo: https://github.com/PyCQA/isort 33 | rev: 6.0.1 34 | hooks: 35 | - id: isort 36 | 37 | - repo: https://github.com/asottile/pyupgrade 38 | rev: v3.20.0 39 | hooks: 40 | - id: pyupgrade 41 | args: ["--py310-plus"] 42 | 43 | - repo: https://github.com/pycqa/flake8 44 | rev: 7.2.0 45 | hooks: 46 | - id: flake8 47 | exclude: docs/conf.py 48 | additional_dependencies: [flake8-bugbear, flake8-print] 49 | args: ['--max-line-length=125', # github viewer width 50 | '--extend-ignore=E203,T201'] # E203 is not PEP8 compliant 51 | 52 | - repo: https://github.com/mgedmin/check-manifest 53 | rev: "0.50" 54 | hooks: 55 | - id: check-manifest 56 | stages: [manual] 57 | 58 | - repo: https://github.com/jmduarte/p-clang-format 59 | rev: "v1.0.4" 60 | hooks: 61 | - id: p-clang-format 62 | types_or: [c++, c, cuda] 63 | ci: 64 | autofix_commit_msg: '[pre-commit.ci] auto fixes from pre-commit hooks' 65 | autofix_prs: false # default is true 66 | autoupdate_branch: 'main' 67 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' 68 | autoupdate_schedule: weekly 69 | skip: [] 70 | submodules: true 71 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "Please cite the following works when using this software." 3 | type: software 4 | authors: 5 | - given-names: "FastML Team" 6 | title: "hls4ml" 7 | version: "v1.1.0" 8 | date-released: "2025-03-17" 9 | doi: 10.5281/zenodo.1201549 10 | repository-code: "https://github.com/fastmachinelearning/hls4ml" 11 | url: "https://fastmachinelearning.org/hls4ml" 12 | keywords: 13 | - python 14 | - machine-learning 15 | - FPGA 16 | - physics 17 | - tensorflow 18 | - pytorch 19 | - onnx 20 | - qonnx 21 | license: "Apache-2.0" 22 | abstract: | 23 | hls4ml is an open-source software-hardware codesign workflow 24 | to interpret and translate machine learning algorithms for 25 | implementations in hardware, including FPGAs and ASICs. 26 | references: 27 | - type: article 28 | title: "Fast inference of deep neural networks on FPGAs with hls4ml" 29 | authors: 30 | - family-names: "Duarte" 31 | given-names: "Javier" 32 | - family-names: "Han" 33 | given-names: "Song" 34 | - family-names: "Harris" 35 | given-names: "Philip" 36 | - family-names: "Jindariani" 37 | given-names: "Sergo" 38 | - family-names: "Kreinar" 39 | given-names: "Edward" 40 | - family-names: "Kreis" 41 | given-names: "Benjamin" 42 | - family-names: "Ngadiuba" 43 | given-names: "Jennifer" 44 | - family-names: "Pierini" 45 | given-names: "Maurizio" 46 | - family-names: "Rivera" 47 | given-names: "Ryan" 48 | - family-names: "Tran" 49 | given-names: "Nhan" 50 | - family-names: "Wu" 51 | given-names: "Zhenbin" 52 | journal: "JINST" 53 | volume: "13" 54 | start: "P07027" 55 | doi: "10.1088/1748-0221/13/07/P07027" 56 | year: "2018" 57 | number: "07" 58 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.md CONTRIBUTING.md CITATION.cff pyproject.toml .clang-format 2 | graft example-models 3 | graft test 4 | graft contrib 5 | recursive-include hls4ml/templates * 6 | recursive-include hls4ml *.py 7 | recursive-include hls4ml/contrib * 8 | global-exclude .git .gitmodules .gitlab-ci.yml *.pyc 9 | include hls4ml/backends/vivado_accelerator/supported_boards.json 10 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @sphinx-apidoc -f -T -o autodoc/ ../hls4ml 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /docs/advanced/bramfactor.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | Loading weights from external BRAM 3 | ================================== 4 | 5 | .. note:: 6 | This feature is being evaluated for re-implementation. We welcome feedback from users how to make the implementation more flexible. 7 | 8 | ``hls4ml`` can optionally store weights in BRAMs external to the design. This is supported in Vivado/Vitis and Catapult backends. It is the responsibility of the user to ensure the weights are properly loaded during the operation of the design. 9 | 10 | The feature works as a threshold, exposed through a ``BramFactor`` config parameter. Layers with more weights above the threshold will be exposed as BRAM interface. Consider the following code: 11 | 12 | .. code-block:: Python 13 | 14 | model = tf.keras.models.Sequential() 15 | model.add(Dense(10, activation="relu", input_shape=(12,), name="dense_1")) 16 | model.add(Dense(20, activation="relu", name="dense_2")) 17 | model.add(Dense(5, activation="softmax", name="dense_3")) 18 | model.compile(optimizer='adam', loss='mse') 19 | 20 | config = hls4ml.utils.config_from_keras_model(model) 21 | config["Model"]["Strategy"] = "Resource" 22 | config["Model"]["BramFactor"] = 100 23 | 24 | hls_model = hls4ml.converters.convert_from_keras_model( 25 | model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend 26 | ) 27 | 28 | Having set ``BramFactor=100``, only layers with more than 100 weights will be exposed as external BRAM, in this case layers ``dense_1`` and ``dense_2``. ``BramFactor`` can currently be only set at the model level. The generated code will now have weights as part of the interface. 29 | 30 | .. code-block:: C++ 31 | 32 | void myproject( 33 | hls::stream &dense_1_input, 34 | hls::stream &layer7_out, 35 | model_default_t w2[120], 36 | model_default_t w4[200] 37 | ) { 38 | #pragma HLS INTERFACE axis port=dense_1_input,layer7_out 39 | #pragma HLS INTERFACE bram port=w2,w4 40 | ... 41 | 42 | When integrating the design, users can use the exposed interface to implement weight reloading scheme. 43 | -------------------------------------------------------------------------------- /docs/api/serialization.rst: -------------------------------------------------------------------------------- 1 | ============================ 2 | Saving/Loading hls4ml models 3 | ============================ 4 | 5 | ``hls4ml`` model objects (instances of ``ModelGraph`` class) can be saved to disk and loaded at a later stage. The saved model doesn't require original Keras/PyTorch/ONNX model for loading. 6 | 7 | To save/load a model use the following API: 8 | 9 | .. code-block:: python 10 | 11 | from hls4ml.converters import convert_from_keras_model, load_saved_model 12 | 13 | model = convert_from_keras_model(keras_model, ...) 14 | 15 | # Save a model to some path 16 | model.save('some/path/my_hls4ml_model.fml') 17 | 18 | # Load a model from a file 19 | loaded_model = load_saved_model('some/path/my_hls4ml_model.fml') 20 | 21 | 22 | Saved model will have a ``.fml`` extension, but is in fact a gzipped tar archive. Loaded model can be used in the same way as the original one. This includes modification of certain config parameters, for example output directory, layer reuse factor etc. 23 | 24 | Linking with existing project 25 | ============================= 26 | 27 | Once the project has been written to disk with ``ModelGraph.write()``, it can also be linked with at later stage. Similarly to loading a saved model, this feature allows skipping the conversion step. Additionally, it may be used to test manual changes to the generated project. 28 | 29 | Linking function will create a special instance of ``ModelGraph`` that only allows calls to ``compile()``, ``predict()`` and ``build()``. Other calls to the ``ModelGraph`` instance are disabled. 30 | 31 | To link a model use the following API: 32 | 33 | .. code-block:: python 34 | 35 | from hls4ml.converters import convert_from_keras_model, link_existing_project 36 | 37 | model = convert_from_keras_model(keras_model, output_dir='/some/path/', ...) 38 | 39 | # Generate the project files and write them to some path 40 | model.write() 41 | 42 | # Later on, link this path to the Python runtime 43 | linked_model = link_existing_project('some/path/') 44 | linked_model.compile() 45 | linked_model.predict(...) 46 | linked_model.build(...) 47 | -------------------------------------------------------------------------------- /docs/backend/catapult.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Catapult 3 | ======== 4 | 5 | Support for Siemens Catapult HLS compiler has been added in ``hls4ml`` version 1.0.0. 6 | 7 | *TODO expand this section* 8 | -------------------------------------------------------------------------------- /docs/backend/quartus.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Quartus 3 | ======= 4 | 5 | .. warning:: 6 | The **Quartus** backend is deprecated and will be removed in a future version. Users should migrate to the **oneAPI** backend. 7 | 8 | The **Quartus** backend of hls4ml is designed for deploying NNs on Intel/Altera FPGAs. It uses the discontinued Intel HLS compiler. The **oneAPI** backend should be preferred for new projects. 9 | The **oneAPI** backend contains the migrated the HLS code from this backend, with significantly better io_stream support, though the **oneAPI** backend does not yet support profiling, tracing, 10 | or the BramFactor option supported by the **Quartus** backend. Nevertheless, little or no further development is expected for this backend. 11 | 12 | The **Quartus** backend only implements the ``Resource`` strategy for the layers. There is no ``Latency`` implementation of any of the layers. 13 | -------------------------------------------------------------------------------- /docs/backend/sr.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | SymbolicExpression 3 | ================== 4 | 5 | This backend can be used to implement expressions obtained through symbolic regression tools such as `PySR `_ or `SymbolNet `_. The backend targets Vivado/Vitis HLS and relies on HLS math libraries provided with a licensed installation of these tools. 6 | 7 | *TODO expand this section* 8 | -------------------------------------------------------------------------------- /docs/backend/vitis.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Vivado/Vitis 3 | ============ 4 | 5 | The **Vivado** and **Vitis** backends are aimed for use with AMD/Xilinx FPGAs. The **Vivado** backend targets the discontinued ``Vivado HLS`` compiler, while 6 | the **Vitis** backend targets the ``Vitis HLS`` compiler. Both are designed to produce IP for incorporation in ``Vivado`` designs. (See :doc:`VivadoAccelerator ` 7 | for generating easily-deployable models with ``Vivado HLS``.) The ``Vitis`` accelerator flow is not directly supported, though HLS produced with the **Vitis** 8 | backend can be easily incorporated into Vitis kernel. 9 | 10 | Users should generally use the **Vitis** backend for new designs that target AMD/Xilinx FPGAs; new ``hls4ml`` developments will not necessarily be backported to 11 | the **Vivado** backend. 12 | -------------------------------------------------------------------------------- /docs/frontend/keras.rst: -------------------------------------------------------------------------------- 1 | ================================ 2 | Keras and its quantized variants 3 | ================================ 4 | 5 | Keras and the quantization library QKeras are well supported in ``hls4ml``. Both Keras v2 (``tf.keras``) and the new Keras v3 are supported. While the Keras v2 support is based on parsing the serialized json representation of the model, the Keras v3 support uses direct model inspection. 6 | 7 | Currently, ``hls4ml`` can parse most Keras layers, including core layers, convolutional layers, pooling layers, recurrent layers, merging/reshaping layers and activation layers, implemented either via sequential or functional API. Notably missing are the attention and normalization layers. The ``Lambda`` layers don't save their state in the serialized format and are thus impossible to parse. In this case, the ``Lambda`` layers can be implemented as custom layers and parsed via the :ref:`Extension API`. 8 | 9 | The ``data_format='channels_first'`` parameter of Keras layers is supported, but not extensively tested. All HLS implementations in ``hls4ml`` are based on ``channels_last`` data format and need to be converted to that format before the HLS code can be emitted. We encourage users of ``channels_first`` to report their experiences to developers on GitHub. 10 | 11 | 12 | * `QKeras `_ 13 | The equivalent QKeras API and its quantizers are also supported by ``hls4ml``. QKeras is not compatible with Keras v3. Currently, only HGQ2 is compatible with Keras v3 (see below). 14 | * `HGQ `_ 15 | The equivalent HGQ API is also supported. HGQ is not compatible with Keras v3. See `advanced/HGQ <../advanced/hgq.html>`__ for more information. 16 | * `HGQ2 `_ 17 | HGQ2 is based on Keras v3. Its support in hls4ml is currently under development. 18 | 19 | The development team of ``hls4ml`` is currently exploring options for QKeras alternative and will provide a drop-in replacement API compatible with Keras v3. 20 | -------------------------------------------------------------------------------- /docs/img/act_hls4ml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/act_hls4ml.png -------------------------------------------------------------------------------- /docs/img/act_keras.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/act_keras.png -------------------------------------------------------------------------------- /docs/img/hls4ml_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/hls4ml_logo.png -------------------------------------------------------------------------------- /docs/img/hls4ml_logo_lightgrey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/hls4ml_logo_lightgrey.png -------------------------------------------------------------------------------- /docs/img/hls4ml_logo_navbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/hls4ml_logo_navbar.png -------------------------------------------------------------------------------- /docs/img/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/logo.jpg -------------------------------------------------------------------------------- /docs/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/logo.png -------------------------------------------------------------------------------- /docs/img/nn_map_paper_fig_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/nn_map_paper_fig_2.png -------------------------------------------------------------------------------- /docs/img/overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/overview.jpg -------------------------------------------------------------------------------- /docs/img/overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/overview.pdf -------------------------------------------------------------------------------- /docs/img/pynqframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/pynqframe.png -------------------------------------------------------------------------------- /docs/img/reuse_factor_paper_fig_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/reuse_factor_paper_fig_8.png -------------------------------------------------------------------------------- /docs/img/weights_hls4ml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/weights_hls4ml.png -------------------------------------------------------------------------------- /docs/img/weights_keras.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/weights_keras.png -------------------------------------------------------------------------------- /docs/img/zynq_interfaces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/zynq_interfaces.png -------------------------------------------------------------------------------- /docs/intro/release_notes.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | Release Notes 3 | ======================== 4 | 5 | .. changelog:: 6 | :changelog-url: https://fastmachinelearning.org/hls4ml/release_notes.html 7 | :github: https://github.com/fastmachinelearning/hls4ml/releases/ 8 | :pypi: https://pypi.org/project/hls4ml/ 9 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | setuptools_scm[toml]>=5 3 | sphinx>=3.2.1 4 | sphinx_contributors 5 | sphinx_github_changelog 6 | sphinx_rtd_theme 7 | toposort>=1.5.0 8 | -------------------------------------------------------------------------------- /hls4ml/__init__.py: -------------------------------------------------------------------------------- 1 | from hls4ml import converters, report, utils # noqa: F401, E402 2 | 3 | try: 4 | from ._version import version as __version__ 5 | from ._version import version_tuple 6 | except ImportError: 7 | __version__ = "unknown version" 8 | version_tuple = (0, 0, "unknown version") 9 | 10 | 11 | def reseed(newseed): 12 | print(f'\npytest-randomly: reseed with {newseed}') 13 | try: 14 | import tensorflow 15 | 16 | tensorflow.random.set_seed(newseed) 17 | except ImportError: 18 | print('\nTensorFlow seed not set') 19 | try: 20 | import torch 21 | 22 | torch.manual_seed(newseed) 23 | except ImportError: 24 | print('\nPyTorch seed not set') 25 | -------------------------------------------------------------------------------- /hls4ml/backends/__init__.py: -------------------------------------------------------------------------------- 1 | from hls4ml.backends.backend import Backend, get_available_backends, get_backend, register_backend # noqa: F401 2 | from hls4ml.backends.fpga.fpga_backend import FPGABackend # noqa: F401 3 | from hls4ml.backends.oneapi.oneapi_backend import OneAPIBackend 4 | from hls4ml.backends.quartus.quartus_backend import QuartusBackend 5 | from hls4ml.backends.symbolic.symbolic_backend import SymbolicExpressionBackend 6 | from hls4ml.backends.vivado.vivado_backend import VivadoBackend 7 | from hls4ml.backends.vivado_accelerator.vivado_accelerator_backend import VivadoAcceleratorBackend 8 | from hls4ml.backends.vivado_accelerator.vivado_accelerator_config import VivadoAcceleratorConfig # noqa: F401 9 | 10 | from hls4ml.backends.catapult.catapult_backend import CatapultBackend # isort: skip 11 | 12 | from hls4ml.backends.vitis.vitis_backend import VitisBackend # isort: skip 13 | 14 | register_backend('Vivado', VivadoBackend) 15 | register_backend('VivadoAccelerator', VivadoAcceleratorBackend) 16 | register_backend('Vitis', VitisBackend) 17 | register_backend('Quartus', QuartusBackend) 18 | register_backend('Catapult', CatapultBackend) 19 | register_backend('SymbolicExpression', SymbolicExpressionBackend) 20 | register_backend('oneAPI', OneAPIBackend) 21 | -------------------------------------------------------------------------------- /hls4ml/backends/catapult/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/catapult/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/catapult/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/catapult/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/catapult/passes/quantization_templates.py: -------------------------------------------------------------------------------- 1 | from hls4ml.backends.backend import get_backend 2 | from hls4ml.backends.catapult.passes.core_templates import ( 3 | batchnorm_config_template, 4 | batchnorm_function_template, 5 | batchnorm_include_list, 6 | ) 7 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate 8 | from hls4ml.model.optimizer.passes.qkeras import ApplyAlpha 9 | 10 | 11 | class ApplyAlphaConfigTemplate(LayerConfigTemplate): 12 | def __init__(self): 13 | super().__init__(ApplyAlpha) 14 | self.template = batchnorm_config_template 15 | 16 | def format(self, node): 17 | params = self._default_config_params(node) 18 | params['n_in'] = node.get_input_variable().size_cpp() 19 | params['product_type'] = get_backend('catapult').product_type( 20 | node.get_input_variable().type.precision, node.get_weights('scale').type.precision 21 | ) 22 | 23 | return self.template.format(**params) 24 | 25 | 26 | class ApplyAlphaFunctionTemplate(FunctionCallTemplate): 27 | def __init__(self): 28 | super().__init__(ApplyAlpha, include_header=batchnorm_include_list) 29 | self.template = batchnorm_function_template 30 | 31 | def format(self, node): 32 | params = self._default_function_params(node) 33 | params['scale'] = node.get_weights('scale').name 34 | params['bias'] = node.get_weights('bias').name 35 | 36 | return self.template.format(**params) 37 | -------------------------------------------------------------------------------- /hls4ml/backends/fpga/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/fpga/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/fpga/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/bram_weights.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from hls4ml.backends.fpga.fpga_types import BramWeightVariableConverter 4 | from hls4ml.model.optimizer import OptimizerPass 5 | 6 | 7 | class RegisterBramWeights(OptimizerPass): 8 | def match(self, node): 9 | return len(node.weights) > 0 10 | 11 | def transform(self, model, node): 12 | bramport_size = model.config.get_bram_size(node) 13 | for w_name, w_var in node.weights.items(): 14 | if ('storage' in w_var.__dict__ and w_var.storage != 'bram') and np.prod(w_var.shape) > bramport_size: 15 | new_weight = BramWeightVariableConverter.convert(w_var) 16 | node.set_attr(w_name, new_weight) 17 | -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/embedding.py: -------------------------------------------------------------------------------- 1 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate 2 | from hls4ml.model.layers import Embedding 3 | 4 | embed_config_template = """struct config{index} : nnet::embed_config {{ 5 | static const unsigned n_in = {n_in}; 6 | static const unsigned n_out = {n_out}; 7 | static const unsigned vocab_size = {vocab_size}; 8 | static const unsigned io_type = nnet::{iotype}; 9 | static const unsigned reuse_factor = {reuse}; 10 | typedef {embeddings_t.name} embeddings_t; 11 | }};\n""" 12 | 13 | embed_function_template = 'nnet::embedding<{input_t}, {output_t}, {config}>({input}, {output}, {e});' 14 | 15 | embed_include_list = ['nnet_utils/nnet_embed.h', 'nnet_utils/nnet_embed_stream.h'] 16 | 17 | 18 | class EmbeddingConfigTemplate(LayerConfigTemplate): 19 | def __init__(self): 20 | super().__init__(Embedding) 21 | self.template = embed_config_template 22 | 23 | def format(self, node): 24 | params = self._default_config_params(node) 25 | return self.template.format(**params) 26 | 27 | 28 | class EmbeddingFunctionTemplate(FunctionCallTemplate): 29 | def __init__(self): 30 | super().__init__(Embedding, include_header=embed_include_list) 31 | self.template = embed_function_template 32 | 33 | def format(self, node): 34 | params = self._default_function_params(node) 35 | params['e'] = node.get_weights('embeddings').name 36 | 37 | return self.template.format(**params) 38 | -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/final_reshape.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Reshape 2 | from hls4ml.model.optimizer import OptimizerPass 3 | 4 | 5 | class RemoveFinalReshape(OptimizerPass): 6 | '''Remove reshape if final layer''' 7 | 8 | def match(self, node): 9 | # match if reshape is final node 10 | return isinstance(node, Reshape) and not node.get_output_nodes() 11 | 12 | def transform(self, model, node): 13 | if model.config.get_config_value('IOType') == 'io_parallel': 14 | print('WARNING: Final layer is a Reshape, which does not affect the output for io_parallel; removing it') 15 | model.remove_node(node) 16 | return True 17 | elif model.config.get_config_value('IOType') == 'io_stream': 18 | print( 19 | 'WARNING: Final layer is a Reshape, which may incur a large resource cost for io_stream; ' 20 | 'consider removing it' 21 | ) 22 | return False 23 | -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/inplace_parallel_reshape.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Reshape 2 | from hls4ml.model.optimizer import OptimizerPass 3 | from hls4ml.model.types import InplaceTensorVariable 4 | 5 | 6 | class InplaceParallelReshape(OptimizerPass): 7 | """ 8 | Replaces the output variable of Reshape layer with an inplace variable when using io_parallel. 9 | 10 | This is done because in io_parallel tensors are stored as flat arrays, requiring no reshaping. 11 | """ 12 | 13 | def match(self, node): 14 | if not isinstance(node, Reshape): 15 | return False 16 | return node.model.config.get_config_value('IOType') == 'io_parallel' 17 | 18 | def transform(self, model, node): 19 | outvar = node.get_output_variable() 20 | invar = node.get_input_variable() 21 | newoutvar = InplaceTensorVariable(outvar, invar) 22 | node.set_attr(node.outputs[0], newoutvar) 23 | if node.name in model.outputs: 24 | prev_node = node.get_input_node() 25 | assert ( 26 | prev_node.name not in model.outputs 27 | ), f"Cannot output node {prev_node.name}: reshape is a no-op in io_parallel.\ 28 | As a result, the previous node {prev_node.name}'s output will be used as the\ 29 | output. However, this node is already an output." 30 | model.outputs = [name if name != node.name else prev_node.name for name in model.outputs] 31 | return False 32 | -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/inplace_stream_flatten.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Reshape 2 | from hls4ml.model.optimizer import OptimizerPass 3 | from hls4ml.model.types import InplaceTensorVariable 4 | 5 | 6 | class InplaceStreamFlatten(OptimizerPass): 7 | """ 8 | Replaces the output variable of Reshape (flatten) layer with an inplace variable when using io_stream. 9 | 10 | This optimizer avoids the expensive repacking of the stream when Reshape layer flattens the tensor to 1d. 11 | """ 12 | 13 | def match(self, node): 14 | # Layers require flatten data can gather it from the stream, no need for repacking. 15 | # Reshape acts as a Flatten layer when the result has 1 dimension. Make it a inplace tensor if it happens. 16 | 17 | if node.model.config.get_config_value('IOType') != 'io_stream': 18 | return False 19 | if not (isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1): 20 | # If is not flatten 21 | return False 22 | if node.name in node.model.outputs: 23 | # If used as model output. Output shape shall be preserved in this case. 24 | return False 25 | return True 26 | 27 | def transform(self, model, node): 28 | outvar = node.get_output_variable() 29 | invar = node.get_input_variable() 30 | newoutvar = InplaceTensorVariable(outvar, invar) 31 | node.set_attr(node.outputs[0], newoutvar) 32 | return False 33 | -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/remove_softmax.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Softmax 2 | from hls4ml.model.optimizer.optimizer import OptimizerPass 3 | 4 | 5 | class SkipSoftmax(OptimizerPass): 6 | def match(self, node): 7 | is_softmax = isinstance(node, Softmax) 8 | remove_softmax = node.get_attr('skip', False) 9 | return is_softmax and remove_softmax 10 | 11 | def transform(self, model, node): 12 | model.remove_node(node) 13 | return True 14 | -------------------------------------------------------------------------------- /hls4ml/backends/fpga/passes/xnor_pooling.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import GlobalPooling1D, GlobalPooling2D, Pooling1D, Pooling2D 2 | from hls4ml.model.optimizer import OptimizerPass 3 | from hls4ml.model.types import XnorPrecisionType 4 | 5 | 6 | class XnorPooling(OptimizerPass): 7 | ''' 8 | For correct behavior, for MaxPooling and similar, for XnorPrecisionType, have to propagate 9 | the type to the output. 10 | ''' 11 | 12 | def match(self, node): 13 | if isinstance(node, (Pooling1D, Pooling2D, GlobalPooling1D, GlobalPooling2D)) and node.get_attr('pool_op') == 'Max': 14 | return isinstance(node.get_input_variable().type.precision, XnorPrecisionType) and not isinstance( 15 | node.get_output_variable().type.precision, XnorPrecisionType 16 | ) 17 | return False 18 | 19 | def transform(self, model, node): 20 | outvar = node.get_output_variable() 21 | outvar.type.precision = XnorPrecisionType() 22 | return True 23 | -------------------------------------------------------------------------------- /hls4ml/backends/oneapi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/oneapi/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/oneapi/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/oneapi/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/oneapi/passes/clone_templates.py: -------------------------------------------------------------------------------- 1 | """The clone templates in the fpga backend are not enough for oneAPI, so this adds the missing parts""" 2 | 3 | from hls4ml.backends.fpga.passes.clone import Clone 4 | from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate 5 | 6 | clone_stream_function_template = '{name}.async();' 7 | 8 | 9 | class CloneTaskSequenceTemplate(TaskSequenceTemplate): 10 | def __init__(self): 11 | super().__init__(Clone) 12 | 13 | def format(self, node): 14 | params = self._default_function_params(node) 15 | for i in range(len(node.outputs)): 16 | params[f'output{i + 1}_pipe'] = node.variables[node.outputs[i]].pipe_name 17 | 18 | output_pipes = ', '.join([f'{{output{i + 1}_pipe}}' for i in range(len(node.outputs))]) 19 | 20 | template = f'task_sequence> {{name}};' 21 | return template.format(**params) 22 | 23 | 24 | class CloneStreamFunctionTemplate(StreamFunctionCallTemplate): 25 | def __init__(self): 26 | super().__init__(Clone) 27 | self.template = clone_stream_function_template 28 | 29 | def format(self, node): 30 | params = self._default_function_params(node) 31 | return self.template.format(**params) 32 | -------------------------------------------------------------------------------- /hls4ml/backends/oneapi/passes/embedding_templates.py: -------------------------------------------------------------------------------- 1 | """ 2 | These are the stream oneAPI templates for embedding layers. The io_parallel ones are in backends/fpga/passes/embedding.py. 3 | """ 4 | 5 | from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate 6 | from hls4ml.model.layers import Embedding 7 | 8 | embed_task_sequence_template = 'task_sequence> {name};' 9 | embed_stream_function_template = '{name}.async({e});' 10 | 11 | 12 | class EmbeddingTaskSequenceTemplate(TaskSequenceTemplate): 13 | def __init__(self): 14 | super().__init__(Embedding) 15 | self.template = embed_task_sequence_template 16 | 17 | def format(self, node): 18 | params = self._default_function_params(node) 19 | 20 | return self.template.format(**params) 21 | 22 | 23 | class EmbeddingStreamFunctionTemplate(StreamFunctionCallTemplate): 24 | def __init__(self): 25 | super().__init__(Embedding) 26 | self.template = embed_stream_function_template 27 | 28 | def format(self, node): 29 | params = self._default_function_params(node) 30 | params['e'] = node.get_weights('embeddings').name 31 | 32 | return self.template.format(**params) 33 | -------------------------------------------------------------------------------- /hls4ml/backends/quartus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/quartus/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/quartus/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/quartus/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/quartus/passes/quantization_templates.py: -------------------------------------------------------------------------------- 1 | from hls4ml.backends.backend import get_backend 2 | from hls4ml.backends.quartus.passes.core_templates import ( 3 | batchnorm_config_template, 4 | batchnorm_function_template, 5 | batchnorm_include_list, 6 | ) 7 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate 8 | from hls4ml.model.optimizer.passes.qkeras import ApplyAlpha 9 | 10 | 11 | class ApplyAlphaConfigTemplate(LayerConfigTemplate): 12 | def __init__(self): 13 | super().__init__(ApplyAlpha) 14 | self.template = batchnorm_config_template 15 | 16 | def format(self, node): 17 | params = self._default_config_params(node) 18 | params['n_in'] = node.get_input_variable().size_cpp() 19 | params['product_type'] = get_backend('quartus').product_type( 20 | node.get_input_variable().type.precision, node.get_weights('scale').type.precision 21 | ) 22 | 23 | return self.template.format(**params) 24 | 25 | 26 | class ApplyAlphaFunctionTemplate(FunctionCallTemplate): 27 | def __init__(self): 28 | super().__init__(ApplyAlpha, include_header=batchnorm_include_list) 29 | self.template = batchnorm_function_template 30 | 31 | def format(self, node): 32 | params = self._default_function_params(node) 33 | params['scale'] = node.get_weights('scale').name 34 | params['bias'] = node.get_weights('bias').name 35 | 36 | return self.template.format(**params) 37 | -------------------------------------------------------------------------------- /hls4ml/backends/symbolic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/symbolic/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/symbolic/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/symbolic/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/symbolic/passes/validate_lut.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import SymbolicExpression 2 | from hls4ml.model.optimizer import ConfigurableOptimizerPass 3 | 4 | 5 | class ValidateUserLookupTable(ConfigurableOptimizerPass): 6 | '''Validates the precision of user-defined LUTs is adequate''' 7 | 8 | def __init__(self): 9 | self.raise_exception = False 10 | 11 | def match(self, node): 12 | return isinstance(node, SymbolicExpression) and len(node.get_attr('lut_functions', [])) > 0 13 | 14 | def transform(self, model, node): 15 | precision = node.get_output_variable().type.precision 16 | range = 2 ** (precision.integer - precision.signed) 17 | frac_step = 1 / 2**precision.fractional 18 | 19 | for lut_fn in node.get_attr('lut_functions'): 20 | lut_range = lut_fn.range_end - lut_fn.range_start 21 | lut_step = lut_range / lut_fn.table_size 22 | 23 | if lut_step < frac_step: 24 | msg = f'LUT function {lut_fn.name} requires more fractional bits.' 25 | if self.raise_exception: 26 | raise Exception(msg) 27 | else: 28 | print('WARNING:', msg) 29 | 30 | if lut_range > range: 31 | msg = f'LUT function {lut_fn.name} requires more integer bits.' 32 | if self.raise_exception: 33 | raise Exception(msg) 34 | else: 35 | print('WARNING:', msg) 36 | 37 | return False 38 | -------------------------------------------------------------------------------- /hls4ml/backends/vitis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vitis/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/vitis/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vitis/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/vivado/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/vivado/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/vivado/passes/quantization_templates.py: -------------------------------------------------------------------------------- 1 | from hls4ml.backends.backend import get_backend 2 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate 3 | from hls4ml.backends.vivado.passes.core_templates import ( 4 | batchnorm_config_template, 5 | batchnorm_function_template, 6 | batchnorm_include_list, 7 | ) 8 | from hls4ml.model.optimizer.passes.qkeras import ApplyAlpha 9 | 10 | 11 | class ApplyAlphaConfigTemplate(LayerConfigTemplate): 12 | def __init__(self): 13 | super().__init__(ApplyAlpha) 14 | self.template = batchnorm_config_template 15 | 16 | def format(self, node): 17 | params = self._default_config_params(node) 18 | params['n_in'] = node.get_input_variable().size_cpp() 19 | params['product_type'] = get_backend('vivado').product_type( 20 | node.get_input_variable().type.precision, node.get_weights('scale').type.precision 21 | ) 22 | 23 | return self.template.format(**params) 24 | 25 | 26 | class ApplyAlphaFunctionTemplate(FunctionCallTemplate): 27 | def __init__(self): 28 | super().__init__(ApplyAlpha, include_header=batchnorm_include_list) 29 | self.template = batchnorm_function_template 30 | 31 | def format(self, node): 32 | params = self._default_function_params(node) 33 | params['scale'] = node.get_weights('scale').name 34 | params['bias'] = node.get_weights('bias').name 35 | 36 | return self.template.format(**params) 37 | -------------------------------------------------------------------------------- /hls4ml/backends/vivado_accelerator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado_accelerator/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/vivado_accelerator/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado_accelerator/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/backends/vivado_accelerator/supported_boards.json: -------------------------------------------------------------------------------- 1 | { 2 | "pynq-z2": { 3 | "part": "xc7z020clg400-1", 4 | "tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream": "axi_stream_design.tcl"}, 5 | "python_drivers": {"axi_stream": "axi_stream_driver.py"}, 6 | "c_drivers": {} 7 | }, 8 | "zcu102": { 9 | "part": "xczu9eg-ffvb1156-2-e", 10 | "tcl_scripts": { "axi_stream": "axi_stream_design.tcl"}, 11 | "python_drivers": {"axi_stream": "axi_stream_driver.py"}, 12 | "c_drivers": {} 13 | }, 14 | "alveo-u50": { 15 | "part": "xcu50-fsvh2104-2-e", 16 | "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, 17 | "python_drivers": {"axi_stream": "axi_stream_driver.py"}, 18 | "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, 19 | "c_drivers": {} 20 | }, 21 | "alveo-u250": { 22 | "part": "xcu250-figd2104-2L-e", 23 | "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, 24 | "python_drivers": {"axi_stream": "axi_stream_driver.py"}, 25 | "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, 26 | "c_drivers": {} 27 | }, 28 | "alveo-u200": { 29 | "part": "xcu200-fsgd2104-2-e", 30 | "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, 31 | "python_drivers": {"axi_stream": "axi_stream_driver.py"}, 32 | "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, 33 | "c_drivers": {} 34 | }, 35 | "alveo-u280": { 36 | "part": "xcu280-fsvh2892-2L-e", 37 | "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"}, 38 | "python_drivers": {"axi_stream": "axi_stream_driver.py"}, 39 | "krnl_rtl_srcs": {"axi_stream": "krnl_rtl_src"}, 40 | "c_drivers": {} 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /hls4ml/cli/__main__.py: -------------------------------------------------------------------------------- 1 | from . import main 2 | 3 | main() 4 | -------------------------------------------------------------------------------- /hls4ml/contrib/README.md: -------------------------------------------------------------------------------- 1 | # Contributions 2 | 3 | This section is for contributed work that can be used with hls4ml that is potentially useful to a wider audience. Examples include implementations for custom layer types for use with the Extensions API. 4 | 5 | ## How to structure contributions 6 | 7 | The best way to structure a contribution is to make a directory for the contribution, with a README inside to explain what it is and how to use it. If possible there should be an example script demonstrating how to use it. We should be able to validate that the code works. 8 | -------------------------------------------------------------------------------- /hls4ml/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/contrib/__init__.py -------------------------------------------------------------------------------- /hls4ml/contrib/kl_layer/README.md: -------------------------------------------------------------------------------- 1 | This folder contains the implementation of custom KL divergence layer. 2 | This is a custom implementation and not a built-in layer in any deep learning framework. 3 | It was developed specifically for [AD@L1 CMS paper](https://www.nature.com/articles/s42256-022-00441-3). 4 | 5 | # Files 6 | 7 | * `kl_layer.py`: contains the standalone implementation of the custom KL divergence layer 8 | * `kl_layer.h`: contains the HLS implementation of KL layer 9 | 10 | 11 | # Usage 12 | 13 | `kl_layer.py` contains the example of how to use the KL layer. 14 | To run do 15 | 16 | ``` 17 | python kl_layer.py 18 | ``` 19 | -------------------------------------------------------------------------------- /hls4ml/converters/keras/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/converters/keras/__init__.py -------------------------------------------------------------------------------- /hls4ml/converters/keras/hgq_proxy_model.py: -------------------------------------------------------------------------------- 1 | from hls4ml.converters.keras_v2_to_hls import KerasReader, keras_handler, parse_default_keras_layer 2 | 3 | 4 | @keras_handler('FixedPointQuantizer', 'HGQ>FixedPointQuantizer') 5 | def fixedpoint_quantizer_handler(keras_layer, input_names, input_shapes, data_reader: KerasReader): 6 | config = parse_default_keras_layer(keras_layer, input_names) 7 | 8 | name = config['name'] 9 | fusible = keras_layer['config']['fusible'] 10 | config['RND'] = keras_layer['config']['RND'] 11 | config['SAT'] = keras_layer['config']['SAT'] 12 | config['fusible'] = fusible 13 | if not fusible: 14 | k = data_reader.get_weights_data(name, 'keep_negative') 15 | b = data_reader.get_weights_data(name, 'bits') 16 | i = data_reader.get_weights_data(name, 'integers') 17 | config['mask_kbi'] = k, b, i 18 | config['overrides'] = keras_layer['config']['overrides'] 19 | 20 | layer = config 21 | return layer, input_shapes[0] 22 | 23 | 24 | @keras_handler('UnaryLUT', 'HGQ>UnaryLUT') 25 | def unary_lut_keras_handler(keras_layer, input_names, input_shapes, data_reader: KerasReader): 26 | config = parse_default_keras_layer(keras_layer, input_names) 27 | 28 | table = data_reader.get_weights_data(config['name'], 'table') 29 | k, i, f = keras_layer['config']['kif_out'] 30 | k, b, i = k, k + i + f, k + i 31 | config['table_t'] = f'{"" if k else "u"}fixed<{b},{i}>' 32 | config['table'] = table 33 | config['table_size'] = len(table) 34 | config['activation'] = 'unary_lut' 35 | 36 | layer = config 37 | return layer, input_shapes[0] 38 | -------------------------------------------------------------------------------- /hls4ml/converters/keras/merge.py: -------------------------------------------------------------------------------- 1 | from hls4ml.converters.keras_v2_to_hls import keras_handler, parse_default_keras_layer 2 | 3 | merge_layers = ['Add', 'Subtract', 'Multiply', 'Average', 'Maximum', 'Minimum', 'Concatenate', 'Dot'] 4 | 5 | 6 | @keras_handler(*merge_layers) 7 | def parse_merge_layer(keras_layer, input_names, input_shapes, data_reader): 8 | assert keras_layer['class_name'] in merge_layers 9 | 10 | layer = parse_default_keras_layer(keras_layer, input_names) 11 | 12 | layer['op'] = layer['class_name'].lower() 13 | 14 | output_shape = input_shapes[0][:] 15 | if layer['class_name'] == 'Concatenate': 16 | rank = len(input_shapes[0][1:]) 17 | if rank > 3: 18 | raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.') 19 | layer['op'] = layer['class_name'].lower() + f'{rank}d' 20 | layer['axis'] = keras_layer['config']['axis'] 21 | output_shape[layer['axis']] += input_shapes[1][layer['axis']] 22 | elif layer['class_name'] == 'Dot': 23 | rank = len(input_shapes[0][1:]) 24 | if rank > 1: 25 | raise Exception('ERROR: Dot of tensors with rank > 1 is not yet supported.') 26 | layer['op'] = layer['class_name'].lower() + f'{rank}d' 27 | else: 28 | layer['class_name'] = 'Merge' 29 | if len(layer['inputs']) > 2: 30 | raise Exception('ERROR: Merging more than two tensors is not yet supported.') 31 | 32 | return layer, output_shape 33 | -------------------------------------------------------------------------------- /hls4ml/converters/keras/model.py: -------------------------------------------------------------------------------- 1 | from hls4ml.converters.keras_v2_to_hls import ( 2 | KerasFileReader, 3 | KerasModelReader, 4 | KerasNestedFileReader, 5 | keras_handler, 6 | parse_default_keras_layer, 7 | parse_keras_model, 8 | ) 9 | 10 | model_layers = ['Sequential', 'Functional'] 11 | 12 | 13 | @keras_handler(*model_layers) 14 | def parse_model_layer(keras_layer, input_names, input_shapes, data_reader): 15 | assert keras_layer['class_name'] in model_layers 16 | 17 | layer = parse_default_keras_layer(keras_layer, input_names) 18 | layer['class_name'] = 'LayerGroup' 19 | 20 | if isinstance(data_reader, KerasNestedFileReader): 21 | # In the .h5 file, the paths don't go more than one level deep 22 | nested_path = data_reader.nested_path 23 | else: 24 | nested_path = layer['name'] 25 | 26 | if isinstance(data_reader, KerasFileReader): 27 | nested_reader = KerasNestedFileReader(data_reader, nested_path) 28 | else: 29 | nested_reader = KerasModelReader(data_reader.model.get_layer(layer['name'])) 30 | 31 | layer_list, input_layers, output_layers, output_shapes = parse_keras_model(keras_layer, nested_reader) 32 | 33 | if output_layers is None: 34 | last_layer = layer_list[-1]['name'] 35 | else: 36 | last_layer = output_layers[0] 37 | output_shape = output_shapes[last_layer] 38 | 39 | layer['layer_list'] = layer_list 40 | layer['input_layers'] = input_layers if input_layers is not None else [] 41 | layer['output_layers'] = output_layers if output_layers is not None else [] 42 | layer['data_reader'] = nested_reader 43 | layer['output_shape'] = output_shape 44 | 45 | return layer, output_shape 46 | -------------------------------------------------------------------------------- /hls4ml/converters/keras_v3/__init__.py: -------------------------------------------------------------------------------- 1 | from . import conv # noqa: F401 2 | from . import core # noqa: F401 3 | from . import einsum_dense # noqa: F401 4 | from . import merge # noqa: F401 5 | from . import pooling # noqa: F401 6 | from ._base import registry as layer_handlers 7 | 8 | __all__ = ['layer_handlers'] 9 | -------------------------------------------------------------------------------- /hls4ml/converters/onnx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/converters/onnx/__init__.py -------------------------------------------------------------------------------- /hls4ml/converters/onnx/merge.py: -------------------------------------------------------------------------------- 1 | from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler 2 | 3 | merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum'] 4 | 5 | op_map = { 6 | 'Add': 'add', 7 | 'Sub': 'subtract', 8 | 'Mul': 'multiply', 9 | 'Div': 'divide', 10 | 'Average': 'average', 11 | 'Max': 'maximum', 12 | 'Min': 'minimum', 13 | 'Sum': 'add', 14 | 'Concat': 'concat', 15 | } 16 | 17 | 18 | @onnx_handler(*merge_layers) 19 | def parse_merge_layer(node, input_names, input_shapes, graph): 20 | layer = {} 21 | layer['class_name'] = node.op_type 22 | layer['name'] = node.name 23 | layer['op'] = op_map[node.op_type] 24 | layer['inputs'] = input_names 25 | layer['outputs'] = list(node.output) 26 | 27 | if layer['class_name'] == 'Concat': 28 | rank = len(input_shapes[0][1:]) 29 | if rank > 3: 30 | raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.') 31 | 32 | layer['class_name'] = 'Concatenate' 33 | layer['op'] = layer['class_name'].lower() + f'{rank}d' 34 | layer['axis'] = get_onnx_attribute(node, 'axis') 35 | 36 | else: 37 | layer['class_name'] = 'Merge' 38 | 39 | if len(layer['inputs']) > 2: 40 | raise Exception('ERROR: Merging more than two tensors is not yet supported.') 41 | 42 | return layer 43 | -------------------------------------------------------------------------------- /hls4ml/converters/pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/converters/pytorch/__init__.py -------------------------------------------------------------------------------- /hls4ml/model/__init__.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.graph import HLSConfig, ModelGraph # noqa: F401 2 | -------------------------------------------------------------------------------- /hls4ml/model/flow/__init__.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.flow.flow import ( # noqa: F401 2 | Flow, 3 | get_available_flows, 4 | get_backend_flows, 5 | get_flow, 6 | register_flow, 7 | update_flow, 8 | ) 9 | -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/model/optimizer/passes/__init__.py -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/expand_layer_group.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Input, LayerGroup 2 | from hls4ml.model.optimizer import OptimizerPass 3 | 4 | 5 | class ExpandLayerGroup(OptimizerPass): 6 | '''Expands LayerGroup (a nested model) into the parent model.''' 7 | 8 | def match(self, node): 9 | return isinstance(node, LayerGroup) 10 | 11 | def transform(self, model, node): 12 | layer_list = node.get_attr('layer_list') 13 | 14 | # We'll keep track of inserted Input nodes to remove later 15 | inserted_input_nodes = [] 16 | 17 | for i, layer in enumerate(layer_list): 18 | kind = layer['class_name'] 19 | name = layer['name'] 20 | inputs = layer.get('inputs', []) 21 | outputs = layer.get('outputs', []) 22 | 23 | if name in model.graph.keys(): 24 | raise Exception(f'Layer names must be unique: "{name}" already found in the model graph.') 25 | 26 | if len(inputs) == 0: 27 | if kind in ['InputLayer', 'Input']: 28 | inputs = node.inputs.copy() 29 | else: 30 | inputs = model.graph[layer_list[i - 1]['name']].outputs.copy() 31 | if len(outputs) == 0: 32 | outputs = [name] 33 | 34 | new_node = model.make_node(kind, name, layer, inputs, outputs) 35 | model.insert_node(new_node) 36 | if isinstance(new_node, Input): 37 | inserted_input_nodes.append(new_node) 38 | 39 | model.remove_node(node) 40 | 41 | for input_node in inserted_input_nodes: 42 | model.remove_node(input_node) 43 | 44 | return True 45 | -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/fuse_biasadd.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import BiasAdd, Conv1D, Conv2D, Dense 2 | from hls4ml.model.optimizer import OptimizerPass 3 | 4 | 5 | class FuseBiasAdd(OptimizerPass): 6 | '''Fuses BiasAdd into Dense/Conv2D layer (common in TF models).''' 7 | 8 | def match(self, node): 9 | return isinstance(node, BiasAdd) and isinstance(node.get_input_node(), (Dense, Conv1D, Conv2D)) 10 | 11 | def transform(self, model, node): 12 | # Fuse BiasAdd into Dense layer 13 | dense_layer = node.get_input_node() 14 | dense_layer.get_weights('bias').data = node.get_weights('bias').data 15 | 16 | model.remove_node(node) 17 | 18 | return True 19 | -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/linear.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Activation, BatchNormalization, Conv1D, Conv2D, Dense 2 | from hls4ml.model.optimizer import OptimizerPass 3 | from hls4ml.model.types import UnspecifiedPrecisionType 4 | 5 | 6 | class EliminateLinearActivation(OptimizerPass): 7 | def match(self, node): 8 | cast = False 9 | if isinstance(node, Activation): 10 | cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision 11 | return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast 12 | 13 | def transform(self, model, node): 14 | model.remove_node(node) 15 | return True 16 | 17 | 18 | _safe_parents = (Dense, Conv1D, Conv2D, BatchNormalization, Activation) 19 | 20 | 21 | class MergeLinearActivation(OptimizerPass): 22 | ''' 23 | For many objects it's safe to change the output precision independently of the calculation. 24 | ''' 25 | 26 | def match(self, node): 27 | ''' 28 | Only match if the parent is safe and the precision is not explicitly set. 29 | ''' 30 | if isinstance(node, Activation) and node.get_attr('activation') == 'linear': 31 | parent = node.get_input_node(node.inputs[0]) 32 | safe_parent = isinstance(parent, _safe_parents) 33 | return safe_parent and isinstance(parent.get_output_variable().type.precision, UnspecifiedPrecisionType) 34 | else: 35 | return False 36 | 37 | def transform(self, model, node): 38 | prev_node = node.get_input_node(node.inputs[0]) 39 | quantizer = node.get_attr("quantizer") 40 | # if the activation has a quantizer (usually from a QONNX Quant node), set the previous node's output precision 41 | if quantizer is not None: 42 | prev_node.set_attr("quantizer", quantizer) 43 | prev_node.get_output_variable().type.precision = quantizer.hls_type 44 | model.remove_node(node) 45 | return True 46 | -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/reshape_const.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Constant, Reshape 2 | from hls4ml.model.optimizer import OptimizerPass 3 | 4 | 5 | class ReshapeConstant(OptimizerPass): 6 | """ 7 | ONNX has the target shape come as an input, not a parameter. This removes 8 | the Constant input from new shape input. (Non-constant inputs are not supported.) 9 | The constant value was already used; this is just a cleanup uptimization. 10 | """ 11 | 12 | def match(self, node): 13 | is_match = isinstance(node, Reshape) and len(node.inputs) > 1 and node.get_input_node(node.inputs[1]) 14 | 15 | return is_match 16 | 17 | def transform(self, model, node): 18 | """ 19 | Remove Constant from new shape input. Note, input shape node is already used on initialize 20 | """ 21 | shape_node = node.get_input_node(node.inputs[1]) 22 | node.inputs[1] = '' 23 | if not isinstance(shape_node, Constant): 24 | raise RuntimeError('Nonconstant shape inputs are not currently supported') 25 | model.remove_node(shape_node) 26 | 27 | return True 28 | -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/resize_remove_constants.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | from hls4ml.model.layers import Constant, Resize 4 | from hls4ml.model.optimizer import OptimizerPass 5 | 6 | 7 | class ResizeRemoveConstants(OptimizerPass): 8 | """ 9 | This optimizer is intended to clean the Resize node from RoI and Scales parameters that if left cause issues in hls4ml. 10 | """ 11 | 12 | def match(self, node): 13 | is_match = isinstance(node, Resize) and len(node.inputs) > 1 14 | return is_match 15 | 16 | def transform(self, model, node): 17 | """ 18 | Remove RoI and Scale Constant from new shape input. 19 | """ 20 | # see doc here: https://onnx.ai/onnx/operators/onnx__Resize.html 21 | roi_index = 1 22 | scales_idx = 2 23 | scales_node = node.get_input_node(node.inputs[scales_idx]) 24 | node.inputs[scales_idx] = '' 25 | if not isinstance(scales_node, Constant): 26 | raise RuntimeError('Non-constant shape inputs are not supported') 27 | model.remove_node(scales_node) 28 | # RoI position is always 1 when present 29 | roi_node = node.get_input_node(node.inputs[roi_index]) 30 | if roi_node.get_attr('value'): 31 | warn('RoI value vector is not empty. Consider that RoI is not supported in hls4ml', stacklevel=2) 32 | node.inputs[roi_index] = '' 33 | if not isinstance(roi_node, Constant): 34 | raise RuntimeError('Non-constant RoI inputs are not supported') 35 | model.remove_node(roi_node) 36 | # Clean all the '' inputs 37 | node.inputs = list(filter(None, node.inputs)) 38 | return True 39 | -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/stamp.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | from hls4ml.model.optimizer import ModelOptimizerPass 4 | 5 | 6 | class MakeStamp(ModelOptimizerPass): 7 | def __init__(self): 8 | self.name = 'make_stamp' 9 | 10 | def transform(self, model): 11 | def _make_stamp(): 12 | """Create a unique identifier for the generated code. This identifier is used to 13 | compile a unique library and link it with python.""" 14 | 15 | length = 8 16 | 17 | stamp = uuid.uuid4() 18 | return str(stamp)[-length:] 19 | 20 | model.config.config['Stamp'] = _make_stamp() 21 | 22 | return False # No model graph changes made 23 | -------------------------------------------------------------------------------- /hls4ml/model/optimizer/passes/transpose_opt.py: -------------------------------------------------------------------------------- 1 | from hls4ml.model.layers import Input, Transpose 2 | from hls4ml.model.optimizer import OptimizerPass 3 | 4 | 5 | class RemoveNopTranspose(OptimizerPass): 6 | """ 7 | Remove a transpose layer if it doesn't do anything to a 1D array. i.e, 1D input and perm = [0] 8 | """ 9 | 10 | def match(self, node): 11 | is_match = isinstance(node, Transpose) and node.get_attr('perm') == [0] # Useless transpose 12 | return is_match 13 | 14 | def transform(self, model, node): 15 | print(f'Unnecessary transpose node ({node.name}) detected, optimizing ...') 16 | model.remove_node(node) 17 | 18 | return True 19 | 20 | 21 | class RemoveSingleChannelTranspose(OptimizerPass): 22 | """ 23 | Remove transpose of inputs if the number of channels is 1 as for io_parallel this doesn't affect the array 24 | representation used 25 | """ 26 | 27 | def match(self, node): 28 | if node.model.config.get_config_value('IOType') != 'io_parallel': 29 | return False 30 | 31 | return ( 32 | isinstance(node, Transpose) 33 | and isinstance(node.get_input_node(), Input) 34 | and node.get_input_variable().shape[0] == 1 35 | ) 36 | 37 | def transform(self, model, node): 38 | # Adjust the input shape and remove the Transpose node 39 | input_var = node.get_input_variable() 40 | input_var.shape.append(input_var.shape.pop(0)) 41 | model.remove_node(node) 42 | 43 | return True 44 | -------------------------------------------------------------------------------- /hls4ml/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | # No imports as each of the optimization modules may contain different dependencies. 2 | -------------------------------------------------------------------------------- /hls4ml/optimization/dsp_aware_pruning/config.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | ''' 4 | A list of currently supported structures when optimizing (pruning, weight sharing) 5 | For more information, see attributes.py 6 | 7 | 1. Unstructured: 8 | - Pruning: Y 9 | - Weight sharing: N 10 | - Description: Removes (zeroes out) individual weights 11 | - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) 12 | 13 | 2. Structured: 14 | - Pruning: Y 15 | - Weight sharing: Y 16 | - Description: Zeroes out or quantizes all the weights in a structure: 17 | - Dense: Neurons, determined by their outgoing connections (columns in Keras weight tensors) 18 | - Conv2D: Filters (structures of size filt_width x filt_height x n_chan) 19 | - Notes: 20 | - For Dense, it was also possible optimize by incoming connections (rows); 21 | However, removing zero neurons becomes harder because of Keras Surgeon 22 | - For Conv2D, significant literature explored pruning channels; currently not supported 23 | - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) 24 | 25 | 3. Pattern: 26 | - Pruning: Y 27 | - Weight sharing: Y 28 | - Description: Zeroes out or quantizes all the weights in a group 29 | Groups are determined by a variable, n, and every n-th weight in the flattened, 30 | Transposed (Resource) weight tensor is collected and stored in the same group 31 | Equivalent to pruning/quantizing weight processed by the same DSP in hls4ml 32 | - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) 33 | 34 | 4. Block: 35 | - Pruning: Y 36 | - Weight sharing: Y 37 | - Description: Zeroes out or quantizes all the weights in a block of size (w, h) 38 | - Supports: All rank-2 (e.g. Dense, but not Conv2D) layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) 39 | 40 | ''' 41 | 42 | 43 | class SUPPORTED_STRUCTURES(Enum): 44 | UNSTRUCTURED = 'unstructured' 45 | STRUCTURED = 'structured' 46 | PATTERN = 'pattern' 47 | BLOCK = 'block' 48 | -------------------------------------------------------------------------------- /hls4ml/optimization/dsp_aware_pruning/keras/config.py: -------------------------------------------------------------------------------- 1 | from qkeras import QConv2D, QDense 2 | from tensorflow.keras.layers import Conv2D, Dense 3 | 4 | ''' 5 | Optimizable layers in Keras / QKeras 6 | Any new layers need to be registered here first 7 | Additional logic in the source files may need to be written (e.g. recurrent layers should also optimize recurrent kernels) 8 | ''' 9 | SUPPORTED_LAYERS = (Dense, Conv2D, QDense, QConv2D) 10 | 11 | 12 | ''' 13 | Supported ranking metrics, for classifying redundant (groups of) weights 14 | 15 | 1. l1 - groups of weights are ranked by their l1 norm 16 | 2. l2 - groups of weights are ranked by their l2 norm 17 | 3. oracle - abs(dL / dw * w), introduced by Molchanov et al. (2016) 18 | Pruning Convolutional Neural Networks for Resource Efficient Inference 19 | 4. saliency - (d^2L / dw^2 * w)^2, introduced by Lecun et al. (1989) Optimal Brain Damage 20 | ''' 21 | SUPPORTED_METRICS = ('l1', 'l2', 'oracle', 'saliency') 22 | 23 | ''' 24 | Temporary directory for storing best models, tuning results etc. 25 | ''' 26 | TMP_DIRECTORY = 'hls4ml-optimization-keras' 27 | -------------------------------------------------------------------------------- /hls4ml/report/__init__.py: -------------------------------------------------------------------------------- 1 | from hls4ml.report.catapult_report import parse_catapult_report # noqa: F401 2 | from hls4ml.report.catapult_report import qofr # noqa: F401 3 | from hls4ml.report.catapult_report import read_catapult_report # noqa: F401 4 | from hls4ml.report.oneapi_report import parse_oneapi_report # noqa: F401 5 | from hls4ml.report.oneapi_report import print_oneapi_report # noqa: F401 6 | from hls4ml.report.quartus_report import parse_quartus_report # noqa: F401 7 | from hls4ml.report.quartus_report import read_quartus_report # noqa: F401 8 | from hls4ml.report.vivado_report import parse_vivado_report # noqa: F401 9 | from hls4ml.report.vivado_report import print_vivado_report # noqa: F401 10 | from hls4ml.report.vivado_report import read_vivado_report # noqa: F401 11 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/build_lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CC=g++ 4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 5 | CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" 6 | elif [[ "$OSTYPE" == "linux"* ]]; then 7 | CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique -Wno-pragmas" 8 | elif [[ "$OSTYPE" == "darwin"* ]]; then 9 | CFLAGS="-O3 -fPIC -std=c++11" 10 | fi 11 | LDFLAGS= 12 | 13 | # Pick up AC libraries from Catapult install first 14 | INCFLAGS="-I$MGC_HOME/shared/include -I$MGC_HOME/shared/include/nnet_utils -Ifirmware/ac_types/include -Ifirmware/ac_math/include -Ifirmware/ac_simutils/include -Ifirmware/nnet_utils" 15 | PROJECT=myproject 16 | LIB_STAMP=mystamp 17 | 18 | ${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}.cpp -o ${PROJECT}.o 19 | ${CC} ${CFLAGS} ${INCFLAGS} -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o 20 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so 21 | rm -f *.o 22 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/catapult_synth.tcl: -------------------------------------------------------------------------------- 1 | add_files myproject_prj/solution1/syn/vhdl 2 | synth_design -top myproject -part xcku115-flvb2104-2-i 3 | report_utilization -file vivado_synth.rpt 4 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/firmware/defines.h: -------------------------------------------------------------------------------- 1 | #ifndef DEFINES_H_ 2 | #define DEFINES_H_ 3 | 4 | #include "nnet_utils/nnet_types.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // hls-fpga-machine-learning insert numbers 12 | 13 | // hls-fpga-machine-learning insert layer-precision 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/firmware/myproject.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "myproject.h" 4 | #include "parameters.h" 5 | 6 | #include 7 | 8 | #pragma hls_design top 9 | // hls-fpga-machine-learning insert IFSynPragmas 10 | void CCS_BLOCK(myproject)( 11 | // hls-fpga-machine-learning insert header 12 | ) { 13 | 14 | // hls-fpga-machine-learning insert IO 15 | 16 | #ifndef __SYNTHESIS__ 17 | static bool loaded_weights = false; 18 | if (!loaded_weights) { 19 | // hls-fpga-machine-learning insert load weights 20 | loaded_weights = true; 21 | } 22 | #endif 23 | 24 | // **************************************** 25 | // NETWORK INSTANTIATION 26 | // **************************************** 27 | 28 | // hls-fpga-machine-learning insert layers 29 | } 30 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/firmware/myproject.h: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_H_ 2 | #define MYPROJECT_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "defines.h" 9 | 10 | // Prototype of top level function for C-synthesis 11 | void myproject( 12 | // hls-fpga-machine-learning insert header 13 | ); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/firmware/parameters.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETERS_H_ 2 | #define PARAMETERS_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "nnet_utils/nnet_code_gen.h" 8 | #include "nnet_utils/nnet_helpers.h" 9 | // hls-fpga-machine-learning insert includes 10 | 11 | // hls-fpga-machine-learning insert weights 12 | 13 | // hls-fpga-machine-learning insert layer-config 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/myproject_bridge.cpp: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_BRIDGE_H_ 2 | #define MYPROJECT_BRIDGE_H_ 3 | 4 | #include "firmware/myproject.h" 5 | #include "nnet_helpers.h" 6 | #include 7 | #include 8 | 9 | // hls-fpga-machine-learning insert weights dir 10 | 11 | const char *get_weights_dir() { return s_weights_dir.c_str(); } 12 | 13 | // hls-fpga-machine-learning insert bram 14 | 15 | // hls-fpga-machine-learning insert declare weights 16 | 17 | namespace nnet { 18 | bool trace_enabled = false; 19 | std::map *trace_outputs = NULL; 20 | size_t trace_type_size = sizeof(double); 21 | } // namespace nnet 22 | 23 | extern "C" { 24 | 25 | struct trace_data { 26 | const char *name; 27 | void *data; 28 | }; 29 | 30 | void allocate_trace_storage(size_t element_size) { 31 | nnet::trace_enabled = true; 32 | nnet::trace_outputs = new std::map; 33 | nnet::trace_type_size = element_size; 34 | // hls-fpga-machine-learning insert trace_outputs 35 | } 36 | 37 | void free_trace_storage() { 38 | for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { 39 | void *ptr = i->second; 40 | free(ptr); 41 | } 42 | nnet::trace_outputs->clear(); 43 | delete nnet::trace_outputs; 44 | nnet::trace_outputs = NULL; 45 | nnet::trace_enabled = false; 46 | } 47 | 48 | void collect_trace_output(struct trace_data *c_trace_outputs) { 49 | int ii = 0; 50 | for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { 51 | c_trace_outputs[ii].name = i->first.c_str(); 52 | c_trace_outputs[ii].data = i->second; 53 | ii++; 54 | } 55 | } 56 | 57 | // Wrapper of top level function for Python bridge 58 | void myproject_float( 59 | // hls-fpga-machine-learning insert header #float 60 | ) { 61 | 62 | // hls-fpga-machine-learning insert wrapper #float 63 | } 64 | 65 | void myproject_double( 66 | // hls-fpga-machine-learning insert header #double 67 | ) { 68 | // hls-fpga-machine-learning insert wrapper #double 69 | } 70 | } 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/hls_math.h: -------------------------------------------------------------------------------- 1 | #ifndef X_HLS_MATH_H 2 | #define X_HLS_MATH_H 3 | 4 | #include "ac_fixed.h" 5 | #include 6 | 7 | namespace hls { 8 | 9 | template static T exp(const T x) { return (T)std::exp(x.to_double()); } 10 | 11 | template T sin(T x) { return (T)std::sin(x.to_double()); }; 12 | 13 | template T cos(T x) { return (T)std::cos(x.to_double()); }; 14 | 15 | template T asin(T x) { return (T)std::asin(x.to_double()); }; 16 | 17 | template T acos(T x) { return (T)std::acos(x.to_double()); }; 18 | 19 | template T atan(T x) { return (T)std::atan(x.to_double()); }; 20 | 21 | template T atan2(T x, T y) { return (T)hls::atan2(x.to_double(), y.to_double()); }; 22 | 23 | } // namespace hls 24 | #endif 25 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_array.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_ARRAY_H_ 2 | #define NNET_ARRAY_H_ 3 | 4 | #include 5 | 6 | namespace nnet { 7 | 8 | struct transpose_config { 9 | static const unsigned height = 10; 10 | static const unsigned width = 10; 11 | static const unsigned depth = 10; 12 | static constexpr unsigned perm[3] = {2, 0, 1}; 13 | }; 14 | 15 | template 16 | void transpose_2d(data_T data[CONFIG_T::height * CONFIG_T::width], res_T data_t[CONFIG_T::height * CONFIG_T::width]) { 17 | //#pragma HLS PIPELINE 18 | 19 | for (int i = 0; i < CONFIG_T::height; i++) { 20 | for (int j = 0; j < CONFIG_T::width; j++) { 21 | data_t[j * CONFIG_T::height + i] = data[i * CONFIG_T::width + j]; 22 | } 23 | } 24 | } 25 | 26 | template 27 | void transpose_3d(data_T data[CONFIG_T::depth * CONFIG_T::height * CONFIG_T::width], 28 | res_T data_t[CONFIG_T::depth * CONFIG_T::height * CONFIG_T::width]) { 29 | unsigned dims[3] = {CONFIG_T::depth, CONFIG_T::height, CONFIG_T::width}; 30 | unsigned dims_t[3]; 31 | dims_t[0] = dims[CONFIG_T::perm[0]]; 32 | dims_t[1] = dims[CONFIG_T::perm[1]]; 33 | dims_t[2] = dims[CONFIG_T::perm[2]]; 34 | 35 | int idx[3] = {0}, idx_t[3] = {0}; 36 | for (idx[0] = 0; idx[0] < dims[0]; idx[0]++) { 37 | for (idx[1] = 0; idx[1] < dims[1]; idx[1]++) { 38 | for (idx[2] = 0; idx[2] < dims[2]; idx[2]++) { 39 | idx_t[0] = idx[CONFIG_T::perm[0]]; 40 | idx_t[1] = idx[CONFIG_T::perm[1]]; 41 | idx_t[2] = idx[CONFIG_T::perm[2]]; 42 | 43 | data_t[idx_t[0] * dims_t[1] * dims_t[2] + idx_t[1] * dims_t[2] + idx_t[2]] = 44 | data[idx[0] * dims[1] * dims[2] + idx[1] * dims[2] + idx[2]]; 45 | } 46 | } 47 | } 48 | } 49 | 50 | } // namespace nnet 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_code_gen.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_INSTR_GEN_H_ 2 | #define NNET_INSTR_GEN_H_ 3 | 4 | #include "nnet_helpers.h" 5 | #include 6 | 7 | namespace nnet { 8 | 9 | template class FillConv1DBuffer { 10 | public: 11 | static void fill_buffer(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan], 12 | data_T buffer[CONFIG_T::n_pixels][CONFIG_T::filt_width * CONFIG_T::n_chan], 13 | const unsigned partition) { 14 | // To be implemented in subclasses 15 | } 16 | }; 17 | 18 | template class FillConv2DBuffer { 19 | public: 20 | static void 21 | fill_buffer(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan], 22 | data_T buffer[CONFIG_T::n_pixels][CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan], 23 | const unsigned partition) { 24 | // To be implemented in subclasses 25 | } 26 | }; 27 | 28 | // hls4ml insert code 29 | 30 | } // namespace nnet 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_common.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef NNET_COMMON_H_ 3 | #define NNET_COMMON_H_ 4 | 5 | #include "ac_fixed.h" 6 | 7 | // This is a substitute for "ceil(n/(float)d)". 8 | #define DIV_ROUNDUP(n, d) ((n + d - 1) / d) 9 | #define MIN(n, d) (n > d ? d : n) 10 | #define MAX(n, d) (n > d ? n : d) 11 | 12 | namespace nnet { 13 | 14 | // Common type definitions 15 | enum io_type { io_parallel = 0, io_stream }; 16 | enum strategy { latency, resource }; 17 | 18 | /* --- 19 | * Balanced tree reduce implementation. 20 | * For use in scenarios where Vivado cannot expression balance 21 | * Reduces an array of inputs to a single value using the template binary operator 'Op', 22 | * for example summing all elements with Op_add, or finding the maximum with Op_max 23 | * Use only when the input array is fully unrolled. Or, slice out a fully unrolled section 24 | * before applying and accumulate the result over the rolled dimension. 25 | * --- */ 26 | template T reduce(const T *x, Op op) { 27 | static constexpr int leftN = pow2(floorlog2(N - 1)) > 0 ? pow2(floorlog2(N - 1)) : 0; 28 | static constexpr int rightN = N - leftN > 0 ? N - leftN : 0; 29 | 30 | if (N == 1) { 31 | return x[0]; 32 | } else if (N == 2) { 33 | return op(x[0], x[1]); 34 | } else { 35 | return op(reduce(x, op), reduce(x + leftN, op)); 36 | } 37 | } 38 | 39 | template class Op_add { 40 | public: 41 | T operator()(T a, T b) { return a + b; } 42 | }; 43 | 44 | template class Op_and { 45 | public: 46 | T operator()(T a, T b) { return a && b; } 47 | }; 48 | 49 | template class Op_or { 50 | public: 51 | T operator()(T a, T b) { return a || b; } 52 | }; 53 | 54 | template class Op_max { 55 | public: 56 | T operator()(T a, T b) { return a >= b ? a : b; } 57 | }; 58 | 59 | template class Op_min { 60 | public: 61 | T operator()(T a, T b) { return a <= b ? a : b; } 62 | }; 63 | 64 | } // namespace nnet 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_dense.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_DENSE_H_ 2 | #define NNET_DENSE_H_ 3 | 4 | #include "ac_channel.h" 5 | #include "nnet_common.h" 6 | #include "nnet_dense_latency.h" 7 | #include "nnet_dense_resource.h" 8 | #include "nnet_helpers.h" 9 | #include "nnet_mult.h" 10 | #include 11 | 12 | namespace nnet { 13 | 14 | struct dense_config { 15 | // Internal data type definitions 16 | typedef float bias_t; 17 | typedef float weight_t; 18 | typedef float accum_t; 19 | 20 | // Layer Sizes 21 | static const unsigned n_in = 10; 22 | static const unsigned n_out = 10; 23 | 24 | // Resource reuse info 25 | static const unsigned io_type = io_parallel; 26 | static const unsigned strategy = latency; 27 | static const unsigned reuse_factor = 1; 28 | static const bool store_weights_in_bram = false; 29 | static const unsigned n_zeros = 0; 30 | // partitioning arrays cyclically to go with roll factors? 31 | // Product function to use 32 | template using product = nnet::product::mult; 33 | }; 34 | 35 | template 36 | void dense(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_out], 37 | typename CONFIG_T::weight_t weights[CONFIG_T::n_in * CONFIG_T::n_out], 38 | typename CONFIG_T::bias_t biases[CONFIG_T::n_out]) { 39 | //#pragma HLS inline 40 | if (CONFIG_T::strategy == nnet::latency) { 41 | dense_latency(data, res, weights, biases); 42 | } else { 43 | dense_resource(data, res, weights, biases); 44 | } 45 | } 46 | 47 | } // namespace nnet 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_embed.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_H_ 2 | #define NNET_EMBED_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_helpers.h" 6 | 7 | namespace nnet { 8 | 9 | struct embed_config { 10 | // Internal data type definitions 11 | typedef float embeddings_t; 12 | 13 | // Layer Sizes 14 | static const unsigned n_in = 10; 15 | static const unsigned n_out = 16; 16 | static const unsigned vocab_size = 50; 17 | 18 | // Resource reuse info 19 | static const unsigned io_type = io_parallel; 20 | static const unsigned reuse_factor = 1; 21 | }; 22 | 23 | template 24 | void embedding(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in * CONFIG_T::n_out], 25 | typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) { 26 | 27 | //#pragma HLS PIPELINE II=CONFIG_T::reuse_factor 28 | // This can save a few cycles, but it will create a large multiplexer due to 29 | // non-constant access pattern, so let's leave it out 30 | ////#pragma HLS ARRAY_PARTITION variable=embeddings complete 31 | 32 | constexpr int ce_reuse_factor = CONFIG_T::reuse_factor; 33 | (void)ce_reuse_factor; 34 | InputSequence: 35 | for (int j = 0; j < CONFIG_T::n_in; j++) { 36 | // #pragma HLS UNROLL 37 | DenseEmbedding: 38 | for (int i = 0; i < CONFIG_T::n_out; i++) { 39 | // #pragma HLS UNROLL 40 | res[j * CONFIG_T::n_out + i] = embeddings[data[j] * CONFIG_T::n_out + i]; 41 | } 42 | } 43 | } 44 | 45 | } // namespace nnet 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_embed_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_STREAM_H_ 2 | #define NNET_EMBED_STREAM_H_ 3 | 4 | #include "ac_channel.h" 5 | #include "nnet_common.h" 6 | #include "nnet_helpers.h" 7 | 8 | namespace nnet { 9 | 10 | template 11 | void embedding(ac_channel &data, ac_channel &res, 12 | typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) { 13 | data_T in_data = data.read(); 14 | constexpr int ce_reuse_factor = CONFIG_T::reuse_factor; 15 | (void)ce_reuse_factor; 16 | InputSequence: 17 | for (int j = 0; j < data_T::size; j++) { 18 | //#pragma HLS PIPELINE II=CONFIG_T::reuse_factor 19 | 20 | res_T res_pack; 21 | //#pragma HLS DATA_PACK variable=res_pack 22 | 23 | DenseEmbedding: 24 | for (int i = 0; i < CONFIG_T::n_out; i++) { 25 | // #pragma HLS UNROLL 26 | res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i]; 27 | } 28 | res.write(res_pack); 29 | } 30 | } 31 | 32 | } // namespace nnet 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_image.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_IMAGE_H_ 2 | #define NNET_IMAGE_H_ 3 | 4 | #include "ac_channel.h" 5 | #include "nnet_common.h" 6 | #include 7 | 8 | namespace nnet { 9 | 10 | struct resize_config { 11 | static const unsigned height = 10; 12 | static const unsigned width = 10; 13 | static const unsigned n_chan = 10; 14 | static const unsigned new_height = 10; 15 | static const unsigned new_width = 10; 16 | }; 17 | 18 | template 19 | void resize_nearest(data_T image[CONFIG_T::height * CONFIG_T::width * CONFIG_T::n_chan], 20 | data_T resized[CONFIG_T::new_height * CONFIG_T::new_width * CONFIG_T::n_chan]) { 21 | int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1; 22 | int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1; 23 | int x2, y2; 24 | 25 | //#pragma HLS PIPELINE 26 | 27 | for (int i = 0; i < CONFIG_T::new_height; i++) { 28 | for (int j = 0; j < CONFIG_T::new_width; j++) { 29 | x2 = ((j * x_ratio) >> 16); 30 | y2 = ((i * y_ratio) >> 16); 31 | for (int k = 0; k < CONFIG_T::n_chan; k++) { 32 | resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] = 33 | image[(y2 * CONFIG_T::width * CONFIG_T::n_chan) + x2 * CONFIG_T::n_chan + k]; 34 | } 35 | } 36 | } 37 | } 38 | 39 | } // namespace nnet 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_recr_activations.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_RECR_ACTIVATION_H_ 2 | #define NNET_RECR_ACTIVATION_H_ 3 | 4 | #include "ac_channel.h" 5 | #include "nnet_activation.h" 6 | #include "nnet_common.h" 7 | #include "nnet_helpers.h" 8 | #include 9 | 10 | namespace nnet { 11 | 12 | namespace activation { 13 | 14 | template class Activation { 15 | public: 16 | // ************************************************* 17 | // Blank Activation 18 | // ************************************************* 19 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {} // Nothing to do here 20 | }; 21 | 22 | template class relu : public Activation { 23 | public: 24 | // ************************************************* 25 | // Relu Activation 26 | // ************************************************* 27 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 28 | nnet::relu(data, res); 29 | } 30 | }; 31 | 32 | template class sigmoid : public Activation { 33 | public: 34 | // ************************************************* 35 | // Sigmoid Activation 36 | // ************************************************* 37 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 38 | nnet::sigmoid(data, res); 39 | } 40 | }; 41 | 42 | template class tanh : public Activation { 43 | public: 44 | // ************************************************* 45 | // TanH Activation 46 | // ************************************************* 47 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 48 | nnet::tanh(data, res); 49 | } 50 | }; 51 | 52 | } // namespace activation 53 | 54 | } // namespace nnet 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /hls4ml/templates/catapult/nnet_utils/nnet_types.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_TYPES_H_ 2 | #define NNET_TYPES_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nnet { 9 | 10 | // Fixed-size array 11 | template struct array { 12 | typedef T value_type; 13 | static const unsigned size = N; 14 | 15 | T data[N]; 16 | 17 | T &operator[](size_t pos) { return data[pos]; } 18 | 19 | const T &operator[](size_t pos) const { return data[pos]; } 20 | 21 | array &operator=(const array &other) { 22 | if (&other == this) 23 | return *this; 24 | 25 | assert(N == other.size && "Array sizes must match."); 26 | 27 | for (unsigned i = 0; i < N; i++) { 28 | //#pragma HLS UNROLL 29 | data[i] = other[i]; 30 | } 31 | return *this; 32 | } 33 | }; 34 | 35 | // Generic lookup-table implementation, for use in approximations of math functions 36 | template class lookup_table { 37 | public: 38 | lookup_table(T from, T to) : range_start(from), range_end(to), base_div(ac_int<16, false>(N) / T(to - from)) { 39 | T step = (range_end - range_start) / ac_int<16, false>(N); 40 | for (size_t i = 0; i < N; i++) { 41 | T num = range_start + ac_int<16, false>(i) * step; 42 | T sample = func(num); 43 | samples[i] = sample; 44 | } 45 | } 46 | 47 | T operator()(T n) const { 48 | int index = (n - range_start) * base_div; 49 | if (index < 0) 50 | index = 0; 51 | else if (index > N - 1) 52 | index = N - 1; 53 | return samples[index]; 54 | } 55 | 56 | private: 57 | T samples[N]; 58 | const T range_start, range_end; 59 | ac_fixed<20, 16, true> base_div; 60 | }; 61 | 62 | } // namespace nnet 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/exception_handler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __EXCEPTIONHANDLER_HPP__ 2 | #define __EXCEPTIONHANDLER_HPP__ 3 | #include 4 | #include 5 | #include 6 | 7 | namespace fpga_tools { 8 | 9 | void exception_handler(sycl::exception_list exceptions) { 10 | for (std::exception_ptr const &e : exceptions) { 11 | try { 12 | std::rethrow_exception(e); 13 | } catch (sycl::exception const &e) { 14 | std::cout << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl; 15 | } 16 | } 17 | } 18 | 19 | } // namespace fpga_tools 20 | 21 | #endif //__EXCEPTIONHANDLER_HPP__ 22 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/defines.h: -------------------------------------------------------------------------------- 1 | #ifndef DEFINES_H_ 2 | #define DEFINES_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Include nnet::array - a custom array-like struct, mainly used with io_stream 10 | #include "nnet_utils/nnet_types.h" 11 | 12 | // hls-fpga-machine-learning insert numbers 13 | 14 | // hls-fpga-machine-learning insert layer-precision 15 | 16 | #define DIV_ROUNDUP(n, d) ((n + d - 1) / d) 17 | #define MIN(n, d) (n > d ? d : n) 18 | #define MAX(n, d) (n < d ? d : n) 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/myproject.cpp: -------------------------------------------------------------------------------- 1 | #include "myproject.h" 2 | #include "parameters.h" 3 | #include 4 | 5 | // hls-fpga-machine-learning insert weights 6 | 7 | // The inter-task pipes need to be declared in the global scope 8 | // hls-fpga-machine-learning insert inter-task pipes 9 | 10 | using sycl::ext::intel::experimental::task_sequence; 11 | 12 | void MyProject::operator()() const { 13 | // **************************************** 14 | // NETWORK INSTANTIATION 15 | // **************************************** 16 | 17 | // hls-fpga-machine-learning read in 18 | 19 | // hls-fpga-machine-learning declare task sequences 20 | 21 | // hls-fpga-machine-learning insert layers 22 | 23 | // hls-fpga-machine-learning return 24 | } 25 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/myproject.h: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_H_ 2 | #define MYPROJECT_H_ 3 | 4 | #include "defines.h" 5 | 6 | // This file defines the interface to the kernel 7 | 8 | // currently this is fixed 9 | using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(sycl::ext::intel::experimental::ready_latency<0>)); 10 | 11 | // Need to declare the input and output pipes 12 | 13 | // hls-fpga-machine-learning insert inputs 14 | // hls-fpga-machine-learning insert outputs 15 | 16 | class MyProjectID; 17 | 18 | struct MyProject { 19 | 20 | // kernel property method to config invocation interface 21 | auto get(sycl::ext::oneapi::experimental::properties_tag) { 22 | return sycl::ext::oneapi::experimental::properties{sycl::ext::intel::experimental::streaming_interface<>, 23 | sycl::ext::intel::experimental::pipelined<>}; 24 | } 25 | 26 | SYCL_EXTERNAL void operator()() const; 27 | }; 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_DENSE_STREAM_H_ 2 | #define NNET_DENSE_STREAM_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_dense.h" 6 | #include "nnet_types.h" 7 | 8 | namespace nnet { 9 | 10 | // Note: DataPack logic removed, at least in the initial version 11 | template 12 | void dense_resource_stream(typename CONFIG_T::weight_t weights, typename CONFIG_T::bias_t biases) { 13 | 14 | [[intel::fpga_register]] typename ExtractPipeType::value_type res; 15 | [[intel::fpga_register]] auto data = data_pipe::read(); 16 | dense_resource::value_type, typename ExtractPipeType::value_type, 17 | CONFIG_T>(data, res, weights, biases); 18 | res_pipe::write(res); 19 | } 20 | 21 | } // namespace nnet 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_DEPTH_CONV1D_H_ 2 | #define NNET_DEPTH_CONV1D_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_conv1d.h" 6 | #include "nnet_depthconv1d_resource.h" 7 | 8 | namespace nnet { 9 | 10 | template 11 | void depthwise_conv_1d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, 12 | const typename CONFIG_T::bias_t &biases) { 13 | 14 | depthwise_conv_1d_resource_cl(data, res, weights, biases); 15 | } 16 | 17 | } // namespace nnet 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_DEPTH_CONV2D_H_ 2 | #define NNET_DEPTH_CONV2D_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_conv2d.h" 6 | #include "nnet_depthconv2d_resource.h" 7 | 8 | namespace nnet { 9 | 10 | template 11 | void depthwise_conv_2d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, 12 | const typename CONFIG_T::bias_t &biases) { 13 | 14 | depthwise_conv_2d_resource_cl(data, res, weights, biases); 15 | } 16 | 17 | } // namespace nnet 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_embed.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_H_ 2 | #define NNET_EMBED_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_helpers.h" 6 | 7 | namespace nnet { 8 | 9 | struct embed_config { 10 | // Internal data type definitions 11 | typedef float embeddings_t; 12 | 13 | // (Default layer sizes, overwritten form the backend 14 | static const unsigned n_in = 10; 15 | static const unsigned n_out = 16; 16 | static const unsigned vocab_size = 50; 17 | 18 | // Resource reuse info 19 | static const unsigned io_type = io_parallel; 20 | static const unsigned reuse_factor = 1; 21 | }; 22 | 23 | template 24 | void embedding(const data_T &data, res_T &res, const typename CONFIG_T::embeddings_t &embeddings) { 25 | 26 | /* 27 | * Can store embeddings[] in a register, but a large multiiplexer 28 | * is created due to a non-constant access pattern 29 | */ 30 | 31 | InputSequence: 32 | #pragma unroll 33 | [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int j = 0; j < CONFIG_T::n_in; j++) { 34 | DenseEmbedding: 35 | #pragma unroll 36 | for (int i = 0; i < CONFIG_T::n_out; i++) { 37 | res[j * CONFIG_T::n_out + i] = embeddings[data[j].to_uint() * CONFIG_T::n_out + i]; 38 | } 39 | } 40 | } 41 | 42 | } // namespace nnet 43 | #endif 44 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_embed_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_STREAM_H_ 2 | #define NNET_EMBED_STREAM_H_ 3 | 4 | namespace nnet { 5 | 6 | template 7 | void embedding_stream(typename CONFIG_T::embeddings_t embeddings) { 8 | 9 | using res_T = typename ExtractPipeType::value_type; 10 | constexpr auto datasize = std::tuple_size::value_type>{}; 11 | 12 | auto in_data = data_pipe::read(); 13 | 14 | InputSequence: 15 | [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int j = 0; j < datasize; j++) { 16 | 17 | res_T res_pack; 18 | 19 | DenseEmbedding: 20 | #pragma unroll 21 | for (int i = 0; i < CONFIG_T::n_out; i++) { 22 | res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i]; 23 | } 24 | 25 | res_pipe::write(res_pack); 26 | } 27 | } 28 | 29 | } // namespace nnet 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_printf.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_PRINTF_H_ 2 | #define NNET_PRINTF_H_ 3 | 4 | #ifdef __SYCL_DEVICE_ONLY__ 5 | #define CL_CONSTANT __attribute__((opencl_constant)) 6 | #else 7 | #define CL_CONSTANT 8 | #endif 9 | 10 | using namespace sycl; 11 | 12 | #define PRINTF(format, ...) \ 13 | { \ 14 | static const CL_CONSTANT char _format[] = format; \ 15 | ext::oneapi::experimental::printf(_format, ##__VA_ARGS__); \ 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_recurrent_activation.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_RECR_ACTIVATION_H_ 2 | #define NNET_RECR_ACTIVATION_H_ 3 | 4 | #include "nnet_activation.h" 5 | #include "nnet_common.h" 6 | 7 | namespace nnet { 8 | 9 | namespace activation { 10 | 11 | template class Activation { 12 | public: 13 | // ************************************************* 14 | // Blank Activation 15 | // ************************************************* 16 | static void activation(const data_T &data, res_T &res) {} 17 | }; 18 | 19 | template class relu : public Activation { 20 | public: 21 | // ************************************************* 22 | // Relu Activation 23 | // ************************************************* 24 | static void activation(const data_T &data, res_T &res) { nnet::relu(data, res); } 25 | }; 26 | 27 | template class sigmoid : public Activation { 28 | public: 29 | // ************************************************* 30 | // Sigmoid Activation 31 | // ************************************************* 32 | static void activation(const data_T &data, res_T &res) { nnet::sigmoid(data, res); } 33 | }; 34 | 35 | template class tanh : public Activation { 36 | public: 37 | // ************************************************* 38 | // TanH Activation 39 | // ************************************************* 40 | static void activation(const data_T &data, res_T &res) { nnet::dense_tanh(data, res); } 41 | }; 42 | 43 | } // namespace activation 44 | 45 | } // namespace nnet 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_resize.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_IMAGE_H_ 2 | #define NNET_IMAGE_H_ 3 | 4 | namespace nnet { 5 | 6 | struct resize_config { 7 | static const unsigned in_height = 10; 8 | static const unsigned in_width = 10; 9 | 10 | static const unsigned out_height = 10; 11 | static const unsigned out_width = 10; 12 | 13 | static const unsigned n_chan = 10; 14 | }; 15 | 16 | template void resize_nearest(const data_T &image, res_T &resized) { 17 | int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1; 18 | int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1; 19 | 20 | for (int i = 0; i < CONFIG_T::new_height; i++) { 21 | for (int j = 0; j < CONFIG_T::new_width; j++) { 22 | int x = ((j * x_ratio) >> 16); 23 | int y = ((i * y_ratio) >> 16); 24 | 25 | #pragma unroll 26 | for (int k = 0; k < CONFIG_T::n_chan; k++) { 27 | resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] = 28 | image[(y * CONFIG_T::width * CONFIG_T::n_chan) + x * CONFIG_T::n_chan + k]; 29 | } 30 | } 31 | } 32 | } 33 | 34 | } // namespace nnet 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_resize_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_IMAGE_STREAM_H_ 2 | #define NNET_IMAGE_STREAM_H_ 3 | 4 | #include "nnet_common.h" 5 | 6 | namespace nnet { 7 | 8 | template void resize_nearest_stream() { 9 | assert(CONFIG_T::new_height % CONFIG_T::height == 0); 10 | assert(CONFIG_T::new_width % CONFIG_T::width == 0); 11 | 12 | using data_T = typename ExtractPipeType::value_type; 13 | 14 | constexpr unsigned ratio_height = CONFIG_T::new_height / CONFIG_T::height; 15 | constexpr unsigned ratio_width = CONFIG_T::new_width / CONFIG_T::width; 16 | 17 | ImageHeight: 18 | for (unsigned h = 0; h < CONFIG_T::height; h++) { 19 | [[intel::fpga_register]] data_T data_in_row[CONFIG_T::width]; 20 | 21 | ImageWidth: 22 | for (unsigned i = 0; i < CONFIG_T::width; i++) { 23 | [[intel::fpga_register]] auto in_data = data_pipe::read(); 24 | 25 | ImageChan: 26 | #pragma unroll 27 | for (unsigned j = 0; j < CONFIG_T::n_chan; j++) { 28 | data_in_row[i][j] = in_data[j]; 29 | } 30 | } 31 | 32 | ResizeHeight: 33 | for (unsigned i = 0; i < ratio_height; i++) { 34 | 35 | ImageWidth2: 36 | for (unsigned l = 0; l < CONFIG_T::width; l++) { 37 | 38 | ResizeWidth: 39 | for (unsigned j = 0; j < ratio_width; j++) { 40 | 41 | [[intel::fpga_register]] data_T out_data; 42 | 43 | ResizeChan: 44 | #pragma unroll 45 | for (unsigned k = 0; k < CONFIG_T::n_chan; k++) { 46 | out_data[k] = data_in_row[l][k]; 47 | } 48 | 49 | res_pipe::write(out_data); 50 | } 51 | } 52 | } 53 | } 54 | } 55 | 56 | } // namespace nnet 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_transpose.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_TRANSPOSE_H_ 2 | #define NNET_TRANSPOSE_H_ 3 | 4 | namespace nnet { 5 | 6 | struct transpose_config { 7 | static constexpr unsigned dims = 0; 8 | static constexpr unsigned N = 0; 9 | 10 | // Inherited struct should define these 11 | // static constexpr std::array from_shape; 12 | // static constexpr std::array to_shape; 13 | // static constexpr std::array perm; 14 | // static constexpr std::array perm_strides; 15 | }; 16 | 17 | template unsigned transfer_idx(int index) { 18 | // Given output idx in c-order flat array, return input idx 19 | int idx = 0; 20 | for (int i = CONFIG_T::dims - 1; i >= 0; i--) { 21 | idx += (index % CONFIG_T::to_shape[i]) * CONFIG_T::perm_strides[i]; 22 | index /= CONFIG_T::to_shape[i]; 23 | } 24 | return idx; 25 | } 26 | 27 | template void transpose(const data_T &data, res_T &res) { 28 | #pragma unroll 29 | for (int i = 0; i < CONFIG_T::N; i++) { 30 | int idx = transfer_idx(i); 31 | res[i] = data[idx]; 32 | } 33 | } 34 | 35 | } // namespace nnet 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/nnet_utils/nnet_transpose_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_TRANSPOSE_STREAM_H_ 2 | #define NNET_TRANSPOSE_STREAM_H_ 3 | 4 | namespace nnet { 5 | 6 | template void transpose_stream() { 7 | 8 | using data_T = typename ExtractPipeType::value_type; 9 | using res_T = typename ExtractPipeType::value_type; 10 | 11 | constexpr auto data_size = std::tuple_size::value_type>{}; 12 | constexpr auto res_size = std::tuple_size::value_type>{}; 13 | 14 | [[intel::fpga_register]] typename data_T::value_type data_array[CONFIG_T::N]; 15 | 16 | for (int i = 0; i < CONFIG_T::N / data_size; i++) { 17 | [[intel::fpga_register]] data_T in_data = data_pipe::read(); 18 | 19 | #pragma unroll 20 | for (int j = 0; j < data_size; j++) { 21 | data_array[i * data_size + j] = typename data_T::value_type(in_data[j]); 22 | } 23 | } 24 | 25 | for (int i = 0; i < CONFIG_T::N / res_size; i++) { 26 | [[intel::fpga_register]] res_T out_data; 27 | 28 | #pragma unroll 29 | for (int j = 0; j < res_size; j++) { 30 | out_data[j] = typename res_T::value_type(data_array[transfer_idx(i * res_size + j)]); 31 | } 32 | 33 | res_pipe::write(out_data); 34 | } 35 | } 36 | 37 | } // namespace nnet 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /hls4ml/templates/oneapi/firmware/parameters.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETERS_H_ 2 | #define PARAMETERS_H_ 3 | 4 | #include "defines.h" 5 | 6 | #include "nnet_utils/nnet_helpers.h" 7 | // hls-fpga-machine-learning insert includes 8 | 9 | // hls-fpga-machine-learning insert layer-config 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/Makefile: -------------------------------------------------------------------------------- 1 | DEVICE := Arria10 2 | TARGETS := myproject-fpga 3 | 4 | CXX := i++ 5 | CXXFLAGS := -march=$(DEVICE) 6 | RM := rm -rf 7 | DEBUG_FLAGS := --time quartus-hlssynt.log 8 | SOURCE_FILES := myproject_test.cpp firmware/myproject.cpp 9 | HEADER_FILES := firmware/myproject.h 10 | LOGGING_1 := 11 | LOGGING_2 := 12 | QUARTUS_COMPILE := 13 | CONT_IF_LARGE_AREA := 14 | 15 | .PHONY: test 16 | test: $(TARGETS) 17 | @$(foreach t,$(TARGETS),echo ./$(t); ./$(t) | tee $(t)_run.log; echo "";) 18 | 19 | .PHONY: all 20 | all: $(TARGETS) 21 | 22 | .PHONY: clean 23 | clean: 24 | -$(RM) $(foreach t,$(TARGETS),$(t).prj $(t) $(t)_time.log) 25 | 26 | .PHONY: myproject-fpga 27 | myproject-fpga: CXXFLAGS := $(CXXFLAGS) 28 | 29 | $(TARGETS) : $(SOURCE_FILES) $(HEADER_FILES) 30 | $(CXX) $(LOGGING_1) $(LOGGING_2) $(CXXFLAGS) $(DEBUG_FLAGS) $(SOURCE_FILES) $(CONT_IF_LARGE_AREA) $(QUARTUS_COMPILE) -o $@ 31 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/ac_types/stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_STREAM_H 2 | #define NNET_STREAM_H 3 | 4 | #include 5 | 6 | namespace nnet { 7 | 8 | /* 9 | * A struct with the same high-level functionality as Intel's HLS ihc::stream 10 | * This struct is used during GCC compilation / hls4ml model.predict(...) 11 | * This is because GCC does not have access to HLS source files (ihc::stream) 12 | * Software-wise, this struct behaves like a first-in, first-out (FIFO) buffer 13 | * However, it cannot be used for HLS synthesis, since it uses dynamic memory allocation (deque) 14 | */ 15 | template 16 | struct stream { 17 | private: 18 | std::deque _data; 19 | 20 | public: 21 | stream() {} 22 | 23 | T read() { 24 | T element = _data.front(); 25 | _data.pop_front(); 26 | return element; 27 | } 28 | 29 | void write(const T& element) { 30 | _data.push_back(element); 31 | } 32 | }; 33 | 34 | } 35 | 36 | #endif -------------------------------------------------------------------------------- /hls4ml/templates/quartus/build_lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | CC=g++ 5 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 6 | CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" 7 | elif [[ "$OSTYPE" == "darwin"* ]]; then 8 | CFLAGS="-O3 -fPIC -std=c++11" 9 | fi 10 | LDFLAGS= 11 | INCFLAGS="-Ifirmware/ac_types/ -Ifirmware/ap_types/" 12 | PROJECT=myproject 13 | LIB_STAMP=mystamp 14 | 15 | ${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}.cpp -o ${PROJECT}.o 16 | ${CC} ${CFLAGS} ${INCFLAGS} -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o 17 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so 18 | rm -f *.o 19 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/defines.h: -------------------------------------------------------------------------------- 1 | #ifndef DEFINES_H_ 2 | #define DEFINES_H_ 3 | 4 | /* 5 | * Intel HLS makes use of three streaming interfaces: 6 | * (1) stream_in - used as the main input to a component 7 | * (2) stream_out - used as the main output of a component 8 | * (3) stream - allows both reading and writing; used for inter-component connections 9 | * ihc::stream has a implicitly deleted constructor and therefore, cannot be used as the output of a function/component 10 | * Therefore, variables of type 'stream' are always passed by reference 11 | */ 12 | 13 | #ifndef __INTELFPGA_COMPILER__ 14 | 15 | #include "ac_fixed.h" 16 | #include "ac_int.h" 17 | #define hls_register 18 | 19 | #include "stream.h" 20 | template using stream = nnet::stream; 21 | template using stream_in = nnet::stream; 22 | template using stream_out = nnet::stream; 23 | 24 | #else 25 | 26 | #include "HLS/ac_fixed.h" 27 | #include "HLS/ac_int.h" 28 | #include "HLS/hls.h" 29 | 30 | template using stream = ihc::stream; 31 | template using stream_in = ihc::stream_in; 32 | template using stream_out = ihc::stream_out; 33 | 34 | #endif 35 | 36 | // Include nnet::array - a custom array-like struct, mainly used with io_stream 37 | #include "nnet_utils/nnet_types.h" 38 | 39 | // hls-fpga-machine-learning insert numbers 40 | 41 | // hls-fpga-machine-learning insert layer-precision 42 | 43 | #define DIV_ROUNDUP(n, d) ((n + d - 1) / d) 44 | #define MIN(n, d) (n > d ? d : n) 45 | #define MAX(n, d) (n < d ? d : n) 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/myproject.h: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_H_ 2 | #define MYPROJECT_H_ 3 | 4 | #ifndef __INTELFPGA_COMPILER__ 5 | #include "ac_fixed.h" 6 | #include "ac_int.h" 7 | #define hls_register 8 | #else 9 | #include "HLS/ac_fixed.h" 10 | #include "HLS/ac_int.h" 11 | #include "HLS/hls.h" 12 | #endif 13 | 14 | // Streams are explicitly defined in defines.h, which are included for parameters.h 15 | // Defining them again in this file will cause compile-time errors 16 | #include "defines.h" 17 | 18 | // If using io_parallel, inputs and output need to be initialised before calling the top-level function 19 | // If using io_stream, no inputs/outputs are initialised, as they are passed by reference to the top-level function 20 | // hls-fpga-machine-learning insert inputs 21 | // hls-fpga-machine-learning insert outputs 22 | 23 | #ifndef __INTELFPGA_COMPILER__ 24 | /* 25 | * The top-level function used during GCC compilation / hls4ml.predic(...) goes here 26 | * An important distinction is made between io_stream and io_parallel: 27 | * (1) io_parallel: 28 | - Top-level function takes a struct containing an array as function argument 29 | - Returns a struct containing an array - the prediction 30 | (2) io_stream: 31 | - Top-level function is 'void' - no return value 32 | - Instead, both the input and output are passed by reference 33 | - This is due the HLS Streaming Interfaces; stream cannot be copied (implicitly deleted copy constructor) 34 | * This distinction is handled in quartus_writer.py 35 | */ 36 | // hls-fpga-machine-learning instantiate GCC top-level 37 | #else 38 | // Maximum initiation interval, concurrency and frequency for HLS syntheis are defined here 39 | // hls-fpga-machine-learning insert cpragmas 40 | 41 | /* 42 | * The top-level function used during HLS Synthesis goes here 43 | * In a similar manner to GCC, there is a distinction between io_stream & io_parallel 44 | */ 45 | // hls-fpga-machine-learning instantiate HLS top-level 46 | #endif 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_dense_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_DENSE_STREAM_H_ 2 | #define NNET_DENSE_STREAM_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_dense.h" 6 | #include "nnet_types.h" 7 | 8 | namespace nnet { 9 | 10 | template 11 | void dense_resource(stream &data_stream, stream &res_stream, 12 | const typename CONFIG_T::weight_t weights[CONFIG_T::n_in * CONFIG_T::n_out], 13 | const typename CONFIG_T::bias_t biases[CONFIG_T::n_out]) { 14 | hls_register typename data_T::value_type data[CONFIG_T::n_in]; 15 | hls_register typename res_T::value_type res[CONFIG_T::n_out]; 16 | 17 | DataPrepare: 18 | #pragma ii 1 19 | for (int i_in = 0; i_in < CONFIG_T::n_in / data_T::size; i_in++) { 20 | data_T data_pack = data_stream.read(); 21 | DataPack: 22 | #pragma unroll 23 | for (int i_pack = 0; i_pack < data_T::size; i_pack++) { 24 | data[i_in * data_T::size + i_pack] = data_pack[i_pack]; 25 | } 26 | } 27 | 28 | dense_resource(data, res, weights, biases); 29 | 30 | ResWrite: 31 | #pragma ii 1 32 | for (unsigned i_out = 0; i_out < CONFIG_T::n_out / res_T::size; i_out++) { 33 | res_T res_pack; 34 | ResPack: 35 | #pragma unroll 36 | for (int i_pack = 0; i_pack < res_T::size; i_pack++) { 37 | res_pack[i_pack] = res[i_out * res_T::size + i_pack]; 38 | } 39 | 40 | res_stream.write(res_pack); 41 | } 42 | } 43 | 44 | } // namespace nnet 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_H_ 2 | #define NNET_EMBED_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_helpers.h" 6 | 7 | namespace nnet { 8 | 9 | struct embed_config { 10 | // Internal data type definitions 11 | typedef float embeddings_t; 12 | 13 | // (Default layer sizes, overwritten form the backend 14 | static const unsigned n_in = 10; 15 | static const unsigned n_out = 16; 16 | static const unsigned vocab_size = 50; 17 | 18 | // Resource reuse info 19 | static const unsigned io_type = io_parallel; 20 | static const unsigned reuse_factor = 1; 21 | }; 22 | 23 | template 24 | void embedding(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in * CONFIG_T::n_out], 25 | const typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) { 26 | 27 | /* 28 | * Can store embeddings[] in a register, but a large multiiplexer 29 | * is created due to a non-constant access pattern 30 | */ 31 | 32 | InputSequence: 33 | #pragma ii CONFIG_T::reuse_factor 34 | #pragma unroll 35 | for (int j = 0; j < CONFIG_T::n_in; j++) { 36 | DenseEmbedding: 37 | #pragma unroll 38 | for (int i = 0; i < CONFIG_T::n_out; i++) { 39 | res[j * CONFIG_T::n_out + i] = embeddings[data[j].to_uint() * CONFIG_T::n_out + i]; 40 | } 41 | } 42 | } 43 | 44 | } // namespace nnet 45 | #endif 46 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_STREAM_H_ 2 | #define NNET_EMBED_STREAM_H_ 3 | 4 | namespace nnet { 5 | 6 | template 7 | void embedding(stream &data, stream &res, 8 | const typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) { 9 | data_T in_data = data.read(); 10 | 11 | InputSequence: 12 | #pragma ii CONFIG_T::reuse_factor 13 | for (int j = 0; j < data_T::size; j++) { 14 | 15 | res_T res_pack; 16 | 17 | DenseEmbedding: 18 | #pragma unroll 19 | for (int i = 0; i < CONFIG_T::n_out; i++) { 20 | res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i]; 21 | } 22 | 23 | res.write(res_pack); 24 | } 25 | } 26 | 27 | } // namespace nnet 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_recurrent_activation.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_RECR_ACTIVATION_H_ 2 | #define NNET_RECR_ACTIVATION_H_ 3 | 4 | #include "nnet_activation.h" 5 | #include "nnet_common.h" 6 | 7 | namespace nnet { 8 | 9 | namespace activation { 10 | 11 | template class Activation { 12 | public: 13 | // ************************************************* 14 | // Blank Activation 15 | // ************************************************* 16 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {} 17 | }; 18 | 19 | template class relu : public Activation { 20 | public: 21 | // ************************************************* 22 | // Relu Activation 23 | // ************************************************* 24 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 25 | nnet::relu(data, res); 26 | } 27 | }; 28 | 29 | template class sigmoid : public Activation { 30 | public: 31 | // ************************************************* 32 | // Sigmoid Activation 33 | // ************************************************* 34 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 35 | nnet::sigmoid(data, res); 36 | } 37 | }; 38 | 39 | template class tanh : public Activation { 40 | public: 41 | // ************************************************* 42 | // TanH Activation 43 | // ************************************************* 44 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 45 | nnet::dense_tanh(data, res); 46 | } 47 | }; 48 | 49 | } // namespace activation 50 | 51 | } // namespace nnet 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_resize.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_IMAGE_H_ 2 | #define NNET_IMAGE_H_ 3 | 4 | namespace nnet { 5 | 6 | struct resize_config { 7 | static const unsigned in_height = 10; 8 | static const unsigned in_width = 10; 9 | 10 | static const unsigned out_height = 10; 11 | static const unsigned out_width = 10; 12 | 13 | static const unsigned n_chan = 10; 14 | }; 15 | 16 | template 17 | void resize_nearest(data_T image[CONFIG_T::height * CONFIG_T::width * CONFIG_T::n_chan], 18 | data_T resized[CONFIG_T::new_height * CONFIG_T::new_width * CONFIG_T::n_chan]) { 19 | int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1; 20 | int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1; 21 | 22 | for (int i = 0; i < CONFIG_T::new_height; i++) { 23 | for (int j = 0; j < CONFIG_T::new_width; j++) { 24 | int x = ((j * x_ratio) >> 16); 25 | int y = ((i * y_ratio) >> 16); 26 | 27 | #pragma unroll 28 | for (int k = 0; k < CONFIG_T::n_chan; k++) { 29 | resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] = 30 | image[(y * CONFIG_T::width * CONFIG_T::n_chan) + x * CONFIG_T::n_chan + k]; 31 | } 32 | } 33 | } 34 | } 35 | 36 | } // namespace nnet 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_resize_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_IMAGE_STREAM_H_ 2 | #define NNET_IMAGE_STREAM_H_ 3 | 4 | #include "nnet_common.h" 5 | 6 | namespace nnet { 7 | 8 | template void resize_nearest(stream &image, stream &resized) { 9 | assert(CONFIG_T::new_height % CONFIG_T::height == 0); 10 | assert(CONFIG_T::new_width % CONFIG_T::width == 0); 11 | 12 | constexpr unsigned ratio_height = CONFIG_T::new_height / CONFIG_T::height; 13 | constexpr unsigned ratio_width = CONFIG_T::new_width / CONFIG_T::width; 14 | 15 | ImageHeight: 16 | for (unsigned h = 0; h < CONFIG_T::height; h++) { 17 | hls_register data_T data_in_row[CONFIG_T::width]; 18 | 19 | ImageWidth: 20 | for (unsigned i = 0; i < CONFIG_T::width; i++) { 21 | hls_register data_T in_data = image.read(); 22 | 23 | ImageChan: 24 | #pragma unroll 25 | for (unsigned j = 0; j < CONFIG_T::n_chan; j++) { 26 | data_in_row[i][j] = in_data[j]; 27 | } 28 | } 29 | 30 | ResizeHeight: 31 | for (unsigned i = 0; i < ratio_height; i++) { 32 | 33 | ImageWidth2: 34 | for (unsigned l = 0; l < CONFIG_T::width; l++) { 35 | 36 | ResizeWidth: 37 | for (unsigned j = 0; j < ratio_width; j++) { 38 | 39 | hls_register data_T out_data; 40 | 41 | ResizeChan: 42 | #pragma unroll 43 | for (unsigned k = 0; k < CONFIG_T::n_chan; k++) { 44 | out_data[k] = data_in_row[l][k]; 45 | } 46 | 47 | resized.write(out_data); 48 | } 49 | } 50 | } 51 | } 52 | } 53 | 54 | } // namespace nnet 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_transpose_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_TRANSPOSE_STREAM_H_ 2 | #define NNET_TRANSPOSE_STREAM_H_ 3 | 4 | namespace nnet { 5 | 6 | template void transpose_2d(stream &data, stream &res) { 7 | hls_register typename data_T::value_type data_array[CONFIG_T::height * CONFIG_T::width]; 8 | 9 | for (int i = 0; i < CONFIG_T::height * CONFIG_T::width / data_T::size; i++) { 10 | hls_register data_T in_data = data.read(); 11 | 12 | #pragma unroll 13 | for (int j = 0; j < data_T::size; j++) { 14 | data_array[i * data_T::size + j] = typename data_T::value_type(in_data[j]); 15 | } 16 | } 17 | 18 | for (int i = 0; i < CONFIG_T::height * CONFIG_T::width / res_T::size; i++) { 19 | hls_register res_T out_data; 20 | 21 | #pragma unroll 22 | for (int j = 0; j < res_T::size; j++) { 23 | out_data[j] = typename res_T::value_type(data_array[j * data_T::size + i]); 24 | } 25 | 26 | res.write(out_data); 27 | } 28 | } 29 | 30 | } // namespace nnet 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/nnet_utils/nnet_types.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_TYPES_H_ 2 | #define NNET_TYPES_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nnet { 9 | 10 | // Fixed-size array 11 | template struct array { 12 | typedef T value_type; 13 | static const unsigned size = N; 14 | 15 | T data[N]; 16 | 17 | array() {} 18 | 19 | array(T x) { 20 | #pragma unroll 21 | for (int i = 0; i < N; i++) { 22 | data[i] = x; 23 | } 24 | } 25 | 26 | T &operator[](size_t pos) { return data[pos]; } 27 | 28 | const T &operator[](size_t pos) const { return data[pos]; } 29 | 30 | array &operator=(const array &other) { 31 | if (&other == this) 32 | return *this; 33 | 34 | assert(N == other.size && "Array sizes must match."); 35 | 36 | #pragma unroll 37 | for (unsigned i = 0; i < N; i++) { 38 | data[i] = other[i]; 39 | } 40 | return *this; 41 | } 42 | }; 43 | 44 | /* 45 | * HLS Shift Register Implementation 46 | * To verify a shift register is used in hardware, go to report.html > Area Analysis of System 47 | * Unrolling the shift loop minimizes resource usage and latency at the same time 48 | * The shift loop should be either fully unrolled or not unrolled at all 49 | * Unrolling with a specific unroll factor or pipelining with certain ii's, can cause an irregular access pattern, which 50 | * wouldn't allow shift register usage in RTL 51 | */ 52 | template struct shift_reg { 53 | private: 54 | T data[N]; 55 | 56 | public: 57 | // Default constructor 58 | shift_reg() {} 59 | 60 | // Shift queue, insert new element and return element from the front 61 | T shift(T inp) { 62 | T out = data[N - 1]; 63 | 64 | #pragma unroll 65 | for (int i = N - 1; i > 0; i--) { 66 | data[i] = data[i - 1]; 67 | } 68 | data[0] = inp; 69 | 70 | return out; 71 | } 72 | 73 | T read(int pos) { return data[pos]; } 74 | }; 75 | 76 | } // namespace nnet 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/firmware/parameters.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETERS_H_ 2 | #define PARAMETERS_H_ 3 | 4 | #include "defines.h" 5 | 6 | #include "nnet_utils/nnet_helpers.h" 7 | // hls-fpga-machine-learning insert includes 8 | 9 | // hls-fpga-machine-learning insert layer-config 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /hls4ml/templates/quartus/myproject_bridge.cpp: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_BRIDGE_H_ 2 | #define MYPROJECT_BRIDGE_H_ 3 | 4 | #include "firmware/myproject.h" 5 | #include "firmware/nnet_utils/nnet_helpers.h" 6 | #include 7 | #include 8 | 9 | // hls-fpga-machine-learning insert bram 10 | 11 | namespace nnet { 12 | bool trace_enabled = false; 13 | std::map *trace_outputs = NULL; 14 | size_t trace_type_size = sizeof(double); 15 | } // namespace nnet 16 | 17 | extern "C" { 18 | 19 | struct trace_data { 20 | const char *name; 21 | void *data; 22 | }; 23 | 24 | void allocate_trace_storage(size_t element_size) { 25 | nnet::trace_enabled = true; 26 | nnet::trace_outputs = new std::map; 27 | nnet::trace_type_size = element_size; 28 | // hls-fpga-machine-learning insert trace_outputs 29 | } 30 | 31 | void free_trace_storage() { 32 | for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { 33 | void *ptr = i->second; 34 | free(ptr); 35 | } 36 | nnet::trace_outputs->clear(); 37 | delete nnet::trace_outputs; 38 | nnet::trace_outputs = NULL; 39 | nnet::trace_enabled = false; 40 | } 41 | 42 | void collect_trace_output(struct trace_data *c_trace_outputs) { 43 | int ii = 0; 44 | for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { 45 | c_trace_outputs[ii].name = i->first.c_str(); 46 | c_trace_outputs[ii].data = i->second; 47 | ii++; 48 | } 49 | } 50 | 51 | // Wrapper of top level function for Python bridge 52 | void myproject_float( 53 | // hls-fpga-machine-learning insert header #float 54 | ) { 55 | 56 | // hls-fpga-machine-learning insert wrapper #float 57 | } 58 | 59 | void myproject_double( 60 | // hls-fpga-machine-learning insert header #double 61 | ) { 62 | // hls-fpga-machine-learning insert wrapper #double 63 | } 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /hls4ml/templates/symbolic/build_lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CC=g++ 4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 5 | CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" 6 | elif [[ "$OSTYPE" == "darwin"* ]]; then 7 | CFLAGS="-O3 -fPIC -std=c++11" 8 | fi 9 | HLS_LIBS_PATH=mylibspath 10 | LDFLAGS="-Wl,--no-undefined -Wl,--no-allow-shlib-undefined -Wl,--no-as-needed -Wl,-rpath,${HLS_LIBS_PATH}/lib/csim -L ${HLS_LIBS_PATH}/lib/csim -lhlsmc++-GCC46 -lhlsm-GCC46 -fno-builtin -fno-inline -Wl,-rpath,${HLS_LIBS_PATH}/tools/fpo_v7_0 -L ${HLS_LIBS_PATH}/tools/fpo_v7_0 -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel" 11 | INCFLAGS="-Ifirmware/ap_types/" 12 | PROJECT=myproject 13 | LIB_STAMP=mystamp 14 | 15 | ${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}.cpp -o ${PROJECT}.o 16 | ${CC} ${CFLAGS} ${INCFLAGS} -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o 17 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so ${LDFLAGS} 18 | rm -f *.o 19 | -------------------------------------------------------------------------------- /hls4ml/templates/vitis/nnet_utils/nnet_conv1d_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_CONV1D_STREAM_H_ 2 | #define NNET_CONV1D_STREAM_H_ 3 | 4 | #include "hls_stream.h" 5 | #include "nnet_common.h" 6 | #include "nnet_conv_stream.h" 7 | 8 | namespace nnet { 9 | 10 | template 11 | void conv_1d_cl(hls::stream &data, hls::stream &res, 12 | typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt], 13 | typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) { 14 | assert(CONFIG_T::implementation == conv_implementation::linebuffer && 15 | "Only \"linebuffer\" implementation is supported in Vitis HLS."); 16 | 17 | assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0); 18 | 19 | if (CONFIG_T::strategy == nnet::latency) { 20 | ReadInputWidth: 21 | for (unsigned i_iw = 0; i_iw < CONFIG_T::in_width; i_iw++) { 22 | #pragma HLS PIPELINE II=CONFIG_T::reuse_factor 23 | compute_output_buffer_1d(data.read(), res, weights, biases); 24 | } 25 | } else { 26 | ReadInputWidthSerial: 27 | for (unsigned i_iw = 0; i_iw < CONFIG_T::in_width; i_iw++) { 28 | compute_output_buffer_1d(data.read(), res, weights, biases); 29 | } 30 | } 31 | } 32 | 33 | } // namespace nnet 34 | #endif 35 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/ap_types/hls_math.h: -------------------------------------------------------------------------------- 1 | #ifndef X_HLS_MATH_H 2 | #define X_HLS_MATH_H 3 | 4 | #include 5 | #include "ap_fixed.h" 6 | 7 | namespace hls { 8 | 9 | template 10 | static T exp(const T x) { 11 | return (T) std::exp(x.to_double()); 12 | } 13 | 14 | template T sin(T x) { return (T) std::sin(x.to_double()); }; 15 | 16 | template T cos(T x) { return (T) std::cos(x.to_double()); }; 17 | 18 | template T asin(T x) { return (T) std::asin(x.to_double()); }; 19 | 20 | template T acos(T x) { return (T) std::acos(x.to_double()); }; 21 | 22 | template T atan(T x) { return (T) std::atan(x.to_double()); }; 23 | 24 | template T atan2(T x, T y) { return (T) hls::atan2(x.to_double(), y.to_double()); }; 25 | 26 | } 27 | #endif 28 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/build_lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | CC=g++ 5 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 6 | CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" 7 | elif [[ "$OSTYPE" == "darwin"* ]]; then 8 | CFLAGS="-O3 -fPIC -std=c++11" 9 | fi 10 | LDFLAGS= 11 | INCFLAGS="-Ifirmware/ap_types/" 12 | PROJECT=myproject 13 | LIB_STAMP=mystamp 14 | BASEDIR="$(cd "$(dirname "$0")" && pwd)" 15 | WEIGHTS_DIR="\"${BASEDIR}/firmware/weights\"" 16 | 17 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${PROJECT}.cpp -o ${PROJECT}.o 18 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o 19 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so 20 | rm -f *.o 21 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/firmware/defines.h: -------------------------------------------------------------------------------- 1 | #ifndef DEFINES_H_ 2 | #define DEFINES_H_ 3 | 4 | #include "ap_fixed.h" 5 | #include "ap_int.h" 6 | #include "nnet_utils/nnet_types.h" 7 | #include 8 | #include 9 | 10 | // hls-fpga-machine-learning insert numbers 11 | 12 | // hls-fpga-machine-learning insert namespace-start 13 | 14 | // hls-fpga-machine-learning insert layer-precision 15 | 16 | // hls-fpga-machine-learning insert namespace-end 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/firmware/myproject.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "myproject.h" 4 | #include "parameters.h" 5 | 6 | // hls-fpga-machine-learning insert namespace-start 7 | 8 | void myproject( 9 | // hls-fpga-machine-learning insert header 10 | ) { 11 | 12 | // hls-fpga-machine-learning insert IO 13 | 14 | // hls-fpga-machine-learning insert load weights 15 | 16 | // **************************************** 17 | // NETWORK INSTANTIATION 18 | // **************************************** 19 | 20 | // hls-fpga-machine-learning insert layers 21 | } 22 | 23 | // hls-fpga-machine-learning insert namespace-end 24 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/firmware/myproject.h: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_H_ 2 | #define MYPROJECT_H_ 3 | 4 | #include "ap_fixed.h" 5 | #include "ap_int.h" 6 | #include "hls_stream.h" 7 | 8 | #include "defines.h" 9 | 10 | // hls-fpga-machine-learning insert namespace-start 11 | 12 | // Prototype of top level function for C-synthesis 13 | void myproject( 14 | // hls-fpga-machine-learning insert header 15 | ); 16 | 17 | // hls-fpga-machine-learning insert namespace-end 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/firmware/parameters.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETERS_H_ 2 | #define PARAMETERS_H_ 3 | 4 | #include "ap_fixed.h" 5 | #include "ap_int.h" 6 | 7 | #include "nnet_utils/nnet_code_gen.h" 8 | #include "nnet_utils/nnet_helpers.h" 9 | // hls-fpga-machine-learning insert includes 10 | 11 | // hls-fpga-machine-learning insert weights 12 | 13 | // hls-fpga-machine-learning insert namespace-start 14 | 15 | // hls-fpga-machine-learning insert layer-config 16 | 17 | // hls-fpga-machine-learning insert namespace-end 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/myproject_bridge.cpp: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_BRIDGE_H_ 2 | #define MYPROJECT_BRIDGE_H_ 3 | 4 | #include "firmware/myproject.h" 5 | #include "firmware/nnet_utils/nnet_helpers.h" 6 | #include 7 | #include 8 | 9 | // hls-fpga-machine-learning insert bram 10 | 11 | namespace nnet { 12 | bool trace_enabled = false; 13 | std::map *trace_outputs = NULL; 14 | size_t trace_type_size = sizeof(double); 15 | } // namespace nnet 16 | 17 | extern "C" { 18 | 19 | struct trace_data { 20 | const char *name; 21 | void *data; 22 | }; 23 | 24 | void allocate_trace_storage(size_t element_size) { 25 | nnet::trace_enabled = true; 26 | nnet::trace_outputs = new std::map; 27 | nnet::trace_type_size = element_size; 28 | // hls-fpga-machine-learning insert trace_outputs 29 | } 30 | 31 | void free_trace_storage() { 32 | for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { 33 | void *ptr = i->second; 34 | free(ptr); 35 | } 36 | nnet::trace_outputs->clear(); 37 | delete nnet::trace_outputs; 38 | nnet::trace_outputs = NULL; 39 | nnet::trace_enabled = false; 40 | } 41 | 42 | void collect_trace_output(struct trace_data *c_trace_outputs) { 43 | int ii = 0; 44 | for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { 45 | c_trace_outputs[ii].name = i->first.c_str(); 46 | c_trace_outputs[ii].data = i->second; 47 | ii++; 48 | } 49 | } 50 | 51 | // Wrapper of top level function for Python bridge 52 | void myproject_float( 53 | // hls-fpga-machine-learning insert header #float 54 | ) { 55 | // hls-fpga-machine-learning insert namespace 56 | 57 | // hls-fpga-machine-learning insert wrapper #float 58 | } 59 | 60 | void myproject_double( 61 | // hls-fpga-machine-learning insert header #double 62 | ) { 63 | // hls-fpga-machine-learning insert namespace 64 | 65 | // hls-fpga-machine-learning insert wrapper #double 66 | } 67 | } 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_code_gen.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_INSTR_GEN_H_ 2 | #define NNET_INSTR_GEN_H_ 3 | 4 | #include "nnet_conv1d_latency.h" 5 | #include "nnet_helpers.h" 6 | 7 | #include "hls_stream.h" 8 | #include "nnet_common.h" 9 | #include "nnet_function_stubs.h" 10 | #include "nnet_mult.h" 11 | 12 | namespace nnet { 13 | 14 | template class PointwiseConv1D { 15 | public: 16 | static void pointwise_conv(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan], 17 | res_T res[CONFIG_T::out_width * CONFIG_T::n_filt], 18 | typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt], 19 | typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) { 20 | // To be implemented in subclasses 21 | } 22 | }; 23 | 24 | // hls4ml insert code 25 | 26 | } // namespace nnet 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_embed.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_H_ 2 | #define NNET_EMBED_H_ 3 | 4 | #include "nnet_common.h" 5 | #include "nnet_helpers.h" 6 | 7 | namespace nnet { 8 | 9 | struct embed_config { 10 | // Internal data type definitions 11 | typedef float embeddings_t; 12 | 13 | // Layer Sizes 14 | static const unsigned n_in = 10; 15 | static const unsigned n_out = 16; 16 | static const unsigned vocab_size = 50; 17 | 18 | // Resource reuse info 19 | static const unsigned io_type = io_parallel; 20 | static const unsigned reuse_factor = 1; 21 | }; 22 | 23 | template 24 | void embedding(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in * CONFIG_T::n_out], 25 | typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) { 26 | 27 | #pragma HLS PIPELINE II=CONFIG_T::reuse_factor 28 | // This can save a few cycles, but it will create a large multiplexer due to 29 | // non-constant access pattern, so let's leave it out 30 | //#pragma HLS ARRAY_PARTITION variable=embeddings complete 31 | 32 | InputSequence: 33 | for (int j = 0; j < CONFIG_T::n_in; j++) { 34 | #pragma HLS UNROLL 35 | DenseEmbedding: 36 | for (int i = 0; i < CONFIG_T::n_out; i++) { 37 | #pragma HLS UNROLL 38 | res[j * CONFIG_T::n_out + i] = embeddings[data[j] * CONFIG_T::n_out + i]; 39 | } 40 | } 41 | } 42 | 43 | } // namespace nnet 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_embed_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_EMBED_STREAM_H_ 2 | #define NNET_EMBED_STREAM_H_ 3 | 4 | #include "hls_stream.h" 5 | #include "nnet_common.h" 6 | #include "nnet_helpers.h" 7 | 8 | namespace nnet { 9 | 10 | template 11 | void embedding(hls::stream &data, hls::stream &res, 12 | typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) { 13 | data_T in_data = data.read(); 14 | 15 | InputSequence: 16 | for (int j = 0; j < data_T::size; j++) { 17 | #pragma HLS PIPELINE II=CONFIG_T::reuse_factor 18 | 19 | res_T res_pack; 20 | PRAGMA_DATA_PACK(res_pack) 21 | 22 | DenseEmbedding: 23 | for (int i = 0; i < CONFIG_T::n_out; i++) { 24 | #pragma HLS UNROLL 25 | res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i]; 26 | } 27 | res.write(res_pack); 28 | } 29 | } 30 | 31 | } // namespace nnet 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_image.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_IMAGE_H_ 2 | #define NNET_IMAGE_H_ 3 | 4 | #include "hls_stream.h" 5 | #include "nnet_common.h" 6 | #include 7 | 8 | namespace nnet { 9 | 10 | struct resize_config { 11 | static const unsigned height = 10; 12 | static const unsigned width = 10; 13 | static const unsigned n_chan = 10; 14 | static const unsigned new_height = 10; 15 | static const unsigned new_width = 10; 16 | }; 17 | 18 | template 19 | void resize_nearest(data_T image[CONFIG_T::height * CONFIG_T::width * CONFIG_T::n_chan], 20 | data_T resized[CONFIG_T::new_height * CONFIG_T::new_width * CONFIG_T::n_chan]) { 21 | int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1; 22 | int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1; 23 | int x2, y2; 24 | 25 | #pragma HLS PIPELINE 26 | 27 | for (int i = 0; i < CONFIG_T::new_height; i++) { 28 | for (int j = 0; j < CONFIG_T::new_width; j++) { 29 | x2 = ((j * x_ratio) >> 16); 30 | y2 = ((i * y_ratio) >> 16); 31 | for (int k = 0; k < CONFIG_T::n_chan; k++) { 32 | resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] = 33 | image[(y2 * CONFIG_T::width * CONFIG_T::n_chan) + x2 * CONFIG_T::n_chan + k]; 34 | } 35 | } 36 | } 37 | } 38 | 39 | } // namespace nnet 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_image_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_IMAGE_STREAM_H_ 2 | #define NNET_IMAGE_STREAM_H_ 3 | 4 | #include "hls_stream.h" 5 | #include "nnet_common.h" 6 | 7 | namespace nnet { 8 | 9 | template void resize_nearest(hls::stream &image, hls::stream &resized) { 10 | assert(CONFIG_T::new_height % CONFIG_T::height == 0); 11 | assert(CONFIG_T::new_width % CONFIG_T::width == 0); 12 | constexpr unsigned ratio_height = CONFIG_T::new_height / CONFIG_T::height; 13 | constexpr unsigned ratio_width = CONFIG_T::new_width / CONFIG_T::width; 14 | 15 | ImageHeight: 16 | for (unsigned h = 0; h < CONFIG_T::height; h++) { 17 | #pragma HLS PIPELINE 18 | 19 | data_T data_in_row[CONFIG_T::width]; 20 | 21 | ImageWidth: 22 | for (unsigned i = 0; i < CONFIG_T::width; i++) { 23 | #pragma HLS UNROLL 24 | 25 | data_T in_data = image.read(); 26 | 27 | ImageChan: 28 | for (unsigned j = 0; j < CONFIG_T::n_chan; j++) { 29 | #pragma HLS UNROLL 30 | 31 | data_in_row[i][j] = in_data[j]; 32 | } 33 | } 34 | 35 | ResizeHeight: 36 | for (unsigned i = 0; i < ratio_height; i++) { 37 | #pragma HLS UNROLL 38 | 39 | ImageWidth2: 40 | for (unsigned l = 0; l < CONFIG_T::width; l++) { 41 | #pragma HLS UNROLL 42 | 43 | ResizeWidth: 44 | for (unsigned j = 0; j < ratio_width; j++) { 45 | #pragma HLS UNROLL 46 | 47 | data_T out_data; 48 | PRAGMA_DATA_PACK(out_data) 49 | 50 | ResizeChan: 51 | for (unsigned k = 0; k < CONFIG_T::n_chan; k++) { 52 | #pragma HLS UNROLL 53 | 54 | out_data[k] = data_in_row[l][k]; 55 | } 56 | 57 | resized.write(out_data); 58 | } 59 | } 60 | } 61 | } 62 | } 63 | 64 | } // namespace nnet 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_recr_activations.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_RECR_ACTIVATION_H_ 2 | #define NNET_RECR_ACTIVATION_H_ 3 | 4 | #include "hls_stream.h" 5 | #include "nnet_activation.h" 6 | #include "nnet_common.h" 7 | #include "nnet_helpers.h" 8 | #include 9 | 10 | namespace nnet { 11 | 12 | namespace activation { 13 | 14 | template class Activation { 15 | public: 16 | // ************************************************* 17 | // Blank Activation 18 | // ************************************************* 19 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {} // Nothing to do here 20 | }; 21 | 22 | template class relu : public Activation { 23 | public: 24 | // ************************************************* 25 | // Relu Activation 26 | // ************************************************* 27 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 28 | nnet::relu(data, res); 29 | } 30 | }; 31 | 32 | template class sigmoid : public Activation { 33 | public: 34 | // ************************************************* 35 | // Sigmoid Activation 36 | // ************************************************* 37 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 38 | nnet::sigmoid(data, res); 39 | } 40 | }; 41 | 42 | template class tanh : public Activation { 43 | public: 44 | // ************************************************* 45 | // TanH Activation 46 | // ************************************************* 47 | static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) { 48 | nnet::tanh(data, res); 49 | } 50 | }; 51 | 52 | } // namespace activation 53 | 54 | } // namespace nnet 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_transpose.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_PERMUTE_H_ 2 | #define NNET_PERMUTE_H_ 3 | 4 | namespace nnet { 5 | 6 | struct transpose_config { 7 | static const unsigned dims; 8 | static const unsigned N; 9 | // vivado/vitis hls can't index constexpr array for some reason 10 | // and vivado hls don't like template recursion either (vitis is fine) 11 | // thus this appears to be the only workaround (or overkill it with codegen) 12 | static const unsigned *const from_shape; 13 | static const unsigned *const to_shape; 14 | static const unsigned *const perm; 15 | static const unsigned *const perm_strides; 16 | }; 17 | 18 | template unsigned transfer_idx(int index) { 19 | // Given output idx in c-order flat array, return input idx 20 | int idx = 0; 21 | for (int i = CONFIG_T::dims - 1; i >= 0; i--) { 22 | idx += (index % CONFIG_T::to_shape[i]) * CONFIG_T::perm_strides[i]; 23 | index /= CONFIG_T::to_shape[i]; 24 | } 25 | return idx; 26 | } 27 | 28 | template 29 | void transpose(const data_T data[CONFIG_T::N], res_T res[CONFIG_T::N]) { 30 | for (int i = 0; i < CONFIG_T::N; i++) { 31 | #pragma HLS UNROLL 32 | int idx = transfer_idx(i); 33 | res[i] = data[idx]; 34 | } 35 | } 36 | 37 | } // namespace nnet 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/nnet_utils/nnet_types.h: -------------------------------------------------------------------------------- 1 | #ifndef NNET_TYPES_H_ 2 | #define NNET_TYPES_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nnet { 9 | 10 | // Fixed-size array 11 | template struct array { 12 | typedef T value_type; 13 | static const unsigned size = N; 14 | 15 | T data[N]; 16 | 17 | T &operator[](size_t pos) { return data[pos]; } 18 | 19 | const T &operator[](size_t pos) const { return data[pos]; } 20 | 21 | array &operator=(const array &other) { 22 | if (&other == this) 23 | return *this; 24 | 25 | assert(N == other.size && "Array sizes must match."); 26 | 27 | for (unsigned i = 0; i < N; i++) { 28 | #pragma HLS UNROLL 29 | data[i] = other[i]; 30 | } 31 | return *this; 32 | } 33 | }; 34 | 35 | // Generic lookup-table implementation, for use in approximations of math functions 36 | template class lookup_table { 37 | public: 38 | lookup_table(T from, T to) : range_start(from), range_end(to), base_div(ap_uint<16>(N) / T(to - from)) { 39 | T step = (range_end - range_start) / ap_uint<16>(N); 40 | for (size_t i = 0; i < N; i++) { 41 | T num = range_start + ap_uint<16>(i) * step; 42 | T sample = func(num); 43 | samples[i] = sample; 44 | } 45 | } 46 | 47 | T operator()(T n) const { 48 | int index = (n - range_start) * base_div; 49 | if (index < 0) 50 | index = 0; 51 | else if (index > N - 1) 52 | index = N - 1; 53 | return samples[index]; 54 | } 55 | 56 | private: 57 | T samples[N]; 58 | const T range_start, range_end; 59 | ap_fixed<20, 16> base_div; 60 | }; 61 | 62 | } // namespace nnet 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado/vivado_synth.tcl: -------------------------------------------------------------------------------- 1 | set tcldir [file dirname [info script]] 2 | source [file join $tcldir project.tcl] 3 | 4 | add_files ${project_name}_prj/solution1/syn/verilog 5 | synth_design -top ${project_name} -part $part 6 | opt_design -retarget -propconst -sweep -bram_power_opt -shift_register_opt 7 | report_utilization -file vivado_synth.rpt 8 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado_accelerator/build_lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CC=g++ 4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 5 | CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" 6 | elif [[ "$OSTYPE" == "darwin"* ]]; then 7 | CFLAGS="-O3 -fPIC -std=c++11" 8 | fi 9 | INCFLAGS="-Ifirmware/ap_types/" 10 | PROJECT=myproject 11 | LIB_STAMP=mystamp 12 | BASEDIR="$(cd "$(dirname "$0")" && pwd)" 13 | WEIGHTS_DIR="\"${BASEDIR}/firmware/weights\"" 14 | 15 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${PROJECT}.cpp -o ${PROJECT}.o 16 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${PROJECT}_axi.cpp -o ${PROJECT}_axi.o 17 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o 18 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_axi.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so 19 | rm -f *.o 20 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado_accelerator/myproject_axi.cpp: -------------------------------------------------------------------------------- 1 | // hls-fpga-machine-learning insert include 2 | 3 | void myproject_axi(input_axi_t in[N_IN], output_axi_t out[N_OUT]) { 4 | 5 | // hls-fpga-machine-learning insert interface 6 | 7 | // hls-fpga-machine-learning insert local vars 8 | 9 | // hls-fpga-machine-learning insert enqueue 10 | 11 | // hls-fpga-machine-learning insert call 12 | 13 | // hls-fpga-machine-learning insert dequeue 14 | } 15 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado_accelerator/myproject_axi.h: -------------------------------------------------------------------------------- 1 | #ifndef MYPROJECT_AXI_H_ 2 | #define MYPROJECT_AXI_H_ 3 | 4 | #include 5 | // hls-fpga-machine-learning insert include 6 | 7 | // hls-fpga-machine-learning insert definitions 8 | 9 | void myproject_axi(input_axi_t in[N_IN], output_axi_t out[N_OUT]); 10 | #endif 11 | -------------------------------------------------------------------------------- /hls4ml/templates/vivado_accelerator/pynq-z2/tcl_scripts/axi_lite_design.tcl: -------------------------------------------------------------------------------- 1 | set tcldir [file dirname [info script]] 2 | source [file join $tcldir project.tcl] 3 | 4 | create_project project_1 ${project_name}_vivado_accelerator -part xc7z020clg400-1 -force 5 | 6 | set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project] 7 | set_property ip_repo_paths ${project_name}_prj [current_project] 8 | update_ip_catalog 9 | 10 | # Create Block Designer design 11 | create_bd_design "design_1" 12 | create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 processing_system7_0 13 | apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config {make_external "FIXED_IO, DDR" apply_board_preset "1" Master "Disable" Slave "Disable" } [get_bd_cells processing_system7_0] 14 | create_bd_cell -type ip -vlnv xilinx.com:hls:${project_name}_axi:1.0 ${project_name}_axi_0 15 | apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/processing_system7_0/M_AXI_GP0} Slave {/${project_name}_axi_0/s_axi_AXILiteS} ddr_seg {Auto} intc_ip {New AXI Interconnect} master_apm {0}} [get_bd_intf_pins ${project_name}_axi_0/s_axi_AXILiteS] 16 | 17 | make_wrapper -files [get_files ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/design_1.bd] -top 18 | add_files -norecurse ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/hdl/design_1_wrapper.v 19 | 20 | reset_run impl_1 21 | reset_run synth_1 22 | launch_runs impl_1 -to_step write_bitstream -jobs 6 23 | wait_on_run -timeout 360 impl_1 24 | 25 | open_run impl_1 26 | report_utilization -file util.rpt -hierarchical -hierarchical_percentages 27 | -------------------------------------------------------------------------------- /hls4ml/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from hls4ml.utils.config import config_from_keras_model, config_from_onnx_model, config_from_pytorch_model # noqa: F401 2 | from hls4ml.utils.example_models import fetch_example_list, fetch_example_model # noqa: F401 3 | from hls4ml.utils.plot import plot_model # noqa: F401 4 | -------------------------------------------------------------------------------- /hls4ml/utils/dependency.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from functools import wraps 3 | from importlib.metadata import metadata 4 | from inspect import ismethod 5 | 6 | extra_requires: dict[str, list[str]] = {} 7 | subpackage = None 8 | for k, v in metadata('hls4ml')._headers: # type: ignore 9 | if k != 'Requires-Dist': 10 | continue 11 | if '; extra == ' not in v: 12 | continue 13 | 14 | req, pkg = v.split('; extra == ') 15 | pkg = pkg.strip('"') 16 | 17 | extra_requires.setdefault(pkg, []).append(req) 18 | 19 | 20 | def requires(pkg: str): 21 | """ 22 | Mark a function or method as requiring a package to be installed. 23 | 24 | Args: 25 | pkg (str): The package to require. 'name' requires hls4ml[name] to be installed. 26 | '_name' requires name to be installed. 27 | """ 28 | 29 | def deco(f): 30 | if ismethod(f): 31 | qualifier = f'Method {f.__self__.__class__.__name__}.{f.__name__}' 32 | else: 33 | qualifier = f'Function {f.__name__}' 34 | 35 | if not pkg.startswith('_'): 36 | reqs = ', '.join(extra_requires[pkg]) 37 | msg = f'{qualifier} requires {reqs}, but package {{ename}} is missing' 38 | 'Please consider install it with `pip install hls4ml[{pkg}]` for full functionality with {pkg}.' 39 | else: 40 | msg = f'{qualifier} requires {pkg[1:]}, but package {{ename}} is missing.' 41 | 'Consider install it with `pip install {pkg}`.' 42 | 43 | @wraps(f) 44 | def inner(*args, **kwargs): 45 | try: 46 | return f(*args, **kwargs) 47 | except ImportError as e: 48 | print(msg.format(ename=e.name), file=sys.stderr) 49 | raise e 50 | 51 | return inner 52 | 53 | return deco 54 | -------------------------------------------------------------------------------- /hls4ml/utils/string_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def convert_to_snake_case(pascal_case): 5 | """Convert string in PascalCase to snake_case 6 | 7 | Args: 8 | pascal_case (str): string to convert 9 | 10 | Returns: 11 | str: converted string 12 | """ 13 | camel_case = re.sub(r'(? bool: 15 | """ 16 | Custom Tracer class for hls4ml to define Brevitas modules and custom modules as leaf modules so they are not traced 17 | through by torch.FX 18 | """ 19 | import torch 20 | 21 | return ( 22 | isinstance(m, HLS4MLModule) 23 | or m.__module__.startswith('torch.nn') 24 | or m.__module__.startswith('torch.ao.nn') 25 | or m.__module__.startswith('brevitas.nn') 26 | ) and not isinstance(m, torch.nn.Sequential) 27 | -------------------------------------------------------------------------------- /hls4ml/utils/transpose_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | 5 | 6 | def transpose_config_gen(name: str, shape: tuple[int, ...], perm: tuple[int, ...]): 7 | """ 8 | Generate new shape and perm_strides for a permute operation. Operates by mapping the output index 9 | to input input index by: 10 | - unravel the output index 11 | - map each dimension to the corresponding stride in the input tensor, sum 12 | The operation can be expressed as: 13 | 14 | new_shape = tuple(shape[i] for i in perm) 15 | strides = np.cumprod((shapes[1:] + (1,))[::-1])[::-1] 16 | perm_strides = [strides[i] for i in perm] 17 | out[index] = inp[np.dot(np.unravel_index(index, new_shape), perm_strides)] 18 | 19 | Args: 20 | name (str): The name of the configuration. 21 | shape (tuple[int, ...]): The shape of the input tensor. 22 | perm (tuple[int, ...]): The permutation of the dimensions. 23 | 24 | Returns: 25 | dict: Dictionary containing the configuration. 26 | """ 27 | new_shape = tuple(shape[i] for i in perm) 28 | strides = np.cumprod((shape[1:] + (1,))[::-1])[::-1] 29 | perm_strides = tuple(int(strides[i]) for i in perm) 30 | return dict( 31 | dims=len(shape), 32 | N=math.prod(shape), 33 | from_shape=', '.join(str(x) for x in shape), 34 | perm=', '.join(str(x) for x in perm), 35 | perm_strides=', '.join(str(x) for x in perm_strides), 36 | to_shape=', '.join(str(x) for x in new_shape), 37 | config_name=name, 38 | ) 39 | -------------------------------------------------------------------------------- /hls4ml/writer/__init__.py: -------------------------------------------------------------------------------- 1 | from hls4ml.writer.catapult_writer import CatapultWriter 2 | from hls4ml.writer.oneapi_writer import OneAPIWriter 3 | from hls4ml.writer.quartus_writer import QuartusWriter 4 | from hls4ml.writer.symbolic_writer import SymbolicExpressionWriter 5 | from hls4ml.writer.vitis_writer import VitisWriter 6 | from hls4ml.writer.vivado_accelerator_writer import VivadoAcceleratorWriter 7 | from hls4ml.writer.vivado_writer import VivadoWriter 8 | from hls4ml.writer.writers import Writer, get_writer, register_writer # noqa: F401 9 | 10 | register_writer('Vivado', VivadoWriter) 11 | register_writer('VivadoAccelerator', VivadoAcceleratorWriter) 12 | register_writer('Vitis', VitisWriter) 13 | register_writer('Quartus', QuartusWriter) 14 | register_writer('oneAPI', OneAPIWriter) 15 | register_writer('Catapult', CatapultWriter) 16 | register_writer('SymbolicExpression', SymbolicExpressionWriter) 17 | -------------------------------------------------------------------------------- /hls4ml/writer/vitis_writer.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | from pathlib import Path 4 | from shutil import copy 5 | 6 | from hls4ml.writer.vivado_writer import VivadoWriter 7 | 8 | 9 | class VitisWriter(VivadoWriter): 10 | def __init__(self): 11 | super().__init__() 12 | 13 | def write_nnet_utils_overrides(self, model): 14 | ################### 15 | # nnet_utils 16 | ################### 17 | 18 | filedir = os.path.dirname(os.path.abspath(__file__)) 19 | 20 | srcpath = os.path.join(filedir, '../templates/vitis/nnet_utils/') 21 | dstpath = f'{model.config.get_output_dir()}/firmware/nnet_utils/' 22 | 23 | headers = [os.path.basename(h) for h in glob.glob(srcpath + '*.h')] 24 | 25 | for h in headers: 26 | copy(srcpath + h, dstpath + h) 27 | 28 | def write_board_script_override(self, model): 29 | ''' 30 | Write the tcl scripts and kernel sources to create a Vitis IPI 31 | ''' 32 | 33 | ################### 34 | # project.tcl 35 | ################### 36 | 37 | prj_tcl_file = Path(f'{model.config.get_output_dir()}/project.tcl') 38 | with open(prj_tcl_file) as f: 39 | prj_tcl_contents = f.readlines() 40 | for line_num, line in enumerate(prj_tcl_contents): 41 | if 'set backend' in line: 42 | prj_tcl_contents[line_num] = 'set backend "vitis"\n' 43 | if 'set clock_uncertainty' in line: 44 | prj_tcl_contents[line_num] = 'set clock_uncertainty {}\n'.format( 45 | model.config.get_config_value('ClockUncertainty', '27%') 46 | ) 47 | 48 | with open(prj_tcl_file, 'w') as f: 49 | f.writelines(prj_tcl_contents) 50 | 51 | def write_hls(self, model): 52 | """ 53 | Write the HLS project. Calls the steps from VivadoWriter, adapted for Vitis 54 | """ 55 | super().write_hls(model) 56 | self.write_nnet_utils_overrides(model) 57 | self.write_board_script_override(model) 58 | self.write_tar(model) 59 | -------------------------------------------------------------------------------- /hls4ml/writer/writers.py: -------------------------------------------------------------------------------- 1 | class Writer: 2 | def __init__(self): 3 | pass 4 | 5 | def write_hls(self, model): 6 | raise NotImplementedError 7 | 8 | 9 | writer_map = {} 10 | 11 | 12 | def register_writer(name, writer_cls): 13 | if name in writer_map: 14 | raise Exception(f'Writer {name} already registered') 15 | 16 | writer_map[name] = writer_cls 17 | 18 | 19 | def get_writer(name): 20 | return writer_map[name]() 21 | -------------------------------------------------------------------------------- /test/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | failed=0 4 | basedir=hls_prj 5 | all=0 6 | 7 | function print_usage { 8 | echo "Usage: `basename $0` [OPTION]" 9 | echo "" 10 | echo "Cleans up the projects in provided directory." 11 | echo "" 12 | echo "Options are:" 13 | echo " -d DIR" 14 | echo " Base directory where projects are located." 15 | echo " -a" 16 | echo " Remove all projects, even the failed ones." 17 | echo " -h" 18 | echo " Prints this help message." 19 | } 20 | 21 | while getopts ":d:ah" opt; do 22 | case "$opt" in 23 | d) basedir=$OPTARG 24 | ;; 25 | a) all=1 26 | ;; 27 | h) 28 | print_usage 29 | exit 30 | ;; 31 | esac 32 | done 33 | 34 | if [ ! -d "${basedir}" ]; then 35 | echo "Specified directory '${basedir}' does not exist." 36 | exit 1 37 | fi 38 | 39 | if [ "${all}" -eq 1 ]; then 40 | rm -rf "${basedir}" 41 | exit $? 42 | fi 43 | 44 | #rundir=`pwd` 45 | 46 | cd "${basedir}" 47 | 48 | rm -f *.tar.gz 49 | 50 | # Delete 51 | for dir in */ ; do 52 | if [ ! -f "${dir}BUILD_FAILED" ]; then 53 | rm -rf "${dir}" 54 | if [ $? -eq 0 ]; then 55 | echo "Removed ${dir%/}." 56 | else 57 | failed=1 58 | fi 59 | fi 60 | done 61 | 62 | #cd "${rundir}" 63 | 64 | exit ${failed} 65 | -------------------------------------------------------------------------------- /test/gather-reports.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | failed=0 4 | basedir=hls_prj 5 | full=0 6 | brief=0 7 | 8 | function print_usage { 9 | echo "Usage: `basename $0` [OPTION]" 10 | echo "" 11 | echo "Prints synthesis reports found in projects in the provided directory." 12 | echo "" 13 | echo "Options are:" 14 | echo " -d DIR" 15 | echo " Base directory where projects are located." 16 | echo " -b" 17 | echo " Print only summary of performance and utilization estimates." 18 | echo " -f" 19 | echo " Print whole report." 20 | echo " -h" 21 | echo " Prints this help message." 22 | } 23 | 24 | while getopts ":d:bfh" opt; do 25 | case "$opt" in 26 | d) basedir=$OPTARG 27 | ;; 28 | b) brief=1 29 | ;; 30 | f) full=1 31 | ;; 32 | h) 33 | print_usage 34 | exit 35 | ;; 36 | esac 37 | done 38 | 39 | if [ "${brief}" -eq "${full}" ]; then 40 | echo "Argument -b or -f must be provided." 41 | exit 1 42 | fi 43 | 44 | if [ ! -d "${basedir}" ]; then 45 | echo "Specified directory '${basedir}' does not exist." 46 | exit 1 47 | fi 48 | 49 | #rundir=`pwd` 50 | 51 | cd "${basedir}" 52 | 53 | for dir in */ ; do 54 | cd ${dir} 55 | prjdir="myproject_prj" 56 | prjname="myproject" 57 | for subdir in *_prj/ ; do 58 | prjdir=${subdir} 59 | prjname="${prjdir%_prj/}" 60 | done 61 | prjdir="${prjdir}solution1/syn/report" 62 | if [ -d "$prjdir" ]; then 63 | echo "Synthesis report for ${dir%/}" 64 | if [ "${brief}" -eq 1 ]; then 65 | sed "/* DSP48/Q" "${prjdir}/${prjname}_csynth.rpt" 66 | else 67 | cat "${prjdir}/${prjname}_csynth.rpt" 68 | fi 69 | else 70 | echo "No report files found in ${dir}." 71 | failed=1 72 | fi 73 | cd .. 74 | done 75 | 76 | #cd "${rundir}" 77 | 78 | exit ${failed} 79 | -------------------------------------------------------------------------------- /test/hls4ml-keras-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VIVADO_INSTALL_DIR=/opt/Xilinx 4 | VIVADO_VERSION=2020.1 5 | 6 | # If running in docker image we would first need to activate the proper conda environment 7 | #. activate hls4ml-py36 8 | 9 | # Convert models in keras-models.txt 10 | ./convert-keras-models.sh -x -f keras-models.txt 11 | 12 | # Alternatively, keras-to-hls script can be called, with the model name(s) specified, i.e.: 13 | #./keras-to-hls.sh KERAS_1layer KERAS_conv1d_small 14 | ./keras-to-hls.sh -b alveo-u250 -B VivadoAccelerator -x xcu250-figd2104-2L-e KERAS_3layer 15 | ./keras-to-hls.sh -b pynq-z2 -B VivadoAccelerator -x xc7z020clg400-1 KERAS_3layer 16 | # KERAS_3layer b:pynq-z2 B:VivadoAccelerator x:xc7z020clg400-1 s:Resource 17 | 18 | # Build the projects generated by keras-to-hls script. 19 | # Remove parameter -s to disable synthesis. -p controls the number of parallel tasks 20 | ./build-prj.sh -i ${VIVADO_INSTALL_DIR} -v ${VIVADO_VERSION} -c -s -p 2 21 | 22 | # Go through the generated reports and print out basic information. 23 | # Reports are available if synthesis is enabled. 24 | ./gather-reports.sh -b 25 | 26 | # Clean-up at the end 27 | #./cleanup.sh 28 | -------------------------------------------------------------------------------- /test/hls4ml-onnx-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VIVADO_INSTALL_DIR=/opt/Xilinx 4 | VIVADO_VERSION=2017.2 5 | 6 | # If running in docker image we would first need to activate the proper conda environment 7 | #. activate hls4ml-py36 8 | 9 | # Convert models in onnx-models.txt 10 | ./convert-onnx-models.sh -x -p 3 -f onnx-models.txt 11 | 12 | # Same for Python 2 13 | #. activate hls4ml-py27 14 | ./convert-onnx-models.sh -x -p 2 -f onnx-models.txt 15 | 16 | # Alternatively, onnx-to-hls script can be called, with the model name(s) specified, i.e.: 17 | #./onnx-to-hls.sh -p 3 three_layer_keras conv1d_small_keras 18 | #./onnx-to-hls.sh -p 2 three_layer_keras conv1d_small_keras 19 | 20 | # Check if there is any difference between files generated by Python 2 and Python 3 21 | # Not needed if there were no changes in onnx-to-hls.py or hls-writer.py 22 | ./py-diff.sh -r 2 23 | 24 | # Build the projects generated by onnx-to-hls script. 25 | # Remove parameter -s to disable synthesis. -p controls the number of parallel tasks 26 | ./build-prj.sh -i ${VIVADO_INSTALL_DIR} -v ${VIVADO_VERSION} -c -s -p 2 27 | 28 | # Go through the generated reports and print out basic information. 29 | # Reports are available if synthesis is enabled. 30 | ./gather-reports.sh -b 31 | 32 | # Clean-up at the end 33 | #./cleanup.sh 34 | -------------------------------------------------------------------------------- /test/hls4ml-pytorch-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VIVADO_INSTALL_DIR=/opt/Xilinx 4 | VIVADO_VERSION=2017.2 5 | 6 | # If running in docker image we would first need to activate the proper conda environment 7 | #. activate hls4ml-py36 8 | 9 | # Convert models in pytorch-models.txt 10 | ./convert-pytorch-models.sh -x -p 3 -f pytorch-models.txt 11 | 12 | # Same for Python 2 13 | #. activate hls4ml-py27 14 | ./convert-pytorch-models.sh -x -p 2 -f pytorch-models.txt 15 | 16 | # Alternatively, pytorch-to-hls script can be called, with the model name(s) specified, i.e.: 17 | #./pytorch-to-hls.sh -p 3 two_layer_model three_layer_model 18 | #./pytorch-to-hls.sh -p 2 two_layer_model three_layer_model 19 | 20 | # Check if there is any difference between files generated by Python 2 and Python 3 21 | # Not needed if there were no changes in pytorch-to-hls.py or hls-writer.py 22 | ./py-diff.sh -r 2 23 | 24 | # Build the projects generated by pytorch-to-hls script. 25 | # Remove parameter -s to disable synthesis. -p controls the number of parallel tasks 26 | ./build-prj.sh -i ${VIVADO_INSTALL_DIR} -v ${VIVADO_VERSION} -c -s -p 2 27 | 28 | # Go through the generated reports and print out basic information. 29 | # Reports are available if synthesis is enabled. 30 | ./gather-reports.sh -b 31 | 32 | # Clean-up at the end 33 | #./cleanup.sh 34 | -------------------------------------------------------------------------------- /test/keras-models.txt: -------------------------------------------------------------------------------- 1 | # Keras models from examples directory that will be used for testing 2 | # 3 | # Synthax: 4 | # MODEL_NAME[:WEIGHTS_FILE] [x:PART] [b:BOARD] [B:BACKEND] [c:CLOCK_PERIOD] [io:s] [r:REUSE_FACTOR] [t:AP_TYPE] [s:STRATEGY] [y:CONFIG_FILE] 5 | # where 6 | # MODEL_NAME - Name of the file containing json model (without ".json") 7 | # WEIGHTS_FILE - Name of the HDF5 file containing model weights (without ".h5") 8 | # x:PART - FPGA part number to use 9 | # b:BOARD - name of one board defined in supported_board.json file 10 | # B:BACKEND - name of the backend to be used (Vivado, VivadoAccelerator) 11 | # c:CLOCK_PERIOD - Clock period 12 | # io:s - User streaming I/O, otherwise use parallel I/O 13 | # r:REUSE_FACTOR - Reuse factor 14 | # s:STRATEGY - Latency-optimized or Resource-optimized strategy 15 | # t:AP_TYPE - Default precision 16 | # y:CONFIG_FILE - YAML config file to copy HLSConfig from 17 | # 18 | # Lines starting with "#" are ignored. 19 | # 20 | 21 | KERAS_1layer 22 | KERAS_3layer 23 | #KERAS_3layer:KERAS_3layer_70pruned_retrained_weights 24 | #KERAS_conv1d 25 | #KERAS_conv1d_small 26 | #KERAS_conv2d_model 27 | #KERAS_dense_16x100x100x100x100x100x5 28 | KERAS_3layer_batch_norm 29 | KERAS_3layer_binary_smaller 30 | KERAS_3layer_ternary_small 31 | 32 | # Pynq backend 33 | KERAS_3layer b:pynq-z2 B:VivadoAccelerator x:xc7z020clg400-1 s:Resource 34 | garnet_1layer x:xcku115-flvb2104-2-i y:garnet_1layer_config 35 | 36 | 37 | # Resource strategy 38 | KERAS_3layer r:2 s:Resource 39 | qkeras_mnist_dense r:112 s:Resource 40 | 41 | #Fails synthesis due to a problem with loop unrolling 42 | #jetTagger_Conv2D_Small:jetTagger_Conv2D_Small 43 | 44 | # Streaming IO 45 | #KERAS_1layer io:s 46 | KERAS_3layer io:s 47 | KERAS_conv1d_small io:s 48 | KERAS_conv2d_model io:s 49 | jetTagger_Conv2D_Small io:s 50 | jetTagger_Conv2D_Small_NoBatchNorm io:s 51 | 52 | 53 | #KERAS_1layer x:xcku115-flvf1924-2-i 54 | -------------------------------------------------------------------------------- /test/onnx-models.txt: -------------------------------------------------------------------------------- 1 | # ONNX models from examples directory that will be used for testing 2 | # 3 | # Synthax: 4 | # MODEL_NAME [x:PART] [c:CLOCK_PERIOD] [io:s] [r:REUSE_FACTOR] [t:AP_TYPE] [s:STRATEGY] 5 | # where 6 | # MODEL_NAME - Name of the file containing the model (without ".onnx") 7 | # x:PART - FPGA part number to use 8 | # c:CLOCK_PERIOD - Clock period 9 | # io:s - User streaming I/O, otherwise use parallel I/O 10 | # r:REUSE_FACTOR - Reuse factor 11 | # s:STRATEGY - Latency-optimized or Resource-optimized strategy 12 | # t:AP_TYPE - Default precision 13 | # 14 | # Lines starting with "#" are ignored. 15 | # 16 | 17 | conv1d_small_keras 18 | conv2d_small_keras 19 | conv2d_small_keras 20 | conv2d_small_mp_keras 21 | dense_big_keras 22 | three_layer_bn_keras 23 | three_layer_bn_pytorch 24 | three_layer_keras 25 | three_layer_pytorch 26 | two_layer_keras 27 | two_layer_pytorch 28 | -------------------------------------------------------------------------------- /test/pytest/ci-template.yml: -------------------------------------------------------------------------------- 1 | .pytest: 2 | stage: test 3 | image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.6.1.base 4 | tags: 5 | - k8s-default 6 | before_script: 7 | - eval "$(conda shell.bash hook)" 8 | - conda activate hls4ml-testing 9 | - source /opt/intel/oneapi/setvars.sh --force 10 | - git config --global --add safe.directory /builds/fastmachinelearning/hls4ml 11 | - git submodule update --init --recursive hls4ml/templates/catapult/ 12 | - if [ $EXAMPLEMODEL == 1 ]; then git submodule update --init example-models; fi 13 | - pip install .[testing,sr,optimization] 14 | script: 15 | - cd test/pytest 16 | - pytest $PYTESTFILE -rA --cov-report xml --cov-report term --cov=hls4ml --junitxml=report.xml --randomly-seed=42 --randomly-dont-reorganize --randomly-dont-reset-seed 17 | artifacts: 18 | when: always 19 | reports: 20 | junit: 21 | - test/pytest/report.xml 22 | coverage_report: 23 | coverage_format: cobertura 24 | path: test/pytest/coverage.xml 25 | paths: 26 | - test/pytest/hls4mlprj*.tar.gz 27 | -------------------------------------------------------------------------------- /test/pytest/test_batchnorm.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | from tensorflow.keras.layers import BatchNormalization 6 | from tensorflow.keras.models import Sequential 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | in_shape = 16 13 | atol = 5e-3 14 | 15 | 16 | @pytest.fixture(scope='module') 17 | def data(): 18 | np.random.seed(0) 19 | X = np.random.rand(100, in_shape) 20 | return X 21 | 22 | 23 | @pytest.fixture(scope='module') 24 | def model(request): 25 | model = Sequential() 26 | model.add(BatchNormalization(input_shape=(in_shape,), center=request.param, scale=request.param)) 27 | model.compile() 28 | return model 29 | 30 | 31 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) 32 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult', 'oneAPI']) 33 | @pytest.mark.parametrize('model', [True, False], indirect=True) 34 | def test_batchnorm(model, data, backend, io_type): 35 | default_precision = 'fixed<32, 1>' 36 | 37 | center = model.layers[0].center 38 | scale = model.layers[0].scale 39 | config = hls4ml.utils.config_from_keras_model( 40 | model, default_precision=default_precision, granularity='name', backend=backend 41 | ) 42 | output_dir = str(test_root_path / f'hls4mlprj_batchnorm_{backend}_{io_type}_center{center}_scale{scale}') 43 | hls_model = hls4ml.converters.convert_from_keras_model( 44 | model, backend=backend, hls_config=config, io_type=io_type, output_dir=output_dir 45 | ) 46 | hls_model.compile() 47 | 48 | # Predict 49 | y_keras = np.squeeze(model.predict(data)) 50 | y_hls = hls_model.predict(data) 51 | np.testing.assert_allclose(y_keras, y_hls, rtol=0, atol=atol, verbose=True) 52 | -------------------------------------------------------------------------------- /test/pytest/test_bram_factor.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | import tensorflow as tf 6 | from tensorflow.keras.layers import Activation, Dense 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus']) 14 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) 15 | def test_bram_factor(backend, io_type): 16 | '''A copy of the test_dense from test_keras_api.py with BramFactor set to 0''' 17 | model = tf.keras.models.Sequential() 18 | model.add( 19 | Dense( 20 | 2, 21 | input_shape=(1,), 22 | name='Dense', 23 | use_bias=True, 24 | kernel_initializer=tf.keras.initializers.RandomUniform(minval=1, maxval=10), 25 | bias_initializer='zeros', 26 | kernel_regularizer=None, 27 | bias_regularizer=None, 28 | activity_regularizer=None, 29 | kernel_constraint=None, 30 | bias_constraint=None, 31 | ) 32 | ) 33 | model.add(Activation(activation='elu', name='Activation')) 34 | model.compile(optimizer='adam', loss='mse') 35 | 36 | X_input = np.random.rand(100, 1) 37 | 38 | keras_prediction = model.predict(X_input) 39 | 40 | config = hls4ml.utils.config_from_keras_model(model) 41 | config["Model"]["BramFactor"] = 0 42 | output_dir = str(test_root_path / f'hls4mlprj_bram_factor_{backend}_{io_type}') 43 | 44 | hls_model = hls4ml.converters.convert_from_keras_model( 45 | model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend 46 | ) 47 | 48 | hls_model.compile() 49 | 50 | hls_prediction = hls_model.predict(X_input) 51 | 52 | np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=1e-2, atol=0.01) 53 | 54 | # Check that there weights are actually remote 55 | model_brams = [var for var in hls_model.get_weight_variables() if var.storage.lower() == 'bram'] 56 | assert len(model_brams) == 2 57 | -------------------------------------------------------------------------------- /test/pytest/test_causalpadding.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | from tensorflow.keras.layers import Conv1D 6 | from tensorflow.keras.models import Sequential 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | atol = 5e-3 13 | 14 | 15 | @pytest.mark.parametrize('io_type', ['io_stream', 'io_parallel']) 16 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) 17 | def test_causalpadding(io_type, backend): 18 | model = Sequential() 19 | model.add(Conv1D(1, 5, padding="causal", input_shape=(100, 1))) 20 | model.compile() 21 | 22 | data = np.random.randint(0, 10, 100).astype(float) 23 | data = np.expand_dims(data, axis=0) 24 | data = np.expand_dims(data, axis=-1) 25 | 26 | config = hls4ml.utils.config_from_keras_model( 27 | model, default_precision='ap_fixed<32,16>', granularity='name', backend=backend 28 | ) 29 | odir = str(test_root_path / f'hls4mlprj_validpadding_{backend}_{io_type}') 30 | hls_model = hls4ml.converters.convert_from_keras_model( 31 | model, hls_config=config, io_type=io_type, output_dir=odir, backend=backend 32 | ) 33 | hls_model.compile() 34 | 35 | # Predict 36 | y_keras = model.predict(data).flatten() 37 | y_hls = hls_model.predict(data).flatten() 38 | np.testing.assert_allclose(y_keras, y_hls, rtol=0, atol=atol, verbose=True) 39 | -------------------------------------------------------------------------------- /test/pytest/test_clone_flatten.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | from tensorflow.keras.layers import Concatenate, Flatten, Input 6 | from tensorflow.keras.models import Model 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def data(): 15 | X = np.random.randint(-5, 5, (1, 2, 3), dtype='int32') 16 | return X 17 | 18 | 19 | @pytest.fixture(scope='module') 20 | def keras_model(): 21 | inp1 = Input(shape=(2, 3), name='input_1') 22 | x = Flatten()(inp1) 23 | y = Flatten()(inp1) 24 | out = Concatenate(axis=1)([x, y]) 25 | model = Model(inputs=inp1, outputs=out) 26 | return model 27 | 28 | 29 | @pytest.fixture 30 | @pytest.mark.parametrize('io_type', ['io_stream']) 31 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus', 'Catapult']) 32 | def hls_model(keras_model, backend, io_type): 33 | hls_config = hls4ml.utils.config_from_keras_model( 34 | keras_model, default_precision='ap_int<6>', granularity='name', backend=backend 35 | ) 36 | output_dir = str(test_root_path / f'hls4mlprj_clone_flatten_{backend}_{io_type}') 37 | hls_model = hls4ml.converters.convert_from_keras_model( 38 | keras_model, 39 | hls_config=hls_config, 40 | io_type=io_type, 41 | backend=backend, 42 | output_dir=output_dir, 43 | ) 44 | 45 | hls_model.compile() 46 | return hls_model 47 | 48 | 49 | @pytest.mark.parametrize('io_type', ['io_stream']) 50 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus']) 51 | def test_accuracy(data, keras_model, hls_model): 52 | X = data 53 | model = keras_model 54 | # model under test predictions and accuracy 55 | y_keras = model.predict(X) 56 | y_hls4ml = hls_model.predict(X.astype('float32')).reshape(y_keras.shape) 57 | # "accuracy" of hls4ml predictions vs keras 58 | np.testing.assert_array_equal(y_keras, y_hls4ml, verbose=True) 59 | -------------------------------------------------------------------------------- /test/pytest/test_conv1d_narrow.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | from tensorflow.keras.layers import Conv1D 6 | from tensorflow.keras.models import Sequential 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def data(): 15 | X = np.random.rand(10, 11, 3) 16 | return X 17 | 18 | 19 | @pytest.fixture(scope='module') 20 | def model(): 21 | model = Sequential() 22 | model.add(Conv1D(5, 9, input_shape=(11, 3))) 23 | model.compile() 24 | return model 25 | 26 | 27 | @pytest.mark.parametrize( 28 | 'narrowset', 29 | [ 30 | ('io_stream', 'latency', 'Encoded'), 31 | ('io_stream', 'resource', 'Encoded'), 32 | ('io_stream', 'latency', 'LineBuffer'), 33 | ('io_stream', 'resource', 'LineBuffer'), 34 | ('io_parallel', 'resource', 'Encoded'), 35 | ('io_parallel', 'latency', 'Encoded'), 36 | ('io_parallel', 'resource', 'LineBuffer'), 37 | ('io_parallel', 'latency', 'LineBuffer'), 38 | ], 39 | ) 40 | @pytest.mark.filterwarnings("error") 41 | def test_narrow(data, model, narrowset, capfd): 42 | ''' 43 | Check that the implementation does not have leftover data. 44 | ''' 45 | io_type = narrowset[0] 46 | strategy = narrowset[1] 47 | conv = narrowset[2] 48 | X = data 49 | 50 | output_dir = str(test_root_path / f'hls4mlprj_conv1d_narrow_{io_type}_{strategy}_{conv}') 51 | 52 | config = hls4ml.utils.config_from_keras_model(model) 53 | config['Model']['Strategy'] = strategy 54 | config['Model']['ConvImplementation'] = conv 55 | 56 | hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, io_type=io_type, output_dir=output_dir) 57 | hls_model.compile() 58 | 59 | # model under test predictions and accuracy 60 | y_keras = model.predict(X) 61 | y_hls4ml = hls_model.predict(X) 62 | 63 | out, _ = capfd.readouterr() 64 | assert "leftover data" not in out 65 | np.testing.assert_allclose(y_keras.ravel(), y_hls4ml.ravel(), atol=0.05) 66 | -------------------------------------------------------------------------------- /test/pytest/test_conv2d_narrow.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | from tensorflow.keras.layers import Conv2D 6 | from tensorflow.keras.models import Sequential 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def data(): 15 | X = np.random.rand(10, 5, 5, 3) 16 | return X 17 | 18 | 19 | @pytest.fixture(scope='module') 20 | def model(): 21 | model = Sequential() 22 | model.add(Conv2D(5, (4, 4), input_shape=(5, 5, 3))) 23 | model.compile() 24 | return model 25 | 26 | 27 | @pytest.mark.parametrize( 28 | 'narrowset', 29 | [ 30 | ('io_stream', 'latency', 'Encoded'), 31 | ('io_stream', 'resource', 'Encoded'), 32 | ('io_stream', 'latency', 'LineBuffer'), 33 | ('io_stream', 'resource', 'LineBuffer'), 34 | ('io_parallel', 'resource', 'Encoded'), 35 | ('io_parallel', 'latency', 'Encoded'), 36 | ('io_parallel', 'resource', 'LineBuffer'), 37 | ('io_parallel', 'latency', 'LineBuffer'), 38 | ], 39 | ) 40 | @pytest.mark.filterwarnings("error") 41 | def test_narrow(data, model, narrowset, capfd): 42 | ''' 43 | Check that the implementation does not have leftover data. 44 | ''' 45 | io_type = narrowset[0] 46 | strategy = narrowset[1] 47 | conv = narrowset[2] 48 | X = data 49 | 50 | output_dir = str(test_root_path / f'hls4mlprj_conv2d_narrow_{io_type}_{strategy}_{conv}') 51 | 52 | config = hls4ml.utils.config_from_keras_model(model) 53 | config['Model']['Strategy'] = strategy 54 | config['Model']['ConvImplementation'] = conv 55 | 56 | hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, io_type=io_type, output_dir=output_dir) 57 | hls_model.compile() 58 | 59 | # model under test predictions and accuracy 60 | y_keras = model.predict(X) 61 | y_hls4ml = hls_model.predict(X) 62 | 63 | out, _ = capfd.readouterr() 64 | assert "leftover data" not in out 65 | np.testing.assert_allclose(y_keras.ravel(), y_hls4ml.ravel(), atol=0.05) 66 | -------------------------------------------------------------------------------- /test/pytest/test_embed.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | from tensorflow.keras.layers import Embedding, Input 6 | from tensorflow.keras.models import Model 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def data(): 15 | X = np.random.randint(10, size=(32, 100)) 16 | return X 17 | 18 | 19 | @pytest.fixture(scope='module') 20 | def keras_model(): 21 | inputs = Input(shape=(100,), name='embedding_input') 22 | embedding = Embedding(13, 8, input_length=100, name='embedding')(inputs) 23 | model = Model(inputs=inputs, outputs=embedding) 24 | return model 25 | 26 | 27 | @pytest.fixture 28 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult', 'oneAPI']) 29 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) 30 | def hls_model(keras_model, backend, io_type): 31 | hls_config = hls4ml.utils.config_from_keras_model( 32 | keras_model, default_precision='ap_fixed<16,6>', granularity='name', backend=backend 33 | ) 34 | hls_config['LayerName']['embedding_input']['Precision']['result'] = 'ap_uint<4>' 35 | out_dir = str(test_root_path / 'hls4mlprj_embed_{}_{}').format(backend, io_type) 36 | hls_model = hls4ml.converters.convert_from_keras_model( 37 | keras_model, backend=backend, hls_config=hls_config, io_type=io_type, output_dir=out_dir 38 | ) 39 | 40 | hls_model.compile() 41 | return hls_model 42 | 43 | 44 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult', 'oneAPI']) 45 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) 46 | def test_embedding_accuracy(data, keras_model, hls_model): 47 | X = data 48 | model = keras_model 49 | # model under test predictions and accuracy 50 | y_keras = model.predict(X) 51 | y_hls4ml = hls_model.predict(X.astype(float)).reshape(y_keras.shape) 52 | # "accuracy" of hls4ml predictions vs keras 53 | np.testing.assert_allclose(y_keras, y_hls4ml, rtol=0, atol=1e-03, verbose=True) 54 | -------------------------------------------------------------------------------- /test/pytest/test_fetch_example.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import io 3 | from contextlib import redirect_stdout 4 | from pathlib import Path 5 | 6 | import pytest 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) 14 | def test_fetch_example_utils(backend): 15 | f = io.StringIO() 16 | with redirect_stdout(f): 17 | hls4ml.utils.fetch_example_list() 18 | out = f.getvalue() 19 | 20 | model_list = ast.literal_eval(out) # Check if we indeed got a dictionary back 21 | 22 | assert 'qkeras_mnist_cnn.json' in model_list['keras'] 23 | 24 | # This model has an example config that is also downloaded. Stored configurations don't set "Backend" value. 25 | config = hls4ml.utils.fetch_example_model('qkeras_mnist_cnn.json', backend=backend) 26 | config['KerasJson'] = 'qkeras_mnist_cnn.json' 27 | config['KerasH5'] 28 | config['Backend'] = backend 29 | config['OutputDir'] = str(test_root_path / f'hls4mlprj_fetch_example_{backend}') 30 | 31 | hls_model = hls4ml.converters.keras_v2_to_hls(config) 32 | hls_model.compile() # For now, it is enough if it compiles, we're only testing downloading works as expected 33 | -------------------------------------------------------------------------------- /test/pytest/test_keras_h5_loader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | import tensorflow as tf 6 | 7 | import hls4ml 8 | 9 | test_root_path = Path(__file__).parent 10 | 11 | 12 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult']) 13 | def test_keras_h5_loader(backend): 14 | input_shape = (10,) 15 | model = tf.keras.models.Sequential( 16 | [ 17 | tf.keras.layers.InputLayer(input_shape=input_shape), 18 | tf.keras.layers.Activation(activation='relu'), 19 | ] 20 | ) 21 | 22 | hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name') 23 | 24 | config = { 25 | 'OutputDir': str(test_root_path / f'hls4mlprj_KerasH5_loader_test_{backend}'), 26 | 'ProjectName': f'KerasH5_loader_test_{backend}', 27 | 'Backend': backend, 28 | 'ClockPeriod': 25.0, 29 | 'IOType': 'io_parallel', 30 | 'HLSConfig': hls_config, 31 | 'KerasH5': str(test_root_path / f'hls4mlprj_KerasH5_loader_test_{backend}/model.h5'), 32 | } 33 | 34 | model.save(config['KerasH5']) 35 | hls_model = hls4ml.converters.keras_v2_to_hls(config) 36 | hls_model.compile() 37 | data = np.random.rand(1000, 10).astype(np.float32) 38 | pred = hls_model.predict(data) 39 | np.testing.assert_allclose(pred, model.predict(data), rtol=5e-3, atol=5e-3) 40 | -------------------------------------------------------------------------------- /test/pytest/test_multi_dense.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | import tensorflow as tf 6 | from tensorflow.keras.layers import Dense 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.mark.parametrize( 14 | 'backend, strategy', 15 | [ 16 | ('Vivado', 'Latency'), 17 | ('Vivado', 'Resource'), 18 | ('Vitis', 'Latency'), 19 | ('Vitis', 'Resource'), 20 | ('Quartus', 'Resource'), 21 | ('oneAPI', 'Resource'), 22 | ('Catapult', 'Latency'), 23 | ('Catapult', 'Resource'), 24 | ], 25 | ) 26 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) 27 | @pytest.mark.parametrize('shape', [(4, 3), (4, 1), (2, 3, 2), (1, 3, 1)]) 28 | def test_multi_dense(backend, strategy, io_type, shape): 29 | model = tf.keras.models.Sequential() 30 | model.add(Dense(7, input_shape=shape, activation='relu')) 31 | model.add(Dense(2, activation='relu')) 32 | model.compile(optimizer='adam', loss='mse') 33 | 34 | X_input = np.random.rand(100, *shape) 35 | X_input = np.round(X_input * 2**10) * 2**-10 # make it an exact ap_fixed<16,6> 36 | 37 | keras_prediction = model.predict(X_input) 38 | 39 | config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend) 40 | config['Model']['Strategy'] = strategy 41 | shapestr = '_'.join(str(x) for x in shape) 42 | output_dir = str(test_root_path / f'hls4mlprj_multi_dense_{backend}_{strategy}_{io_type}_{shapestr}') 43 | 44 | hls_model = hls4ml.converters.convert_from_keras_model( 45 | model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type 46 | ) 47 | 48 | hls_model.compile() 49 | 50 | hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape) 51 | 52 | np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=1e-2, atol=0.01) 53 | -------------------------------------------------------------------------------- /test/pytest/test_optimization/test_knapsack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from hls4ml.optimization.dsp_aware_pruning.knapsack import solve_knapsack 5 | 6 | 7 | # In the simple case below, both implementations give the optimal answer 8 | # In general, the greedy algorithm will not give the optimal solution 9 | @pytest.mark.parametrize('implementation', ['dynamic', 'greedy', 'branch_bound', 'CBC_MIP']) 10 | def test_knapsack_1d(implementation): 11 | values = np.array([4, 5, 6, 8, 3]) 12 | weights = np.array([[2, 5, 3, 2, 5]]) 13 | capacity = np.array([8]) 14 | 15 | optimal, selected = solve_knapsack(values, weights, capacity, implementation=implementation) 16 | assert optimal == 18 17 | assert 0 in selected 18 | assert 2 in selected 19 | assert 3 in selected 20 | 21 | 22 | @pytest.mark.parametrize('implementation', ['greedy', 'branch_bound', 'CBC_MIP']) 23 | def test_multidimensional_knapsack(implementation): 24 | values = np.array([10, 2, 6, 12, 3]) 25 | weights = np.array([[3, 1, 4, 5, 5], [3, 2, 4, 1, 2]]) 26 | capacity = np.array([8, 7]) 27 | 28 | optimal, selected = solve_knapsack(values, weights, capacity, implementation=implementation) 29 | assert optimal == 22 30 | assert 0 in selected 31 | assert 3 in selected 32 | 33 | 34 | def test_knapsack_equal_weights(): 35 | values = np.array([10, 2, 6, 8, 3]) 36 | weights = np.array([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]) 37 | capacity = np.array([7, 7]) 38 | 39 | optimal, selected = solve_knapsack(values, weights, capacity) 40 | assert optimal == 18 41 | assert 0 in selected 42 | assert 3 in selected 43 | 44 | 45 | def test_knapsack_all_elements_fit(): 46 | values = np.array([10, 2, 6, 12, 3]) 47 | weights = np.array([[3, 1, 4, 5, 5], [3, 2, 4, 1, 2]]) 48 | capacity = np.array([19, 12]) 49 | 50 | optimal, selected = solve_knapsack(values, weights, capacity) 51 | assert optimal == 33 52 | assert selected == list(range(0, values.shape[0])) 53 | -------------------------------------------------------------------------------- /test/pytest/test_report/Vivado/vivado_hls.app: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /test/pytest/test_report/oneAPI/loop_attr.ndjson: -------------------------------------------------------------------------------- 1 | {"name":"loop_attributes", "id":312, "nodes":[1, 3, 2]} 2 | {"name":"Myproject", "id":1, "clk":"No", "fmax":"480.00", "type":"kernel", "children":[4, 5]} 3 | {"name":"Myproject.B0", "id":4, "af":"480.00", "br":"0", "ci":"0", "fo":"Disabled", "ii":"1", "ll":"1", "lllt":"17.000000", "lt":"17.000000", "mi":"n/a", "pl":"Yes", "tc":"0", "tn":"1", "type":"bb"} 4 | {"name":"Myproject.B1", "id":5, "af":"480.00", "br":"0", "ci":"0", "fo":"Enabled", "ii":"1", "ll":"1", "lllt":"176.000000", "lt":"176.000000", "mi":"1", "pl":"Yes", "tc":"0", "tn":"1", "details":[{"type":"text", "text":"Hyper-Optimized loop structure: enabled."}], "type":"loop"} 5 | {"name":"relu_config3>()", "id":3, "clk":"No", "fmax":"480.00", "debug":[[{"filename":"/home/enlupi/Work/code/hls4mlprj_report_oneAPI/src/firmware/nnet_utils/nnet_activation_stream.h", "line":32}]], "type":"kernel", "children":[8, 9]} 6 | {"name":"relu_config3>().B0", "id":8, "af":"480.00", "br":"0", "ci":"0", "fo":"Disabled", "ii":"1", "ll":"1", "lllt":"17.000000", "lt":"17.000000", "mi":"n/a", "pl":"Yes", "tc":"0", "tn":"1", "type":"bb"} 7 | {"name":"relu_config3>().B1", "id":9, "af":"480.00", "br":"0", "ci":"0", "fo":"Enabled", "ii":"1", "ll":"1", "lllt":"159.000000", "lt":"159.000000", "mi":"1", "pl":"Yes", "tc":"0", "tn":"1", "details":[{"type":"text", "text":"Hyper-Optimized loop structure: enabled."}], "type":"loop"} 8 | {"name":"bias_t)", "id":2, "clk":"No", "fmax":"480.00", "debug":[[{"filename":"/home/enlupi/Work/code/hls4mlprj_report_oneAPI/src/firmware/nnet_utils/nnet_dense_stream.h", "line":12}]], "type":"kernel", "children":[6, 7]} 9 | {"name":"bias_t).B0", "id":6, "af":"480.00", "br":"0", "ci":"0", "fo":"Disabled", "ii":"1", "ll":"1", "lllt":"17.000000", "lt":"17.000000", "mi":"n/a", "pl":"Yes", "tc":"0", "tn":"1", "type":"bb"} 10 | {"name":"bias_t).B1", "id":7, "af":"480.00", "br":"0", "ci":"0", "fo":"Enabled", "ii":"1", "ll":"1", "lllt":"200.000000", "lt":"200.000000", "mi":"1", "pl":"Yes", "tc":"0", "tn":"1", "details":[{"type":"text", "text":"Hyper-Optimized loop structure: enabled."}], "type":"loop"} 11 | -------------------------------------------------------------------------------- /test/pytest/test_report/oneAPI/quartus.ndjson: -------------------------------------------------------------------------------- 1 | {"quartusFitClockSummary":{"nodes":[{"name":"Quartus Fitter: Clock Frequency (MHz)","type":"system","id":1000,"clock1x":"597.73","clock fmax":"597.73","clock":"597.73","details":[{"text":"The actual frequency of the clock is 597.73 MHz after platform PLL adjustment. The maximum frequency for the clock is 597.73 MHz. "}]}]},"quartusFitResourceUsageSummary":{"nodes":[{"type":"system","id":1000,"name":"Quartus Fitter: Device Image","alm":"4520.5","alut":"4182","reg":"16419","dsp":"40","ram":"36","mlab":"52"},{"type":"kernel","id":1010,"name":"Myproject","alm":"4520.0","alut":"4181","reg":"16419","dsp":"40","ram":"36","mlab":"52"}]}} -------------------------------------------------------------------------------- /test/pytest/test_reshape.py: -------------------------------------------------------------------------------- 1 | """Test that reshape is properly handled by optimizers.""" 2 | 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | import pytest 7 | import tensorflow as tf 8 | 9 | import hls4ml 10 | 11 | test_root_path = Path(__file__).parent 12 | 13 | 14 | def randX(batch_size, N): 15 | return np.random.rand(batch_size, N) 16 | 17 | 18 | @pytest.fixture(scope='module') 19 | def randX_20_10(): 20 | return randX(20, 10) 21 | 22 | 23 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus', 'Catapult', 'oneAPI']) 24 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) 25 | def test_reshape_parallel(randX_20_10, backend, io_type): 26 | model = tf.keras.models.Sequential( 27 | [ 28 | tf.keras.layers.Input(shape=(10,)), 29 | tf.keras.layers.Dense(10 * 3), 30 | tf.keras.layers.Reshape((10, 3)), 31 | tf.keras.layers.ReLU(), 32 | ] 33 | ) 34 | model.compile(optimizer='adam', loss='mse') 35 | config = hls4ml.utils.config_from_keras_model(model, default_precision='fixed<32,16>') 36 | prj_name = f'hls4mlprj_reshape_{backend}_{io_type}' 37 | output_dir = str(test_root_path / prj_name) 38 | hls_model = hls4ml.converters.convert_from_keras_model( 39 | model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend 40 | ) 41 | hls_model.compile() 42 | 43 | X = randX_20_10 44 | y_qkeras = model.predict(X) 45 | y_hls4ml = hls_model.predict(X) 46 | 47 | # check that the values are close 48 | np.testing.assert_allclose(y_qkeras.ravel(), y_hls4ml.ravel(), atol=0.02) 49 | -------------------------------------------------------------------------------- /test/pytest/test_softsign.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import numpy as np 4 | import pytest 5 | import tensorflow as tf 6 | from sklearn.metrics import accuracy_score 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult']) 14 | @pytest.mark.parametrize('input_shape, io_type', [((8,), 'io_parallel'), ((8,), 'io_stream'), ((8, 8, 3), 'io_stream')]) 15 | def test_softsign(backend, input_shape, io_type): 16 | X = np.random.rand(1000, *input_shape) 17 | X = np.round(X * 2**10) * 2**-10 18 | model = tf.keras.models.Sequential() 19 | model.add(tf.keras.layers.Activation(input_shape=input_shape, activation='softsign', name='softsign')) 20 | model.compile() 21 | 22 | cfg = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<20,4>', backend=backend) 23 | # Since softsign implementation is lookup-based increasing the precision and size of the table helps with accuracy 24 | cfg['LayerName']['softsign']['table_t'] = 'fixed<20,4>' 25 | cfg['LayerName']['softsign']['table_size'] = 2048 26 | odir = str(test_root_path / f'hls4mlprj_softsign_{backend}_{io_type}_{str(input_shape)}') 27 | hls_model = hls4ml.converters.convert_from_keras_model( 28 | model, hls_config=cfg, io_type=io_type, output_dir=odir, backend=backend 29 | ) 30 | hls_model.compile() 31 | 32 | y_keras = model.predict(X) 33 | y_hls4ml = hls_model.predict(X).reshape(y_keras.shape) 34 | acc_hls4ml = accuracy_score(np.argmax(y_keras, axis=-1).ravel(), np.argmax(y_hls4ml, axis=-1).ravel()) 35 | 36 | print(f'Accuracy hls4ml relative to keras: {acc_hls4ml}') 37 | assert acc_hls4ml >= 0.96 38 | -------------------------------------------------------------------------------- /test/pytest/test_weight_writer.py: -------------------------------------------------------------------------------- 1 | from glob import glob 2 | from pathlib import Path 3 | 4 | import keras 5 | import numpy as np 6 | import pytest 7 | 8 | import hls4ml 9 | 10 | test_root_path = Path(__file__).parent 11 | 12 | 13 | @pytest.mark.parametrize('k', [0, 1]) 14 | @pytest.mark.parametrize('i', [4, 8, 10]) 15 | @pytest.mark.parametrize('f', [-2, 0, 2, 7, 14]) 16 | def test_weight_writer(k, i, f): 17 | k, b, i = k, k + i + f, k + i 18 | w = np.array([[np.float32(2.0**-f)]]) 19 | u = '' if k else 'u' 20 | dtype = f'{u}fixed<{b}, {i}>' 21 | hls_config = {'LayerName': {'dense': {'Precision': {'weight': dtype}}}} 22 | 23 | model = keras.Sequential([keras.layers.Dense(1, input_shape=(1,), name='dense')]) 24 | model.layers[0].kernel.assign(keras.backend.constant(w)) 25 | output_dir = str(test_root_path / f'hls4ml_prj_test_weight_writer_{dtype}') 26 | 27 | model_hls = hls4ml.converters.convert_from_keras_model( 28 | model, hls_config=hls_config, output_dir=output_dir, write_weights_txt=True 29 | ) 30 | model_hls.write() 31 | 32 | w_paths = glob(str(Path(output_dir) / 'firmware/weights/w*.txt')) 33 | assert len(w_paths) == 1 34 | 35 | w_loaded = np.loadtxt(w_paths[0], delimiter=',').reshape(1, 1) 36 | assert np.all(w == w_loaded) 37 | -------------------------------------------------------------------------------- /test/pytorch-models.txt: -------------------------------------------------------------------------------- 1 | # PyTorch models from examples directory that will be used for testing 2 | # 3 | # Synthax: 4 | # MODEL_NAME [x:XILINXPART] [c:CLOCK_PERIOD] [io:s] [r:REUSE_FACTOR] [t:AP_TYPE] [s:STRATEGY] 5 | # where 6 | # MODEL_NAME - Name of the file containing the model (without ".pt") 7 | # x:XILINXPART - Xilinx part number to use 8 | # c:CLOCK_PERIOD - Clock period 9 | # io:s - User streaming I/O, otherwise use parallel I/O 10 | # r:REUSE_FACTOR - Reuse factor 11 | # s:STRATEGY - Latency-optimized or Resource-optimized strategy 12 | # t:AP_TYPE - Default precision 13 | # 14 | # Lines starting with "#" are ignored. 15 | # 16 | 17 | two_layer_model 18 | three_layer_model 19 | --------------------------------------------------------------------------------