├── .clang-format
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── dependabot.yml
    └── workflows
    │   ├── build-sphinx.yml
    │   ├── pre-commit.yml
    │   ├── pypi-publish.yml
    │   ├── test-sphinx.yml
    │   └── update-branch-on-pr.yml
├── .gitignore
├── .gitlab-ci.yml
├── .gitmodules
├── .pre-commit-config.yaml
├── CITATION.cff
├── CONTRIBUTING.md
├── Jenkinsfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── advanced
    │   ├── auto.rst
    │   ├── bramfactor.rst
    │   ├── extension.rst
    │   ├── fifo_depth.rst
    │   ├── hgq.rst
    │   ├── model_optimization.rst
    │   └── profiling.rst
    ├── api
    │   ├── command.rst
    │   ├── concepts.rst
    │   ├── configuration.rst
    │   └── serialization.rst
    ├── attr_doc_gen.py
    ├── backend
    │   ├── accelerator.rst
    │   ├── catapult.rst
    │   ├── oneapi.rst
    │   ├── quartus.rst
    │   ├── sr.rst
    │   └── vitis.rst
    ├── conf.py
    ├── frontend
    │   ├── keras.rst
    │   ├── pytorch.rst
    │   └── qonnx.rst
    ├── img
    │   ├── act_hls4ml.png
    │   ├── act_keras.png
    │   ├── hls4ml_logo.png
    │   ├── hls4ml_logo.svg
    │   ├── hls4ml_logo_lightgrey.png
    │   ├── hls4ml_logo_lightgrey.svg
    │   ├── hls4ml_logo_navbar.png
    │   ├── logo.jpg
    │   ├── logo.png
    │   ├── nn_map_paper_fig_2.png
    │   ├── overview.jpg
    │   ├── overview.pdf
    │   ├── pynqframe.png
    │   ├── reuse_factor_paper_fig_8.png
    │   ├── weights_hls4ml.png
    │   ├── weights_keras.png
    │   └── zynq_interfaces.png
    ├── index.rst
    ├── intro
    │   ├── faq.rst
    │   ├── introduction.rst
    │   ├── reference.rst
    │   ├── release_notes.rst
    │   ├── setup.rst
    │   └── status.rst
    ├── ir
    │   ├── attributes.rst
    │   ├── flows.rst
    │   ├── ir.rst
    │   └── modelgraph.rst
    └── requirements.txt
├── hls4ml
    ├── __init__.py
    ├── backends
    │   ├── __init__.py
    │   ├── backend.py
    │   ├── catapult
    │   │   ├── __init__.py
    │   │   ├── catapult_backend.py
    │   │   ├── catapult_types.py
    │   │   └── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── bn_quant.py
    │   │   │   ├── broadcast_stream.py
    │   │   │   ├── conv_same_pad.py
    │   │   │   ├── conv_stream.py
    │   │   │   ├── convolution_templates.py
    │   │   │   ├── convolution_winograd.py
    │   │   │   ├── core_templates.py
    │   │   │   ├── fifo_depth_optimization.py
    │   │   │   ├── garnet_templates.py
    │   │   │   ├── merge_templates.py
    │   │   │   ├── pointwise.py
    │   │   │   ├── pooling_templates.py
    │   │   │   ├── quantization_templates.py
    │   │   │   ├── recurrent_templates.py
    │   │   │   ├── reshaping_templates.py
    │   │   │   ├── resource_strategy.py
    │   │   │   └── transform_types.py
    │   ├── fpga
    │   │   ├── __init__.py
    │   │   ├── fpga_backend.py
    │   │   ├── fpga_layers.py
    │   │   ├── fpga_types.py
    │   │   └── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── bram_weights.py
    │   │   │   ├── clone.py
    │   │   │   ├── embedding.py
    │   │   │   ├── final_reshape.py
    │   │   │   ├── fix_softmax_table_size.py
    │   │   │   ├── hgq_proxy_model.py
    │   │   │   ├── im2col_codegen.py
    │   │   │   ├── inplace_parallel_reshape.py
    │   │   │   ├── inplace_stream_flatten.py
    │   │   │   ├── remove_softmax.py
    │   │   │   ├── repack_stream.py
    │   │   │   └── xnor_pooling.py
    │   ├── oneapi
    │   │   ├── __init__.py
    │   │   ├── oneapi_backend.py
    │   │   ├── oneapi_template.py
    │   │   ├── oneapi_types.py
    │   │   └── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── bn_quant.py
    │   │   │   ├── clone_templates.py
    │   │   │   ├── convolution_templates.py
    │   │   │   ├── convolution_winograd.py
    │   │   │   ├── core_templates.py
    │   │   │   ├── embedding_templates.py
    │   │   │   ├── merge_templates.py
    │   │   │   ├── pointwise.py
    │   │   │   ├── pooling_templates.py
    │   │   │   ├── quantization_templates.py
    │   │   │   ├── recurrent_templates.py
    │   │   │   ├── reshaping_templates.py
    │   │   │   ├── resource_strategy.py
    │   │   │   └── transform_types.py
    │   ├── quartus
    │   │   ├── __init__.py
    │   │   ├── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── bn_quant.py
    │   │   │   ├── convolution_templates.py
    │   │   │   ├── convolution_winograd.py
    │   │   │   ├── core_templates.py
    │   │   │   ├── merge_templates.py
    │   │   │   ├── pointwise.py
    │   │   │   ├── pooling_templates.py
    │   │   │   ├── quantization_templates.py
    │   │   │   ├── recurrent_templates.py
    │   │   │   ├── reshaping_templates.py
    │   │   │   ├── resource_strategy.py
    │   │   │   └── transform_types.py
    │   │   ├── quartus_backend.py
    │   │   └── quartus_types.py
    │   ├── symbolic
    │   │   ├── __init__.py
    │   │   ├── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── expr_templates.py
    │   │   │   └── validate_lut.py
    │   │   └── symbolic_backend.py
    │   ├── template.py
    │   ├── vitis
    │   │   ├── __init__.py
    │   │   ├── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── feature_check.py
    │   │   │   └── fifo_depth_optimization.py
    │   │   └── vitis_backend.py
    │   ├── vivado
    │   │   ├── __init__.py
    │   │   ├── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── bn_quant.py
    │   │   │   ├── broadcast_stream.py
    │   │   │   ├── conv_same_pad.py
    │   │   │   ├── conv_stream.py
    │   │   │   ├── convolution_templates.py
    │   │   │   ├── core_templates.py
    │   │   │   ├── einsum.py
    │   │   │   ├── einsum_dense.py
    │   │   │   ├── fifo_depth_optimization.py
    │   │   │   ├── garnet_templates.py
    │   │   │   ├── merge_templates.py
    │   │   │   ├── pipeline_style.py
    │   │   │   ├── pointwise.py
    │   │   │   ├── pointwise_codegen.py
    │   │   │   ├── pooling_templates.py
    │   │   │   ├── quantization_templates.py
    │   │   │   ├── recurrent_templates.py
    │   │   │   ├── reshaping_templates.py
    │   │   │   ├── resource_strategy.py
    │   │   │   ├── transform_types.py
    │   │   │   └── unrolled_codegen.py
    │   │   ├── vivado_backend.py
    │   │   └── vivado_types.py
    │   └── vivado_accelerator
    │   │   ├── __init__.py
    │   │   ├── passes
    │   │       ├── __init__.py
    │   │       └── fifo_depth_optimization.py
    │   │   ├── supported_boards.json
    │   │   ├── vivado_accelerator_backend.py
    │   │   └── vivado_accelerator_config.py
    ├── cli
    │   ├── __init__.py
    │   └── __main__.py
    ├── contrib
    │   ├── README.md
    │   ├── __init__.py
    │   ├── garnet.py
    │   └── kl_layer
    │   │   ├── README.md
    │   │   ├── kl_layer.h
    │   │   └── kl_layer.py
    ├── converters
    │   ├── __init__.py
    │   ├── keras
    │   │   ├── __init__.py
    │   │   ├── convolution.py
    │   │   ├── core.py
    │   │   ├── graph.py
    │   │   ├── hgq_proxy_model.py
    │   │   ├── merge.py
    │   │   ├── model.py
    │   │   ├── pooling.py
    │   │   ├── qkeras.py
    │   │   ├── recurrent.py
    │   │   ├── reshape.py
    │   │   └── reshaping.py
    │   ├── keras_v2_to_hls.py
    │   ├── keras_v3
    │   │   ├── __init__.py
    │   │   ├── _base.py
    │   │   ├── conv.py
    │   │   ├── core.py
    │   │   ├── einsum_dense.py
    │   │   ├── merge.py
    │   │   └── pooling.py
    │   ├── keras_v3_to_hls.py
    │   ├── onnx
    │   │   ├── __init__.py
    │   │   ├── convolution.py
    │   │   ├── core.py
    │   │   ├── merge.py
    │   │   ├── pooling.py
    │   │   └── reshape.py
    │   ├── onnx_to_hls.py
    │   ├── pytorch
    │   │   ├── __init__.py
    │   │   ├── convolution.py
    │   │   ├── core.py
    │   │   ├── merge.py
    │   │   ├── pooling.py
    │   │   ├── recurrent.py
    │   │   └── reshape.py
    │   ├── pytorch_to_hls.py
    │   └── utils.py
    ├── model
    │   ├── __init__.py
    │   ├── attributes.py
    │   ├── flow
    │   │   ├── __init__.py
    │   │   └── flow.py
    │   ├── graph.py
    │   ├── layers.py
    │   ├── optimizer
    │   │   ├── __init__.py
    │   │   ├── optimizer.py
    │   │   └── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── batchnorm_opt.py
    │   │   │   ├── bn_fuse.py
    │   │   │   ├── conv_to_convxd.py
    │   │   │   ├── conv_to_depthwiseconvxd.py
    │   │   │   ├── convert_to_channels_last.py
    │   │   │   ├── expand_layer_group.py
    │   │   │   ├── expand_time_distributed.py
    │   │   │   ├── fuse_biasadd.py
    │   │   │   ├── hgq_proxy_model.py
    │   │   │   ├── infer_precision.py
    │   │   │   ├── linear.py
    │   │   │   ├── matmul_const_to_dense.py
    │   │   │   ├── merge_const.py
    │   │   │   ├── move_scales.py
    │   │   │   ├── multi_dense.py
    │   │   │   ├── qkeras.py
    │   │   │   ├── quant_opt.py
    │   │   │   ├── reshape_const.py
    │   │   │   ├── resize_remove_constants.py
    │   │   │   ├── seperable_to_dw_conv.py
    │   │   │   ├── stamp.py
    │   │   │   └── transpose_opt.py
    │   ├── profiling.py
    │   ├── quantizers.py
    │   └── types.py
    ├── optimization
    │   ├── __init__.py
    │   └── dsp_aware_pruning
    │   │   ├── __init__.py
    │   │   ├── attributes.py
    │   │   ├── config.py
    │   │   ├── keras
    │   │       ├── __init__.py
    │   │       ├── builder.py
    │   │       ├── config.py
    │   │       ├── masking.py
    │   │       ├── reduction.py
    │   │       ├── regularizers.py
    │   │       └── utils.py
    │   │   ├── knapsack.py
    │   │   ├── objectives
    │   │       ├── __init__.py
    │   │       ├── gpu_objectives.py
    │   │       └── vivado_objectives.py
    │   │   └── scheduler.py
    ├── report
    │   ├── __init__.py
    │   ├── catapult_report.py
    │   ├── oneapi_report.py
    │   ├── quartus_report.py
    │   └── vivado_report.py
    ├── templates
    │   ├── catapult
    │   │   ├── build_lib.sh
    │   │   ├── build_prj.tcl
    │   │   ├── catapult_synth.tcl
    │   │   ├── firmware
    │   │   │   ├── defines.h
    │   │   │   ├── myproject.cpp
    │   │   │   ├── myproject.h
    │   │   │   └── parameters.h
    │   │   ├── myproject_bridge.cpp
    │   │   ├── myproject_test.cpp
    │   │   └── nnet_utils
    │   │   │   ├── ap_shift_reg.h
    │   │   │   ├── hls_math.h
    │   │   │   ├── nnet_activation.h
    │   │   │   ├── nnet_activation_stream.h
    │   │   │   ├── nnet_array.h
    │   │   │   ├── nnet_batchnorm.h
    │   │   │   ├── nnet_batchnorm_stream.h
    │   │   │   ├── nnet_code_gen.h
    │   │   │   ├── nnet_common.h
    │   │   │   ├── nnet_conv1d.h
    │   │   │   ├── nnet_conv1d_latency.h
    │   │   │   ├── nnet_conv1d_resource.h
    │   │   │   ├── nnet_conv1d_stream.h
    │   │   │   ├── nnet_conv2d.h
    │   │   │   ├── nnet_conv2d_latency.h
    │   │   │   ├── nnet_conv2d_resource.h
    │   │   │   ├── nnet_conv2d_stream.h
    │   │   │   ├── nnet_conv_stream.h
    │   │   │   ├── nnet_dense.h
    │   │   │   ├── nnet_dense_compressed.h
    │   │   │   ├── nnet_dense_latency.h
    │   │   │   ├── nnet_dense_resource.h
    │   │   │   ├── nnet_dense_stream.h
    │   │   │   ├── nnet_embed.h
    │   │   │   ├── nnet_embed_stream.h
    │   │   │   ├── nnet_garnet.h
    │   │   │   ├── nnet_helpers.h
    │   │   │   ├── nnet_image.h
    │   │   │   ├── nnet_image_stream.h
    │   │   │   ├── nnet_math.h
    │   │   │   ├── nnet_merge.h
    │   │   │   ├── nnet_merge_stream.h
    │   │   │   ├── nnet_mult.h
    │   │   │   ├── nnet_padding.h
    │   │   │   ├── nnet_padding_stream.h
    │   │   │   ├── nnet_pooling.h
    │   │   │   ├── nnet_pooling_stream.h
    │   │   │   ├── nnet_recr_activations.h
    │   │   │   ├── nnet_recurrent.h
    │   │   │   ├── nnet_sepconv1d_stream.h
    │   │   │   ├── nnet_sepconv2d.h
    │   │   │   ├── nnet_sepconv2d_stream.h
    │   │   │   ├── nnet_sepconv_stream.h
    │   │   │   ├── nnet_stream.h
    │   │   │   └── nnet_types.h
    │   ├── oneapi
    │   │   ├── CMakeLists.txt
    │   │   ├── exception_handler.hpp
    │   │   ├── firmware
    │   │   │   ├── defines.h
    │   │   │   ├── myproject.cpp
    │   │   │   ├── myproject.h
    │   │   │   ├── nnet_utils
    │   │   │   │   ├── nnet_activation.h
    │   │   │   │   ├── nnet_activation_stream.h
    │   │   │   │   ├── nnet_batchnorm.h
    │   │   │   │   ├── nnet_batchnorm_stream.h
    │   │   │   │   ├── nnet_common.h
    │   │   │   │   ├── nnet_conv1d.h
    │   │   │   │   ├── nnet_conv1d_resource.h
    │   │   │   │   ├── nnet_conv1d_stream.h
    │   │   │   │   ├── nnet_conv2d.h
    │   │   │   │   ├── nnet_conv2d_resource.h
    │   │   │   │   ├── nnet_conv2d_stream.h
    │   │   │   │   ├── nnet_dense.h
    │   │   │   │   ├── nnet_dense_stream.h
    │   │   │   │   ├── nnet_depthconv1d.h
    │   │   │   │   ├── nnet_depthconv1d_resource.h
    │   │   │   │   ├── nnet_depthconv2d.h
    │   │   │   │   ├── nnet_depthconv2d_resource.h
    │   │   │   │   ├── nnet_embed.h
    │   │   │   │   ├── nnet_embed_stream.h
    │   │   │   │   ├── nnet_helpers.h
    │   │   │   │   ├── nnet_merge.h
    │   │   │   │   ├── nnet_merge_stream.h
    │   │   │   │   ├── nnet_mult.h
    │   │   │   │   ├── nnet_padding.h
    │   │   │   │   ├── nnet_padding_stream.h
    │   │   │   │   ├── nnet_pooling.h
    │   │   │   │   ├── nnet_pooling_stream.h
    │   │   │   │   ├── nnet_printf.h
    │   │   │   │   ├── nnet_recurrent.h
    │   │   │   │   ├── nnet_recurrent_activation.h
    │   │   │   │   ├── nnet_recurrent_stream.h
    │   │   │   │   ├── nnet_resize.h
    │   │   │   │   ├── nnet_resize_stream.h
    │   │   │   │   ├── nnet_stream.h
    │   │   │   │   ├── nnet_transpose.h
    │   │   │   │   ├── nnet_transpose_stream.h
    │   │   │   │   └── nnet_types.h
    │   │   │   └── parameters.h
    │   │   ├── myproject_bridge.cpp
    │   │   └── myproject_test.cpp
    │   ├── quartus
    │   │   ├── Makefile
    │   │   ├── ac_types
    │   │   │   ├── ac_channel.h
    │   │   │   ├── ac_complex.h
    │   │   │   ├── ac_fixed.h
    │   │   │   ├── ac_float.h
    │   │   │   ├── ac_int.h
    │   │   │   ├── ac_sc.h
    │   │   │   ├── ac_std_float.h
    │   │   │   └── stream.h
    │   │   ├── build_lib.sh
    │   │   ├── firmware
    │   │   │   ├── defines.h
    │   │   │   ├── myproject.cpp
    │   │   │   ├── myproject.h
    │   │   │   ├── nnet_utils
    │   │   │   │   ├── nnet_activation.h
    │   │   │   │   ├── nnet_activation_stream.h
    │   │   │   │   ├── nnet_batchnorm.h
    │   │   │   │   ├── nnet_batchnorm_stream.h
    │   │   │   │   ├── nnet_common.h
    │   │   │   │   ├── nnet_conv1d.h
    │   │   │   │   ├── nnet_conv1d_resource.h
    │   │   │   │   ├── nnet_conv1d_stream.h
    │   │   │   │   ├── nnet_conv2d.h
    │   │   │   │   ├── nnet_conv2d_resource.h
    │   │   │   │   ├── nnet_conv2d_stream.h
    │   │   │   │   ├── nnet_dense.h
    │   │   │   │   ├── nnet_dense_compressed.h
    │   │   │   │   ├── nnet_dense_stream.h
    │   │   │   │   ├── nnet_embed.h
    │   │   │   │   ├── nnet_embed_stream.h
    │   │   │   │   ├── nnet_helpers.h
    │   │   │   │   ├── nnet_merge.h
    │   │   │   │   ├── nnet_merge_stream.h
    │   │   │   │   ├── nnet_mult.h
    │   │   │   │   ├── nnet_padding.h
    │   │   │   │   ├── nnet_padding_stream.h
    │   │   │   │   ├── nnet_pooling.h
    │   │   │   │   ├── nnet_pooling_stream.h
    │   │   │   │   ├── nnet_recurrent.h
    │   │   │   │   ├── nnet_recurrent_activation.h
    │   │   │   │   ├── nnet_recurrent_stream.h
    │   │   │   │   ├── nnet_resize.h
    │   │   │   │   ├── nnet_resize_stream.h
    │   │   │   │   ├── nnet_stream.h
    │   │   │   │   ├── nnet_transpose.h
    │   │   │   │   ├── nnet_transpose_stream.h
    │   │   │   │   └── nnet_types.h
    │   │   │   └── parameters.h
    │   │   ├── myproject_bridge.cpp
    │   │   ├── myproject_test_parallel.cpp
    │   │   └── myproject_test_stream.cpp
    │   ├── symbolic
    │   │   └── build_lib.sh
    │   ├── vitis
    │   │   └── nnet_utils
    │   │   │   ├── nnet_conv1d.h
    │   │   │   ├── nnet_conv1d_latency.h
    │   │   │   ├── nnet_conv1d_resource.h
    │   │   │   ├── nnet_conv1d_stream.h
    │   │   │   ├── nnet_conv2d.h
    │   │   │   ├── nnet_conv2d_latency.h
    │   │   │   ├── nnet_conv2d_resource.h
    │   │   │   ├── nnet_conv2d_stream.h
    │   │   │   ├── nnet_dense_stream.h
    │   │   │   ├── nnet_pooling.h
    │   │   │   ├── nnet_pooling_stream.h
    │   │   │   ├── nnet_sepconv1d_stream.h
    │   │   │   └── nnet_sepconv2d_stream.h
    │   ├── vivado
    │   │   ├── ap_types
    │   │   │   ├── ap_common.h
    │   │   │   ├── ap_decl.h
    │   │   │   ├── ap_fixed.h
    │   │   │   ├── ap_fixed_base.h
    │   │   │   ├── ap_fixed_ref.h
    │   │   │   ├── ap_fixed_special.h
    │   │   │   ├── ap_int.h
    │   │   │   ├── ap_int_base.h
    │   │   │   ├── ap_int_ref.h
    │   │   │   ├── ap_int_special.h
    │   │   │   ├── ap_shift_reg.h
    │   │   │   ├── etc
    │   │   │   │   └── ap_private.h
    │   │   │   ├── hls_math.h
    │   │   │   ├── hls_stream.h
    │   │   │   └── utils
    │   │   │   │   └── x_hls_utils.h
    │   │   ├── build_lib.sh
    │   │   ├── build_prj.tcl
    │   │   ├── firmware
    │   │   │   ├── defines.h
    │   │   │   ├── myproject.cpp
    │   │   │   ├── myproject.h
    │   │   │   └── parameters.h
    │   │   ├── myproject_bridge.cpp
    │   │   ├── myproject_test.cpp
    │   │   ├── nnet_utils
    │   │   │   ├── nnet_activation.h
    │   │   │   ├── nnet_activation_stream.h
    │   │   │   ├── nnet_batchnorm.h
    │   │   │   ├── nnet_batchnorm_stream.h
    │   │   │   ├── nnet_code_gen.h
    │   │   │   ├── nnet_common.h
    │   │   │   ├── nnet_conv1d.h
    │   │   │   ├── nnet_conv1d_latency.h
    │   │   │   ├── nnet_conv1d_resource.h
    │   │   │   ├── nnet_conv1d_stream.h
    │   │   │   ├── nnet_conv2d.h
    │   │   │   ├── nnet_conv2d_latency.h
    │   │   │   ├── nnet_conv2d_resource.h
    │   │   │   ├── nnet_conv2d_stream.h
    │   │   │   ├── nnet_conv_stream.h
    │   │   │   ├── nnet_dense.h
    │   │   │   ├── nnet_dense_compressed.h
    │   │   │   ├── nnet_dense_latency.h
    │   │   │   ├── nnet_dense_resource.h
    │   │   │   ├── nnet_dense_stream.h
    │   │   │   ├── nnet_depthwise_product.h
    │   │   │   ├── nnet_einsum.h
    │   │   │   ├── nnet_einsum_dense.h
    │   │   │   ├── nnet_embed.h
    │   │   │   ├── nnet_embed_stream.h
    │   │   │   ├── nnet_function_stubs.h
    │   │   │   ├── nnet_garnet.h
    │   │   │   ├── nnet_helpers.h
    │   │   │   ├── nnet_image.h
    │   │   │   ├── nnet_image_stream.h
    │   │   │   ├── nnet_math.h
    │   │   │   ├── nnet_merge.h
    │   │   │   ├── nnet_merge_stream.h
    │   │   │   ├── nnet_mult.h
    │   │   │   ├── nnet_padding.h
    │   │   │   ├── nnet_padding_stream.h
    │   │   │   ├── nnet_pooling.h
    │   │   │   ├── nnet_pooling_stream.h
    │   │   │   ├── nnet_recr_activations.h
    │   │   │   ├── nnet_recurrent.h
    │   │   │   ├── nnet_sepconv1d.h
    │   │   │   ├── nnet_sepconv1d_latency.h
    │   │   │   ├── nnet_sepconv1d_stream.h
    │   │   │   ├── nnet_sepconv2d.h
    │   │   │   ├── nnet_sepconv2d_latency.h
    │   │   │   ├── nnet_sepconv2d_stream.h
    │   │   │   ├── nnet_sepconv_stream.h
    │   │   │   ├── nnet_stream.h
    │   │   │   ├── nnet_time_distributed.h
    │   │   │   ├── nnet_transpose.h
    │   │   │   ├── nnet_transpose_stream.h
    │   │   │   └── nnet_types.h
    │   │   └── vivado_synth.tcl
    │   └── vivado_accelerator
    │   │   ├── alveo
    │   │       ├── krnl_rtl_src
    │   │       │   ├── krnl_rtl_axi_read_master.sv
    │   │       │   ├── krnl_rtl_axi_write_master.sv
    │   │       │   ├── krnl_rtl_control_s_axi.v
    │   │       │   ├── krnl_rtl_counter.sv
    │   │       │   ├── krnl_rtl_int.sv
    │   │       │   └── myproject_kernel.v
    │   │       ├── python_drivers
    │   │       │   └── axi_stream_driver.py
    │   │       └── tcl_scripts
    │   │       │   └── axi_stream_design.tcl
    │   │   ├── build_lib.sh
    │   │   ├── myproject_axi.cpp
    │   │   ├── myproject_axi.h
    │   │   ├── pynq-z2
    │   │       ├── python_drivers
    │   │       │   └── axi_stream_driver.py
    │   │       └── tcl_scripts
    │   │       │   ├── axi_lite_design.tcl
    │   │       │   └── axi_stream_design.tcl
    │   │   └── zcu102
    │   │       ├── python_drivers
    │   │           └── axi_stream_driver.py
    │   │       └── tcl_scripts
    │   │           └── axi_stream_design.tcl
    ├── utils
    │   ├── __init__.py
    │   ├── attribute_descriptions.py
    │   ├── config.py
    │   ├── dependency.py
    │   ├── einsum_utils.py
    │   ├── example_models.py
    │   ├── fixed_point_utils.py
    │   ├── link.py
    │   ├── plot.py
    │   ├── profiling_utils.py
    │   ├── serialization.py
    │   ├── string_utils.py
    │   ├── symbolic_utils.py
    │   ├── torch.py
    │   └── transpose_utils.py
    └── writer
    │   ├── __init__.py
    │   ├── catapult_writer.py
    │   ├── oneapi_writer.py
    │   ├── quartus_writer.py
    │   ├── symbolic_writer.py
    │   ├── vitis_writer.py
    │   ├── vivado_accelerator_writer.py
    │   ├── vivado_writer.py
    │   └── writers.py
├── pyproject.toml
└── test
    ├── build-prj.sh
    ├── cleanup.sh
    ├── compare-reports.sh
    ├── convert-keras-models.sh
    ├── convert-onnx-models.sh
    ├── convert-pytorch-models.sh
    ├── gather-reports.sh
    ├── hls4ml-keras-test.sh
    ├── hls4ml-onnx-test.sh
    ├── hls4ml-pytorch-test.sh
    ├── keras-models.txt
    ├── keras-to-hls.sh
    ├── onnx-models.txt
    ├── onnx-to-hls.sh
    ├── pytest
        ├── ci-template.yml
        ├── generate_ci_yaml.py
        ├── test_activations.py
        ├── test_auto_precision.py
        ├── test_backend_config.py
        ├── test_batchnorm.py
        ├── test_batchnorm_pytorch.py
        ├── test_binary_cnn.py
        ├── test_boxplot.py
        ├── test_bram_factor.py
        ├── test_causalpadding.py
        ├── test_clone_flatten.py
        ├── test_cnn_mnist_qkeras.py
        ├── test_conv1d.py
        ├── test_conv1d_narrow.py
        ├── test_conv2d_narrow.py
        ├── test_dense_unrolled.py
        ├── test_depthconv1d.py
        ├── test_depthconv2d.py
        ├── test_einsum_dense.py
        ├── test_embed.py
        ├── test_extensions.py
        ├── test_extensions_pytorch.py
        ├── test_fetch_example.py
        ├── test_fifo_depth.py
        ├── test_flows.py
        ├── test_garnet.py
        ├── test_globalpooling.py
        ├── test_graph.py
        ├── test_hgq_layers.py
        ├── test_hgq_players.py
        ├── test_keras_api.py
        ├── test_keras_h5_loader.py
        ├── test_keras_nested_model.py
        ├── test_keras_v3_api.py
        ├── test_merge.py
        ├── test_merge_pytorch.py
        ├── test_multi_dense.py
        ├── test_multiout_network.py
        ├── test_multiout_onnx.py
        ├── test_optimization
        │   ├── test_attributes.py
        │   ├── test_keras
        │   │   ├── test_masking.py
        │   │   ├── test_reduction.py
        │   │   ├── test_regularizers.py
        │   │   └── test_weight_sharing.py
        │   ├── test_knapsack.py
        │   ├── test_objectives.py
        │   └── test_scheduler.py
        ├── test_pipeline_style.py
        ├── test_plot_model.py
        ├── test_pointwiseconv.py
        ├── test_pooling.py
        ├── test_pytorch_api.py
        ├── test_pytorch_profiler.py
        ├── test_qkeras.py
        ├── test_qonnx.py
        ├── test_recurrent_pytorch.py
        ├── test_repack_stream.py
        ├── test_report.py
        ├── test_report
        │   ├── Vivado
        │   │   ├── myproject_csynth.rpt
        │   │   ├── myproject_csynth.xml
        │   │   ├── vivado_hls.app
        │   │   └── vivado_synth.rpt
        │   └── oneAPI
        │   │   ├── loop_attr.ndjson
        │   │   ├── quartus.ndjson
        │   │   └── summary.ndjson
        ├── test_reshape.py
        ├── test_rnn.py
        ├── test_sepconv1d.py
        ├── test_sepconv2d.py
        ├── test_sequential_parsing_pytorch.py
        ├── test_serialization.py
        ├── test_softmax.py
        ├── test_softsign.py
        ├── test_sr.py
        ├── test_stream_clone.py
        ├── test_time_distributed.py
        ├── test_trace.py
        ├── test_transpose_concat.py
        ├── test_types.py
        ├── test_upsampling.py
        ├── test_upsampling_pytorch.py
        ├── test_weight_writer.py
        ├── test_writer_config.py
        └── test_zeropadding.py
    ├── pytorch-models.txt
    └── pytorch-to-hls.sh


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Something isn't working as expected
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | ## Prerequisites
12 | Please make sure to check off these prerequisites before submitting a bug report.
13 | - [ ] Test that the bug appears on the current version of the master branch. Make sure to include the commit hash of the commit you checked out.
14 | - [ ] Check that the issue hasn't already been reported, by checking the currently open issues.
15 | - [ ] If there are steps to reproduce the problem, make sure to write them down below.
16 | - [ ] If relevant, please include the hls4ml project files, which were created directly before and/or after the bug.
17 | 
18 | ## Quick summary
19 | Please give a brief and concise description of the bug.
20 | 
21 | ## Details
22 | Please add to the following sections to describe the bug as accurately as possible.
23 | 
24 | ### Steps to Reproduce
25 | Add what needs to be done to reproduce the bug. Add *commented* code examples and make sure to include the original model files / code, and the commit hash you are working on.
26 | 
27 | 1. Clone the hls4ml repository
28 | 2. Checkout the master branch, with commit hash: [...]
29 | 3. Run conversion [...] on model file with code [...]
30 | 4. [Further steps ...]
31 | 
32 | ### Expected behavior
33 | Please add a brief description of what you expected to happen.
34 | 
35 | ### Actual behavior
36 | Describe what actually happens instead.
37 | 
38 | ## Optional
39 | 
40 | ### Possible fix
41 | If you already know where the issue stems from, or you have a hint please let us know.
42 | 
43 | ### Additional context
44 | Add any other context about the problem here.
45 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Talk and engage with the comunity
4 |     url: https://github.com/fastmachinelearning/hls4ml/discussions/categories/general
5 |     about: Check out the GitHub discusisons page for hls4ml. This is the best way to get in touch with us. In particular, if you have a question about hls4ml or a general problem that is likely not a bug.
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for hls4ml
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Prerequisites
11 | Please talk to us before creating a new feature request. So that you can check that the idea is not already in active development.
12 | 
13 | You can present your idea over here at the GitHub discussions page for hls4ml: https://github.com/fastmachinelearning/hls4ml/discussions/categories/ideas
14 | 
15 | Even if an idea is already being worked on you can still create a feature request,
16 | if you would like to open a discussion about the feature or want to contribute to it.
17 | 
18 | ## Details
19 | Please add to the following sections to describe the feature as accurately as possible.
20 | 
21 | ### New behavior
22 | Please add a brief and concise description of what you would like to happen in hls4ml in the future.
23 | 
24 | ### Motivation
25 | Please tell us why this feature is important to the community.
26 | 
27 | ### Parts of hls4ml being affected
28 | Please describe which parts of hls4ml would be affected by this feature.
29 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | 
 3 | > :memo: Please include a summary of the change.
 4 | >
 5 | > * Please also include relevant motivation and context.
 6 | > * List any dependencies that are required for this change.
 7 | 
 8 | ## Type of change
 9 | 
10 | For a new feature or function, please create an issue first to discuss it
11 | with us before submitting a pull request.
12 | 
13 | Note: Please delete options that are not relevant.
14 | 
15 | - [ ] Bug fix (non-breaking change that fixes an issue)
16 | - [ ] Documentation update
17 | - [ ] New feature (non-breaking change which adds functionality)
18 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
19 | - [ ] A new research paper code implementation
20 | - [ ] Other (Specify)
21 | 
22 | ## Tests
23 | 
24 | > :memo: Please describe the tests that you ran to verify your changes.
25 | >
26 | > * Provide instructions so we can reproduce.
27 | > * Please also list any relevant details for your test configuration.
28 | 
29 | **Test Configuration**:
30 | 
31 | ## Checklist
32 | 
33 | - [ ] I have read the [guidelines for contributing](https://github.com/fastmachinelearning/hls4ml/blob/main/CONTRIBUTING.md).
34 | - [ ] I have commented my code, particularly in hard-to-understand areas.
35 | - [ ] I have made corresponding changes to the documentation.
36 | - [ ] My changes generate no new warnings.
37 | - [ ] I have installed and run `pre-commit` on the files I edited or added.
38 | - [ ] I have added tests that prove my fix is effective or that my feature works.
39 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   # Maintain dependencies for GitHub Actions
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "weekly"
8 | 


--------------------------------------------------------------------------------
/.github/workflows/build-sphinx.yml:
--------------------------------------------------------------------------------
 1 | name: build-sphinx
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | 
 7 | jobs:
 8 |   build:
 9 | 
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v4
14 |       with:
15 |         fetch-depth: 0
16 |         ref: ${{ github.event.pull_request.head.sha }}
17 |     - name: Allow for file ownership conflicts with Docker and GitHub Actions
18 |       run: git config --global --add safe.directory '*'
19 |     - uses: jmduarte/sphinx-action@main
20 |       env:
21 |         SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 |       with:
23 |         pre-build-command: "git config --system --add safe.directory '*'"
24 |         docs-folder: "docs/"
25 |     - name: Commit Documentation Changes
26 |       run: |
27 |         git clone https://github.com/fastmachinelearning/hls4ml.git --branch gh-pages --single-branch gh-pages
28 |         cp -r docs/_build/html/* gh-pages/
29 |         cd gh-pages
30 |         touch .nojekyll
31 |         git config --local user.email "action@github.com"
32 |         git config --local user.name "GitHub Action"
33 |         git add .
34 |         git commit -m "Update Sphinx Documentation" -a || true
35 |     - name: Push Documentation Changes
36 |       uses: ad-m/github-push-action@master
37 |       with:
38 |         branch: gh-pages
39 |         directory: gh-pages
40 |         github_token: ${{ secrets.PERSONAL_TOKEN }}
41 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: Run pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [ main ]
 6 |   push:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   pre-commit:
11 |     name: Format
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: [3.8]
16 | 
17 |     steps:
18 |       - name: Checkout
19 |         uses: actions/checkout@v4
20 |         with:
21 |           submodules: recursive
22 | 
23 |       - name: Pre-commit
24 |         uses: pre-commit/action@v3.0.1
25 |         with:
26 |           extra_args: --hook-stage manual --all-files
27 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-publish.yml:
--------------------------------------------------------------------------------
 1 | name: 📦 Packaging release to PyPI
 2 | on:
 3 |   workflow_dispatch:
 4 |   pull_request:
 5 |     branches: [main]
 6 |   release:
 7 |     types: [published]
 8 | 
 9 | jobs:
10 |   release:
11 |     name: Upload new release to PyPI
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - name: Checkout source
15 |       uses: actions/checkout@v4
16 |       with:
17 |         submodules: recursive
18 |         fetch-depth: 0
19 | 
20 |     - name: Build SDist and Wheel
21 |       run: pipx run build --sdist --wheel
22 | 
23 |     - uses: actions/upload-artifact@v4
24 |       with:
25 |         path: dist/*.*
26 | 
27 |     - name: Publish 📦 to PyPI
28 |       if: startsWith(github.ref, 'refs/tags')
29 |       uses: pypa/gh-action-pypi-publish@release/v1
30 |       with:
31 |         password: ${{ secrets.PYPI_PASSWORD }}
32 | 


--------------------------------------------------------------------------------
/.github/workflows/test-sphinx.yml:
--------------------------------------------------------------------------------
 1 | name: test-sphinx
 2 | on:
 3 |   pull_request:
 4 |     branches:
 5 |       - main
 6 | 
 7 | jobs:
 8 |   build:
 9 | 
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v4
14 |       with:
15 |         fetch-depth: 0
16 |         ref: ${{ github.event.pull_request.head.sha }}
17 |     - name: Allow for file ownership conflicts with Docker and GitHub Actions
18 |       run: git config --global --add safe.directory '*'
19 |     - uses: jmduarte/sphinx-action@main
20 |       env:
21 |         SPHINX_GITHUB_CHANGELOG_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 |       with:
23 |         pre-build-command: "git config --system --add safe.directory '*'"
24 |         docs-folder: "docs/"
25 |     - uses: actions/upload-artifact@v4
26 |       with:
27 |         path: docs/_build/html
28 | 


--------------------------------------------------------------------------------
/.github/workflows/update-branch-on-pr.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   pull_request_target:
 3 |     types: [labeled]
 4 |     branches: [main]
 5 | 
 6 | name: Update branch on PR from fork
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - name: Checkout repo
12 |       uses: actions/checkout@v4
13 |       if: ${{ github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name && github.event.label.name == 'please test' }}
14 |       with:
15 |         ref: ${{ github.event.pull_request.head.sha }}
16 |     - name: Push changes
17 |       if: ${{ github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name && github.event.label.name == 'please test' }}
18 |       run: |
19 |         git checkout -b pr/${{ github.event.pull_request.number }}
20 |         git push --force origin pr/${{ github.event.pull_request.number }}
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | _version.py
 3 | __pycache__
 4 | build/
 5 | dist/
 6 | sdist/
 7 | *.egg-info/
 8 | vivado_prj
 9 | .vscode
10 | my-hls-test
11 | *.tar.gz
12 | docs/_build
13 | docs/autodoc/*
14 | hls4mlprj_*
15 | *~
16 | *.ipynb_checkpoints/
17 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | stages:
 2 |   - generate
 3 |   - trigger
 4 |   - test
 5 | 
 6 | generator:
 7 |   stage: generate
 8 |   image: python:3.8-alpine
 9 |   variables:
10 |     N_TESTS_PER_YAML: 4
11 |   tags:
12 |     - k8s-default
13 |   before_script:
14 |     - pip install pyyaml
15 |   script:
16 |     - cd test/pytest
17 |     - python generate_ci_yaml.py
18 |   artifacts:
19 |     paths:
20 |       - test/pytest/pytests.yml
21 | 
22 | pytests:
23 |   stage: trigger
24 |   trigger:
25 |     include:
26 |       - local: test/pytest/ci-template.yml
27 |       - artifact: test/pytest/pytests.yml
28 |         job: generator
29 |     strategy: depend
30 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "example-models"]
 2 | 	path = example-models
 3 | 	url = https://github.com/hls-fpga-machine-learning/example-models.git
 4 | [submodule "hls4ml/templates/catapult/ac_types"]
 5 | 	path = hls4ml/templates/catapult/ac_types
 6 | 	url = https://github.com/hlslibs/ac_types.git
 7 | [submodule "hls4ml/templates/catapult/ac_simutils"]
 8 | 	path = hls4ml/templates/catapult/ac_simutils
 9 | 	url = https://github.com/hlslibs/ac_simutils.git
10 | [submodule "hls4ml/templates/catapult/ac_math"]
11 | 	path = hls4ml/templates/catapult/ac_math
12 | 	url = https://github.com/hlslibs/ac_math.git
13 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: (^hls4ml\/templates\/(vivado|quartus)\/(ap_types|ac_types)\/|^test/pytest/test_report/)
 2 | 
 3 | repos:
 4 | - repo: https://github.com/psf/black
 5 |   rev: 25.1.0
 6 |   hooks:
 7 |   - id: black
 8 |     language_version: python3
 9 |     args: ['--line-length=125',
10 |            '--skip-string-normalization']
11 | 
12 | - repo: https://github.com/tox-dev/pyproject-fmt
13 |   rev: v2.6.0
14 |   hooks:
15 |     - id: pyproject-fmt
16 | 
17 | - repo: https://github.com/pre-commit/pre-commit-hooks
18 |   rev: v5.0.0
19 |   hooks:
20 |   - id: check-added-large-files
21 |   - id: check-case-conflict
22 |   - id: check-merge-conflict
23 |   - id: check-symlinks
24 |   - id: check-toml
25 |   - id: check-yaml
26 |   - id: debug-statements
27 |   - id: end-of-file-fixer
28 |   - id: mixed-line-ending
29 |   - id: requirements-txt-fixer
30 |   - id: trailing-whitespace
31 | 
32 | - repo: https://github.com/PyCQA/isort
33 |   rev: 6.0.1
34 |   hooks:
35 |   - id: isort
36 | 
37 | - repo: https://github.com/asottile/pyupgrade
38 |   rev: v3.20.0
39 |   hooks:
40 |   - id: pyupgrade
41 |     args: ["--py310-plus"]
42 | 
43 | - repo: https://github.com/pycqa/flake8
44 |   rev: 7.2.0
45 |   hooks:
46 |   - id: flake8
47 |     exclude: docs/conf.py
48 |     additional_dependencies: [flake8-bugbear, flake8-print]
49 |     args: ['--max-line-length=125',  # github viewer width
50 |            '--extend-ignore=E203,T201']  # E203 is not PEP8 compliant
51 | 
52 | - repo: https://github.com/mgedmin/check-manifest
53 |   rev: "0.50"
54 |   hooks:
55 |   - id: check-manifest
56 |     stages: [manual]
57 | 
58 | - repo: https://github.com/jmduarte/p-clang-format
59 |   rev: "v1.0.4"
60 |   hooks:
61 |     - id: p-clang-format
62 |       types_or: [c++, c, cuda]
63 | ci:
64 |     autofix_commit_msg: '[pre-commit.ci] auto fixes from pre-commit hooks'
65 |     autofix_prs: false  # default is true
66 |     autoupdate_branch: 'main'
67 |     autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
68 |     autoupdate_schedule: weekly
69 |     skip: []
70 |     submodules: true
71 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "Please cite the following works when using this software."
 3 | type: software
 4 | authors:
 5 | - given-names: "FastML Team"
 6 | title: "hls4ml"
 7 | version: "v1.1.0"
 8 | date-released: "2025-03-17"
 9 | doi: 10.5281/zenodo.1201549
10 | repository-code: "https://github.com/fastmachinelearning/hls4ml"
11 | url: "https://fastmachinelearning.org/hls4ml"
12 | keywords:
13 |   - python
14 |   - machine-learning
15 |   - FPGA
16 |   - physics
17 |   - tensorflow
18 |   - pytorch
19 |   - onnx
20 |   - qonnx
21 | license: "Apache-2.0"
22 | abstract: |
23 |   hls4ml is an open-source software-hardware codesign workflow
24 |   to interpret and translate machine learning algorithms for
25 |   implementations in hardware, including FPGAs and ASICs.
26 | references:
27 |   - type: article
28 |     title: "Fast inference of deep neural networks on FPGAs with hls4ml"
29 |     authors:
30 |     - family-names: "Duarte"
31 |       given-names: "Javier"
32 |     - family-names: "Han"
33 |       given-names: "Song"
34 |     - family-names: "Harris"
35 |       given-names: "Philip"
36 |     - family-names: "Jindariani"
37 |       given-names: "Sergo"
38 |     - family-names: "Kreinar"
39 |       given-names: "Edward"
40 |     - family-names: "Kreis"
41 |       given-names: "Benjamin"
42 |     - family-names: "Ngadiuba"
43 |       given-names: "Jennifer"
44 |     - family-names: "Pierini"
45 |       given-names: "Maurizio"
46 |     - family-names: "Rivera"
47 |       given-names: "Ryan"
48 |     - family-names: "Tran"
49 |       given-names: "Nhan"
50 |     - family-names: "Wu"
51 |       given-names: "Zhenbin"
52 |     journal: "JINST"
53 |     volume: "13"
54 |     start: "P07027"
55 |     doi: "10.1088/1748-0221/13/07/P07027"
56 |     year: "2018"
57 |     number: "07"
58 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include LICENSE README.md CONTRIBUTING.md CITATION.cff pyproject.toml .clang-format
 2 | graft example-models
 3 | graft test
 4 | graft contrib
 5 | recursive-include hls4ml/templates *
 6 | recursive-include hls4ml *.py
 7 | recursive-include hls4ml/contrib *
 8 | global-exclude .git .gitmodules .gitlab-ci.yml *.pyc
 9 | include hls4ml/backends/vivado_accelerator/supported_boards.json
10 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@sphinx-apidoc -f -T -o autodoc/ ../hls4ml
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/docs/advanced/bramfactor.rst:
--------------------------------------------------------------------------------
 1 | ==================================
 2 | Loading weights from external BRAM
 3 | ==================================
 4 | 
 5 | .. note::
 6 |     This feature is being evaluated for re-implementation. We welcome feedback from users how to make the implementation more flexible.
 7 | 
 8 | ``hls4ml`` can optionally store weights in BRAMs external to the design. This is supported in Vivado/Vitis and Catapult backends. It is the responsibility of the user to ensure the weights are properly loaded during the operation of the design.
 9 | 
10 | The feature works as a threshold, exposed through a ``BramFactor`` config parameter. Layers with more weights above the threshold will be exposed as BRAM interface. Consider the following code:
11 | 
12 | .. code-block:: Python
13 | 
14 |     model = tf.keras.models.Sequential()
15 |     model.add(Dense(10, activation="relu", input_shape=(12,), name="dense_1"))
16 |     model.add(Dense(20, activation="relu", name="dense_2"))
17 |     model.add(Dense(5, activation="softmax", name="dense_3"))
18 |     model.compile(optimizer='adam', loss='mse')
19 | 
20 |     config = hls4ml.utils.config_from_keras_model(model)
21 |     config["Model"]["Strategy"] = "Resource"
22 |     config["Model"]["BramFactor"] = 100
23 | 
24 |     hls_model = hls4ml.converters.convert_from_keras_model(
25 |         model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend
26 |     )
27 | 
28 | Having set ``BramFactor=100``, only layers with more than 100 weights will be exposed as external BRAM, in this case layers ``dense_1`` and ``dense_2``. ``BramFactor`` can currently be only set at the model level. The generated code will now have weights as part of the interface.
29 | 
30 | .. code-block:: C++
31 | 
32 |     void myproject(
33 |         hls::stream<input_t> &dense_1_input,
34 |         hls::stream<result_t> &layer7_out,
35 |         model_default_t w2[120],
36 |         model_default_t w4[200]
37 |     ) {
38 |         #pragma HLS INTERFACE axis port=dense_1_input,layer7_out
39 |         #pragma HLS INTERFACE bram port=w2,w4
40 |         ...
41 | 
42 | When integrating the design, users can use the exposed interface to implement weight reloading scheme.
43 | 


--------------------------------------------------------------------------------
/docs/api/serialization.rst:
--------------------------------------------------------------------------------
 1 | ============================
 2 | Saving/Loading hls4ml models
 3 | ============================
 4 | 
 5 | ``hls4ml`` model objects (instances of ``ModelGraph`` class) can be saved to disk and loaded at a later stage. The saved model doesn't require original Keras/PyTorch/ONNX model for loading.
 6 | 
 7 | To save/load a model use the following API:
 8 | 
 9 | .. code-block:: python
10 | 
11 |     from hls4ml.converters import convert_from_keras_model, load_saved_model
12 | 
13 |     model = convert_from_keras_model(keras_model, ...)
14 | 
15 |     # Save a model to some path
16 |     model.save('some/path/my_hls4ml_model.fml')
17 | 
18 |     # Load a model from a file
19 |     loaded_model = load_saved_model('some/path/my_hls4ml_model.fml')
20 | 
21 | 
22 | Saved model will have a ``.fml`` extension, but is in fact a gzipped tar archive. Loaded model can be used in the same way as the original one. This includes modification of certain config parameters, for example output directory, layer reuse factor etc.
23 | 
24 | Linking with existing project
25 | =============================
26 | 
27 | Once the project has been written to disk with ``ModelGraph.write()``, it can also be linked with at later stage. Similarly to loading a saved model, this feature allows skipping the conversion step. Additionally, it may be used to test manual changes to the generated project.
28 | 
29 | Linking function will create a special instance of ``ModelGraph`` that only allows calls to ``compile()``, ``predict()`` and ``build()``. Other calls to the ``ModelGraph`` instance are disabled.
30 | 
31 | To link a model use the following API:
32 | 
33 | .. code-block:: python
34 | 
35 |     from hls4ml.converters import convert_from_keras_model, link_existing_project
36 | 
37 |     model = convert_from_keras_model(keras_model, output_dir='/some/path/', ...)
38 | 
39 |     # Generate the project files and write them to some path
40 |     model.write()
41 | 
42 |     # Later on, link this path to the Python runtime
43 |     linked_model = link_existing_project('some/path/')
44 |     linked_model.compile()
45 |     linked_model.predict(...)
46 |     linked_model.build(...)
47 | 


--------------------------------------------------------------------------------
/docs/backend/catapult.rst:
--------------------------------------------------------------------------------
1 | ========
2 | Catapult
3 | ========
4 | 
5 | Support for Siemens Catapult HLS compiler has been added in ``hls4ml`` version 1.0.0.
6 | 
7 | *TODO expand this section*
8 | 


--------------------------------------------------------------------------------
/docs/backend/quartus.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Quartus
 3 | =======
 4 | 
 5 | .. warning::
 6 |     The **Quartus** backend is deprecated and will be removed in a future version. Users should migrate to the **oneAPI** backend.
 7 | 
 8 | The **Quartus** backend of hls4ml is designed for deploying NNs on Intel/Altera FPGAs. It uses the discontinued Intel HLS compiler. The **oneAPI** backend should be preferred for new projects.
 9 | The **oneAPI** backend contains the migrated the HLS code from this backend, with significantly better io_stream support, though the **oneAPI** backend does not yet support profiling, tracing,
10 | or the BramFactor option supported by the **Quartus** backend.  Nevertheless, little or no further development is expected for this backend.
11 | 
12 | The **Quartus** backend only implements the ``Resource`` strategy for the layers. There is no ``Latency`` implementation of any of the layers.
13 | 


--------------------------------------------------------------------------------
/docs/backend/sr.rst:
--------------------------------------------------------------------------------
1 | ==================
2 | SymbolicExpression
3 | ==================
4 | 
5 | This backend can be used to implement expressions obtained through symbolic regression tools such as `PySR <https://github.com/MilesCranmer/PySR>`_ or `SymbolNet <https://github.com/hftsoi/SymbolNet>`_. The backend targets Vivado/Vitis HLS and relies on HLS math libraries provided with a licensed installation of these tools.
6 | 
7 | *TODO expand this section*
8 | 


--------------------------------------------------------------------------------
/docs/backend/vitis.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Vivado/Vitis
 3 | ============
 4 | 
 5 | The **Vivado** and **Vitis** backends are aimed for use with AMD/Xilinx FPGAs. The **Vivado** backend targets the discontinued ``Vivado HLS`` compiler, while
 6 | the **Vitis** backend targets the ``Vitis HLS`` compiler. Both are designed to produce IP for incorporation in ``Vivado`` designs. (See :doc:`VivadoAccelerator <accelerator>`
 7 | for generating easily-deployable models with ``Vivado HLS``.) The ``Vitis`` accelerator flow is not directly supported, though HLS produced with the **Vitis**
 8 | backend can be easily incorporated into Vitis kernel.
 9 | 
10 | Users should generally use the **Vitis** backend for new designs that target AMD/Xilinx FPGAs; new ``hls4ml`` developments will not necessarily be backported to
11 | the **Vivado** backend.
12 | 


--------------------------------------------------------------------------------
/docs/frontend/keras.rst:
--------------------------------------------------------------------------------
 1 | ================================
 2 | Keras and its quantized variants
 3 | ================================
 4 | 
 5 | Keras and the quantization library QKeras are well supported in ``hls4ml``. Both Keras v2 (``tf.keras``) and the new Keras v3 are supported. While the Keras v2 support is based on parsing the serialized json representation of the model, the Keras v3 support uses direct model inspection.
 6 | 
 7 | Currently, ``hls4ml`` can parse most Keras layers, including core layers, convolutional layers, pooling layers, recurrent layers, merging/reshaping layers and activation layers, implemented either via sequential or functional API. Notably missing are the attention and normalization layers. The ``Lambda`` layers don't save their state in the serialized format and are thus impossible to parse. In this case, the ``Lambda`` layers can be implemented as custom layers and parsed via the :ref:`Extension API`.
 8 | 
 9 | The ``data_format='channels_first'`` parameter of Keras layers is supported, but not extensively tested. All HLS implementations in ``hls4ml`` are based on ``channels_last`` data format and need to be converted to that format before the HLS code can be emitted. We encourage users of ``channels_first`` to report their experiences to developers on GitHub.
10 | 
11 | 
12 | * `QKeras <https://github.com/fastmachinelearning/qkeras>`_
13 |     The equivalent QKeras API and its quantizers are also supported by ``hls4ml``. QKeras is not compatible with Keras v3. Currently, only HGQ2 is compatible with Keras v3 (see below).
14 | * `HGQ <https://github.com/calad0i/HGQ>`_
15 |     The equivalent HGQ API is also supported. HGQ is not compatible with Keras v3. See `advanced/HGQ <../advanced/hgq.html>`__ for more information.
16 | * `HGQ2 <https://github.com/calad0i/HGQ2>`_
17 |     HGQ2 is based on Keras v3. Its support in hls4ml is currently under development.
18 | 
19 | The development team of ``hls4ml`` is currently exploring options for QKeras alternative and will provide a drop-in replacement API compatible with Keras v3.
20 | 


--------------------------------------------------------------------------------
/docs/img/act_hls4ml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/act_hls4ml.png


--------------------------------------------------------------------------------
/docs/img/act_keras.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/act_keras.png


--------------------------------------------------------------------------------
/docs/img/hls4ml_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/hls4ml_logo.png


--------------------------------------------------------------------------------
/docs/img/hls4ml_logo_lightgrey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/hls4ml_logo_lightgrey.png


--------------------------------------------------------------------------------
/docs/img/hls4ml_logo_navbar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/hls4ml_logo_navbar.png


--------------------------------------------------------------------------------
/docs/img/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/logo.jpg


--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/logo.png


--------------------------------------------------------------------------------
/docs/img/nn_map_paper_fig_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/nn_map_paper_fig_2.png


--------------------------------------------------------------------------------
/docs/img/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/overview.jpg


--------------------------------------------------------------------------------
/docs/img/overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/overview.pdf


--------------------------------------------------------------------------------
/docs/img/pynqframe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/pynqframe.png


--------------------------------------------------------------------------------
/docs/img/reuse_factor_paper_fig_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/reuse_factor_paper_fig_8.png


--------------------------------------------------------------------------------
/docs/img/weights_hls4ml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/weights_hls4ml.png


--------------------------------------------------------------------------------
/docs/img/weights_keras.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/weights_keras.png


--------------------------------------------------------------------------------
/docs/img/zynq_interfaces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/docs/img/zynq_interfaces.png


--------------------------------------------------------------------------------
/docs/intro/release_notes.rst:
--------------------------------------------------------------------------------
1 | ========================
2 | Release Notes
3 | ========================
4 | 
5 | .. changelog::
6 |     :changelog-url: https://fastmachinelearning.org/hls4ml/release_notes.html
7 |     :github: https://github.com/fastmachinelearning/hls4ml/releases/
8 |     :pypi: https://pypi.org/project/hls4ml/
9 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | .
2 | setuptools_scm[toml]>=5
3 | sphinx>=3.2.1
4 | sphinx_contributors
5 | sphinx_github_changelog
6 | sphinx_rtd_theme
7 | toposort>=1.5.0
8 | 


--------------------------------------------------------------------------------
/hls4ml/__init__.py:
--------------------------------------------------------------------------------
 1 | from hls4ml import converters, report, utils  # noqa: F401, E402
 2 | 
 3 | try:
 4 |     from ._version import version as __version__
 5 |     from ._version import version_tuple
 6 | except ImportError:
 7 |     __version__ = "unknown version"
 8 |     version_tuple = (0, 0, "unknown version")
 9 | 
10 | 
11 | def reseed(newseed):
12 |     print(f'\npytest-randomly: reseed with {newseed}')
13 |     try:
14 |         import tensorflow
15 | 
16 |         tensorflow.random.set_seed(newseed)
17 |     except ImportError:
18 |         print('\nTensorFlow seed not set')
19 |     try:
20 |         import torch
21 | 
22 |         torch.manual_seed(newseed)
23 |     except ImportError:
24 |         print('\nPyTorch seed not set')
25 | 


--------------------------------------------------------------------------------
/hls4ml/backends/__init__.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.backends.backend import Backend, get_available_backends, get_backend, register_backend  # noqa: F401
 2 | from hls4ml.backends.fpga.fpga_backend import FPGABackend  # noqa: F401
 3 | from hls4ml.backends.oneapi.oneapi_backend import OneAPIBackend
 4 | from hls4ml.backends.quartus.quartus_backend import QuartusBackend
 5 | from hls4ml.backends.symbolic.symbolic_backend import SymbolicExpressionBackend
 6 | from hls4ml.backends.vivado.vivado_backend import VivadoBackend
 7 | from hls4ml.backends.vivado_accelerator.vivado_accelerator_backend import VivadoAcceleratorBackend
 8 | from hls4ml.backends.vivado_accelerator.vivado_accelerator_config import VivadoAcceleratorConfig  # noqa: F401
 9 | 
10 | from hls4ml.backends.catapult.catapult_backend import CatapultBackend  # isort: skip
11 | 
12 | from hls4ml.backends.vitis.vitis_backend import VitisBackend  # isort: skip
13 | 
14 | register_backend('Vivado', VivadoBackend)
15 | register_backend('VivadoAccelerator', VivadoAcceleratorBackend)
16 | register_backend('Vitis', VitisBackend)
17 | register_backend('Quartus', QuartusBackend)
18 | register_backend('Catapult', CatapultBackend)
19 | register_backend('SymbolicExpression', SymbolicExpressionBackend)
20 | register_backend('oneAPI', OneAPIBackend)
21 | 


--------------------------------------------------------------------------------
/hls4ml/backends/catapult/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/catapult/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/catapult/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/catapult/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/catapult/passes/quantization_templates.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.backends.backend import get_backend
 2 | from hls4ml.backends.catapult.passes.core_templates import (
 3 |     batchnorm_config_template,
 4 |     batchnorm_function_template,
 5 |     batchnorm_include_list,
 6 | )
 7 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
 8 | from hls4ml.model.optimizer.passes.qkeras import ApplyAlpha
 9 | 
10 | 
11 | class ApplyAlphaConfigTemplate(LayerConfigTemplate):
12 |     def __init__(self):
13 |         super().__init__(ApplyAlpha)
14 |         self.template = batchnorm_config_template
15 | 
16 |     def format(self, node):
17 |         params = self._default_config_params(node)
18 |         params['n_in'] = node.get_input_variable().size_cpp()
19 |         params['product_type'] = get_backend('catapult').product_type(
20 |             node.get_input_variable().type.precision, node.get_weights('scale').type.precision
21 |         )
22 | 
23 |         return self.template.format(**params)
24 | 
25 | 
26 | class ApplyAlphaFunctionTemplate(FunctionCallTemplate):
27 |     def __init__(self):
28 |         super().__init__(ApplyAlpha, include_header=batchnorm_include_list)
29 |         self.template = batchnorm_function_template
30 | 
31 |     def format(self, node):
32 |         params = self._default_function_params(node)
33 |         params['scale'] = node.get_weights('scale').name
34 |         params['bias'] = node.get_weights('bias').name
35 | 
36 |         return self.template.format(**params)
37 | 


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/fpga/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/fpga/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/bram_weights.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from hls4ml.backends.fpga.fpga_types import BramWeightVariableConverter
 4 | from hls4ml.model.optimizer import OptimizerPass
 5 | 
 6 | 
 7 | class RegisterBramWeights(OptimizerPass):
 8 |     def match(self, node):
 9 |         return len(node.weights) > 0
10 | 
11 |     def transform(self, model, node):
12 |         bramport_size = model.config.get_bram_size(node)
13 |         for w_name, w_var in node.weights.items():
14 |             if ('storage' in w_var.__dict__ and w_var.storage != 'bram') and np.prod(w_var.shape) > bramport_size:
15 |                 new_weight = BramWeightVariableConverter.convert(w_var)
16 |                 node.set_attr(w_name, new_weight)
17 | 


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/embedding.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
 2 | from hls4ml.model.layers import Embedding
 3 | 
 4 | embed_config_template = """struct config{index} : nnet::embed_config {{
 5 |     static const unsigned n_in = {n_in};
 6 |     static const unsigned n_out = {n_out};
 7 |     static const unsigned vocab_size = {vocab_size};
 8 |     static const unsigned io_type = nnet::{iotype};
 9 |     static const unsigned reuse_factor = {reuse};
10 |     typedef {embeddings_t.name} embeddings_t;
11 | }};\n"""
12 | 
13 | embed_function_template = 'nnet::embedding<{input_t}, {output_t}, {config}>({input}, {output}, {e});'
14 | 
15 | embed_include_list = ['nnet_utils/nnet_embed.h', 'nnet_utils/nnet_embed_stream.h']
16 | 
17 | 
18 | class EmbeddingConfigTemplate(LayerConfigTemplate):
19 |     def __init__(self):
20 |         super().__init__(Embedding)
21 |         self.template = embed_config_template
22 | 
23 |     def format(self, node):
24 |         params = self._default_config_params(node)
25 |         return self.template.format(**params)
26 | 
27 | 
28 | class EmbeddingFunctionTemplate(FunctionCallTemplate):
29 |     def __init__(self):
30 |         super().__init__(Embedding, include_header=embed_include_list)
31 |         self.template = embed_function_template
32 | 
33 |     def format(self, node):
34 |         params = self._default_function_params(node)
35 |         params['e'] = node.get_weights('embeddings').name
36 | 
37 |         return self.template.format(**params)
38 | 


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/final_reshape.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Reshape
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | 
 4 | 
 5 | class RemoveFinalReshape(OptimizerPass):
 6 |     '''Remove reshape if final layer'''
 7 | 
 8 |     def match(self, node):
 9 |         # match if reshape is final node
10 |         return isinstance(node, Reshape) and not node.get_output_nodes()
11 | 
12 |     def transform(self, model, node):
13 |         if model.config.get_config_value('IOType') == 'io_parallel':
14 |             print('WARNING: Final layer is a Reshape, which does not affect the output for io_parallel; removing it')
15 |             model.remove_node(node)
16 |             return True
17 |         elif model.config.get_config_value('IOType') == 'io_stream':
18 |             print(
19 |                 'WARNING: Final layer is a Reshape, which may incur a large resource cost for io_stream; '
20 |                 'consider removing it'
21 |             )
22 |         return False
23 | 


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/inplace_parallel_reshape.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Reshape
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | from hls4ml.model.types import InplaceTensorVariable
 4 | 
 5 | 
 6 | class InplaceParallelReshape(OptimizerPass):
 7 |     """
 8 |     Replaces the output variable of Reshape layer with an inplace variable when using io_parallel.
 9 | 
10 |     This is done because in io_parallel tensors are stored as flat arrays, requiring no reshaping.
11 |     """
12 | 
13 |     def match(self, node):
14 |         if not isinstance(node, Reshape):
15 |             return False
16 |         return node.model.config.get_config_value('IOType') == 'io_parallel'
17 | 
18 |     def transform(self, model, node):
19 |         outvar = node.get_output_variable()
20 |         invar = node.get_input_variable()
21 |         newoutvar = InplaceTensorVariable(outvar, invar)
22 |         node.set_attr(node.outputs[0], newoutvar)
23 |         if node.name in model.outputs:
24 |             prev_node = node.get_input_node()
25 |             assert (
26 |                 prev_node.name not in model.outputs
27 |             ), f"Cannot output node {prev_node.name}: reshape is a no-op in io_parallel.\
28 |             As a result, the previous node {prev_node.name}'s output will be used as the\
29 |             output. However, this node is already an output."
30 |             model.outputs = [name if name != node.name else prev_node.name for name in model.outputs]
31 |         return False
32 | 


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/inplace_stream_flatten.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Reshape
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | from hls4ml.model.types import InplaceTensorVariable
 4 | 
 5 | 
 6 | class InplaceStreamFlatten(OptimizerPass):
 7 |     """
 8 |     Replaces the output variable of Reshape (flatten) layer with an inplace variable when using io_stream.
 9 | 
10 |     This optimizer avoids the expensive repacking of the stream when Reshape layer flattens the tensor to 1d.
11 |     """
12 | 
13 |     def match(self, node):
14 |         # Layers require flatten data can gather it from the stream, no need for repacking.
15 |         # Reshape acts as a Flatten layer when the result has 1 dimension. Make it a inplace tensor if it happens.
16 | 
17 |         if node.model.config.get_config_value('IOType') != 'io_stream':
18 |             return False
19 |         if not (isinstance(node, Reshape) and len(node.get_output_variable().shape) == 1):
20 |             # If is not flatten
21 |             return False
22 |         if node.name in node.model.outputs:
23 |             # If used as model output. Output shape shall be preserved in this case.
24 |             return False
25 |         return True
26 | 
27 |     def transform(self, model, node):
28 |         outvar = node.get_output_variable()
29 |         invar = node.get_input_variable()
30 |         newoutvar = InplaceTensorVariable(outvar, invar)
31 |         node.set_attr(node.outputs[0], newoutvar)
32 |         return False
33 | 


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/remove_softmax.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Softmax
 2 | from hls4ml.model.optimizer.optimizer import OptimizerPass
 3 | 
 4 | 
 5 | class SkipSoftmax(OptimizerPass):
 6 |     def match(self, node):
 7 |         is_softmax = isinstance(node, Softmax)
 8 |         remove_softmax = node.get_attr('skip', False)
 9 |         return is_softmax and remove_softmax
10 | 
11 |     def transform(self, model, node):
12 |         model.remove_node(node)
13 |         return True
14 | 


--------------------------------------------------------------------------------
/hls4ml/backends/fpga/passes/xnor_pooling.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import GlobalPooling1D, GlobalPooling2D, Pooling1D, Pooling2D
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | from hls4ml.model.types import XnorPrecisionType
 4 | 
 5 | 
 6 | class XnorPooling(OptimizerPass):
 7 |     '''
 8 |     For correct behavior, for MaxPooling and similar, for XnorPrecisionType, have to propagate
 9 |     the type to the output.
10 |     '''
11 | 
12 |     def match(self, node):
13 |         if isinstance(node, (Pooling1D, Pooling2D, GlobalPooling1D, GlobalPooling2D)) and node.get_attr('pool_op') == 'Max':
14 |             return isinstance(node.get_input_variable().type.precision, XnorPrecisionType) and not isinstance(
15 |                 node.get_output_variable().type.precision, XnorPrecisionType
16 |             )
17 |         return False
18 | 
19 |     def transform(self, model, node):
20 |         outvar = node.get_output_variable()
21 |         outvar.type.precision = XnorPrecisionType()
22 |         return True
23 | 


--------------------------------------------------------------------------------
/hls4ml/backends/oneapi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/oneapi/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/oneapi/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/oneapi/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/oneapi/passes/clone_templates.py:
--------------------------------------------------------------------------------
 1 | """The clone templates in the fpga backend are not enough for oneAPI, so this adds the missing parts"""
 2 | 
 3 | from hls4ml.backends.fpga.passes.clone import Clone
 4 | from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate
 5 | 
 6 | clone_stream_function_template = '{name}.async();'
 7 | 
 8 | 
 9 | class CloneTaskSequenceTemplate(TaskSequenceTemplate):
10 |     def __init__(self):
11 |         super().__init__(Clone)
12 | 
13 |     def format(self, node):
14 |         params = self._default_function_params(node)
15 |         for i in range(len(node.outputs)):
16 |             params[f'output{i + 1}_pipe'] = node.variables[node.outputs[i]].pipe_name
17 | 
18 |         output_pipes = ', '.join([f'{{output{i + 1}_pipe}}' for i in range(len(node.outputs))])
19 | 
20 |         template = f'task_sequence<nnet::clone_stream<{{input_pipe}}, {output_pipes}, {{size}}>> {{name}};'
21 |         return template.format(**params)
22 | 
23 | 
24 | class CloneStreamFunctionTemplate(StreamFunctionCallTemplate):
25 |     def __init__(self):
26 |         super().__init__(Clone)
27 |         self.template = clone_stream_function_template
28 | 
29 |     def format(self, node):
30 |         params = self._default_function_params(node)
31 |         return self.template.format(**params)
32 | 


--------------------------------------------------------------------------------
/hls4ml/backends/oneapi/passes/embedding_templates.py:
--------------------------------------------------------------------------------
 1 | """
 2 | These are the stream oneAPI templates for embedding layers. The io_parallel ones are in backends/fpga/passes/embedding.py.
 3 | """
 4 | 
 5 | from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate
 6 | from hls4ml.model.layers import Embedding
 7 | 
 8 | embed_task_sequence_template = 'task_sequence<nnet::embedding_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
 9 | embed_stream_function_template = '{name}.async({e});'
10 | 
11 | 
12 | class EmbeddingTaskSequenceTemplate(TaskSequenceTemplate):
13 |     def __init__(self):
14 |         super().__init__(Embedding)
15 |         self.template = embed_task_sequence_template
16 | 
17 |     def format(self, node):
18 |         params = self._default_function_params(node)
19 | 
20 |         return self.template.format(**params)
21 | 
22 | 
23 | class EmbeddingStreamFunctionTemplate(StreamFunctionCallTemplate):
24 |     def __init__(self):
25 |         super().__init__(Embedding)
26 |         self.template = embed_stream_function_template
27 | 
28 |     def format(self, node):
29 |         params = self._default_function_params(node)
30 |         params['e'] = node.get_weights('embeddings').name
31 | 
32 |         return self.template.format(**params)
33 | 


--------------------------------------------------------------------------------
/hls4ml/backends/quartus/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/quartus/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/quartus/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/quartus/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/quartus/passes/quantization_templates.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.backends.backend import get_backend
 2 | from hls4ml.backends.quartus.passes.core_templates import (
 3 |     batchnorm_config_template,
 4 |     batchnorm_function_template,
 5 |     batchnorm_include_list,
 6 | )
 7 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
 8 | from hls4ml.model.optimizer.passes.qkeras import ApplyAlpha
 9 | 
10 | 
11 | class ApplyAlphaConfigTemplate(LayerConfigTemplate):
12 |     def __init__(self):
13 |         super().__init__(ApplyAlpha)
14 |         self.template = batchnorm_config_template
15 | 
16 |     def format(self, node):
17 |         params = self._default_config_params(node)
18 |         params['n_in'] = node.get_input_variable().size_cpp()
19 |         params['product_type'] = get_backend('quartus').product_type(
20 |             node.get_input_variable().type.precision, node.get_weights('scale').type.precision
21 |         )
22 | 
23 |         return self.template.format(**params)
24 | 
25 | 
26 | class ApplyAlphaFunctionTemplate(FunctionCallTemplate):
27 |     def __init__(self):
28 |         super().__init__(ApplyAlpha, include_header=batchnorm_include_list)
29 |         self.template = batchnorm_function_template
30 | 
31 |     def format(self, node):
32 |         params = self._default_function_params(node)
33 |         params['scale'] = node.get_weights('scale').name
34 |         params['bias'] = node.get_weights('bias').name
35 | 
36 |         return self.template.format(**params)
37 | 


--------------------------------------------------------------------------------
/hls4ml/backends/symbolic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/symbolic/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/symbolic/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/symbolic/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/symbolic/passes/validate_lut.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import SymbolicExpression
 2 | from hls4ml.model.optimizer import ConfigurableOptimizerPass
 3 | 
 4 | 
 5 | class ValidateUserLookupTable(ConfigurableOptimizerPass):
 6 |     '''Validates the precision of user-defined LUTs is adequate'''
 7 | 
 8 |     def __init__(self):
 9 |         self.raise_exception = False
10 | 
11 |     def match(self, node):
12 |         return isinstance(node, SymbolicExpression) and len(node.get_attr('lut_functions', [])) > 0
13 | 
14 |     def transform(self, model, node):
15 |         precision = node.get_output_variable().type.precision
16 |         range = 2 ** (precision.integer - precision.signed)
17 |         frac_step = 1 / 2**precision.fractional
18 | 
19 |         for lut_fn in node.get_attr('lut_functions'):
20 |             lut_range = lut_fn.range_end - lut_fn.range_start
21 |             lut_step = lut_range / lut_fn.table_size
22 | 
23 |             if lut_step < frac_step:
24 |                 msg = f'LUT function {lut_fn.name} requires more fractional bits.'
25 |                 if self.raise_exception:
26 |                     raise Exception(msg)
27 |                 else:
28 |                     print('WARNING:', msg)
29 | 
30 |             if lut_range > range:
31 |                 msg = f'LUT function {lut_fn.name} requires more integer bits.'
32 |                 if self.raise_exception:
33 |                     raise Exception(msg)
34 |                 else:
35 |                     print('WARNING:', msg)
36 | 
37 |         return False
38 | 


--------------------------------------------------------------------------------
/hls4ml/backends/vitis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vitis/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/vitis/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vitis/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/vivado/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/vivado/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/vivado/passes/quantization_templates.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.backends.backend import get_backend
 2 | from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
 3 | from hls4ml.backends.vivado.passes.core_templates import (
 4 |     batchnorm_config_template,
 5 |     batchnorm_function_template,
 6 |     batchnorm_include_list,
 7 | )
 8 | from hls4ml.model.optimizer.passes.qkeras import ApplyAlpha
 9 | 
10 | 
11 | class ApplyAlphaConfigTemplate(LayerConfigTemplate):
12 |     def __init__(self):
13 |         super().__init__(ApplyAlpha)
14 |         self.template = batchnorm_config_template
15 | 
16 |     def format(self, node):
17 |         params = self._default_config_params(node)
18 |         params['n_in'] = node.get_input_variable().size_cpp()
19 |         params['product_type'] = get_backend('vivado').product_type(
20 |             node.get_input_variable().type.precision, node.get_weights('scale').type.precision
21 |         )
22 | 
23 |         return self.template.format(**params)
24 | 
25 | 
26 | class ApplyAlphaFunctionTemplate(FunctionCallTemplate):
27 |     def __init__(self):
28 |         super().__init__(ApplyAlpha, include_header=batchnorm_include_list)
29 |         self.template = batchnorm_function_template
30 | 
31 |     def format(self, node):
32 |         params = self._default_function_params(node)
33 |         params['scale'] = node.get_weights('scale').name
34 |         params['bias'] = node.get_weights('bias').name
35 | 
36 |         return self.template.format(**params)
37 | 


--------------------------------------------------------------------------------
/hls4ml/backends/vivado_accelerator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado_accelerator/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/vivado_accelerator/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/backends/vivado_accelerator/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/backends/vivado_accelerator/supported_boards.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "pynq-z2": {
 3 |     "part": "xc7z020clg400-1",
 4 |     "tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream":  "axi_stream_design.tcl"},
 5 |     "python_drivers": {"axi_stream":  "axi_stream_driver.py"},
 6 |     "c_drivers": {}
 7 |   },
 8 |   "zcu102": {
 9 |     "part": "xczu9eg-ffvb1156-2-e",
10 |     "tcl_scripts": { "axi_stream": "axi_stream_design.tcl"},
11 |     "python_drivers": {"axi_stream":  "axi_stream_driver.py"},
12 |     "c_drivers": {}
13 |   },
14 |   "alveo-u50": {
15 |     "part": "xcu50-fsvh2104-2-e",
16 |     "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"},
17 |     "python_drivers": {"axi_stream": "axi_stream_driver.py"},
18 |     "krnl_rtl_srcs": {"axi_stream":  "krnl_rtl_src"},
19 |     "c_drivers": {}
20 |   },
21 |   "alveo-u250": {
22 |     "part": "xcu250-figd2104-2L-e",
23 |     "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"},
24 |     "python_drivers": {"axi_stream": "axi_stream_driver.py"},
25 |     "krnl_rtl_srcs": {"axi_stream":  "krnl_rtl_src"},
26 |     "c_drivers": {}
27 |   },
28 |   "alveo-u200": {
29 |     "part": "xcu200-fsgd2104-2-e",
30 |     "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"},
31 |     "python_drivers": {"axi_stream": "axi_stream_driver.py"},
32 |     "krnl_rtl_srcs": {"axi_stream":  "krnl_rtl_src"},
33 |     "c_drivers": {}
34 |   },
35 |   "alveo-u280": {
36 |     "part": "xcu280-fsvh2892-2L-e",
37 |     "tcl_scripts": {"axi_stream": "axi_stream_design.tcl"},
38 |     "python_drivers": {"axi_stream": "axi_stream_driver.py"},
39 |     "krnl_rtl_srcs": {"axi_stream":  "krnl_rtl_src"},
40 |     "c_drivers": {}
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/hls4ml/cli/__main__.py:
--------------------------------------------------------------------------------
1 | from . import main
2 | 
3 | main()
4 | 


--------------------------------------------------------------------------------
/hls4ml/contrib/README.md:
--------------------------------------------------------------------------------
1 | # Contributions
2 | 
3 | This section is for contributed work that can be used with hls4ml that is potentially useful to a wider audience. Examples include implementations for custom layer types for use with the Extensions API.
4 | 
5 | ## How to structure contributions
6 | 
7 | The best way to structure a contribution is to make a directory for the contribution, with a README inside to explain what it is and how to use it. If possible there should be an example script demonstrating how to use it. We should be able to validate that the code works.
8 | 


--------------------------------------------------------------------------------
/hls4ml/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/contrib/__init__.py


--------------------------------------------------------------------------------
/hls4ml/contrib/kl_layer/README.md:
--------------------------------------------------------------------------------
 1 | This folder contains the implementation of custom KL divergence layer.
 2 | This is a custom implementation and not a built-in layer in any deep learning framework.
 3 | It was developed specifically for [AD@L1 CMS paper](https://www.nature.com/articles/s42256-022-00441-3).
 4 | 
 5 | # Files
 6 | 
 7 | * `kl_layer.py`: contains the standalone implementation of the custom KL divergence layer
 8 | * `kl_layer.h`: contains the HLS implementation of KL layer
 9 | 
10 | 
11 | # Usage
12 | 
13 | `kl_layer.py` contains the example of how to use the KL layer.
14 | To run do
15 | 
16 | ```
17 | python kl_layer.py
18 | ```
19 | 


--------------------------------------------------------------------------------
/hls4ml/converters/keras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/converters/keras/__init__.py


--------------------------------------------------------------------------------
/hls4ml/converters/keras/hgq_proxy_model.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.converters.keras_v2_to_hls import KerasReader, keras_handler, parse_default_keras_layer
 2 | 
 3 | 
 4 | @keras_handler('FixedPointQuantizer', 'HGQ>FixedPointQuantizer')
 5 | def fixedpoint_quantizer_handler(keras_layer, input_names, input_shapes, data_reader: KerasReader):
 6 |     config = parse_default_keras_layer(keras_layer, input_names)
 7 | 
 8 |     name = config['name']
 9 |     fusible = keras_layer['config']['fusible']
10 |     config['RND'] = keras_layer['config']['RND']
11 |     config['SAT'] = keras_layer['config']['SAT']
12 |     config['fusible'] = fusible
13 |     if not fusible:
14 |         k = data_reader.get_weights_data(name, 'keep_negative')
15 |         b = data_reader.get_weights_data(name, 'bits')
16 |         i = data_reader.get_weights_data(name, 'integers')
17 |         config['mask_kbi'] = k, b, i
18 |     config['overrides'] = keras_layer['config']['overrides']
19 | 
20 |     layer = config
21 |     return layer, input_shapes[0]
22 | 
23 | 
24 | @keras_handler('UnaryLUT', 'HGQ>UnaryLUT')
25 | def unary_lut_keras_handler(keras_layer, input_names, input_shapes, data_reader: KerasReader):
26 |     config = parse_default_keras_layer(keras_layer, input_names)
27 | 
28 |     table = data_reader.get_weights_data(config['name'], 'table')
29 |     k, i, f = keras_layer['config']['kif_out']
30 |     k, b, i = k, k + i + f, k + i
31 |     config['table_t'] = f'{"" if k else "u"}fixed<{b},{i}>'
32 |     config['table'] = table
33 |     config['table_size'] = len(table)
34 |     config['activation'] = 'unary_lut'
35 | 
36 |     layer = config
37 |     return layer, input_shapes[0]
38 | 


--------------------------------------------------------------------------------
/hls4ml/converters/keras/merge.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.converters.keras_v2_to_hls import keras_handler, parse_default_keras_layer
 2 | 
 3 | merge_layers = ['Add', 'Subtract', 'Multiply', 'Average', 'Maximum', 'Minimum', 'Concatenate', 'Dot']
 4 | 
 5 | 
 6 | @keras_handler(*merge_layers)
 7 | def parse_merge_layer(keras_layer, input_names, input_shapes, data_reader):
 8 |     assert keras_layer['class_name'] in merge_layers
 9 | 
10 |     layer = parse_default_keras_layer(keras_layer, input_names)
11 | 
12 |     layer['op'] = layer['class_name'].lower()
13 | 
14 |     output_shape = input_shapes[0][:]
15 |     if layer['class_name'] == 'Concatenate':
16 |         rank = len(input_shapes[0][1:])
17 |         if rank > 3:
18 |             raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.')
19 |         layer['op'] = layer['class_name'].lower() + f'{rank}d'
20 |         layer['axis'] = keras_layer['config']['axis']
21 |         output_shape[layer['axis']] += input_shapes[1][layer['axis']]
22 |     elif layer['class_name'] == 'Dot':
23 |         rank = len(input_shapes[0][1:])
24 |         if rank > 1:
25 |             raise Exception('ERROR: Dot of tensors with rank > 1 is not yet supported.')
26 |         layer['op'] = layer['class_name'].lower() + f'{rank}d'
27 |     else:
28 |         layer['class_name'] = 'Merge'
29 |     if len(layer['inputs']) > 2:
30 |         raise Exception('ERROR: Merging more than two tensors is not yet supported.')
31 | 
32 |     return layer, output_shape
33 | 


--------------------------------------------------------------------------------
/hls4ml/converters/keras/model.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.converters.keras_v2_to_hls import (
 2 |     KerasFileReader,
 3 |     KerasModelReader,
 4 |     KerasNestedFileReader,
 5 |     keras_handler,
 6 |     parse_default_keras_layer,
 7 |     parse_keras_model,
 8 | )
 9 | 
10 | model_layers = ['Sequential', 'Functional']
11 | 
12 | 
13 | @keras_handler(*model_layers)
14 | def parse_model_layer(keras_layer, input_names, input_shapes, data_reader):
15 |     assert keras_layer['class_name'] in model_layers
16 | 
17 |     layer = parse_default_keras_layer(keras_layer, input_names)
18 |     layer['class_name'] = 'LayerGroup'
19 | 
20 |     if isinstance(data_reader, KerasNestedFileReader):
21 |         # In the .h5 file, the paths don't go more than one level deep
22 |         nested_path = data_reader.nested_path
23 |     else:
24 |         nested_path = layer['name']
25 | 
26 |     if isinstance(data_reader, KerasFileReader):
27 |         nested_reader = KerasNestedFileReader(data_reader, nested_path)
28 |     else:
29 |         nested_reader = KerasModelReader(data_reader.model.get_layer(layer['name']))
30 | 
31 |     layer_list, input_layers, output_layers, output_shapes = parse_keras_model(keras_layer, nested_reader)
32 | 
33 |     if output_layers is None:
34 |         last_layer = layer_list[-1]['name']
35 |     else:
36 |         last_layer = output_layers[0]
37 |     output_shape = output_shapes[last_layer]
38 | 
39 |     layer['layer_list'] = layer_list
40 |     layer['input_layers'] = input_layers if input_layers is not None else []
41 |     layer['output_layers'] = output_layers if output_layers is not None else []
42 |     layer['data_reader'] = nested_reader
43 |     layer['output_shape'] = output_shape
44 | 
45 |     return layer, output_shape
46 | 


--------------------------------------------------------------------------------
/hls4ml/converters/keras_v3/__init__.py:
--------------------------------------------------------------------------------
1 | from . import conv  # noqa: F401
2 | from . import core  # noqa: F401
3 | from . import einsum_dense  # noqa: F401
4 | from . import merge  # noqa: F401
5 | from . import pooling  # noqa: F401
6 | from ._base import registry as layer_handlers
7 | 
8 | __all__ = ['layer_handlers']
9 | 


--------------------------------------------------------------------------------
/hls4ml/converters/onnx/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/converters/onnx/__init__.py


--------------------------------------------------------------------------------
/hls4ml/converters/onnx/merge.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 2 | 
 3 | merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum']
 4 | 
 5 | op_map = {
 6 |     'Add': 'add',
 7 |     'Sub': 'subtract',
 8 |     'Mul': 'multiply',
 9 |     'Div': 'divide',
10 |     'Average': 'average',
11 |     'Max': 'maximum',
12 |     'Min': 'minimum',
13 |     'Sum': 'add',
14 |     'Concat': 'concat',
15 | }
16 | 
17 | 
18 | @onnx_handler(*merge_layers)
19 | def parse_merge_layer(node, input_names, input_shapes, graph):
20 |     layer = {}
21 |     layer['class_name'] = node.op_type
22 |     layer['name'] = node.name
23 |     layer['op'] = op_map[node.op_type]
24 |     layer['inputs'] = input_names
25 |     layer['outputs'] = list(node.output)
26 | 
27 |     if layer['class_name'] == 'Concat':
28 |         rank = len(input_shapes[0][1:])
29 |         if rank > 3:
30 |             raise Exception('ERROR: Concatenation of tensors with rank > 3 is not yet supported.')
31 | 
32 |         layer['class_name'] = 'Concatenate'
33 |         layer['op'] = layer['class_name'].lower() + f'{rank}d'
34 |         layer['axis'] = get_onnx_attribute(node, 'axis')
35 | 
36 |     else:
37 |         layer['class_name'] = 'Merge'
38 | 
39 |     if len(layer['inputs']) > 2:
40 |         raise Exception('ERROR: Merging more than two tensors is not yet supported.')
41 | 
42 |     return layer
43 | 


--------------------------------------------------------------------------------
/hls4ml/converters/pytorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/converters/pytorch/__init__.py


--------------------------------------------------------------------------------
/hls4ml/model/__init__.py:
--------------------------------------------------------------------------------
1 | from hls4ml.model.graph import HLSConfig, ModelGraph  # noqa: F401
2 | 


--------------------------------------------------------------------------------
/hls4ml/model/flow/__init__.py:
--------------------------------------------------------------------------------
1 | from hls4ml.model.flow.flow import (  # noqa: F401
2 |     Flow,
3 |     get_available_flows,
4 |     get_backend_flows,
5 |     get_flow,
6 |     register_flow,
7 |     update_flow,
8 | )
9 | 


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastmachinelearning/hls4ml/6cdf842d3b4bb98025c7dae05766d944f5ec6ced/hls4ml/model/optimizer/passes/__init__.py


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/expand_layer_group.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Input, LayerGroup
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | 
 4 | 
 5 | class ExpandLayerGroup(OptimizerPass):
 6 |     '''Expands LayerGroup (a nested model) into the parent model.'''
 7 | 
 8 |     def match(self, node):
 9 |         return isinstance(node, LayerGroup)
10 | 
11 |     def transform(self, model, node):
12 |         layer_list = node.get_attr('layer_list')
13 | 
14 |         # We'll keep track of inserted Input nodes to remove later
15 |         inserted_input_nodes = []
16 | 
17 |         for i, layer in enumerate(layer_list):
18 |             kind = layer['class_name']
19 |             name = layer['name']
20 |             inputs = layer.get('inputs', [])
21 |             outputs = layer.get('outputs', [])
22 | 
23 |             if name in model.graph.keys():
24 |                 raise Exception(f'Layer names must be unique: "{name}" already found in the model graph.')
25 | 
26 |             if len(inputs) == 0:
27 |                 if kind in ['InputLayer', 'Input']:
28 |                     inputs = node.inputs.copy()
29 |                 else:
30 |                     inputs = model.graph[layer_list[i - 1]['name']].outputs.copy()
31 |             if len(outputs) == 0:
32 |                 outputs = [name]
33 | 
34 |             new_node = model.make_node(kind, name, layer, inputs, outputs)
35 |             model.insert_node(new_node)
36 |             if isinstance(new_node, Input):
37 |                 inserted_input_nodes.append(new_node)
38 | 
39 |         model.remove_node(node)
40 | 
41 |         for input_node in inserted_input_nodes:
42 |             model.remove_node(input_node)
43 | 
44 |         return True
45 | 


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/fuse_biasadd.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import BiasAdd, Conv1D, Conv2D, Dense
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | 
 4 | 
 5 | class FuseBiasAdd(OptimizerPass):
 6 |     '''Fuses BiasAdd into Dense/Conv2D layer (common in TF models).'''
 7 | 
 8 |     def match(self, node):
 9 |         return isinstance(node, BiasAdd) and isinstance(node.get_input_node(), (Dense, Conv1D, Conv2D))
10 | 
11 |     def transform(self, model, node):
12 |         # Fuse BiasAdd into Dense layer
13 |         dense_layer = node.get_input_node()
14 |         dense_layer.get_weights('bias').data = node.get_weights('bias').data
15 | 
16 |         model.remove_node(node)
17 | 
18 |         return True
19 | 


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/linear.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Activation, BatchNormalization, Conv1D, Conv2D, Dense
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | from hls4ml.model.types import UnspecifiedPrecisionType
 4 | 
 5 | 
 6 | class EliminateLinearActivation(OptimizerPass):
 7 |     def match(self, node):
 8 |         cast = False
 9 |         if isinstance(node, Activation):
10 |             cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision
11 |         return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast
12 | 
13 |     def transform(self, model, node):
14 |         model.remove_node(node)
15 |         return True
16 | 
17 | 
18 | _safe_parents = (Dense, Conv1D, Conv2D, BatchNormalization, Activation)
19 | 
20 | 
21 | class MergeLinearActivation(OptimizerPass):
22 |     '''
23 |     For many objects it's safe to change the output precision independently of the calculation.
24 |     '''
25 | 
26 |     def match(self, node):
27 |         '''
28 |         Only match if the parent is safe and the precision is not explicitly set.
29 |         '''
30 |         if isinstance(node, Activation) and node.get_attr('activation') == 'linear':
31 |             parent = node.get_input_node(node.inputs[0])
32 |             safe_parent = isinstance(parent, _safe_parents)
33 |             return safe_parent and isinstance(parent.get_output_variable().type.precision, UnspecifiedPrecisionType)
34 |         else:
35 |             return False
36 | 
37 |     def transform(self, model, node):
38 |         prev_node = node.get_input_node(node.inputs[0])
39 |         quantizer = node.get_attr("quantizer")
40 |         # if the activation has a quantizer (usually from a QONNX Quant node), set the previous node's output precision
41 |         if quantizer is not None:
42 |             prev_node.set_attr("quantizer", quantizer)
43 |             prev_node.get_output_variable().type.precision = quantizer.hls_type
44 |         model.remove_node(node)
45 |         return True
46 | 


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/reshape_const.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Constant, Reshape
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | 
 4 | 
 5 | class ReshapeConstant(OptimizerPass):
 6 |     """
 7 |     ONNX has the target shape come as an input, not a parameter. This removes
 8 |     the Constant input from new shape input. (Non-constant inputs are not supported.)
 9 |     The constant value was already used; this is just a cleanup uptimization.
10 |     """
11 | 
12 |     def match(self, node):
13 |         is_match = isinstance(node, Reshape) and len(node.inputs) > 1 and node.get_input_node(node.inputs[1])
14 | 
15 |         return is_match
16 | 
17 |     def transform(self, model, node):
18 |         """
19 |         Remove Constant from new shape input. Note, input shape node is already used on initialize
20 |         """
21 |         shape_node = node.get_input_node(node.inputs[1])
22 |         node.inputs[1] = ''
23 |         if not isinstance(shape_node, Constant):
24 |             raise RuntimeError('Nonconstant shape inputs are not currently supported')
25 |         model.remove_node(shape_node)
26 | 
27 |         return True
28 | 


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/resize_remove_constants.py:
--------------------------------------------------------------------------------
 1 | from warnings import warn
 2 | 
 3 | from hls4ml.model.layers import Constant, Resize
 4 | from hls4ml.model.optimizer import OptimizerPass
 5 | 
 6 | 
 7 | class ResizeRemoveConstants(OptimizerPass):
 8 |     """
 9 |     This optimizer is intended to clean the Resize node from RoI and Scales parameters that if left cause issues in hls4ml.
10 |     """
11 | 
12 |     def match(self, node):
13 |         is_match = isinstance(node, Resize) and len(node.inputs) > 1
14 |         return is_match
15 | 
16 |     def transform(self, model, node):
17 |         """
18 |         Remove RoI and Scale Constant from new shape input.
19 |         """
20 |         # see doc here: https://onnx.ai/onnx/operators/onnx__Resize.html
21 |         roi_index = 1
22 |         scales_idx = 2
23 |         scales_node = node.get_input_node(node.inputs[scales_idx])
24 |         node.inputs[scales_idx] = ''
25 |         if not isinstance(scales_node, Constant):
26 |             raise RuntimeError('Non-constant shape inputs are not supported')
27 |         model.remove_node(scales_node)
28 |         # RoI position is always 1 when present
29 |         roi_node = node.get_input_node(node.inputs[roi_index])
30 |         if roi_node.get_attr('value'):
31 |             warn('RoI value vector is not empty. Consider that RoI is not supported in hls4ml', stacklevel=2)
32 |         node.inputs[roi_index] = ''
33 |         if not isinstance(roi_node, Constant):
34 |             raise RuntimeError('Non-constant RoI inputs are not supported')
35 |         model.remove_node(roi_node)
36 |         # Clean all the '' inputs
37 |         node.inputs = list(filter(None, node.inputs))
38 |         return True
39 | 


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/stamp.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | 
 3 | from hls4ml.model.optimizer import ModelOptimizerPass
 4 | 
 5 | 
 6 | class MakeStamp(ModelOptimizerPass):
 7 |     def __init__(self):
 8 |         self.name = 'make_stamp'
 9 | 
10 |     def transform(self, model):
11 |         def _make_stamp():
12 |             """Create a unique identifier for the generated code. This identifier is used to
13 |             compile a unique library and link it with python."""
14 | 
15 |             length = 8
16 | 
17 |             stamp = uuid.uuid4()
18 |             return str(stamp)[-length:]
19 | 
20 |         model.config.config['Stamp'] = _make_stamp()
21 | 
22 |         return False  # No model graph changes made
23 | 


--------------------------------------------------------------------------------
/hls4ml/model/optimizer/passes/transpose_opt.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.model.layers import Input, Transpose
 2 | from hls4ml.model.optimizer import OptimizerPass
 3 | 
 4 | 
 5 | class RemoveNopTranspose(OptimizerPass):
 6 |     """
 7 |     Remove a transpose layer if it doesn't do anything to a 1D array. i.e, 1D input and perm = [0]
 8 |     """
 9 | 
10 |     def match(self, node):
11 |         is_match = isinstance(node, Transpose) and node.get_attr('perm') == [0]  # Useless transpose
12 |         return is_match
13 | 
14 |     def transform(self, model, node):
15 |         print(f'Unnecessary transpose node ({node.name}) detected, optimizing ...')
16 |         model.remove_node(node)
17 | 
18 |         return True
19 | 
20 | 
21 | class RemoveSingleChannelTranspose(OptimizerPass):
22 |     """
23 |     Remove transpose of inputs if the number of channels is 1 as for io_parallel this doesn't affect the array
24 |     representation used
25 |     """
26 | 
27 |     def match(self, node):
28 |         if node.model.config.get_config_value('IOType') != 'io_parallel':
29 |             return False
30 | 
31 |         return (
32 |             isinstance(node, Transpose)
33 |             and isinstance(node.get_input_node(), Input)
34 |             and node.get_input_variable().shape[0] == 1
35 |         )
36 | 
37 |     def transform(self, model, node):
38 |         # Adjust the input shape and remove the Transpose node
39 |         input_var = node.get_input_variable()
40 |         input_var.shape.append(input_var.shape.pop(0))
41 |         model.remove_node(node)
42 | 
43 |         return True
44 | 


--------------------------------------------------------------------------------
/hls4ml/optimization/__init__.py:
--------------------------------------------------------------------------------
1 | # No imports as each of the optimization modules may contain different dependencies.
2 | 


--------------------------------------------------------------------------------
/hls4ml/optimization/dsp_aware_pruning/config.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | '''
 4 | A list of currently supported structures when optimizing (pruning, weight sharing)
 5 | For more information, see attributes.py
 6 | 
 7 | 1. Unstructured:
 8 |     - Pruning: Y
 9 |     - Weight sharing: N
10 |     - Description: Removes (zeroes out) individual weights
11 |     - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras)
12 | 
13 | 2. Structured:
14 |     - Pruning: Y
15 |     - Weight sharing: Y
16 |     - Description: Zeroes out or quantizes all the weights in a structure:
17 |         - Dense: Neurons, determined by their outgoing connections (columns in Keras weight tensors)
18 |         - Conv2D: Filters (structures of size filt_width x filt_height x n_chan)
19 |         - Notes:
20 |             - For Dense, it was also possible optimize by incoming connections (rows);
21 |                 However, removing zero neurons becomes harder because of Keras Surgeon
22 |             - For Conv2D, significant literature explored pruning channels; currently not supported
23 |     - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras)
24 | 
25 | 3. Pattern:
26 |     - Pruning: Y
27 |     - Weight sharing: Y
28 |     - Description: Zeroes out or quantizes all the weights in a group
29 |        Groups are determined by a variable, n, and every n-th weight in the flattened,
30 |        Transposed (Resource) weight tensor is collected and stored in the same group
31 |        Equivalent to pruning/quantizing weight processed by the same DSP in hls4ml
32 |     - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras)
33 | 
34 | 4. Block:
35 |     - Pruning: Y
36 |     - Weight sharing: Y
37 |     - Description: Zeroes out or quantizes all the weights in a block of size (w, h)
38 |     - Supports: All rank-2 (e.g. Dense, but not Conv2D) layers in SUPPORTED_LAYERS (hls4ml.optimization.keras)
39 | 
40 | '''
41 | 
42 | 
43 | class SUPPORTED_STRUCTURES(Enum):
44 |     UNSTRUCTURED = 'unstructured'
45 |     STRUCTURED = 'structured'
46 |     PATTERN = 'pattern'
47 |     BLOCK = 'block'
48 | 


--------------------------------------------------------------------------------
/hls4ml/optimization/dsp_aware_pruning/keras/config.py:
--------------------------------------------------------------------------------
 1 | from qkeras import QConv2D, QDense
 2 | from tensorflow.keras.layers import Conv2D, Dense
 3 | 
 4 | '''
 5 | Optimizable layers in Keras / QKeras
 6 | Any new layers need to be registered here first
 7 | Additional logic in the source files may need to be written (e.g. recurrent layers should also optimize recurrent kernels)
 8 | '''
 9 | SUPPORTED_LAYERS = (Dense, Conv2D, QDense, QConv2D)
10 | 
11 | 
12 | '''
13 | Supported ranking metrics, for classifying redundant (groups of) weights
14 | 
15 | 1. l1 - groups of weights are ranked by their l1 norm
16 | 2. l2 - groups of weights are ranked by their l2 norm
17 | 3. oracle - abs(dL / dw * w), introduced by Molchanov et al. (2016)
18 |     Pruning Convolutional Neural Networks for Resource Efficient Inference
19 | 4. saliency - (d^2L / dw^2 * w)^2, introduced by Lecun et al. (1989) Optimal Brain Damage
20 | '''
21 | SUPPORTED_METRICS = ('l1', 'l2', 'oracle', 'saliency')
22 | 
23 | '''
24 | Temporary directory for storing best models, tuning results etc.
25 | '''
26 | TMP_DIRECTORY = 'hls4ml-optimization-keras'
27 | 


--------------------------------------------------------------------------------
/hls4ml/report/__init__.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.report.catapult_report import parse_catapult_report  # noqa: F401
 2 | from hls4ml.report.catapult_report import qofr  # noqa: F401
 3 | from hls4ml.report.catapult_report import read_catapult_report  # noqa: F401
 4 | from hls4ml.report.oneapi_report import parse_oneapi_report  # noqa: F401
 5 | from hls4ml.report.oneapi_report import print_oneapi_report  # noqa: F401
 6 | from hls4ml.report.quartus_report import parse_quartus_report  # noqa: F401
 7 | from hls4ml.report.quartus_report import read_quartus_report  # noqa: F401
 8 | from hls4ml.report.vivado_report import parse_vivado_report  # noqa: F401
 9 | from hls4ml.report.vivado_report import print_vivado_report  # noqa: F401
10 | from hls4ml.report.vivado_report import read_vivado_report  # noqa: F401
11 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/build_lib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CC=g++
 4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
 5 |     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique"
 6 | elif [[ "$OSTYPE" == "linux"* ]]; then
 7 |     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique -Wno-pragmas"
 8 | elif [[ "$OSTYPE" == "darwin"* ]]; then
 9 |     CFLAGS="-O3 -fPIC -std=c++11"
10 | fi
11 | LDFLAGS=
12 | 
13 | # Pick up AC libraries from Catapult install first
14 | INCFLAGS="-I$MGC_HOME/shared/include -I$MGC_HOME/shared/include/nnet_utils -Ifirmware/ac_types/include -Ifirmware/ac_math/include -Ifirmware/ac_simutils/include -Ifirmware/nnet_utils"
15 | PROJECT=myproject
16 | LIB_STAMP=mystamp
17 | 
18 | ${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}.cpp -o ${PROJECT}.o
19 | ${CC} ${CFLAGS} ${INCFLAGS} -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o
20 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so
21 | rm -f *.o
22 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/catapult_synth.tcl:
--------------------------------------------------------------------------------
1 | add_files myproject_prj/solution1/syn/vhdl
2 | synth_design -top myproject -part xcku115-flvb2104-2-i
3 | report_utilization -file vivado_synth.rpt
4 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/firmware/defines.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEFINES_H_
 2 | #define DEFINES_H_
 3 | 
 4 | #include "nnet_utils/nnet_types.h"
 5 | #include <ac_channel.h>
 6 | #include <ac_fixed.h>
 7 | #include <ac_int.h>
 8 | #include <cstddef>
 9 | #include <cstdio>
10 | 
11 | // hls-fpga-machine-learning insert numbers
12 | 
13 | // hls-fpga-machine-learning insert layer-precision
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/firmware/myproject.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "myproject.h"
 4 | #include "parameters.h"
 5 | 
 6 | #include <mc_scverify.h>
 7 | 
 8 | #pragma hls_design top
 9 | // hls-fpga-machine-learning insert IFSynPragmas
10 | void CCS_BLOCK(myproject)(
11 |     // hls-fpga-machine-learning insert header
12 | ) {
13 | 
14 |     // hls-fpga-machine-learning insert IO
15 | 
16 | #ifndef __SYNTHESIS__
17 |     static bool loaded_weights = false;
18 |     if (!loaded_weights) {
19 |         // hls-fpga-machine-learning insert load weights
20 |         loaded_weights = true;
21 |     }
22 | #endif
23 | 
24 |     // ****************************************
25 |     // NETWORK INSTANTIATION
26 |     // ****************************************
27 | 
28 |     // hls-fpga-machine-learning insert layers
29 | }
30 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/firmware/myproject.h:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_H_
 2 | #define MYPROJECT_H_
 3 | 
 4 | #include <ac_channel.h>
 5 | #include <ac_fixed.h>
 6 | #include <ac_int.h>
 7 | 
 8 | #include "defines.h"
 9 | 
10 | // Prototype of top level function for C-synthesis
11 | void myproject(
12 |     // hls-fpga-machine-learning insert header
13 | );
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/firmware/parameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARAMETERS_H_
 2 | #define PARAMETERS_H_
 3 | 
 4 | #include <ac_fixed.h>
 5 | #include <ac_int.h>
 6 | 
 7 | #include "nnet_utils/nnet_code_gen.h"
 8 | #include "nnet_utils/nnet_helpers.h"
 9 | // hls-fpga-machine-learning insert includes
10 | 
11 | // hls-fpga-machine-learning insert weights
12 | 
13 | // hls-fpga-machine-learning insert layer-config
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/myproject_bridge.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_BRIDGE_H_
 2 | #define MYPROJECT_BRIDGE_H_
 3 | 
 4 | #include "firmware/myproject.h"
 5 | #include "nnet_helpers.h"
 6 | #include <algorithm>
 7 | #include <map>
 8 | 
 9 | // hls-fpga-machine-learning insert weights dir
10 | 
11 | const char *get_weights_dir() { return s_weights_dir.c_str(); }
12 | 
13 | // hls-fpga-machine-learning insert bram
14 | 
15 | // hls-fpga-machine-learning insert declare weights
16 | 
17 | namespace nnet {
18 | bool trace_enabled = false;
19 | std::map<std::string, void *> *trace_outputs = NULL;
20 | size_t trace_type_size = sizeof(double);
21 | } // namespace nnet
22 | 
23 | extern "C" {
24 | 
25 | struct trace_data {
26 |     const char *name;
27 |     void *data;
28 | };
29 | 
30 | void allocate_trace_storage(size_t element_size) {
31 |     nnet::trace_enabled = true;
32 |     nnet::trace_outputs = new std::map<std::string, void *>;
33 |     nnet::trace_type_size = element_size;
34 |     // hls-fpga-machine-learning insert trace_outputs
35 | }
36 | 
37 | void free_trace_storage() {
38 |     for (std::map<std::string, void *>::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) {
39 |         void *ptr = i->second;
40 |         free(ptr);
41 |     }
42 |     nnet::trace_outputs->clear();
43 |     delete nnet::trace_outputs;
44 |     nnet::trace_outputs = NULL;
45 |     nnet::trace_enabled = false;
46 | }
47 | 
48 | void collect_trace_output(struct trace_data *c_trace_outputs) {
49 |     int ii = 0;
50 |     for (std::map<std::string, void *>::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) {
51 |         c_trace_outputs[ii].name = i->first.c_str();
52 |         c_trace_outputs[ii].data = i->second;
53 |         ii++;
54 |     }
55 | }
56 | 
57 | // Wrapper of top level function for Python bridge
58 | void myproject_float(
59 |     // hls-fpga-machine-learning insert header #float
60 | ) {
61 | 
62 |     // hls-fpga-machine-learning insert wrapper #float
63 | }
64 | 
65 | void myproject_double(
66 |     // hls-fpga-machine-learning insert header #double
67 | ) {
68 |     // hls-fpga-machine-learning insert wrapper #double
69 | }
70 | }
71 | 
72 | #endif
73 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/hls_math.h:
--------------------------------------------------------------------------------
 1 | #ifndef X_HLS_MATH_H
 2 | #define X_HLS_MATH_H
 3 | 
 4 | #include "ac_fixed.h"
 5 | #include <cmath>
 6 | 
 7 | namespace hls {
 8 | 
 9 | template <class T> static T exp(const T x) { return (T)std::exp(x.to_double()); }
10 | 
11 | template <typename T> T sin(T x) { return (T)std::sin(x.to_double()); };
12 | 
13 | template <typename T> T cos(T x) { return (T)std::cos(x.to_double()); };
14 | 
15 | template <typename T> T asin(T x) { return (T)std::asin(x.to_double()); };
16 | 
17 | template <typename T> T acos(T x) { return (T)std::acos(x.to_double()); };
18 | 
19 | template <typename T> T atan(T x) { return (T)std::atan(x.to_double()); };
20 | 
21 | template <typename T> T atan2(T x, T y) { return (T)hls::atan2(x.to_double(), y.to_double()); };
22 | 
23 | } // namespace hls
24 | #endif
25 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_array.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_ARRAY_H_
 2 | #define NNET_ARRAY_H_
 3 | 
 4 | #include <math.h>
 5 | 
 6 | namespace nnet {
 7 | 
 8 | struct transpose_config {
 9 |     static const unsigned height = 10;
10 |     static const unsigned width = 10;
11 |     static const unsigned depth = 10;
12 |     static constexpr unsigned perm[3] = {2, 0, 1};
13 | };
14 | 
15 | template <class data_T, class res_T, typename CONFIG_T>
16 | void transpose_2d(data_T data[CONFIG_T::height * CONFIG_T::width], res_T data_t[CONFIG_T::height * CONFIG_T::width]) {
17 |     //#pragma HLS PIPELINE
18 | 
19 |     for (int i = 0; i < CONFIG_T::height; i++) {
20 |         for (int j = 0; j < CONFIG_T::width; j++) {
21 |             data_t[j * CONFIG_T::height + i] = data[i * CONFIG_T::width + j];
22 |         }
23 |     }
24 | }
25 | 
26 | template <class data_T, class res_T, typename CONFIG_T>
27 | void transpose_3d(data_T data[CONFIG_T::depth * CONFIG_T::height * CONFIG_T::width],
28 |                   res_T data_t[CONFIG_T::depth * CONFIG_T::height * CONFIG_T::width]) {
29 |     unsigned dims[3] = {CONFIG_T::depth, CONFIG_T::height, CONFIG_T::width};
30 |     unsigned dims_t[3];
31 |     dims_t[0] = dims[CONFIG_T::perm[0]];
32 |     dims_t[1] = dims[CONFIG_T::perm[1]];
33 |     dims_t[2] = dims[CONFIG_T::perm[2]];
34 | 
35 |     int idx[3] = {0}, idx_t[3] = {0};
36 |     for (idx[0] = 0; idx[0] < dims[0]; idx[0]++) {
37 |         for (idx[1] = 0; idx[1] < dims[1]; idx[1]++) {
38 |             for (idx[2] = 0; idx[2] < dims[2]; idx[2]++) {
39 |                 idx_t[0] = idx[CONFIG_T::perm[0]];
40 |                 idx_t[1] = idx[CONFIG_T::perm[1]];
41 |                 idx_t[2] = idx[CONFIG_T::perm[2]];
42 | 
43 |                 data_t[idx_t[0] * dims_t[1] * dims_t[2] + idx_t[1] * dims_t[2] + idx_t[2]] =
44 |                     data[idx[0] * dims[1] * dims[2] + idx[1] * dims[2] + idx[2]];
45 |             }
46 |         }
47 |     }
48 | }
49 | 
50 | } // namespace nnet
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_code_gen.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_INSTR_GEN_H_
 2 | #define NNET_INSTR_GEN_H_
 3 | 
 4 | #include "nnet_helpers.h"
 5 | #include <iostream>
 6 | 
 7 | namespace nnet {
 8 | 
 9 | template <class data_T, typename CONFIG_T> class FillConv1DBuffer {
10 |   public:
11 |     static void fill_buffer(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
12 |                             data_T buffer[CONFIG_T::n_pixels][CONFIG_T::filt_width * CONFIG_T::n_chan],
13 |                             const unsigned partition) {
14 |         // To be implemented in subclasses
15 |     }
16 | };
17 | 
18 | template <class data_T, typename CONFIG_T> class FillConv2DBuffer {
19 |   public:
20 |     static void
21 |     fill_buffer(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],
22 |                 data_T buffer[CONFIG_T::n_pixels][CONFIG_T::filt_height * CONFIG_T::filt_width * CONFIG_T::n_chan],
23 |                 const unsigned partition) {
24 |         // To be implemented in subclasses
25 |     }
26 | };
27 | 
28 | // hls4ml insert code
29 | 
30 | } // namespace nnet
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_common.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef NNET_COMMON_H_
 3 | #define NNET_COMMON_H_
 4 | 
 5 | #include "ac_fixed.h"
 6 | 
 7 | // This is a substitute for "ceil(n/(float)d)".
 8 | #define DIV_ROUNDUP(n, d) ((n + d - 1) / d)
 9 | #define MIN(n, d) (n > d ? d : n)
10 | #define MAX(n, d) (n > d ? n : d)
11 | 
12 | namespace nnet {
13 | 
14 | // Common type definitions
15 | enum io_type { io_parallel = 0, io_stream };
16 | enum strategy { latency, resource };
17 | 
18 | /* ---
19 |  * Balanced tree reduce implementation.
20 |  * For use in scenarios where Vivado cannot expression balance
21 |  * Reduces an array of inputs to a single value using the template binary operator 'Op',
22 |  * for example summing all elements with Op_add, or finding the maximum with Op_max
23 |  * Use only when the input array is fully unrolled. Or, slice out a fully unrolled section
24 |  * before applying and accumulate the result over the rolled dimension.
25 |  * --- */
26 | template <class T, int N, class Op> T reduce(const T *x, Op op) {
27 |     static constexpr int leftN = pow2(floorlog2(N - 1)) > 0 ? pow2(floorlog2(N - 1)) : 0;
28 |     static constexpr int rightN = N - leftN > 0 ? N - leftN : 0;
29 | 
30 |     if (N == 1) {
31 |         return x[0];
32 |     } else if (N == 2) {
33 |         return op(x[0], x[1]);
34 |     } else {
35 |         return op(reduce<T, leftN, Op>(x, op), reduce<T, rightN, Op>(x + leftN, op));
36 |     }
37 | }
38 | 
39 | template <class T> class Op_add {
40 |   public:
41 |     T operator()(T a, T b) { return a + b; }
42 | };
43 | 
44 | template <class T> class Op_and {
45 |   public:
46 |     T operator()(T a, T b) { return a && b; }
47 | };
48 | 
49 | template <class T> class Op_or {
50 |   public:
51 |     T operator()(T a, T b) { return a || b; }
52 | };
53 | 
54 | template <class T> class Op_max {
55 |   public:
56 |     T operator()(T a, T b) { return a >= b ? a : b; }
57 | };
58 | 
59 | template <class T> class Op_min {
60 |   public:
61 |     T operator()(T a, T b) { return a <= b ? a : b; }
62 | };
63 | 
64 | } // namespace nnet
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_dense.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_DENSE_H_
 2 | #define NNET_DENSE_H_
 3 | 
 4 | #include "ac_channel.h"
 5 | #include "nnet_common.h"
 6 | #include "nnet_dense_latency.h"
 7 | #include "nnet_dense_resource.h"
 8 | #include "nnet_helpers.h"
 9 | #include "nnet_mult.h"
10 | #include <math.h>
11 | 
12 | namespace nnet {
13 | 
14 | struct dense_config {
15 |     // Internal data type definitions
16 |     typedef float bias_t;
17 |     typedef float weight_t;
18 |     typedef float accum_t;
19 | 
20 |     // Layer Sizes
21 |     static const unsigned n_in = 10;
22 |     static const unsigned n_out = 10;
23 | 
24 |     // Resource reuse info
25 |     static const unsigned io_type = io_parallel;
26 |     static const unsigned strategy = latency;
27 |     static const unsigned reuse_factor = 1;
28 |     static const bool store_weights_in_bram = false;
29 |     static const unsigned n_zeros = 0;
30 |     // partitioning arrays cyclically to go with roll factors?
31 |     // Product function to use
32 |     template <class x_T, class y_T> using product = nnet::product::mult<x_T, y_T>;
33 | };
34 | 
35 | template <class data_T, class res_T, typename CONFIG_T>
36 | void dense(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_out],
37 |            typename CONFIG_T::weight_t weights[CONFIG_T::n_in * CONFIG_T::n_out],
38 |            typename CONFIG_T::bias_t biases[CONFIG_T::n_out]) {
39 |     //#pragma HLS inline
40 |     if (CONFIG_T::strategy == nnet::latency) {
41 |         dense_latency<data_T, res_T, CONFIG_T>(data, res, weights, biases);
42 |     } else {
43 |         dense_resource<data_T, res_T, CONFIG_T>(data, res, weights, biases);
44 |     }
45 | }
46 | 
47 | } // namespace nnet
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_embed.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_H_
 2 | #define NNET_EMBED_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_helpers.h"
 6 | 
 7 | namespace nnet {
 8 | 
 9 | struct embed_config {
10 |     // Internal data type definitions
11 |     typedef float embeddings_t;
12 | 
13 |     // Layer Sizes
14 |     static const unsigned n_in = 10;
15 |     static const unsigned n_out = 16;
16 |     static const unsigned vocab_size = 50;
17 | 
18 |     // Resource reuse info
19 |     static const unsigned io_type = io_parallel;
20 |     static const unsigned reuse_factor = 1;
21 | };
22 | 
23 | template <class data_T, class res_T, typename CONFIG_T>
24 | void embedding(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in * CONFIG_T::n_out],
25 |                typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) {
26 | 
27 |     //#pragma HLS PIPELINE II=CONFIG_T::reuse_factor
28 |     // This can save a few cycles, but it will create a large multiplexer due to
29 |     // non-constant access pattern, so let's leave it out
30 |     ////#pragma HLS ARRAY_PARTITION variable=embeddings complete
31 | 
32 |     constexpr int ce_reuse_factor = CONFIG_T::reuse_factor;
33 |     (void)ce_reuse_factor;
34 | InputSequence:
35 |     for (int j = 0; j < CONFIG_T::n_in; j++) {
36 |     // #pragma HLS UNROLL
37 |     DenseEmbedding:
38 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
39 |             // #pragma HLS UNROLL
40 |             res[j * CONFIG_T::n_out + i] = embeddings[data[j] * CONFIG_T::n_out + i];
41 |         }
42 |     }
43 | }
44 | 
45 | } // namespace nnet
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_embed_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_STREAM_H_
 2 | #define NNET_EMBED_STREAM_H_
 3 | 
 4 | #include "ac_channel.h"
 5 | #include "nnet_common.h"
 6 | #include "nnet_helpers.h"
 7 | 
 8 | namespace nnet {
 9 | 
10 | template <class data_T, class res_T, typename CONFIG_T>
11 | void embedding(ac_channel<data_T> &data, ac_channel<res_T> &res,
12 |                typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) {
13 |     data_T in_data = data.read();
14 |     constexpr int ce_reuse_factor = CONFIG_T::reuse_factor;
15 |     (void)ce_reuse_factor;
16 | InputSequence:
17 |     for (int j = 0; j < data_T::size; j++) {
18 |         //#pragma HLS PIPELINE II=CONFIG_T::reuse_factor
19 | 
20 |         res_T res_pack;
21 |         //#pragma HLS DATA_PACK variable=res_pack
22 | 
23 |     DenseEmbedding:
24 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
25 |             // #pragma HLS UNROLL
26 |             res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i];
27 |         }
28 |         res.write(res_pack);
29 |     }
30 | }
31 | 
32 | } // namespace nnet
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_image.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_IMAGE_H_
 2 | #define NNET_IMAGE_H_
 3 | 
 4 | #include "ac_channel.h"
 5 | #include "nnet_common.h"
 6 | #include <math.h>
 7 | 
 8 | namespace nnet {
 9 | 
10 | struct resize_config {
11 |     static const unsigned height = 10;
12 |     static const unsigned width = 10;
13 |     static const unsigned n_chan = 10;
14 |     static const unsigned new_height = 10;
15 |     static const unsigned new_width = 10;
16 | };
17 | 
18 | template <class data_T, typename CONFIG_T>
19 | void resize_nearest(data_T image[CONFIG_T::height * CONFIG_T::width * CONFIG_T::n_chan],
20 |                     data_T resized[CONFIG_T::new_height * CONFIG_T::new_width * CONFIG_T::n_chan]) {
21 |     int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1;
22 |     int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1;
23 |     int x2, y2;
24 | 
25 |     //#pragma HLS PIPELINE
26 | 
27 |     for (int i = 0; i < CONFIG_T::new_height; i++) {
28 |         for (int j = 0; j < CONFIG_T::new_width; j++) {
29 |             x2 = ((j * x_ratio) >> 16);
30 |             y2 = ((i * y_ratio) >> 16);
31 |             for (int k = 0; k < CONFIG_T::n_chan; k++) {
32 |                 resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] =
33 |                     image[(y2 * CONFIG_T::width * CONFIG_T::n_chan) + x2 * CONFIG_T::n_chan + k];
34 |             }
35 |         }
36 |     }
37 | }
38 | 
39 | } // namespace nnet
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_recr_activations.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_RECR_ACTIVATION_H_
 2 | #define NNET_RECR_ACTIVATION_H_
 3 | 
 4 | #include "ac_channel.h"
 5 | #include "nnet_activation.h"
 6 | #include "nnet_common.h"
 7 | #include "nnet_helpers.h"
 8 | #include <math.h>
 9 | 
10 | namespace nnet {
11 | 
12 | namespace activation {
13 | 
14 | template <class data_T, class res_T, typename CONFIG_T> class Activation {
15 |   public:
16 |     // *************************************************
17 |     //       Blank Activation
18 |     // *************************************************
19 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {} // Nothing to do here
20 | };
21 | 
22 | template <class data_T, class res_T, typename CONFIG_T> class relu : public Activation<data_T, res_T, CONFIG_T> {
23 |   public:
24 |     // *************************************************
25 |     //       Relu Activation
26 |     // *************************************************
27 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
28 |         nnet::relu<data_T, res_T, CONFIG_T>(data, res);
29 |     }
30 | };
31 | 
32 | template <class data_T, class res_T, typename CONFIG_T> class sigmoid : public Activation<data_T, res_T, CONFIG_T> {
33 |   public:
34 |     // *************************************************
35 |     //       Sigmoid Activation
36 |     // *************************************************
37 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
38 |         nnet::sigmoid<data_T, res_T, CONFIG_T>(data, res);
39 |     }
40 | };
41 | 
42 | template <class data_T, class res_T, typename CONFIG_T> class tanh : public Activation<data_T, res_T, CONFIG_T> {
43 |   public:
44 |     // *************************************************
45 |     //       TanH Activation
46 |     // *************************************************
47 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
48 |         nnet::tanh<data_T, res_T, CONFIG_T>(data, res);
49 |     }
50 | };
51 | 
52 | } // namespace activation
53 | 
54 | } // namespace nnet
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/hls4ml/templates/catapult/nnet_utils/nnet_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_TYPES_H_
 2 | #define NNET_TYPES_H_
 3 | 
 4 | #include <assert.h>
 5 | #include <cstddef>
 6 | #include <cstdio>
 7 | 
 8 | namespace nnet {
 9 | 
10 | // Fixed-size array
11 | template <typename T, unsigned N> struct array {
12 |     typedef T value_type;
13 |     static const unsigned size = N;
14 | 
15 |     T data[N];
16 | 
17 |     T &operator[](size_t pos) { return data[pos]; }
18 | 
19 |     const T &operator[](size_t pos) const { return data[pos]; }
20 | 
21 |     array &operator=(const array &other) {
22 |         if (&other == this)
23 |             return *this;
24 | 
25 |         assert(N == other.size && "Array sizes must match.");
26 | 
27 |         for (unsigned i = 0; i < N; i++) {
28 |             //#pragma HLS UNROLL
29 |             data[i] = other[i];
30 |         }
31 |         return *this;
32 |     }
33 | };
34 | 
35 | // Generic lookup-table implementation, for use in approximations of math functions
36 | template <typename T, unsigned N, T (*func)(T)> class lookup_table {
37 |   public:
38 |     lookup_table(T from, T to) : range_start(from), range_end(to), base_div(ac_int<16, false>(N) / T(to - from)) {
39 |         T step = (range_end - range_start) / ac_int<16, false>(N);
40 |         for (size_t i = 0; i < N; i++) {
41 |             T num = range_start + ac_int<16, false>(i) * step;
42 |             T sample = func(num);
43 |             samples[i] = sample;
44 |         }
45 |     }
46 | 
47 |     T operator()(T n) const {
48 |         int index = (n - range_start) * base_div;
49 |         if (index < 0)
50 |             index = 0;
51 |         else if (index > N - 1)
52 |             index = N - 1;
53 |         return samples[index];
54 |     }
55 | 
56 |   private:
57 |     T samples[N];
58 |     const T range_start, range_end;
59 |     ac_fixed<20, 16, true> base_div;
60 | };
61 | 
62 | } // namespace nnet
63 | 
64 | #endif
65 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/exception_handler.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef __EXCEPTIONHANDLER_HPP__
 2 | #define __EXCEPTIONHANDLER_HPP__
 3 | #include <exception>
 4 | #include <iostream>
 5 | #include <sycl/sycl.hpp>
 6 | 
 7 | namespace fpga_tools {
 8 | 
 9 | void exception_handler(sycl::exception_list exceptions) {
10 |     for (std::exception_ptr const &e : exceptions) {
11 |         try {
12 |             std::rethrow_exception(e);
13 |         } catch (sycl::exception const &e) {
14 |             std::cout << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl;
15 |         }
16 |     }
17 | }
18 | 
19 | } // namespace fpga_tools
20 | 
21 | #endif //__EXCEPTIONHANDLER_HPP__
22 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/defines.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEFINES_H_
 2 | #define DEFINES_H_
 3 | 
 4 | #include <sycl/ext/intel/ac_types/ac_fixed.hpp>
 5 | #include <sycl/ext/intel/ac_types/ac_int.hpp>
 6 | #include <sycl/ext/intel/fpga_extensions.hpp>
 7 | #include <sycl/sycl.hpp>
 8 | 
 9 | // Include nnet::array - a custom array-like struct, mainly used with io_stream
10 | #include "nnet_utils/nnet_types.h"
11 | 
12 | // hls-fpga-machine-learning insert numbers
13 | 
14 | // hls-fpga-machine-learning insert layer-precision
15 | 
16 | #define DIV_ROUNDUP(n, d) ((n + d - 1) / d)
17 | #define MIN(n, d) (n > d ? d : n)
18 | #define MAX(n, d) (n < d ? d : n)
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/myproject.cpp:
--------------------------------------------------------------------------------
 1 | #include "myproject.h"
 2 | #include "parameters.h"
 3 | #include <sycl/ext/intel/experimental/task_sequence.hpp>
 4 | 
 5 | // hls-fpga-machine-learning insert weights
 6 | 
 7 | // The inter-task pipes need to be declared in the global scope
 8 | // hls-fpga-machine-learning insert inter-task pipes
 9 | 
10 | using sycl::ext::intel::experimental::task_sequence;
11 | 
12 | void MyProject::operator()() const {
13 |     // ****************************************
14 |     // NETWORK INSTANTIATION
15 |     // ****************************************
16 | 
17 |     // hls-fpga-machine-learning read in
18 | 
19 |     // hls-fpga-machine-learning declare task sequences
20 | 
21 |     // hls-fpga-machine-learning insert layers
22 | 
23 |     // hls-fpga-machine-learning return
24 | }
25 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/myproject.h:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_H_
 2 | #define MYPROJECT_H_
 3 | 
 4 | #include "defines.h"
 5 | 
 6 | // This file defines the interface to the kernel
 7 | 
 8 | // currently this is fixed
 9 | using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(sycl::ext::intel::experimental::ready_latency<0>));
10 | 
11 | // Need to declare the input and output pipes
12 | 
13 | // hls-fpga-machine-learning insert inputs
14 | // hls-fpga-machine-learning insert outputs
15 | 
16 | class MyProjectID;
17 | 
18 | struct MyProject {
19 | 
20 |     // kernel property method to config invocation interface
21 |     auto get(sycl::ext::oneapi::experimental::properties_tag) {
22 |         return sycl::ext::oneapi::experimental::properties{sycl::ext::intel::experimental::streaming_interface<>,
23 |                                                            sycl::ext::intel::experimental::pipelined<>};
24 |     }
25 | 
26 |     SYCL_EXTERNAL void operator()() const;
27 | };
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_DENSE_STREAM_H_
 2 | #define NNET_DENSE_STREAM_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_dense.h"
 6 | #include "nnet_types.h"
 7 | 
 8 | namespace nnet {
 9 | 
10 | // Note:  DataPack logic removed, at least in the initial version
11 | template <class data_pipe, class res_pipe, typename CONFIG_T>
12 | void dense_resource_stream(typename CONFIG_T::weight_t weights, typename CONFIG_T::bias_t biases) {
13 | 
14 |     [[intel::fpga_register]] typename ExtractPipeType<res_pipe>::value_type res;
15 |     [[intel::fpga_register]] auto data = data_pipe::read();
16 |     dense_resource<typename ExtractPipeType<data_pipe>::value_type, typename ExtractPipeType<res_pipe>::value_type,
17 |                    CONFIG_T>(data, res, weights, biases);
18 |     res_pipe::write(res);
19 | }
20 | 
21 | } // namespace nnet
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv1d.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_DEPTH_CONV1D_H_
 2 | #define NNET_DEPTH_CONV1D_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_conv1d.h"
 6 | #include "nnet_depthconv1d_resource.h"
 7 | 
 8 | namespace nnet {
 9 | 
10 | template <class data_T, class res_T, typename CONFIG_T>
11 | void depthwise_conv_1d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights,
12 |                           const typename CONFIG_T::bias_t &biases) {
13 | 
14 |     depthwise_conv_1d_resource_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
15 | }
16 | 
17 | } // namespace nnet
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_depthconv2d.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_DEPTH_CONV2D_H_
 2 | #define NNET_DEPTH_CONV2D_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_conv2d.h"
 6 | #include "nnet_depthconv2d_resource.h"
 7 | 
 8 | namespace nnet {
 9 | 
10 | template <class data_T, class res_T, typename CONFIG_T>
11 | void depthwise_conv_2d_cl(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights,
12 |                           const typename CONFIG_T::bias_t &biases) {
13 | 
14 |     depthwise_conv_2d_resource_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
15 | }
16 | 
17 | } // namespace nnet
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_embed.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_H_
 2 | #define NNET_EMBED_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_helpers.h"
 6 | 
 7 | namespace nnet {
 8 | 
 9 | struct embed_config {
10 |     // Internal data type definitions
11 |     typedef float embeddings_t;
12 | 
13 |     // (Default layer sizes, overwritten form the backend
14 |     static const unsigned n_in = 10;
15 |     static const unsigned n_out = 16;
16 |     static const unsigned vocab_size = 50;
17 | 
18 |     // Resource reuse info
19 |     static const unsigned io_type = io_parallel;
20 |     static const unsigned reuse_factor = 1;
21 | };
22 | 
23 | template <class data_T, class res_T, typename CONFIG_T>
24 | void embedding(const data_T &data, res_T &res, const typename CONFIG_T::embeddings_t &embeddings) {
25 | 
26 |     /*
27 |      * Can store embeddings[] in a register, but a large multiiplexer
28 |      * is created due to a non-constant access pattern
29 |      */
30 | 
31 | InputSequence:
32 |     #pragma unroll
33 |     [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int j = 0; j < CONFIG_T::n_in; j++) {
34 |     DenseEmbedding:
35 |         #pragma unroll
36 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
37 |             res[j * CONFIG_T::n_out + i] = embeddings[data[j].to_uint() * CONFIG_T::n_out + i];
38 |         }
39 |     }
40 | }
41 | 
42 | } // namespace nnet
43 | #endif
44 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_embed_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_STREAM_H_
 2 | #define NNET_EMBED_STREAM_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | template <class data_pipe, class res_pipe, typename CONFIG_T>
 7 | void embedding_stream(typename CONFIG_T::embeddings_t embeddings) {
 8 | 
 9 |     using res_T = typename ExtractPipeType<res_pipe>::value_type;
10 |     constexpr auto datasize = std::tuple_size<typename ExtractPipeType<data_pipe>::value_type>{};
11 | 
12 |     auto in_data = data_pipe::read();
13 | 
14 | InputSequence:
15 |     [[intel::initiation_interval(CONFIG_T::reuse_factor)]] for (int j = 0; j < datasize; j++) {
16 | 
17 |         res_T res_pack;
18 | 
19 |     DenseEmbedding:
20 |         #pragma unroll
21 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
22 |             res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i];
23 |         }
24 | 
25 |         res_pipe::write(res_pack);
26 |     }
27 | }
28 | 
29 | } // namespace nnet
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_printf.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_PRINTF_H_
 2 | #define NNET_PRINTF_H_
 3 | 
 4 | #ifdef __SYCL_DEVICE_ONLY__
 5 | #define CL_CONSTANT __attribute__((opencl_constant))
 6 | #else
 7 | #define CL_CONSTANT
 8 | #endif
 9 | 
10 | using namespace sycl;
11 | 
12 | #define PRINTF(format, ...)                                                                                                 \
13 |     {                                                                                                                       \
14 |         static const CL_CONSTANT char _format[] = format;                                                                   \
15 |         ext::oneapi::experimental::printf(_format, ##__VA_ARGS__);                                                          \
16 |     }
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_recurrent_activation.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_RECR_ACTIVATION_H_
 2 | #define NNET_RECR_ACTIVATION_H_
 3 | 
 4 | #include "nnet_activation.h"
 5 | #include "nnet_common.h"
 6 | 
 7 | namespace nnet {
 8 | 
 9 | namespace activation {
10 | 
11 | template <class data_T, class res_T, typename CONFIG_T> class Activation {
12 |   public:
13 |     // *************************************************
14 |     //       Blank Activation
15 |     // *************************************************
16 |     static void activation(const data_T &data, res_T &res) {}
17 | };
18 | 
19 | template <class data_T, class res_T, typename CONFIG_T> class relu : public Activation<data_T, res_T, CONFIG_T> {
20 |   public:
21 |     // *************************************************
22 |     //       Relu Activation
23 |     // *************************************************
24 |     static void activation(const data_T &data, res_T &res) { nnet::relu<data_T, res_T, CONFIG_T>(data, res); }
25 | };
26 | 
27 | template <class data_T, class res_T, typename CONFIG_T> class sigmoid : public Activation<data_T, res_T, CONFIG_T> {
28 |   public:
29 |     // *************************************************
30 |     //       Sigmoid Activation
31 |     // *************************************************
32 |     static void activation(const data_T &data, res_T &res) { nnet::sigmoid<data_T, res_T, CONFIG_T>(data, res); }
33 | };
34 | 
35 | template <class data_T, class res_T, typename CONFIG_T> class tanh : public Activation<data_T, res_T, CONFIG_T> {
36 |   public:
37 |     // *************************************************
38 |     //       TanH Activation
39 |     // *************************************************
40 |     static void activation(const data_T &data, res_T &res) { nnet::dense_tanh<data_T, res_T, CONFIG_T>(data, res); }
41 | };
42 | 
43 | } // namespace activation
44 | 
45 | } // namespace nnet
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_resize.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_IMAGE_H_
 2 | #define NNET_IMAGE_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | struct resize_config {
 7 |     static const unsigned in_height = 10;
 8 |     static const unsigned in_width = 10;
 9 | 
10 |     static const unsigned out_height = 10;
11 |     static const unsigned out_width = 10;
12 | 
13 |     static const unsigned n_chan = 10;
14 | };
15 | 
16 | template <class data_T, class res_T, typename CONFIG_T> void resize_nearest(const data_T &image, res_T &resized) {
17 |     int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1;
18 |     int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1;
19 | 
20 |     for (int i = 0; i < CONFIG_T::new_height; i++) {
21 |         for (int j = 0; j < CONFIG_T::new_width; j++) {
22 |             int x = ((j * x_ratio) >> 16);
23 |             int y = ((i * y_ratio) >> 16);
24 | 
25 |             #pragma unroll
26 |             for (int k = 0; k < CONFIG_T::n_chan; k++) {
27 |                 resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] =
28 |                     image[(y * CONFIG_T::width * CONFIG_T::n_chan) + x * CONFIG_T::n_chan + k];
29 |             }
30 |         }
31 |     }
32 | }
33 | 
34 | } // namespace nnet
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_resize_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_IMAGE_STREAM_H_
 2 | #define NNET_IMAGE_STREAM_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | 
 6 | namespace nnet {
 7 | 
 8 | template <class data_pipe, class res_pipe, typename CONFIG_T> void resize_nearest_stream() {
 9 |     assert(CONFIG_T::new_height % CONFIG_T::height == 0);
10 |     assert(CONFIG_T::new_width % CONFIG_T::width == 0);
11 | 
12 |     using data_T = typename ExtractPipeType<data_pipe>::value_type;
13 | 
14 |     constexpr unsigned ratio_height = CONFIG_T::new_height / CONFIG_T::height;
15 |     constexpr unsigned ratio_width = CONFIG_T::new_width / CONFIG_T::width;
16 | 
17 | ImageHeight:
18 |     for (unsigned h = 0; h < CONFIG_T::height; h++) {
19 |         [[intel::fpga_register]] data_T data_in_row[CONFIG_T::width];
20 | 
21 |     ImageWidth:
22 |         for (unsigned i = 0; i < CONFIG_T::width; i++) {
23 |             [[intel::fpga_register]] auto in_data = data_pipe::read();
24 | 
25 |         ImageChan:
26 |             #pragma unroll
27 |             for (unsigned j = 0; j < CONFIG_T::n_chan; j++) {
28 |                 data_in_row[i][j] = in_data[j];
29 |             }
30 |         }
31 | 
32 |     ResizeHeight:
33 |         for (unsigned i = 0; i < ratio_height; i++) {
34 | 
35 |         ImageWidth2:
36 |             for (unsigned l = 0; l < CONFIG_T::width; l++) {
37 | 
38 |             ResizeWidth:
39 |                 for (unsigned j = 0; j < ratio_width; j++) {
40 | 
41 |                     [[intel::fpga_register]] data_T out_data;
42 | 
43 |                 ResizeChan:
44 |                     #pragma unroll
45 |                     for (unsigned k = 0; k < CONFIG_T::n_chan; k++) {
46 |                         out_data[k] = data_in_row[l][k];
47 |                     }
48 | 
49 |                     res_pipe::write(out_data);
50 |                 }
51 |             }
52 |         }
53 |     }
54 | }
55 | 
56 | } // namespace nnet
57 | 
58 | #endif
59 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_transpose.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_TRANSPOSE_H_
 2 | #define NNET_TRANSPOSE_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | struct transpose_config {
 7 |     static constexpr unsigned dims = 0;
 8 |     static constexpr unsigned N = 0;
 9 | 
10 |     // Inherited struct should define these
11 |     // static constexpr std::array<unsigned, dims> from_shape;
12 |     // static constexpr std::array<unsigned, dims> to_shape;
13 |     // static constexpr std::array<unsigned, dims> perm;
14 |     // static constexpr std::array<unsigned, dims> perm_strides;
15 | };
16 | 
17 | template <typename CONFIG_T> unsigned transfer_idx(int index) {
18 |     // Given output idx in c-order flat array, return input idx
19 |     int idx = 0;
20 |     for (int i = CONFIG_T::dims - 1; i >= 0; i--) {
21 |         idx += (index % CONFIG_T::to_shape[i]) * CONFIG_T::perm_strides[i];
22 |         index /= CONFIG_T::to_shape[i];
23 |     }
24 |     return idx;
25 | }
26 | 
27 | template <class data_T, class res_T, typename CONFIG_T> void transpose(const data_T &data, res_T &res) {
28 |     #pragma unroll
29 |     for (int i = 0; i < CONFIG_T::N; i++) {
30 |         int idx = transfer_idx<CONFIG_T>(i);
31 |         res[i] = data[idx];
32 |     }
33 | }
34 | 
35 | } // namespace nnet
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_transpose_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_TRANSPOSE_STREAM_H_
 2 | #define NNET_TRANSPOSE_STREAM_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | template <class data_pipe, class res_pipe, typename CONFIG_T> void transpose_stream() {
 7 | 
 8 |     using data_T = typename ExtractPipeType<data_pipe>::value_type;
 9 |     using res_T = typename ExtractPipeType<res_pipe>::value_type;
10 | 
11 |     constexpr auto data_size = std::tuple_size<typename ExtractPipeType<data_pipe>::value_type>{};
12 |     constexpr auto res_size = std::tuple_size<typename ExtractPipeType<res_pipe>::value_type>{};
13 | 
14 |     [[intel::fpga_register]] typename data_T::value_type data_array[CONFIG_T::N];
15 | 
16 |     for (int i = 0; i < CONFIG_T::N / data_size; i++) {
17 |         [[intel::fpga_register]] data_T in_data = data_pipe::read();
18 | 
19 |         #pragma unroll
20 |         for (int j = 0; j < data_size; j++) {
21 |             data_array[i * data_size + j] = typename data_T::value_type(in_data[j]);
22 |         }
23 |     }
24 | 
25 |     for (int i = 0; i < CONFIG_T::N / res_size; i++) {
26 |         [[intel::fpga_register]] res_T out_data;
27 | 
28 |         #pragma unroll
29 |         for (int j = 0; j < res_size; j++) {
30 |             out_data[j] = typename res_T::value_type(data_array[transfer_idx<CONFIG_T>(i * res_size + j)]);
31 |         }
32 | 
33 |         res_pipe::write(out_data);
34 |     }
35 | }
36 | 
37 | } // namespace nnet
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/hls4ml/templates/oneapi/firmware/parameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARAMETERS_H_
 2 | #define PARAMETERS_H_
 3 | 
 4 | #include "defines.h"
 5 | 
 6 | #include "nnet_utils/nnet_helpers.h"
 7 | // hls-fpga-machine-learning insert includes
 8 | 
 9 | // hls-fpga-machine-learning insert layer-config
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/Makefile:
--------------------------------------------------------------------------------
 1 | DEVICE   := Arria10
 2 | TARGETS  := myproject-fpga
 3 | 
 4 | CXX          		:= i++
 5 | CXXFLAGS     		:= -march=$(DEVICE)
 6 | RM           		:= rm -rf
 7 | DEBUG_FLAGS  		:= --time quartus-hlssynt.log
 8 | SOURCE_FILES 		:= myproject_test.cpp firmware/myproject.cpp
 9 | HEADER_FILES 		:= firmware/myproject.h
10 | LOGGING_1			:=
11 | LOGGING_2			:=
12 | QUARTUS_COMPILE 	:=
13 | CONT_IF_LARGE_AREA 	:=
14 | 
15 | .PHONY: test
16 | test: $(TARGETS)
17 | 	@$(foreach t,$(TARGETS),echo ./$(t); ./$(t) | tee $(t)_run.log; echo "";)
18 | 
19 | .PHONY: all
20 | all: $(TARGETS)
21 | 
22 | .PHONY: clean
23 | clean:
24 | 	-$(RM) $(foreach t,$(TARGETS),$(t).prj $(t) $(t)_time.log)
25 | 
26 | .PHONY: myproject-fpga
27 | myproject-fpga: CXXFLAGS := $(CXXFLAGS)
28 | 
29 | $(TARGETS) : $(SOURCE_FILES) $(HEADER_FILES)
30 | 	$(CXX) $(LOGGING_1) $(LOGGING_2) $(CXXFLAGS) $(DEBUG_FLAGS) $(SOURCE_FILES) $(CONT_IF_LARGE_AREA) $(QUARTUS_COMPILE) -o $@
31 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/ac_types/stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_STREAM_H
 2 | #define NNET_STREAM_H
 3 | 
 4 | #include <deque>
 5 | 
 6 | namespace nnet {
 7 | 
 8 | /*
 9 | * A struct with the same high-level functionality as Intel's HLS ihc::stream
10 | * This struct is used during GCC compilation / hls4ml model.predict(...)
11 | * This is because GCC does not have access to HLS source files (ihc::stream)
12 | * Software-wise, this struct behaves like a first-in, first-out (FIFO) buffer
13 | * However, it cannot be used for HLS synthesis, since it uses dynamic memory allocation (deque)
14 | */
15 | template<typename T>
16 | struct stream {
17 |   private:
18 |     std::deque<T> _data;
19 | 
20 |   public:
21 |     stream() {}
22 | 
23 |     T read() {
24 |         T element = _data.front();
25 |         _data.pop_front();
26 |         return element; 
27 |     }
28 | 
29 |     void write(const T& element) { 
30 |         _data.push_back(element);
31 |     }   
32 | };
33 | 
34 | }
35 |  
36 | #endif


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/build_lib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | CC=g++
 5 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
 6 |     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique"
 7 | elif [[ "$OSTYPE" == "darwin"* ]]; then
 8 |     CFLAGS="-O3 -fPIC -std=c++11"
 9 | fi
10 | LDFLAGS=
11 | INCFLAGS="-Ifirmware/ac_types/ -Ifirmware/ap_types/"
12 | PROJECT=myproject
13 | LIB_STAMP=mystamp
14 | 
15 | ${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}.cpp -o ${PROJECT}.o
16 | ${CC} ${CFLAGS} ${INCFLAGS} -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o
17 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so
18 | rm -f *.o
19 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/defines.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEFINES_H_
 2 | #define DEFINES_H_
 3 | 
 4 | /*
 5 |  * Intel HLS makes use of three streaming interfaces:
 6 |  *   (1) stream_in - used as the main input to a component
 7 |  *   (2) stream_out - used as the main output of a component
 8 |  *   (3) stream - allows both reading and writing; used for inter-component connections
 9 |  * ihc::stream has a implicitly deleted constructor and therefore, cannot be used as the output of a function/component
10 |  * Therefore, variables of type 'stream' are always passed by reference
11 |  */
12 | 
13 | #ifndef __INTELFPGA_COMPILER__
14 | 
15 | #include "ac_fixed.h"
16 | #include "ac_int.h"
17 | #define hls_register
18 | 
19 | #include "stream.h"
20 | template <typename T> using stream = nnet::stream<T>;
21 | template <typename T> using stream_in = nnet::stream<T>;
22 | template <typename T> using stream_out = nnet::stream<T>;
23 | 
24 | #else
25 | 
26 | #include "HLS/ac_fixed.h"
27 | #include "HLS/ac_int.h"
28 | #include "HLS/hls.h"
29 | 
30 | template <typename T> using stream = ihc::stream<T>;
31 | template <typename T> using stream_in = ihc::stream_in<T>;
32 | template <typename T> using stream_out = ihc::stream_out<T>;
33 | 
34 | #endif
35 | 
36 | // Include nnet::array - a custom array-like struct, mainly used with io_stream
37 | #include "nnet_utils/nnet_types.h"
38 | 
39 | // hls-fpga-machine-learning insert numbers
40 | 
41 | // hls-fpga-machine-learning insert layer-precision
42 | 
43 | #define DIV_ROUNDUP(n, d) ((n + d - 1) / d)
44 | #define MIN(n, d) (n > d ? d : n)
45 | #define MAX(n, d) (n < d ? d : n)
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/myproject.h:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_H_
 2 | #define MYPROJECT_H_
 3 | 
 4 | #ifndef __INTELFPGA_COMPILER__
 5 | #include "ac_fixed.h"
 6 | #include "ac_int.h"
 7 | #define hls_register
 8 | #else
 9 | #include "HLS/ac_fixed.h"
10 | #include "HLS/ac_int.h"
11 | #include "HLS/hls.h"
12 | #endif
13 | 
14 | // Streams are explicitly defined in defines.h, which are included for parameters.h
15 | // Defining them again in this file will cause compile-time errors
16 | #include "defines.h"
17 | 
18 | // If using io_parallel, inputs and output need to be initialised before calling the top-level function
19 | // If using io_stream, no inputs/outputs are initialised, as they are passed by reference to the top-level function
20 | // hls-fpga-machine-learning insert inputs
21 | // hls-fpga-machine-learning insert outputs
22 | 
23 | #ifndef __INTELFPGA_COMPILER__
24 | /*
25 | * The top-level function used during GCC compilation / hls4ml.predic(...) goes here
26 | * An important distinction is made between io_stream and io_parallel:
27 | *     (1) io_parallel:
28 |                - Top-level function takes a struct containing an array as function argument
29 |                - Returns a struct containing an array - the prediction
30 |       (2) io_stream:
31 |                - Top-level function is 'void' - no return value
32 |                - Instead, both the input and output are passed by reference
33 |                - This is due the HLS Streaming Interfaces; stream cannot be copied (implicitly deleted copy constructor)
34 | * This distinction is handled in quartus_writer.py
35 | */
36 | // hls-fpga-machine-learning instantiate GCC top-level
37 | #else
38 | // Maximum initiation interval, concurrency and frequency for HLS syntheis are defined here
39 | // hls-fpga-machine-learning insert cpragmas
40 | 
41 | /*
42 |  * The top-level function used during HLS Synthesis goes here
43 |  * In a similar manner to GCC, there is a distinction between io_stream & io_parallel
44 |  */
45 | // hls-fpga-machine-learning instantiate HLS top-level
46 | #endif
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_dense_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_DENSE_STREAM_H_
 2 | #define NNET_DENSE_STREAM_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_dense.h"
 6 | #include "nnet_types.h"
 7 | 
 8 | namespace nnet {
 9 | 
10 | template <class data_T, class res_T, typename CONFIG_T>
11 | void dense_resource(stream<data_T> &data_stream, stream<res_T> &res_stream,
12 |                     const typename CONFIG_T::weight_t weights[CONFIG_T::n_in * CONFIG_T::n_out],
13 |                     const typename CONFIG_T::bias_t biases[CONFIG_T::n_out]) {
14 |     hls_register typename data_T::value_type data[CONFIG_T::n_in];
15 |     hls_register typename res_T::value_type res[CONFIG_T::n_out];
16 | 
17 | DataPrepare:
18 |     #pragma ii 1
19 |     for (int i_in = 0; i_in < CONFIG_T::n_in / data_T::size; i_in++) {
20 |         data_T data_pack = data_stream.read();
21 |     DataPack:
22 |         #pragma unroll
23 |         for (int i_pack = 0; i_pack < data_T::size; i_pack++) {
24 |             data[i_in * data_T::size + i_pack] = data_pack[i_pack];
25 |         }
26 |     }
27 | 
28 |     dense_resource<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(data, res, weights, biases);
29 | 
30 | ResWrite:
31 |     #pragma ii 1
32 |     for (unsigned i_out = 0; i_out < CONFIG_T::n_out / res_T::size; i_out++) {
33 |         res_T res_pack;
34 |     ResPack:
35 |         #pragma unroll
36 |         for (int i_pack = 0; i_pack < res_T::size; i_pack++) {
37 |             res_pack[i_pack] = res[i_out * res_T::size + i_pack];
38 |         }
39 | 
40 |         res_stream.write(res_pack);
41 |     }
42 | }
43 | 
44 | } // namespace nnet
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_H_
 2 | #define NNET_EMBED_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_helpers.h"
 6 | 
 7 | namespace nnet {
 8 | 
 9 | struct embed_config {
10 |     // Internal data type definitions
11 |     typedef float embeddings_t;
12 | 
13 |     // (Default layer sizes, overwritten form the backend
14 |     static const unsigned n_in = 10;
15 |     static const unsigned n_out = 16;
16 |     static const unsigned vocab_size = 50;
17 | 
18 |     // Resource reuse info
19 |     static const unsigned io_type = io_parallel;
20 |     static const unsigned reuse_factor = 1;
21 | };
22 | 
23 | template <class data_T, class res_T, typename CONFIG_T>
24 | void embedding(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in * CONFIG_T::n_out],
25 |                const typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) {
26 | 
27 |     /*
28 |      * Can store embeddings[] in a register, but a large multiiplexer
29 |      * is created due to a non-constant access pattern
30 |      */
31 | 
32 | InputSequence:
33 |     #pragma ii CONFIG_T::reuse_factor
34 |     #pragma unroll
35 |     for (int j = 0; j < CONFIG_T::n_in; j++) {
36 |     DenseEmbedding:
37 |         #pragma unroll
38 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
39 |             res[j * CONFIG_T::n_out + i] = embeddings[data[j].to_uint() * CONFIG_T::n_out + i];
40 |         }
41 |     }
42 | }
43 | 
44 | } // namespace nnet
45 | #endif
46 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_embed_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_STREAM_H_
 2 | #define NNET_EMBED_STREAM_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | template <class data_T, class res_T, typename CONFIG_T>
 7 | void embedding(stream<data_T> &data, stream<res_T> &res,
 8 |                const typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) {
 9 |     data_T in_data = data.read();
10 | 
11 | InputSequence:
12 |     #pragma ii CONFIG_T::reuse_factor
13 |     for (int j = 0; j < data_T::size; j++) {
14 | 
15 |         res_T res_pack;
16 | 
17 |     DenseEmbedding:
18 |         #pragma unroll
19 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
20 |             res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i];
21 |         }
22 | 
23 |         res.write(res_pack);
24 |     }
25 | }
26 | 
27 | } // namespace nnet
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_recurrent_activation.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_RECR_ACTIVATION_H_
 2 | #define NNET_RECR_ACTIVATION_H_
 3 | 
 4 | #include "nnet_activation.h"
 5 | #include "nnet_common.h"
 6 | 
 7 | namespace nnet {
 8 | 
 9 | namespace activation {
10 | 
11 | template <class data_T, class res_T, typename CONFIG_T> class Activation {
12 |   public:
13 |     // *************************************************
14 |     //       Blank Activation
15 |     // *************************************************
16 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {}
17 | };
18 | 
19 | template <class data_T, class res_T, typename CONFIG_T> class relu : public Activation<data_T, res_T, CONFIG_T> {
20 |   public:
21 |     // *************************************************
22 |     //       Relu Activation
23 |     // *************************************************
24 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
25 |         nnet::relu<data_T, res_T, CONFIG_T>(data, res);
26 |     }
27 | };
28 | 
29 | template <class data_T, class res_T, typename CONFIG_T> class sigmoid : public Activation<data_T, res_T, CONFIG_T> {
30 |   public:
31 |     // *************************************************
32 |     //       Sigmoid Activation
33 |     // *************************************************
34 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
35 |         nnet::sigmoid<data_T, res_T, CONFIG_T>(data, res);
36 |     }
37 | };
38 | 
39 | template <class data_T, class res_T, typename CONFIG_T> class tanh : public Activation<data_T, res_T, CONFIG_T> {
40 |   public:
41 |     // *************************************************
42 |     //       TanH Activation
43 |     // *************************************************
44 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
45 |         nnet::dense_tanh<data_T, res_T, CONFIG_T>(data, res);
46 |     }
47 | };
48 | 
49 | } // namespace activation
50 | 
51 | } // namespace nnet
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_resize.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_IMAGE_H_
 2 | #define NNET_IMAGE_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | struct resize_config {
 7 |     static const unsigned in_height = 10;
 8 |     static const unsigned in_width = 10;
 9 | 
10 |     static const unsigned out_height = 10;
11 |     static const unsigned out_width = 10;
12 | 
13 |     static const unsigned n_chan = 10;
14 | };
15 | 
16 | template <class data_T, typename CONFIG_T>
17 | void resize_nearest(data_T image[CONFIG_T::height * CONFIG_T::width * CONFIG_T::n_chan],
18 |                     data_T resized[CONFIG_T::new_height * CONFIG_T::new_width * CONFIG_T::n_chan]) {
19 |     int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1;
20 |     int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1;
21 | 
22 |     for (int i = 0; i < CONFIG_T::new_height; i++) {
23 |         for (int j = 0; j < CONFIG_T::new_width; j++) {
24 |             int x = ((j * x_ratio) >> 16);
25 |             int y = ((i * y_ratio) >> 16);
26 | 
27 |             #pragma unroll
28 |             for (int k = 0; k < CONFIG_T::n_chan; k++) {
29 |                 resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] =
30 |                     image[(y * CONFIG_T::width * CONFIG_T::n_chan) + x * CONFIG_T::n_chan + k];
31 |             }
32 |         }
33 |     }
34 | }
35 | 
36 | } // namespace nnet
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_resize_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_IMAGE_STREAM_H_
 2 | #define NNET_IMAGE_STREAM_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | 
 6 | namespace nnet {
 7 | 
 8 | template <class data_T, typename CONFIG_T> void resize_nearest(stream<data_T> &image, stream<data_T> &resized) {
 9 |     assert(CONFIG_T::new_height % CONFIG_T::height == 0);
10 |     assert(CONFIG_T::new_width % CONFIG_T::width == 0);
11 | 
12 |     constexpr unsigned ratio_height = CONFIG_T::new_height / CONFIG_T::height;
13 |     constexpr unsigned ratio_width = CONFIG_T::new_width / CONFIG_T::width;
14 | 
15 | ImageHeight:
16 |     for (unsigned h = 0; h < CONFIG_T::height; h++) {
17 |         hls_register data_T data_in_row[CONFIG_T::width];
18 | 
19 |     ImageWidth:
20 |         for (unsigned i = 0; i < CONFIG_T::width; i++) {
21 |             hls_register data_T in_data = image.read();
22 | 
23 |         ImageChan:
24 |             #pragma unroll
25 |             for (unsigned j = 0; j < CONFIG_T::n_chan; j++) {
26 |                 data_in_row[i][j] = in_data[j];
27 |             }
28 |         }
29 | 
30 |     ResizeHeight:
31 |         for (unsigned i = 0; i < ratio_height; i++) {
32 | 
33 |         ImageWidth2:
34 |             for (unsigned l = 0; l < CONFIG_T::width; l++) {
35 | 
36 |             ResizeWidth:
37 |                 for (unsigned j = 0; j < ratio_width; j++) {
38 | 
39 |                     hls_register data_T out_data;
40 | 
41 |                 ResizeChan:
42 |                     #pragma unroll
43 |                     for (unsigned k = 0; k < CONFIG_T::n_chan; k++) {
44 |                         out_data[k] = data_in_row[l][k];
45 |                     }
46 | 
47 |                     resized.write(out_data);
48 |                 }
49 |             }
50 |         }
51 |     }
52 | }
53 | 
54 | } // namespace nnet
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_transpose_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_TRANSPOSE_STREAM_H_
 2 | #define NNET_TRANSPOSE_STREAM_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | template <class data_T, class res_T, typename CONFIG_T> void transpose_2d(stream<data_T> &data, stream<res_T> &res) {
 7 |     hls_register typename data_T::value_type data_array[CONFIG_T::height * CONFIG_T::width];
 8 | 
 9 |     for (int i = 0; i < CONFIG_T::height * CONFIG_T::width / data_T::size; i++) {
10 |         hls_register data_T in_data = data.read();
11 | 
12 |         #pragma unroll
13 |         for (int j = 0; j < data_T::size; j++) {
14 |             data_array[i * data_T::size + j] = typename data_T::value_type(in_data[j]);
15 |         }
16 |     }
17 | 
18 |     for (int i = 0; i < CONFIG_T::height * CONFIG_T::width / res_T::size; i++) {
19 |         hls_register res_T out_data;
20 | 
21 |         #pragma unroll
22 |         for (int j = 0; j < res_T::size; j++) {
23 |             out_data[j] = typename res_T::value_type(data_array[j * data_T::size + i]);
24 |         }
25 | 
26 |         res.write(out_data);
27 |     }
28 | }
29 | 
30 | } // namespace nnet
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/nnet_utils/nnet_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_TYPES_H_
 2 | #define NNET_TYPES_H_
 3 | 
 4 | #include <assert.h>
 5 | #include <cstddef>
 6 | #include <cstdio>
 7 | 
 8 | namespace nnet {
 9 | 
10 | // Fixed-size array
11 | template <typename T, unsigned N> struct array {
12 |     typedef T value_type;
13 |     static const unsigned size = N;
14 | 
15 |     T data[N];
16 | 
17 |     array() {}
18 | 
19 |     array(T x) {
20 |         #pragma unroll
21 |         for (int i = 0; i < N; i++) {
22 |             data[i] = x;
23 |         }
24 |     }
25 | 
26 |     T &operator[](size_t pos) { return data[pos]; }
27 | 
28 |     const T &operator[](size_t pos) const { return data[pos]; }
29 | 
30 |     array &operator=(const array &other) {
31 |         if (&other == this)
32 |             return *this;
33 | 
34 |         assert(N == other.size && "Array sizes must match.");
35 | 
36 |         #pragma unroll
37 |         for (unsigned i = 0; i < N; i++) {
38 |             data[i] = other[i];
39 |         }
40 |         return *this;
41 |     }
42 | };
43 | 
44 | /*
45 |  * HLS Shift Register Implementation
46 |  * To verify a shift register is used in hardware, go to report.html > Area Analysis of System
47 |  * Unrolling the shift loop minimizes resource usage and latency at the same time
48 |  * The shift loop should be either fully unrolled or not unrolled at all
49 |  * Unrolling with a specific unroll factor or pipelining with certain ii's, can cause an irregular access pattern, which
50 |  * wouldn't allow shift register usage in RTL
51 |  */
52 | template <typename T, int N> struct shift_reg {
53 |   private:
54 |     T data[N];
55 | 
56 |   public:
57 |     // Default constructor
58 |     shift_reg() {}
59 | 
60 |     // Shift queue, insert new element and return element from the front
61 |     T shift(T inp) {
62 |         T out = data[N - 1];
63 | 
64 |         #pragma unroll
65 |         for (int i = N - 1; i > 0; i--) {
66 |             data[i] = data[i - 1];
67 |         }
68 |         data[0] = inp;
69 | 
70 |         return out;
71 |     }
72 | 
73 |     T read(int pos) { return data[pos]; }
74 | };
75 | 
76 | } // namespace nnet
77 | 
78 | #endif
79 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/firmware/parameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARAMETERS_H_
 2 | #define PARAMETERS_H_
 3 | 
 4 | #include "defines.h"
 5 | 
 6 | #include "nnet_utils/nnet_helpers.h"
 7 | // hls-fpga-machine-learning insert includes
 8 | 
 9 | // hls-fpga-machine-learning insert layer-config
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/hls4ml/templates/quartus/myproject_bridge.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_BRIDGE_H_
 2 | #define MYPROJECT_BRIDGE_H_
 3 | 
 4 | #include "firmware/myproject.h"
 5 | #include "firmware/nnet_utils/nnet_helpers.h"
 6 | #include <algorithm>
 7 | #include <map>
 8 | 
 9 | // hls-fpga-machine-learning insert bram
10 | 
11 | namespace nnet {
12 | bool trace_enabled = false;
13 | std::map<std::string, void *> *trace_outputs = NULL;
14 | size_t trace_type_size = sizeof(double);
15 | } // namespace nnet
16 | 
17 | extern "C" {
18 | 
19 | struct trace_data {
20 |     const char *name;
21 |     void *data;
22 | };
23 | 
24 | void allocate_trace_storage(size_t element_size) {
25 |     nnet::trace_enabled = true;
26 |     nnet::trace_outputs = new std::map<std::string, void *>;
27 |     nnet::trace_type_size = element_size;
28 |     // hls-fpga-machine-learning insert trace_outputs
29 | }
30 | 
31 | void free_trace_storage() {
32 |     for (std::map<std::string, void *>::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) {
33 |         void *ptr = i->second;
34 |         free(ptr);
35 |     }
36 |     nnet::trace_outputs->clear();
37 |     delete nnet::trace_outputs;
38 |     nnet::trace_outputs = NULL;
39 |     nnet::trace_enabled = false;
40 | }
41 | 
42 | void collect_trace_output(struct trace_data *c_trace_outputs) {
43 |     int ii = 0;
44 |     for (std::map<std::string, void *>::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) {
45 |         c_trace_outputs[ii].name = i->first.c_str();
46 |         c_trace_outputs[ii].data = i->second;
47 |         ii++;
48 |     }
49 | }
50 | 
51 | // Wrapper of top level function for Python bridge
52 | void myproject_float(
53 |     // hls-fpga-machine-learning insert header #float
54 | ) {
55 | 
56 |     // hls-fpga-machine-learning insert wrapper #float
57 | }
58 | 
59 | void myproject_double(
60 |     // hls-fpga-machine-learning insert header #double
61 | ) {
62 |     // hls-fpga-machine-learning insert wrapper #double
63 | }
64 | }
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/hls4ml/templates/symbolic/build_lib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CC=g++
 4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
 5 |     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique"
 6 | elif [[ "$OSTYPE" == "darwin"* ]]; then
 7 |     CFLAGS="-O3 -fPIC -std=c++11"
 8 | fi
 9 | HLS_LIBS_PATH=mylibspath
10 | LDFLAGS="-Wl,--no-undefined -Wl,--no-allow-shlib-undefined -Wl,--no-as-needed -Wl,-rpath,${HLS_LIBS_PATH}/lib/csim -L ${HLS_LIBS_PATH}/lib/csim -lhlsmc++-GCC46 -lhlsm-GCC46 -fno-builtin -fno-inline -Wl,-rpath,${HLS_LIBS_PATH}/tools/fpo_v7_0 -L ${HLS_LIBS_PATH}/tools/fpo_v7_0 -lgmp -lmpfr -lIp_floating_point_v7_0_bitacc_cmodel"
11 | INCFLAGS="-Ifirmware/ap_types/"
12 | PROJECT=myproject
13 | LIB_STAMP=mystamp
14 | 
15 | ${CC} ${CFLAGS} ${INCFLAGS} -c firmware/${PROJECT}.cpp -o ${PROJECT}.o
16 | ${CC} ${CFLAGS} ${INCFLAGS} -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o
17 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so ${LDFLAGS}
18 | rm -f *.o
19 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vitis/nnet_utils/nnet_conv1d_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_CONV1D_STREAM_H_
 2 | #define NNET_CONV1D_STREAM_H_
 3 | 
 4 | #include "hls_stream.h"
 5 | #include "nnet_common.h"
 6 | #include "nnet_conv_stream.h"
 7 | 
 8 | namespace nnet {
 9 | 
10 | template <class data_T, class res_T, typename CONFIG_T>
11 | void conv_1d_cl(hls::stream<data_T> &data, hls::stream<res_T> &res,
12 |                 typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
13 |                 typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {
14 |     assert(CONFIG_T::implementation == conv_implementation::linebuffer &&
15 |            "Only \"linebuffer\" implementation is supported in Vitis HLS.");
16 | 
17 |     assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);
18 | 
19 |     if (CONFIG_T::strategy == nnet::latency) {
20 |     ReadInputWidth:
21 |         for (unsigned i_iw = 0; i_iw < CONFIG_T::in_width; i_iw++) {
22 |             #pragma HLS PIPELINE II=CONFIG_T::reuse_factor
23 |             compute_output_buffer_1d<data_T, res_T, CONFIG_T>(data.read(), res, weights, biases);
24 |         }
25 |     } else {
26 |     ReadInputWidthSerial:
27 |         for (unsigned i_iw = 0; i_iw < CONFIG_T::in_width; i_iw++) {
28 |             compute_output_buffer_1d<data_T, res_T, CONFIG_T>(data.read(), res, weights, biases);
29 |         }
30 |     }
31 | }
32 | 
33 | } // namespace nnet
34 | #endif
35 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/ap_types/hls_math.h:
--------------------------------------------------------------------------------
 1 | #ifndef X_HLS_MATH_H
 2 | #define X_HLS_MATH_H
 3 | 
 4 | #include <cmath>
 5 | #include "ap_fixed.h"
 6 | 
 7 | namespace hls {
 8 | 
 9 | template<class T>
10 | static T exp(const T x) {
11 |   return (T) std::exp(x.to_double());
12 | }
13 | 
14 | template <typename T> T sin(T x) { return (T) std::sin(x.to_double()); };
15 | 
16 | template <typename T> T cos(T x) { return (T) std::cos(x.to_double()); };
17 | 
18 | template <typename T> T asin(T x) { return (T) std::asin(x.to_double()); };
19 | 
20 | template <typename T> T acos(T x) { return (T) std::acos(x.to_double()); };
21 | 
22 | template <typename T> T atan(T x) { return (T) std::atan(x.to_double()); };
23 | 
24 | template <typename T> T atan2(T x, T y) { return (T) hls::atan2(x.to_double(), y.to_double()); };
25 | 
26 | }
27 | #endif
28 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/build_lib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | CC=g++
 5 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
 6 |     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique"
 7 | elif [[ "$OSTYPE" == "darwin"* ]]; then
 8 |     CFLAGS="-O3 -fPIC -std=c++11"
 9 | fi
10 | LDFLAGS=
11 | INCFLAGS="-Ifirmware/ap_types/"
12 | PROJECT=myproject
13 | LIB_STAMP=mystamp
14 | BASEDIR="$(cd "$(dirname "$0")" && pwd)"
15 | WEIGHTS_DIR="\"${BASEDIR}/firmware/weights\""
16 | 
17 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${PROJECT}.cpp -o ${PROJECT}.o
18 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o
19 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so
20 | rm -f *.o
21 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/firmware/defines.h:
--------------------------------------------------------------------------------
 1 | #ifndef DEFINES_H_
 2 | #define DEFINES_H_
 3 | 
 4 | #include "ap_fixed.h"
 5 | #include "ap_int.h"
 6 | #include "nnet_utils/nnet_types.h"
 7 | #include <cstddef>
 8 | #include <cstdio>
 9 | 
10 | // hls-fpga-machine-learning insert numbers
11 | 
12 | // hls-fpga-machine-learning insert namespace-start
13 | 
14 | // hls-fpga-machine-learning insert layer-precision
15 | 
16 | // hls-fpga-machine-learning insert namespace-end
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/firmware/myproject.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "myproject.h"
 4 | #include "parameters.h"
 5 | 
 6 | // hls-fpga-machine-learning insert namespace-start
 7 | 
 8 | void myproject(
 9 |     // hls-fpga-machine-learning insert header
10 | ) {
11 | 
12 |     // hls-fpga-machine-learning insert IO
13 | 
14 |     // hls-fpga-machine-learning insert load weights
15 | 
16 |     // ****************************************
17 |     // NETWORK INSTANTIATION
18 |     // ****************************************
19 | 
20 |     // hls-fpga-machine-learning insert layers
21 | }
22 | 
23 | // hls-fpga-machine-learning insert namespace-end
24 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/firmware/myproject.h:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_H_
 2 | #define MYPROJECT_H_
 3 | 
 4 | #include "ap_fixed.h"
 5 | #include "ap_int.h"
 6 | #include "hls_stream.h"
 7 | 
 8 | #include "defines.h"
 9 | 
10 | // hls-fpga-machine-learning insert namespace-start
11 | 
12 | // Prototype of top level function for C-synthesis
13 | void myproject(
14 |     // hls-fpga-machine-learning insert header
15 | );
16 | 
17 | // hls-fpga-machine-learning insert namespace-end
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/firmware/parameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARAMETERS_H_
 2 | #define PARAMETERS_H_
 3 | 
 4 | #include "ap_fixed.h"
 5 | #include "ap_int.h"
 6 | 
 7 | #include "nnet_utils/nnet_code_gen.h"
 8 | #include "nnet_utils/nnet_helpers.h"
 9 | // hls-fpga-machine-learning insert includes
10 | 
11 | // hls-fpga-machine-learning insert weights
12 | 
13 | // hls-fpga-machine-learning insert namespace-start
14 | 
15 | // hls-fpga-machine-learning insert layer-config
16 | 
17 | // hls-fpga-machine-learning insert namespace-end
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/myproject_bridge.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_BRIDGE_H_
 2 | #define MYPROJECT_BRIDGE_H_
 3 | 
 4 | #include "firmware/myproject.h"
 5 | #include "firmware/nnet_utils/nnet_helpers.h"
 6 | #include <algorithm>
 7 | #include <map>
 8 | 
 9 | // hls-fpga-machine-learning insert bram
10 | 
11 | namespace nnet {
12 | bool trace_enabled = false;
13 | std::map<std::string, void *> *trace_outputs = NULL;
14 | size_t trace_type_size = sizeof(double);
15 | } // namespace nnet
16 | 
17 | extern "C" {
18 | 
19 | struct trace_data {
20 |     const char *name;
21 |     void *data;
22 | };
23 | 
24 | void allocate_trace_storage(size_t element_size) {
25 |     nnet::trace_enabled = true;
26 |     nnet::trace_outputs = new std::map<std::string, void *>;
27 |     nnet::trace_type_size = element_size;
28 |     // hls-fpga-machine-learning insert trace_outputs
29 | }
30 | 
31 | void free_trace_storage() {
32 |     for (std::map<std::string, void *>::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) {
33 |         void *ptr = i->second;
34 |         free(ptr);
35 |     }
36 |     nnet::trace_outputs->clear();
37 |     delete nnet::trace_outputs;
38 |     nnet::trace_outputs = NULL;
39 |     nnet::trace_enabled = false;
40 | }
41 | 
42 | void collect_trace_output(struct trace_data *c_trace_outputs) {
43 |     int ii = 0;
44 |     for (std::map<std::string, void *>::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) {
45 |         c_trace_outputs[ii].name = i->first.c_str();
46 |         c_trace_outputs[ii].data = i->second;
47 |         ii++;
48 |     }
49 | }
50 | 
51 | // Wrapper of top level function for Python bridge
52 | void myproject_float(
53 |     // hls-fpga-machine-learning insert header #float
54 | ) {
55 |     // hls-fpga-machine-learning insert namespace
56 | 
57 |     // hls-fpga-machine-learning insert wrapper #float
58 | }
59 | 
60 | void myproject_double(
61 |     // hls-fpga-machine-learning insert header #double
62 | ) {
63 |     // hls-fpga-machine-learning insert namespace
64 | 
65 |     // hls-fpga-machine-learning insert wrapper #double
66 | }
67 | }
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_code_gen.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_INSTR_GEN_H_
 2 | #define NNET_INSTR_GEN_H_
 3 | 
 4 | #include "nnet_conv1d_latency.h"
 5 | #include "nnet_helpers.h"
 6 | 
 7 | #include "hls_stream.h"
 8 | #include "nnet_common.h"
 9 | #include "nnet_function_stubs.h"
10 | #include "nnet_mult.h"
11 | 
12 | namespace nnet {
13 | 
14 | template <class data_T, class res_T, typename CONFIG_T> class PointwiseConv1D {
15 |   public:
16 |     static void pointwise_conv(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
17 |                                res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
18 |                                typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],
19 |                                typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {
20 |         // To be implemented in subclasses
21 |     }
22 | };
23 | 
24 | // hls4ml insert code
25 | 
26 | } // namespace nnet
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_embed.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_H_
 2 | #define NNET_EMBED_H_
 3 | 
 4 | #include "nnet_common.h"
 5 | #include "nnet_helpers.h"
 6 | 
 7 | namespace nnet {
 8 | 
 9 | struct embed_config {
10 |     // Internal data type definitions
11 |     typedef float embeddings_t;
12 | 
13 |     // Layer Sizes
14 |     static const unsigned n_in = 10;
15 |     static const unsigned n_out = 16;
16 |     static const unsigned vocab_size = 50;
17 | 
18 |     // Resource reuse info
19 |     static const unsigned io_type = io_parallel;
20 |     static const unsigned reuse_factor = 1;
21 | };
22 | 
23 | template <class data_T, class res_T, typename CONFIG_T>
24 | void embedding(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in * CONFIG_T::n_out],
25 |                typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) {
26 | 
27 |     #pragma HLS PIPELINE II=CONFIG_T::reuse_factor
28 |     // This can save a few cycles, but it will create a large multiplexer due to
29 |     // non-constant access pattern, so let's leave it out
30 |     //#pragma HLS ARRAY_PARTITION variable=embeddings complete
31 | 
32 | InputSequence:
33 |     for (int j = 0; j < CONFIG_T::n_in; j++) {
34 |     #pragma HLS UNROLL
35 |     DenseEmbedding:
36 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
37 |             #pragma HLS UNROLL
38 |             res[j * CONFIG_T::n_out + i] = embeddings[data[j] * CONFIG_T::n_out + i];
39 |         }
40 |     }
41 | }
42 | 
43 | } // namespace nnet
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_embed_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_EMBED_STREAM_H_
 2 | #define NNET_EMBED_STREAM_H_
 3 | 
 4 | #include "hls_stream.h"
 5 | #include "nnet_common.h"
 6 | #include "nnet_helpers.h"
 7 | 
 8 | namespace nnet {
 9 | 
10 | template <class data_T, class res_T, typename CONFIG_T>
11 | void embedding(hls::stream<data_T> &data, hls::stream<res_T> &res,
12 |                typename CONFIG_T::embeddings_t embeddings[CONFIG_T::vocab_size * CONFIG_T::n_out]) {
13 |     data_T in_data = data.read();
14 | 
15 | InputSequence:
16 |     for (int j = 0; j < data_T::size; j++) {
17 |         #pragma HLS PIPELINE II=CONFIG_T::reuse_factor
18 | 
19 |         res_T res_pack;
20 |         PRAGMA_DATA_PACK(res_pack)
21 | 
22 |     DenseEmbedding:
23 |         for (int i = 0; i < CONFIG_T::n_out; i++) {
24 |             #pragma HLS UNROLL
25 |             res_pack[i] = embeddings[in_data[j] * CONFIG_T::n_out + i];
26 |         }
27 |         res.write(res_pack);
28 |     }
29 | }
30 | 
31 | } // namespace nnet
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_image.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_IMAGE_H_
 2 | #define NNET_IMAGE_H_
 3 | 
 4 | #include "hls_stream.h"
 5 | #include "nnet_common.h"
 6 | #include <math.h>
 7 | 
 8 | namespace nnet {
 9 | 
10 | struct resize_config {
11 |     static const unsigned height = 10;
12 |     static const unsigned width = 10;
13 |     static const unsigned n_chan = 10;
14 |     static const unsigned new_height = 10;
15 |     static const unsigned new_width = 10;
16 | };
17 | 
18 | template <class data_T, typename CONFIG_T>
19 | void resize_nearest(data_T image[CONFIG_T::height * CONFIG_T::width * CONFIG_T::n_chan],
20 |                     data_T resized[CONFIG_T::new_height * CONFIG_T::new_width * CONFIG_T::n_chan]) {
21 |     int y_ratio = (int)((CONFIG_T::height << 16) / CONFIG_T::new_height) + 1;
22 |     int x_ratio = (int)((CONFIG_T::width << 16) / CONFIG_T::new_width) + 1;
23 |     int x2, y2;
24 | 
25 |     #pragma HLS PIPELINE
26 | 
27 |     for (int i = 0; i < CONFIG_T::new_height; i++) {
28 |         for (int j = 0; j < CONFIG_T::new_width; j++) {
29 |             x2 = ((j * x_ratio) >> 16);
30 |             y2 = ((i * y_ratio) >> 16);
31 |             for (int k = 0; k < CONFIG_T::n_chan; k++) {
32 |                 resized[(i * CONFIG_T::new_width * CONFIG_T::n_chan) + j * CONFIG_T::n_chan + k] =
33 |                     image[(y2 * CONFIG_T::width * CONFIG_T::n_chan) + x2 * CONFIG_T::n_chan + k];
34 |             }
35 |         }
36 |     }
37 | }
38 | 
39 | } // namespace nnet
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_image_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_IMAGE_STREAM_H_
 2 | #define NNET_IMAGE_STREAM_H_
 3 | 
 4 | #include "hls_stream.h"
 5 | #include "nnet_common.h"
 6 | 
 7 | namespace nnet {
 8 | 
 9 | template <class data_T, typename CONFIG_T> void resize_nearest(hls::stream<data_T> &image, hls::stream<data_T> &resized) {
10 |     assert(CONFIG_T::new_height % CONFIG_T::height == 0);
11 |     assert(CONFIG_T::new_width % CONFIG_T::width == 0);
12 |     constexpr unsigned ratio_height = CONFIG_T::new_height / CONFIG_T::height;
13 |     constexpr unsigned ratio_width = CONFIG_T::new_width / CONFIG_T::width;
14 | 
15 | ImageHeight:
16 |     for (unsigned h = 0; h < CONFIG_T::height; h++) {
17 |         #pragma HLS PIPELINE
18 | 
19 |         data_T data_in_row[CONFIG_T::width];
20 | 
21 |     ImageWidth:
22 |         for (unsigned i = 0; i < CONFIG_T::width; i++) {
23 |             #pragma HLS UNROLL
24 | 
25 |             data_T in_data = image.read();
26 | 
27 |         ImageChan:
28 |             for (unsigned j = 0; j < CONFIG_T::n_chan; j++) {
29 |                 #pragma HLS UNROLL
30 | 
31 |                 data_in_row[i][j] = in_data[j];
32 |             }
33 |         }
34 | 
35 |     ResizeHeight:
36 |         for (unsigned i = 0; i < ratio_height; i++) {
37 |             #pragma HLS UNROLL
38 | 
39 |         ImageWidth2:
40 |             for (unsigned l = 0; l < CONFIG_T::width; l++) {
41 |                 #pragma HLS UNROLL
42 | 
43 |             ResizeWidth:
44 |                 for (unsigned j = 0; j < ratio_width; j++) {
45 |                     #pragma HLS UNROLL
46 | 
47 |                     data_T out_data;
48 |                     PRAGMA_DATA_PACK(out_data)
49 | 
50 |                 ResizeChan:
51 |                     for (unsigned k = 0; k < CONFIG_T::n_chan; k++) {
52 |                         #pragma HLS UNROLL
53 | 
54 |                         out_data[k] = data_in_row[l][k];
55 |                     }
56 | 
57 |                     resized.write(out_data);
58 |                 }
59 |             }
60 |         }
61 |     }
62 | }
63 | 
64 | } // namespace nnet
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_recr_activations.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_RECR_ACTIVATION_H_
 2 | #define NNET_RECR_ACTIVATION_H_
 3 | 
 4 | #include "hls_stream.h"
 5 | #include "nnet_activation.h"
 6 | #include "nnet_common.h"
 7 | #include "nnet_helpers.h"
 8 | #include <math.h>
 9 | 
10 | namespace nnet {
11 | 
12 | namespace activation {
13 | 
14 | template <class data_T, class res_T, typename CONFIG_T> class Activation {
15 |   public:
16 |     // *************************************************
17 |     //       Blank Activation
18 |     // *************************************************
19 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {} // Nothing to do here
20 | };
21 | 
22 | template <class data_T, class res_T, typename CONFIG_T> class relu : public Activation<data_T, res_T, CONFIG_T> {
23 |   public:
24 |     // *************************************************
25 |     //       Relu Activation
26 |     // *************************************************
27 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
28 |         nnet::relu<data_T, res_T, CONFIG_T>(data, res);
29 |     }
30 | };
31 | 
32 | template <class data_T, class res_T, typename CONFIG_T> class sigmoid : public Activation<data_T, res_T, CONFIG_T> {
33 |   public:
34 |     // *************************************************
35 |     //       Sigmoid Activation
36 |     // *************************************************
37 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
38 |         nnet::sigmoid<data_T, res_T, CONFIG_T>(data, res);
39 |     }
40 | };
41 | 
42 | template <class data_T, class res_T, typename CONFIG_T> class tanh : public Activation<data_T, res_T, CONFIG_T> {
43 |   public:
44 |     // *************************************************
45 |     //       TanH Activation
46 |     // *************************************************
47 |     static void activation(data_T data[CONFIG_T::n_in], res_T res[CONFIG_T::n_in]) {
48 |         nnet::tanh<data_T, res_T, CONFIG_T>(data, res);
49 |     }
50 | };
51 | 
52 | } // namespace activation
53 | 
54 | } // namespace nnet
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_transpose.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_PERMUTE_H_
 2 | #define NNET_PERMUTE_H_
 3 | 
 4 | namespace nnet {
 5 | 
 6 | struct transpose_config {
 7 |     static const unsigned dims;
 8 |     static const unsigned N;
 9 |     // vivado/vitis hls can't index constexpr array for some reason
10 |     // and vivado hls don't like template recursion either (vitis is fine)
11 |     // thus this appears to be the only workaround (or overkill it with codegen)
12 |     static const unsigned *const from_shape;
13 |     static const unsigned *const to_shape;
14 |     static const unsigned *const perm;
15 |     static const unsigned *const perm_strides;
16 | };
17 | 
18 | template <typename CONFIG_T> unsigned transfer_idx(int index) {
19 |     // Given output idx in c-order flat array, return input idx
20 |     int idx = 0;
21 |     for (int i = CONFIG_T::dims - 1; i >= 0; i--) {
22 |         idx += (index % CONFIG_T::to_shape[i]) * CONFIG_T::perm_strides[i];
23 |         index /= CONFIG_T::to_shape[i];
24 |     }
25 |     return idx;
26 | }
27 | 
28 | template <typename data_T, typename res_T, typename CONFIG_T>
29 | void transpose(const data_T data[CONFIG_T::N], res_T res[CONFIG_T::N]) {
30 |     for (int i = 0; i < CONFIG_T::N; i++) {
31 |         #pragma HLS UNROLL
32 |         int idx = transfer_idx<CONFIG_T>(i);
33 |         res[i] = data[idx];
34 |     }
35 | }
36 | 
37 | } // namespace nnet
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/nnet_utils/nnet_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef NNET_TYPES_H_
 2 | #define NNET_TYPES_H_
 3 | 
 4 | #include <assert.h>
 5 | #include <cstddef>
 6 | #include <cstdio>
 7 | 
 8 | namespace nnet {
 9 | 
10 | // Fixed-size array
11 | template <typename T, unsigned N> struct array {
12 |     typedef T value_type;
13 |     static const unsigned size = N;
14 | 
15 |     T data[N];
16 | 
17 |     T &operator[](size_t pos) { return data[pos]; }
18 | 
19 |     const T &operator[](size_t pos) const { return data[pos]; }
20 | 
21 |     array &operator=(const array &other) {
22 |         if (&other == this)
23 |             return *this;
24 | 
25 |         assert(N == other.size && "Array sizes must match.");
26 | 
27 |         for (unsigned i = 0; i < N; i++) {
28 |             #pragma HLS UNROLL
29 |             data[i] = other[i];
30 |         }
31 |         return *this;
32 |     }
33 | };
34 | 
35 | // Generic lookup-table implementation, for use in approximations of math functions
36 | template <typename T, unsigned N, T (*func)(T)> class lookup_table {
37 |   public:
38 |     lookup_table(T from, T to) : range_start(from), range_end(to), base_div(ap_uint<16>(N) / T(to - from)) {
39 |         T step = (range_end - range_start) / ap_uint<16>(N);
40 |         for (size_t i = 0; i < N; i++) {
41 |             T num = range_start + ap_uint<16>(i) * step;
42 |             T sample = func(num);
43 |             samples[i] = sample;
44 |         }
45 |     }
46 | 
47 |     T operator()(T n) const {
48 |         int index = (n - range_start) * base_div;
49 |         if (index < 0)
50 |             index = 0;
51 |         else if (index > N - 1)
52 |             index = N - 1;
53 |         return samples[index];
54 |     }
55 | 
56 |   private:
57 |     T samples[N];
58 |     const T range_start, range_end;
59 |     ap_fixed<20, 16> base_div;
60 | };
61 | 
62 | } // namespace nnet
63 | 
64 | #endif
65 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado/vivado_synth.tcl:
--------------------------------------------------------------------------------
1 | set tcldir [file dirname [info script]]
2 | source [file join $tcldir project.tcl]
3 | 
4 | add_files ${project_name}_prj/solution1/syn/verilog
5 | synth_design -top ${project_name} -part $part
6 | opt_design -retarget -propconst -sweep -bram_power_opt -shift_register_opt
7 | report_utilization -file vivado_synth.rpt
8 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado_accelerator/build_lib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CC=g++
 4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
 5 |     CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique"
 6 | elif [[ "$OSTYPE" == "darwin"* ]]; then
 7 |     CFLAGS="-O3 -fPIC -std=c++11"
 8 | fi
 9 | INCFLAGS="-Ifirmware/ap_types/"
10 | PROJECT=myproject
11 | LIB_STAMP=mystamp
12 | BASEDIR="$(cd "$(dirname "$0")" && pwd)"
13 | WEIGHTS_DIR="\"${BASEDIR}/firmware/weights\""
14 | 
15 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${PROJECT}.cpp -o ${PROJECT}.o
16 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${PROJECT}_axi.cpp -o ${PROJECT}_axi.o
17 | ${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o
18 | ${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${PROJECT}_axi.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so
19 | rm -f *.o
20 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado_accelerator/myproject_axi.cpp:
--------------------------------------------------------------------------------
 1 | // hls-fpga-machine-learning insert include
 2 | 
 3 | void myproject_axi(input_axi_t in[N_IN], output_axi_t out[N_OUT]) {
 4 | 
 5 |     // hls-fpga-machine-learning insert interface
 6 | 
 7 |     // hls-fpga-machine-learning insert local vars
 8 | 
 9 |     // hls-fpga-machine-learning insert enqueue
10 | 
11 |     // hls-fpga-machine-learning insert call
12 | 
13 |     // hls-fpga-machine-learning insert dequeue
14 | }
15 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado_accelerator/myproject_axi.h:
--------------------------------------------------------------------------------
 1 | #ifndef MYPROJECT_AXI_H_
 2 | #define MYPROJECT_AXI_H_
 3 | 
 4 | #include <iostream>
 5 | // hls-fpga-machine-learning insert include
 6 | 
 7 | // hls-fpga-machine-learning insert definitions
 8 | 
 9 | void myproject_axi(input_axi_t in[N_IN], output_axi_t out[N_OUT]);
10 | #endif
11 | 


--------------------------------------------------------------------------------
/hls4ml/templates/vivado_accelerator/pynq-z2/tcl_scripts/axi_lite_design.tcl:
--------------------------------------------------------------------------------
 1 | set tcldir [file dirname [info script]]
 2 | source [file join $tcldir project.tcl]
 3 | 
 4 | create_project project_1 ${project_name}_vivado_accelerator -part xc7z020clg400-1 -force
 5 | 
 6 | set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project]
 7 | set_property  ip_repo_paths  ${project_name}_prj [current_project]
 8 | update_ip_catalog
 9 | 
10 | # Create Block Designer design
11 | create_bd_design "design_1"
12 | create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 processing_system7_0
13 | apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config {make_external "FIXED_IO, DDR" apply_board_preset "1" Master "Disable" Slave "Disable" }  [get_bd_cells processing_system7_0]
14 | create_bd_cell -type ip -vlnv xilinx.com:hls:${project_name}_axi:1.0 ${project_name}_axi_0
15 | apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config { Clk_master {Auto} Clk_slave {Auto} Clk_xbar {Auto} Master {/processing_system7_0/M_AXI_GP0} Slave {/${project_name}_axi_0/s_axi_AXILiteS} ddr_seg {Auto} intc_ip {New AXI Interconnect} master_apm {0}}  [get_bd_intf_pins ${project_name}_axi_0/s_axi_AXILiteS]
16 | 
17 | make_wrapper -files [get_files ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/design_1.bd] -top
18 | add_files -norecurse ./${project_name}_vivado_accelerator/project_1.srcs/sources_1/bd/design_1/hdl/design_1_wrapper.v
19 | 
20 | reset_run impl_1
21 | reset_run synth_1
22 | launch_runs impl_1 -to_step write_bitstream -jobs 6
23 | wait_on_run -timeout 360 impl_1
24 | 
25 | open_run impl_1
26 | report_utilization -file util.rpt -hierarchical -hierarchical_percentages
27 | 


--------------------------------------------------------------------------------
/hls4ml/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from hls4ml.utils.config import config_from_keras_model, config_from_onnx_model, config_from_pytorch_model  # noqa: F401
2 | from hls4ml.utils.example_models import fetch_example_list, fetch_example_model  # noqa: F401
3 | from hls4ml.utils.plot import plot_model  # noqa: F401
4 | 


--------------------------------------------------------------------------------
/hls4ml/utils/dependency.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from functools import wraps
 3 | from importlib.metadata import metadata
 4 | from inspect import ismethod
 5 | 
 6 | extra_requires: dict[str, list[str]] = {}
 7 | subpackage = None
 8 | for k, v in metadata('hls4ml')._headers:  # type: ignore
 9 |     if k != 'Requires-Dist':
10 |         continue
11 |     if '; extra == ' not in v:
12 |         continue
13 | 
14 |     req, pkg = v.split('; extra == ')
15 |     pkg = pkg.strip('"')
16 | 
17 |     extra_requires.setdefault(pkg, []).append(req)
18 | 
19 | 
20 | def requires(pkg: str):
21 |     """
22 |     Mark a function or method as requiring a package to be installed.
23 | 
24 |     Args:
25 |         pkg (str): The package to require. 'name' requires hls4ml[name] to be installed.
26 |                    '_name' requires name to be installed.
27 |     """
28 | 
29 |     def deco(f):
30 |         if ismethod(f):
31 |             qualifier = f'Method {f.__self__.__class__.__name__}.{f.__name__}'
32 |         else:
33 |             qualifier = f'Function {f.__name__}'
34 | 
35 |         if not pkg.startswith('_'):
36 |             reqs = ', '.join(extra_requires[pkg])
37 |             msg = f'{qualifier} requires {reqs}, but package {{ename}} is missing'
38 |             'Please consider install it with `pip install hls4ml[{pkg}]` for full functionality with {pkg}.'
39 |         else:
40 |             msg = f'{qualifier} requires {pkg[1:]}, but package {{ename}} is missing.'
41 |             'Consider install it with `pip install {pkg}`.'
42 | 
43 |         @wraps(f)
44 |         def inner(*args, **kwargs):
45 |             try:
46 |                 return f(*args, **kwargs)
47 |             except ImportError as e:
48 |                 print(msg.format(ename=e.name), file=sys.stderr)
49 |                 raise e
50 | 
51 |         return inner
52 | 
53 |     return deco
54 | 


--------------------------------------------------------------------------------
/hls4ml/utils/string_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def convert_to_snake_case(pascal_case):
 5 |     """Convert string in PascalCase to snake_case
 6 | 
 7 |     Args:
 8 |         pascal_case (str): string to convert
 9 | 
10 |     Returns:
11 |         str: converted string
12 |     """
13 |     camel_case = re.sub(r'(?<!^)(?=[A-Z])', '_', pascal_case).lower()
14 |     return re.sub(r'_{2,}', '_', camel_case)  # Removes duplicate underscores
15 | 
16 | 
17 | def convert_to_pascal_case(snake_case):
18 |     """Convert string in snake_case to PascalCase
19 | 
20 |     Args:
21 |         snake_case (str): string to convert
22 | 
23 |     Returns:
24 |         str: converted string
25 |     """
26 |     return ''.join(c.title() for c in snake_case.split('_'))
27 | 


--------------------------------------------------------------------------------
/hls4ml/utils/torch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class HLS4MLModule(torch.nn.Module):
 5 |     """
 6 |     Custom PyTorch module class for hls4ml to define custom modules that shouldn't be traced through by torch.FX
 7 |     """
 8 | 
 9 |     pass
10 | 
11 | 
12 | class CustomFXTracer(torch.fx.Tracer):
13 | 
14 |     def is_leaf_module(self, m, module_qualified_name: str) -> bool:
15 |         """
16 |         Custom Tracer class for hls4ml to define Brevitas modules and custom modules as leaf modules so they are not traced
17 |         through by torch.FX
18 |         """
19 |         import torch
20 | 
21 |         return (
22 |             isinstance(m, HLS4MLModule)
23 |             or m.__module__.startswith('torch.nn')
24 |             or m.__module__.startswith('torch.ao.nn')
25 |             or m.__module__.startswith('brevitas.nn')
26 |         ) and not isinstance(m, torch.nn.Sequential)
27 | 


--------------------------------------------------------------------------------
/hls4ml/utils/transpose_utils.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def transpose_config_gen(name: str, shape: tuple[int, ...], perm: tuple[int, ...]):
 7 |     """
 8 |     Generate new shape and perm_strides for a permute operation. Operates by mapping the output index
 9 |     to input input index by:
10 |     - unravel the output index
11 |     - map each dimension to the corresponding stride in the input tensor, sum
12 |     The operation can be expressed as:
13 | 
14 |     new_shape = tuple(shape[i] for i in perm)
15 |     strides = np.cumprod((shapes[1:] + (1,))[::-1])[::-1]
16 |     perm_strides = [strides[i] for i in perm]
17 |     out[index] = inp[np.dot(np.unravel_index(index, new_shape), perm_strides)]
18 | 
19 |     Args:
20 |         name (str): The name of the configuration.
21 |         shape (tuple[int, ...]): The shape of the input tensor.
22 |         perm (tuple[int, ...]): The permutation of the dimensions.
23 | 
24 |     Returns:
25 |         dict: Dictionary containing the configuration.
26 |     """
27 |     new_shape = tuple(shape[i] for i in perm)
28 |     strides = np.cumprod((shape[1:] + (1,))[::-1])[::-1]
29 |     perm_strides = tuple(int(strides[i]) for i in perm)
30 |     return dict(
31 |         dims=len(shape),
32 |         N=math.prod(shape),
33 |         from_shape=', '.join(str(x) for x in shape),
34 |         perm=', '.join(str(x) for x in perm),
35 |         perm_strides=', '.join(str(x) for x in perm_strides),
36 |         to_shape=', '.join(str(x) for x in new_shape),
37 |         config_name=name,
38 |     )
39 | 


--------------------------------------------------------------------------------
/hls4ml/writer/__init__.py:
--------------------------------------------------------------------------------
 1 | from hls4ml.writer.catapult_writer import CatapultWriter
 2 | from hls4ml.writer.oneapi_writer import OneAPIWriter
 3 | from hls4ml.writer.quartus_writer import QuartusWriter
 4 | from hls4ml.writer.symbolic_writer import SymbolicExpressionWriter
 5 | from hls4ml.writer.vitis_writer import VitisWriter
 6 | from hls4ml.writer.vivado_accelerator_writer import VivadoAcceleratorWriter
 7 | from hls4ml.writer.vivado_writer import VivadoWriter
 8 | from hls4ml.writer.writers import Writer, get_writer, register_writer  # noqa: F401
 9 | 
10 | register_writer('Vivado', VivadoWriter)
11 | register_writer('VivadoAccelerator', VivadoAcceleratorWriter)
12 | register_writer('Vitis', VitisWriter)
13 | register_writer('Quartus', QuartusWriter)
14 | register_writer('oneAPI', OneAPIWriter)
15 | register_writer('Catapult', CatapultWriter)
16 | register_writer('SymbolicExpression', SymbolicExpressionWriter)
17 | 


--------------------------------------------------------------------------------
/hls4ml/writer/vitis_writer.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | from pathlib import Path
 4 | from shutil import copy
 5 | 
 6 | from hls4ml.writer.vivado_writer import VivadoWriter
 7 | 
 8 | 
 9 | class VitisWriter(VivadoWriter):
10 |     def __init__(self):
11 |         super().__init__()
12 | 
13 |     def write_nnet_utils_overrides(self, model):
14 |         ###################
15 |         # nnet_utils
16 |         ###################
17 | 
18 |         filedir = os.path.dirname(os.path.abspath(__file__))
19 | 
20 |         srcpath = os.path.join(filedir, '../templates/vitis/nnet_utils/')
21 |         dstpath = f'{model.config.get_output_dir()}/firmware/nnet_utils/'
22 | 
23 |         headers = [os.path.basename(h) for h in glob.glob(srcpath + '*.h')]
24 | 
25 |         for h in headers:
26 |             copy(srcpath + h, dstpath + h)
27 | 
28 |     def write_board_script_override(self, model):
29 |         '''
30 |         Write the tcl scripts and kernel sources to create a Vitis IPI
31 |         '''
32 | 
33 |         ###################
34 |         # project.tcl
35 |         ###################
36 | 
37 |         prj_tcl_file = Path(f'{model.config.get_output_dir()}/project.tcl')
38 |         with open(prj_tcl_file) as f:
39 |             prj_tcl_contents = f.readlines()
40 |             for line_num, line in enumerate(prj_tcl_contents):
41 |                 if 'set backend' in line:
42 |                     prj_tcl_contents[line_num] = 'set backend "vitis"\n'
43 |                 if 'set clock_uncertainty' in line:
44 |                     prj_tcl_contents[line_num] = 'set clock_uncertainty {}\n'.format(
45 |                         model.config.get_config_value('ClockUncertainty', '27%')
46 |                     )
47 | 
48 |         with open(prj_tcl_file, 'w') as f:
49 |             f.writelines(prj_tcl_contents)
50 | 
51 |     def write_hls(self, model):
52 |         """
53 |         Write the HLS project. Calls the steps from VivadoWriter, adapted for Vitis
54 |         """
55 |         super().write_hls(model)
56 |         self.write_nnet_utils_overrides(model)
57 |         self.write_board_script_override(model)
58 |         self.write_tar(model)
59 | 


--------------------------------------------------------------------------------
/hls4ml/writer/writers.py:
--------------------------------------------------------------------------------
 1 | class Writer:
 2 |     def __init__(self):
 3 |         pass
 4 | 
 5 |     def write_hls(self, model):
 6 |         raise NotImplementedError
 7 | 
 8 | 
 9 | writer_map = {}
10 | 
11 | 
12 | def register_writer(name, writer_cls):
13 |     if name in writer_map:
14 |         raise Exception(f'Writer {name} already registered')
15 | 
16 |     writer_map[name] = writer_cls
17 | 
18 | 
19 | def get_writer(name):
20 |     return writer_map[name]()
21 | 


--------------------------------------------------------------------------------
/test/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | failed=0
 4 | basedir=hls_prj
 5 | all=0
 6 | 
 7 | function print_usage {
 8 |    echo "Usage: `basename $0` [OPTION]"
 9 |    echo ""
10 |    echo "Cleans up the projects in provided directory."
11 |    echo ""
12 |    echo "Options are:"
13 |    echo "   -d DIR"
14 |    echo "      Base directory where projects are located."
15 |    echo "   -a"
16 |    echo "      Remove all projects, even the failed ones."
17 |    echo "   -h"
18 |    echo "      Prints this help message."
19 | }
20 | 
21 | while getopts ":d:ah" opt; do
22 |    case "$opt" in
23 |    d) basedir=$OPTARG
24 |       ;;
25 |    a) all=1
26 |       ;;
27 |    h)
28 |       print_usage
29 |       exit
30 |       ;;
31 |    esac
32 | done
33 | 
34 | if [ ! -d "${basedir}" ]; then
35 |    echo "Specified directory '${basedir}' does not exist."
36 |    exit 1
37 | fi
38 | 
39 | if [ "${all}" -eq 1 ]; then
40 |    rm -rf "${basedir}"
41 |    exit $?
42 | fi
43 | 
44 | #rundir=`pwd`
45 | 
46 | cd "${basedir}"
47 | 
48 | rm -f *.tar.gz
49 | 
50 | # Delete
51 | for dir in */ ; do
52 |    if [ ! -f "${dir}BUILD_FAILED" ]; then
53 |       rm -rf "${dir}"
54 |       if [ $? -eq 0 ]; then
55 |          echo "Removed ${dir%/}."
56 |       else
57 |          failed=1
58 |       fi
59 |    fi
60 | done
61 | 
62 | #cd "${rundir}"
63 | 
64 | exit ${failed}
65 | 


--------------------------------------------------------------------------------
/test/gather-reports.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | failed=0
 4 | basedir=hls_prj
 5 | full=0
 6 | brief=0
 7 | 
 8 | function print_usage {
 9 |    echo "Usage: `basename $0` [OPTION]"
10 |    echo ""
11 |    echo "Prints synthesis reports found in projects in the provided directory."
12 |    echo ""
13 |    echo "Options are:"
14 |    echo "   -d DIR"
15 |    echo "      Base directory where projects are located."
16 |    echo "   -b"
17 |    echo "      Print only summary of performance and utilization estimates."
18 |    echo "   -f"
19 |    echo "      Print whole report."
20 |    echo "   -h"
21 |    echo "      Prints this help message."
22 | }
23 | 
24 | while getopts ":d:bfh" opt; do
25 |    case "$opt" in
26 |    d) basedir=$OPTARG
27 |       ;;
28 |    b) brief=1
29 |       ;;
30 |    f) full=1
31 |       ;;
32 |    h)
33 |       print_usage
34 |       exit
35 |       ;;
36 |    esac
37 | done
38 | 
39 | if [ "${brief}" -eq "${full}" ]; then
40 |    echo "Argument -b or -f must be provided."
41 |    exit 1
42 | fi
43 | 
44 | if [ ! -d "${basedir}" ]; then
45 |    echo "Specified directory '${basedir}' does not exist."
46 |    exit 1
47 | fi
48 | 
49 | #rundir=`pwd`
50 | 
51 | cd "${basedir}"
52 | 
53 | for dir in */ ; do
54 |    cd ${dir}
55 |    prjdir="myproject_prj"
56 |    prjname="myproject"
57 |    for subdir in *_prj/ ; do
58 |       prjdir=${subdir}
59 |       prjname="${prjdir%_prj/}"
60 |    done
61 |    prjdir="${prjdir}solution1/syn/report"
62 |    if [ -d "$prjdir" ]; then
63 |       echo "Synthesis report for ${dir%/}"
64 |       if [ "${brief}" -eq 1 ]; then
65 |          sed "/* DSP48/Q" "${prjdir}/${prjname}_csynth.rpt"
66 |       else
67 |          cat "${prjdir}/${prjname}_csynth.rpt"
68 |       fi
69 |    else
70 |       echo "No report files found in ${dir}."
71 |       failed=1
72 |    fi
73 |    cd ..
74 | done
75 | 
76 | #cd "${rundir}"
77 | 
78 | exit ${failed}
79 | 


--------------------------------------------------------------------------------
/test/hls4ml-keras-test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VIVADO_INSTALL_DIR=/opt/Xilinx
 4 | VIVADO_VERSION=2020.1
 5 | 
 6 | # If running in docker image we would first need to activate the proper conda environment
 7 | #. activate hls4ml-py36
 8 | 
 9 | # Convert models in keras-models.txt
10 | ./convert-keras-models.sh -x -f keras-models.txt
11 | 
12 | # Alternatively, keras-to-hls script can be called, with the model name(s) specified, i.e.:
13 | #./keras-to-hls.sh KERAS_1layer KERAS_conv1d_small
14 | ./keras-to-hls.sh -b alveo-u250 -B VivadoAccelerator -x xcu250-figd2104-2L-e KERAS_3layer
15 | ./keras-to-hls.sh -b pynq-z2 -B VivadoAccelerator -x xc7z020clg400-1 KERAS_3layer
16 | # KERAS_3layer b:pynq-z2 B:VivadoAccelerator x:xc7z020clg400-1 s:Resource
17 | 
18 | # Build the projects generated by keras-to-hls script.
19 | # Remove parameter -s to disable synthesis. -p controls the number of parallel tasks
20 | ./build-prj.sh -i ${VIVADO_INSTALL_DIR} -v ${VIVADO_VERSION} -c -s -p 2
21 | 
22 | # Go through the generated reports and print out basic information.
23 | # Reports are available if synthesis is enabled.
24 | ./gather-reports.sh -b
25 | 
26 | # Clean-up at the end
27 | #./cleanup.sh
28 | 


--------------------------------------------------------------------------------
/test/hls4ml-onnx-test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VIVADO_INSTALL_DIR=/opt/Xilinx
 4 | VIVADO_VERSION=2017.2
 5 | 
 6 | # If running in docker image we would first need to activate the proper conda environment
 7 | #. activate hls4ml-py36
 8 | 
 9 | # Convert models in onnx-models.txt
10 | ./convert-onnx-models.sh -x -p 3 -f onnx-models.txt
11 | 
12 | # Same for Python 2
13 | #. activate hls4ml-py27
14 | ./convert-onnx-models.sh -x -p 2 -f onnx-models.txt
15 | 
16 | # Alternatively, onnx-to-hls script can be called, with the model name(s) specified, i.e.:
17 | #./onnx-to-hls.sh -p 3 three_layer_keras conv1d_small_keras
18 | #./onnx-to-hls.sh -p 2 three_layer_keras conv1d_small_keras
19 | 
20 | # Check if there is any difference between files generated by Python 2 and Python 3
21 | # Not needed if there were no changes in onnx-to-hls.py or hls-writer.py
22 | ./py-diff.sh -r 2
23 | 
24 | # Build the projects generated by onnx-to-hls script.
25 | # Remove parameter -s to disable synthesis. -p controls the number of parallel tasks
26 | ./build-prj.sh -i ${VIVADO_INSTALL_DIR} -v ${VIVADO_VERSION} -c -s -p 2
27 | 
28 | # Go through the generated reports and print out basic information.
29 | # Reports are available if synthesis is enabled.
30 | ./gather-reports.sh -b
31 | 
32 | # Clean-up at the end
33 | #./cleanup.sh
34 | 


--------------------------------------------------------------------------------
/test/hls4ml-pytorch-test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VIVADO_INSTALL_DIR=/opt/Xilinx
 4 | VIVADO_VERSION=2017.2
 5 | 
 6 | # If running in docker image we would first need to activate the proper conda environment
 7 | #. activate hls4ml-py36
 8 | 
 9 | # Convert models in pytorch-models.txt
10 | ./convert-pytorch-models.sh -x -p 3 -f pytorch-models.txt
11 | 
12 | # Same for Python 2
13 | #. activate hls4ml-py27
14 | ./convert-pytorch-models.sh -x -p 2 -f pytorch-models.txt
15 | 
16 | # Alternatively, pytorch-to-hls script can be called, with the model name(s) specified, i.e.:
17 | #./pytorch-to-hls.sh -p 3 two_layer_model three_layer_model
18 | #./pytorch-to-hls.sh -p 2 two_layer_model three_layer_model
19 | 
20 | # Check if there is any difference between files generated by Python 2 and Python 3
21 | # Not needed if there were no changes in pytorch-to-hls.py or hls-writer.py
22 | ./py-diff.sh -r 2
23 | 
24 | # Build the projects generated by pytorch-to-hls script.
25 | # Remove parameter -s to disable synthesis. -p controls the number of parallel tasks
26 | ./build-prj.sh -i ${VIVADO_INSTALL_DIR} -v ${VIVADO_VERSION} -c -s -p 2
27 | 
28 | # Go through the generated reports and print out basic information.
29 | # Reports are available if synthesis is enabled.
30 | ./gather-reports.sh -b
31 | 
32 | # Clean-up at the end
33 | #./cleanup.sh
34 | 


--------------------------------------------------------------------------------
/test/keras-models.txt:
--------------------------------------------------------------------------------
 1 | # Keras models from examples directory that will be used for testing
 2 | #
 3 | # Synthax:
 4 | #    MODEL_NAME[:WEIGHTS_FILE] [x:PART] [b:BOARD] [B:BACKEND] [c:CLOCK_PERIOD] [io:s] [r:REUSE_FACTOR] [t:AP_TYPE] [s:STRATEGY] [y:CONFIG_FILE]
 5 | # where
 6 | #    MODEL_NAME - Name of the file containing json model (without ".json")
 7 | #    WEIGHTS_FILE - Name of the HDF5 file containing model weights (without ".h5")
 8 | #    x:PART - FPGA part number to use
 9 | #    b:BOARD - name of one board defined in supported_board.json file
10 | #    B:BACKEND - name of the backend to be used (Vivado, VivadoAccelerator)
11 | #    c:CLOCK_PERIOD - Clock period
12 | #    io:s - User streaming I/O, otherwise use parallel I/O
13 | #    r:REUSE_FACTOR - Reuse factor
14 | #    s:STRATEGY - Latency-optimized or Resource-optimized strategy
15 | #    t:AP_TYPE - Default precision
16 | #    y:CONFIG_FILE - YAML config file to copy HLSConfig from
17 | #
18 | # Lines starting with "#" are ignored.
19 | #
20 | 
21 | KERAS_1layer
22 | KERAS_3layer
23 | #KERAS_3layer:KERAS_3layer_70pruned_retrained_weights
24 | #KERAS_conv1d
25 | #KERAS_conv1d_small
26 | #KERAS_conv2d_model
27 | #KERAS_dense_16x100x100x100x100x100x5
28 | KERAS_3layer_batch_norm
29 | KERAS_3layer_binary_smaller
30 | KERAS_3layer_ternary_small
31 | 
32 | # Pynq backend
33 | KERAS_3layer b:pynq-z2 B:VivadoAccelerator x:xc7z020clg400-1 s:Resource
34 | garnet_1layer x:xcku115-flvb2104-2-i y:garnet_1layer_config
35 | 
36 | 
37 | # Resource strategy
38 | KERAS_3layer r:2 s:Resource
39 | qkeras_mnist_dense r:112 s:Resource
40 | 
41 | #Fails synthesis due to a problem with loop unrolling
42 | #jetTagger_Conv2D_Small:jetTagger_Conv2D_Small
43 | 
44 | # Streaming IO
45 | #KERAS_1layer io:s
46 | KERAS_3layer io:s
47 | KERAS_conv1d_small io:s
48 | KERAS_conv2d_model io:s
49 | jetTagger_Conv2D_Small io:s
50 | jetTagger_Conv2D_Small_NoBatchNorm io:s
51 | 
52 | 
53 | #KERAS_1layer x:xcku115-flvf1924-2-i
54 | 


--------------------------------------------------------------------------------
/test/onnx-models.txt:
--------------------------------------------------------------------------------
 1 | # ONNX models from examples directory that will be used for testing
 2 | #
 3 | # Synthax:
 4 | #    MODEL_NAME [x:PART] [c:CLOCK_PERIOD] [io:s] [r:REUSE_FACTOR] [t:AP_TYPE] [s:STRATEGY]
 5 | # where
 6 | #    MODEL_NAME - Name of the file containing the model (without ".onnx")
 7 | #    x:PART - FPGA part number to use
 8 | #    c:CLOCK_PERIOD - Clock period
 9 | #    io:s - User streaming I/O, otherwise use parallel I/O
10 | #    r:REUSE_FACTOR - Reuse factor
11 | #    s:STRATEGY - Latency-optimized or Resource-optimized strategy
12 | #    t:AP_TYPE - Default precision
13 | #
14 | # Lines starting with "#" are ignored.
15 | #
16 | 
17 | conv1d_small_keras
18 | conv2d_small_keras
19 | conv2d_small_keras
20 | conv2d_small_mp_keras
21 | dense_big_keras
22 | three_layer_bn_keras
23 | three_layer_bn_pytorch
24 | three_layer_keras
25 | three_layer_pytorch
26 | two_layer_keras
27 | two_layer_pytorch
28 | 


--------------------------------------------------------------------------------
/test/pytest/ci-template.yml:
--------------------------------------------------------------------------------
 1 | .pytest:
 2 |   stage: test
 3 |   image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.6.1.base
 4 |   tags:
 5 |     - k8s-default
 6 |   before_script:
 7 |     - eval "$(conda shell.bash hook)"
 8 |     - conda activate hls4ml-testing
 9 |     - source /opt/intel/oneapi/setvars.sh --force
10 |     - git config --global --add safe.directory /builds/fastmachinelearning/hls4ml
11 |     - git submodule update --init --recursive hls4ml/templates/catapult/
12 |     - if [ $EXAMPLEMODEL == 1 ]; then git submodule update --init example-models; fi
13 |     - pip install .[testing,sr,optimization]
14 |   script:
15 |     - cd test/pytest
16 |     - pytest $PYTESTFILE -rA --cov-report xml --cov-report term --cov=hls4ml --junitxml=report.xml --randomly-seed=42 --randomly-dont-reorganize --randomly-dont-reset-seed
17 |   artifacts:
18 |     when: always
19 |     reports:
20 |       junit:
21 |         - test/pytest/report.xml
22 |       coverage_report:
23 |         coverage_format: cobertura
24 |         path: test/pytest/coverage.xml
25 |     paths:
26 |       - test/pytest/hls4mlprj*.tar.gz
27 | 


--------------------------------------------------------------------------------
/test/pytest/test_batchnorm.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from tensorflow.keras.layers import BatchNormalization
 6 | from tensorflow.keras.models import Sequential
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | in_shape = 16
13 | atol = 5e-3
14 | 
15 | 
16 | @pytest.fixture(scope='module')
17 | def data():
18 |     np.random.seed(0)
19 |     X = np.random.rand(100, in_shape)
20 |     return X
21 | 
22 | 
23 | @pytest.fixture(scope='module')
24 | def model(request):
25 |     model = Sequential()
26 |     model.add(BatchNormalization(input_shape=(in_shape,), center=request.param, scale=request.param))
27 |     model.compile()
28 |     return model
29 | 
30 | 
31 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
32 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult', 'oneAPI'])
33 | @pytest.mark.parametrize('model', [True, False], indirect=True)
34 | def test_batchnorm(model, data, backend, io_type):
35 |     default_precision = 'fixed<32, 1>'
36 | 
37 |     center = model.layers[0].center
38 |     scale = model.layers[0].scale
39 |     config = hls4ml.utils.config_from_keras_model(
40 |         model, default_precision=default_precision, granularity='name', backend=backend
41 |     )
42 |     output_dir = str(test_root_path / f'hls4mlprj_batchnorm_{backend}_{io_type}_center{center}_scale{scale}')
43 |     hls_model = hls4ml.converters.convert_from_keras_model(
44 |         model, backend=backend, hls_config=config, io_type=io_type, output_dir=output_dir
45 |     )
46 |     hls_model.compile()
47 | 
48 |     # Predict
49 |     y_keras = np.squeeze(model.predict(data))
50 |     y_hls = hls_model.predict(data)
51 |     np.testing.assert_allclose(y_keras, y_hls, rtol=0, atol=atol, verbose=True)
52 | 


--------------------------------------------------------------------------------
/test/pytest/test_bram_factor.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | import tensorflow as tf
 6 | from tensorflow.keras.layers import Activation, Dense
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
14 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
15 | def test_bram_factor(backend, io_type):
16 |     '''A copy of the test_dense from test_keras_api.py with BramFactor set to 0'''
17 |     model = tf.keras.models.Sequential()
18 |     model.add(
19 |         Dense(
20 |             2,
21 |             input_shape=(1,),
22 |             name='Dense',
23 |             use_bias=True,
24 |             kernel_initializer=tf.keras.initializers.RandomUniform(minval=1, maxval=10),
25 |             bias_initializer='zeros',
26 |             kernel_regularizer=None,
27 |             bias_regularizer=None,
28 |             activity_regularizer=None,
29 |             kernel_constraint=None,
30 |             bias_constraint=None,
31 |         )
32 |     )
33 |     model.add(Activation(activation='elu', name='Activation'))
34 |     model.compile(optimizer='adam', loss='mse')
35 | 
36 |     X_input = np.random.rand(100, 1)
37 | 
38 |     keras_prediction = model.predict(X_input)
39 | 
40 |     config = hls4ml.utils.config_from_keras_model(model)
41 |     config["Model"]["BramFactor"] = 0
42 |     output_dir = str(test_root_path / f'hls4mlprj_bram_factor_{backend}_{io_type}')
43 | 
44 |     hls_model = hls4ml.converters.convert_from_keras_model(
45 |         model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend
46 |     )
47 | 
48 |     hls_model.compile()
49 | 
50 |     hls_prediction = hls_model.predict(X_input)
51 | 
52 |     np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=1e-2, atol=0.01)
53 | 
54 |     # Check that there weights are actually remote
55 |     model_brams = [var for var in hls_model.get_weight_variables() if var.storage.lower() == 'bram']
56 |     assert len(model_brams) == 2
57 | 


--------------------------------------------------------------------------------
/test/pytest/test_causalpadding.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from tensorflow.keras.layers import Conv1D
 6 | from tensorflow.keras.models import Sequential
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | atol = 5e-3
13 | 
14 | 
15 | @pytest.mark.parametrize('io_type', ['io_stream', 'io_parallel'])
16 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
17 | def test_causalpadding(io_type, backend):
18 |     model = Sequential()
19 |     model.add(Conv1D(1, 5, padding="causal", input_shape=(100, 1)))
20 |     model.compile()
21 | 
22 |     data = np.random.randint(0, 10, 100).astype(float)
23 |     data = np.expand_dims(data, axis=0)
24 |     data = np.expand_dims(data, axis=-1)
25 | 
26 |     config = hls4ml.utils.config_from_keras_model(
27 |         model, default_precision='ap_fixed<32,16>', granularity='name', backend=backend
28 |     )
29 |     odir = str(test_root_path / f'hls4mlprj_validpadding_{backend}_{io_type}')
30 |     hls_model = hls4ml.converters.convert_from_keras_model(
31 |         model, hls_config=config, io_type=io_type, output_dir=odir, backend=backend
32 |     )
33 |     hls_model.compile()
34 | 
35 |     # Predict
36 |     y_keras = model.predict(data).flatten()
37 |     y_hls = hls_model.predict(data).flatten()
38 |     np.testing.assert_allclose(y_keras, y_hls, rtol=0, atol=atol, verbose=True)
39 | 


--------------------------------------------------------------------------------
/test/pytest/test_clone_flatten.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from tensorflow.keras.layers import Concatenate, Flatten, Input
 6 | from tensorflow.keras.models import Model
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.fixture(scope='module')
14 | def data():
15 |     X = np.random.randint(-5, 5, (1, 2, 3), dtype='int32')
16 |     return X
17 | 
18 | 
19 | @pytest.fixture(scope='module')
20 | def keras_model():
21 |     inp1 = Input(shape=(2, 3), name='input_1')
22 |     x = Flatten()(inp1)
23 |     y = Flatten()(inp1)
24 |     out = Concatenate(axis=1)([x, y])
25 |     model = Model(inputs=inp1, outputs=out)
26 |     return model
27 | 
28 | 
29 | @pytest.fixture
30 | @pytest.mark.parametrize('io_type', ['io_stream'])
31 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus', 'Catapult'])
32 | def hls_model(keras_model, backend, io_type):
33 |     hls_config = hls4ml.utils.config_from_keras_model(
34 |         keras_model, default_precision='ap_int<6>', granularity='name', backend=backend
35 |     )
36 |     output_dir = str(test_root_path / f'hls4mlprj_clone_flatten_{backend}_{io_type}')
37 |     hls_model = hls4ml.converters.convert_from_keras_model(
38 |         keras_model,
39 |         hls_config=hls_config,
40 |         io_type=io_type,
41 |         backend=backend,
42 |         output_dir=output_dir,
43 |     )
44 | 
45 |     hls_model.compile()
46 |     return hls_model
47 | 
48 | 
49 | @pytest.mark.parametrize('io_type', ['io_stream'])
50 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
51 | def test_accuracy(data, keras_model, hls_model):
52 |     X = data
53 |     model = keras_model
54 |     # model under test predictions and accuracy
55 |     y_keras = model.predict(X)
56 |     y_hls4ml = hls_model.predict(X.astype('float32')).reshape(y_keras.shape)
57 |     # "accuracy" of hls4ml predictions vs keras
58 |     np.testing.assert_array_equal(y_keras, y_hls4ml, verbose=True)
59 | 


--------------------------------------------------------------------------------
/test/pytest/test_conv1d_narrow.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from tensorflow.keras.layers import Conv1D
 6 | from tensorflow.keras.models import Sequential
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.fixture(scope='module')
14 | def data():
15 |     X = np.random.rand(10, 11, 3)
16 |     return X
17 | 
18 | 
19 | @pytest.fixture(scope='module')
20 | def model():
21 |     model = Sequential()
22 |     model.add(Conv1D(5, 9, input_shape=(11, 3)))
23 |     model.compile()
24 |     return model
25 | 
26 | 
27 | @pytest.mark.parametrize(
28 |     'narrowset',
29 |     [
30 |         ('io_stream', 'latency', 'Encoded'),
31 |         ('io_stream', 'resource', 'Encoded'),
32 |         ('io_stream', 'latency', 'LineBuffer'),
33 |         ('io_stream', 'resource', 'LineBuffer'),
34 |         ('io_parallel', 'resource', 'Encoded'),
35 |         ('io_parallel', 'latency', 'Encoded'),
36 |         ('io_parallel', 'resource', 'LineBuffer'),
37 |         ('io_parallel', 'latency', 'LineBuffer'),
38 |     ],
39 | )
40 | @pytest.mark.filterwarnings("error")
41 | def test_narrow(data, model, narrowset, capfd):
42 |     '''
43 |     Check that the implementation does not have leftover data.
44 |     '''
45 |     io_type = narrowset[0]
46 |     strategy = narrowset[1]
47 |     conv = narrowset[2]
48 |     X = data
49 | 
50 |     output_dir = str(test_root_path / f'hls4mlprj_conv1d_narrow_{io_type}_{strategy}_{conv}')
51 | 
52 |     config = hls4ml.utils.config_from_keras_model(model)
53 |     config['Model']['Strategy'] = strategy
54 |     config['Model']['ConvImplementation'] = conv
55 | 
56 |     hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, io_type=io_type, output_dir=output_dir)
57 |     hls_model.compile()
58 | 
59 |     # model under test predictions and accuracy
60 |     y_keras = model.predict(X)
61 |     y_hls4ml = hls_model.predict(X)
62 | 
63 |     out, _ = capfd.readouterr()
64 |     assert "leftover data" not in out
65 |     np.testing.assert_allclose(y_keras.ravel(), y_hls4ml.ravel(), atol=0.05)
66 | 


--------------------------------------------------------------------------------
/test/pytest/test_conv2d_narrow.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from tensorflow.keras.layers import Conv2D
 6 | from tensorflow.keras.models import Sequential
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.fixture(scope='module')
14 | def data():
15 |     X = np.random.rand(10, 5, 5, 3)
16 |     return X
17 | 
18 | 
19 | @pytest.fixture(scope='module')
20 | def model():
21 |     model = Sequential()
22 |     model.add(Conv2D(5, (4, 4), input_shape=(5, 5, 3)))
23 |     model.compile()
24 |     return model
25 | 
26 | 
27 | @pytest.mark.parametrize(
28 |     'narrowset',
29 |     [
30 |         ('io_stream', 'latency', 'Encoded'),
31 |         ('io_stream', 'resource', 'Encoded'),
32 |         ('io_stream', 'latency', 'LineBuffer'),
33 |         ('io_stream', 'resource', 'LineBuffer'),
34 |         ('io_parallel', 'resource', 'Encoded'),
35 |         ('io_parallel', 'latency', 'Encoded'),
36 |         ('io_parallel', 'resource', 'LineBuffer'),
37 |         ('io_parallel', 'latency', 'LineBuffer'),
38 |     ],
39 | )
40 | @pytest.mark.filterwarnings("error")
41 | def test_narrow(data, model, narrowset, capfd):
42 |     '''
43 |     Check that the implementation does not have leftover data.
44 |     '''
45 |     io_type = narrowset[0]
46 |     strategy = narrowset[1]
47 |     conv = narrowset[2]
48 |     X = data
49 | 
50 |     output_dir = str(test_root_path / f'hls4mlprj_conv2d_narrow_{io_type}_{strategy}_{conv}')
51 | 
52 |     config = hls4ml.utils.config_from_keras_model(model)
53 |     config['Model']['Strategy'] = strategy
54 |     config['Model']['ConvImplementation'] = conv
55 | 
56 |     hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, io_type=io_type, output_dir=output_dir)
57 |     hls_model.compile()
58 | 
59 |     # model under test predictions and accuracy
60 |     y_keras = model.predict(X)
61 |     y_hls4ml = hls_model.predict(X)
62 | 
63 |     out, _ = capfd.readouterr()
64 |     assert "leftover data" not in out
65 |     np.testing.assert_allclose(y_keras.ravel(), y_hls4ml.ravel(), atol=0.05)
66 | 


--------------------------------------------------------------------------------
/test/pytest/test_embed.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from tensorflow.keras.layers import Embedding, Input
 6 | from tensorflow.keras.models import Model
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.fixture(scope='module')
14 | def data():
15 |     X = np.random.randint(10, size=(32, 100))
16 |     return X
17 | 
18 | 
19 | @pytest.fixture(scope='module')
20 | def keras_model():
21 |     inputs = Input(shape=(100,), name='embedding_input')
22 |     embedding = Embedding(13, 8, input_length=100, name='embedding')(inputs)
23 |     model = Model(inputs=inputs, outputs=embedding)
24 |     return model
25 | 
26 | 
27 | @pytest.fixture
28 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult', 'oneAPI'])
29 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
30 | def hls_model(keras_model, backend, io_type):
31 |     hls_config = hls4ml.utils.config_from_keras_model(
32 |         keras_model, default_precision='ap_fixed<16,6>', granularity='name', backend=backend
33 |     )
34 |     hls_config['LayerName']['embedding_input']['Precision']['result'] = 'ap_uint<4>'
35 |     out_dir = str(test_root_path / 'hls4mlprj_embed_{}_{}').format(backend, io_type)
36 |     hls_model = hls4ml.converters.convert_from_keras_model(
37 |         keras_model, backend=backend, hls_config=hls_config, io_type=io_type, output_dir=out_dir
38 |     )
39 | 
40 |     hls_model.compile()
41 |     return hls_model
42 | 
43 | 
44 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult', 'oneAPI'])
45 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
46 | def test_embedding_accuracy(data, keras_model, hls_model):
47 |     X = data
48 |     model = keras_model
49 |     # model under test predictions and accuracy
50 |     y_keras = model.predict(X)
51 |     y_hls4ml = hls_model.predict(X.astype(float)).reshape(y_keras.shape)
52 |     # "accuracy" of hls4ml predictions vs keras
53 |     np.testing.assert_allclose(y_keras, y_hls4ml, rtol=0, atol=1e-03, verbose=True)
54 | 


--------------------------------------------------------------------------------
/test/pytest/test_fetch_example.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import io
 3 | from contextlib import redirect_stdout
 4 | from pathlib import Path
 5 | 
 6 | import pytest
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
14 | def test_fetch_example_utils(backend):
15 |     f = io.StringIO()
16 |     with redirect_stdout(f):
17 |         hls4ml.utils.fetch_example_list()
18 |     out = f.getvalue()
19 | 
20 |     model_list = ast.literal_eval(out)  # Check if we indeed got a dictionary back
21 | 
22 |     assert 'qkeras_mnist_cnn.json' in model_list['keras']
23 | 
24 |     # This model has an example config that is also downloaded. Stored configurations don't set "Backend" value.
25 |     config = hls4ml.utils.fetch_example_model('qkeras_mnist_cnn.json', backend=backend)
26 |     config['KerasJson'] = 'qkeras_mnist_cnn.json'
27 |     config['KerasH5']
28 |     config['Backend'] = backend
29 |     config['OutputDir'] = str(test_root_path / f'hls4mlprj_fetch_example_{backend}')
30 | 
31 |     hls_model = hls4ml.converters.keras_v2_to_hls(config)
32 |     hls_model.compile()  # For now, it is enough if it compiles, we're only testing downloading works as expected
33 | 


--------------------------------------------------------------------------------
/test/pytest/test_keras_h5_loader.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | import tensorflow as tf
 6 | 
 7 | import hls4ml
 8 | 
 9 | test_root_path = Path(__file__).parent
10 | 
11 | 
12 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult'])
13 | def test_keras_h5_loader(backend):
14 |     input_shape = (10,)
15 |     model = tf.keras.models.Sequential(
16 |         [
17 |             tf.keras.layers.InputLayer(input_shape=input_shape),
18 |             tf.keras.layers.Activation(activation='relu'),
19 |         ]
20 |     )
21 | 
22 |     hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name')
23 | 
24 |     config = {
25 |         'OutputDir': str(test_root_path / f'hls4mlprj_KerasH5_loader_test_{backend}'),
26 |         'ProjectName': f'KerasH5_loader_test_{backend}',
27 |         'Backend': backend,
28 |         'ClockPeriod': 25.0,
29 |         'IOType': 'io_parallel',
30 |         'HLSConfig': hls_config,
31 |         'KerasH5': str(test_root_path / f'hls4mlprj_KerasH5_loader_test_{backend}/model.h5'),
32 |     }
33 | 
34 |     model.save(config['KerasH5'])
35 |     hls_model = hls4ml.converters.keras_v2_to_hls(config)
36 |     hls_model.compile()
37 |     data = np.random.rand(1000, 10).astype(np.float32)
38 |     pred = hls_model.predict(data)
39 |     np.testing.assert_allclose(pred, model.predict(data), rtol=5e-3, atol=5e-3)
40 | 


--------------------------------------------------------------------------------
/test/pytest/test_multi_dense.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | import tensorflow as tf
 6 | from tensorflow.keras.layers import Dense
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     'backend, strategy',
15 |     [
16 |         ('Vivado', 'Latency'),
17 |         ('Vivado', 'Resource'),
18 |         ('Vitis', 'Latency'),
19 |         ('Vitis', 'Resource'),
20 |         ('Quartus', 'Resource'),
21 |         ('oneAPI', 'Resource'),
22 |         ('Catapult', 'Latency'),
23 |         ('Catapult', 'Resource'),
24 |     ],
25 | )
26 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
27 | @pytest.mark.parametrize('shape', [(4, 3), (4, 1), (2, 3, 2), (1, 3, 1)])
28 | def test_multi_dense(backend, strategy, io_type, shape):
29 |     model = tf.keras.models.Sequential()
30 |     model.add(Dense(7, input_shape=shape, activation='relu'))
31 |     model.add(Dense(2, activation='relu'))
32 |     model.compile(optimizer='adam', loss='mse')
33 | 
34 |     X_input = np.random.rand(100, *shape)
35 |     X_input = np.round(X_input * 2**10) * 2**-10  # make it an exact ap_fixed<16,6>
36 | 
37 |     keras_prediction = model.predict(X_input)
38 | 
39 |     config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
40 |     config['Model']['Strategy'] = strategy
41 |     shapestr = '_'.join(str(x) for x in shape)
42 |     output_dir = str(test_root_path / f'hls4mlprj_multi_dense_{backend}_{strategy}_{io_type}_{shapestr}')
43 | 
44 |     hls_model = hls4ml.converters.convert_from_keras_model(
45 |         model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
46 |     )
47 | 
48 |     hls_model.compile()
49 | 
50 |     hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
51 | 
52 |     np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=1e-2, atol=0.01)
53 | 


--------------------------------------------------------------------------------
/test/pytest/test_optimization/test_knapsack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from hls4ml.optimization.dsp_aware_pruning.knapsack import solve_knapsack
 5 | 
 6 | 
 7 | # In the simple case below, both implementations give the optimal answer
 8 | # In general, the greedy algorithm will not give the optimal solution
 9 | @pytest.mark.parametrize('implementation', ['dynamic', 'greedy', 'branch_bound', 'CBC_MIP'])
10 | def test_knapsack_1d(implementation):
11 |     values = np.array([4, 5, 6, 8, 3])
12 |     weights = np.array([[2, 5, 3, 2, 5]])
13 |     capacity = np.array([8])
14 | 
15 |     optimal, selected = solve_knapsack(values, weights, capacity, implementation=implementation)
16 |     assert optimal == 18
17 |     assert 0 in selected
18 |     assert 2 in selected
19 |     assert 3 in selected
20 | 
21 | 
22 | @pytest.mark.parametrize('implementation', ['greedy', 'branch_bound', 'CBC_MIP'])
23 | def test_multidimensional_knapsack(implementation):
24 |     values = np.array([10, 2, 6, 12, 3])
25 |     weights = np.array([[3, 1, 4, 5, 5], [3, 2, 4, 1, 2]])
26 |     capacity = np.array([8, 7])
27 | 
28 |     optimal, selected = solve_knapsack(values, weights, capacity, implementation=implementation)
29 |     assert optimal == 22
30 |     assert 0 in selected
31 |     assert 3 in selected
32 | 
33 | 
34 | def test_knapsack_equal_weights():
35 |     values = np.array([10, 2, 6, 8, 3])
36 |     weights = np.array([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3]])
37 |     capacity = np.array([7, 7])
38 | 
39 |     optimal, selected = solve_knapsack(values, weights, capacity)
40 |     assert optimal == 18
41 |     assert 0 in selected
42 |     assert 3 in selected
43 | 
44 | 
45 | def test_knapsack_all_elements_fit():
46 |     values = np.array([10, 2, 6, 12, 3])
47 |     weights = np.array([[3, 1, 4, 5, 5], [3, 2, 4, 1, 2]])
48 |     capacity = np.array([19, 12])
49 | 
50 |     optimal, selected = solve_knapsack(values, weights, capacity)
51 |     assert optimal == 33
52 |     assert selected == list(range(0, values.shape[0]))
53 | 


--------------------------------------------------------------------------------
/test/pytest/test_report/Vivado/vivado_hls.app:
--------------------------------------------------------------------------------
 1 | <AutoPilot:project xmlns:AutoPilot="com.autoesl.autopilot.project" projectType="C/C++" name="myproject_prj" top="myproject">
 2 |     <files>
 3 |         <file name="../../tb_data" sc="0" tb="1" cflags=" -Wno-unknown-pragmas" csimflags=" -Wno-unknown-pragmas" blackbox="false"/>
 4 |         <file name="../../firmware/weights" sc="0" tb="1" cflags=" -Wno-unknown-pragmas" csimflags=" -Wno-unknown-pragmas" blackbox="false"/>
 5 |         <file name="../../myproject_test.cpp" sc="0" tb="1" cflags=" -std=c++0x -Wno-unknown-pragmas" csimflags=" -Wno-unknown-pragmas" blackbox="false"/>
 6 |         <file name="firmware/myproject.cpp" sc="0" tb="false" cflags="-std=c++0x" csimflags="" blackbox="false"/>
 7 |     </files>
 8 |     <solutions>
 9 |         <solution name="solution1" status=""/>
10 |     </solutions>
11 |     <Simulation argv="">
12 |         <SimFlow name="csim" setup="false" optimizeCompile="false" clean="false" ldflags="" mflags=""/>
13 |     </Simulation>
14 | </AutoPilot:project>
15 | 
16 | 


--------------------------------------------------------------------------------
/test/pytest/test_report/oneAPI/loop_attr.ndjson:
--------------------------------------------------------------------------------
 1 | {"name":"loop_attributes", "id":312, "nodes":[1, 3, 2]}
 2 | {"name":"Myproject", "id":1, "clk":"No", "fmax":"480.00", "type":"kernel", "children":[4, 5]}
 3 | {"name":"Myproject.B0", "id":4, "af":"480.00", "br":"0", "ci":"0", "fo":"Disabled", "ii":"1", "ll":"1", "lllt":"17.000000", "lt":"17.000000", "mi":"n/a", "pl":"Yes", "tc":"0", "tn":"1", "type":"bb"}
 4 | {"name":"Myproject.B1", "id":5, "af":"480.00", "br":"0", "ci":"0", "fo":"Enabled", "ii":"1", "ll":"1", "lllt":"176.000000", "lt":"176.000000", "mi":"1", "pl":"Yes", "tc":"0", "tn":"1", "details":[{"type":"text", "text":"Hyper-Optimized loop structure: enabled."}], "type":"loop"}
 5 | {"name":"relu_config3>()", "id":3, "clk":"No", "fmax":"480.00", "debug":[[{"filename":"/home/enlupi/Work/code/hls4mlprj_report_oneAPI/src/firmware/nnet_utils/nnet_activation_stream.h", "line":32}]], "type":"kernel", "children":[8, 9]}
 6 | {"name":"relu_config3>().B0", "id":8, "af":"480.00", "br":"0", "ci":"0", "fo":"Disabled", "ii":"1", "ll":"1", "lllt":"17.000000", "lt":"17.000000", "mi":"n/a", "pl":"Yes", "tc":"0", "tn":"1", "type":"bb"}
 7 | {"name":"relu_config3>().B1", "id":9, "af":"480.00", "br":"0", "ci":"0", "fo":"Enabled", "ii":"1", "ll":"1", "lllt":"159.000000", "lt":"159.000000", "mi":"1", "pl":"Yes", "tc":"0", "tn":"1", "details":[{"type":"text", "text":"Hyper-Optimized loop structure: enabled."}], "type":"loop"}
 8 | {"name":"bias_t)", "id":2, "clk":"No", "fmax":"480.00", "debug":[[{"filename":"/home/enlupi/Work/code/hls4mlprj_report_oneAPI/src/firmware/nnet_utils/nnet_dense_stream.h", "line":12}]], "type":"kernel", "children":[6, 7]}
 9 | {"name":"bias_t).B0", "id":6, "af":"480.00", "br":"0", "ci":"0", "fo":"Disabled", "ii":"1", "ll":"1", "lllt":"17.000000", "lt":"17.000000", "mi":"n/a", "pl":"Yes", "tc":"0", "tn":"1", "type":"bb"}
10 | {"name":"bias_t).B1", "id":7, "af":"480.00", "br":"0", "ci":"0", "fo":"Enabled", "ii":"1", "ll":"1", "lllt":"200.000000", "lt":"200.000000", "mi":"1", "pl":"Yes", "tc":"0", "tn":"1", "details":[{"type":"text", "text":"Hyper-Optimized loop structure: enabled."}], "type":"loop"}
11 | 


--------------------------------------------------------------------------------
/test/pytest/test_report/oneAPI/quartus.ndjson:
--------------------------------------------------------------------------------
1 | {"quartusFitClockSummary":{"nodes":[{"name":"Quartus Fitter: Clock Frequency (MHz)","type":"system","id":1000,"clock1x":"597.73","clock fmax":"597.73","clock":"597.73","details":[{"text":"The actual frequency of the clock is 597.73 MHz after platform PLL adjustment. The maximum frequency for the clock is 597.73 MHz. "}]}]},"quartusFitResourceUsageSummary":{"nodes":[{"type":"system","id":1000,"name":"Quartus Fitter: Device Image","alm":"4520.5","alut":"4182","reg":"16419","dsp":"40","ram":"36","mlab":"52"},{"type":"kernel","id":1010,"name":"Myproject","alm":"4520.0","alut":"4181","reg":"16419","dsp":"40","ram":"36","mlab":"52"}]}}


--------------------------------------------------------------------------------
/test/pytest/test_reshape.py:
--------------------------------------------------------------------------------
 1 | """Test that reshape is properly handled by optimizers."""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import numpy as np
 6 | import pytest
 7 | import tensorflow as tf
 8 | 
 9 | import hls4ml
10 | 
11 | test_root_path = Path(__file__).parent
12 | 
13 | 
14 | def randX(batch_size, N):
15 |     return np.random.rand(batch_size, N)
16 | 
17 | 
18 | @pytest.fixture(scope='module')
19 | def randX_20_10():
20 |     return randX(20, 10)
21 | 
22 | 
23 | @pytest.mark.parametrize('backend', ['Vivado', 'Quartus', 'Catapult', 'oneAPI'])
24 | @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
25 | def test_reshape_parallel(randX_20_10, backend, io_type):
26 |     model = tf.keras.models.Sequential(
27 |         [
28 |             tf.keras.layers.Input(shape=(10,)),
29 |             tf.keras.layers.Dense(10 * 3),
30 |             tf.keras.layers.Reshape((10, 3)),
31 |             tf.keras.layers.ReLU(),
32 |         ]
33 |     )
34 |     model.compile(optimizer='adam', loss='mse')
35 |     config = hls4ml.utils.config_from_keras_model(model, default_precision='fixed<32,16>')
36 |     prj_name = f'hls4mlprj_reshape_{backend}_{io_type}'
37 |     output_dir = str(test_root_path / prj_name)
38 |     hls_model = hls4ml.converters.convert_from_keras_model(
39 |         model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend
40 |     )
41 |     hls_model.compile()
42 | 
43 |     X = randX_20_10
44 |     y_qkeras = model.predict(X)
45 |     y_hls4ml = hls_model.predict(X)
46 | 
47 |     # check that the values are close
48 |     np.testing.assert_allclose(y_qkeras.ravel(), y_hls4ml.ravel(), atol=0.02)
49 | 


--------------------------------------------------------------------------------
/test/pytest/test_softsign.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | import tensorflow as tf
 6 | from sklearn.metrics import accuracy_score
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult'])
14 | @pytest.mark.parametrize('input_shape, io_type', [((8,), 'io_parallel'), ((8,), 'io_stream'), ((8, 8, 3), 'io_stream')])
15 | def test_softsign(backend, input_shape, io_type):
16 |     X = np.random.rand(1000, *input_shape)
17 |     X = np.round(X * 2**10) * 2**-10
18 |     model = tf.keras.models.Sequential()
19 |     model.add(tf.keras.layers.Activation(input_shape=input_shape, activation='softsign', name='softsign'))
20 |     model.compile()
21 | 
22 |     cfg = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<20,4>', backend=backend)
23 |     # Since softsign implementation is lookup-based increasing the precision and size of the table helps with accuracy
24 |     cfg['LayerName']['softsign']['table_t'] = 'fixed<20,4>'
25 |     cfg['LayerName']['softsign']['table_size'] = 2048
26 |     odir = str(test_root_path / f'hls4mlprj_softsign_{backend}_{io_type}_{str(input_shape)}')
27 |     hls_model = hls4ml.converters.convert_from_keras_model(
28 |         model, hls_config=cfg, io_type=io_type, output_dir=odir, backend=backend
29 |     )
30 |     hls_model.compile()
31 | 
32 |     y_keras = model.predict(X)
33 |     y_hls4ml = hls_model.predict(X).reshape(y_keras.shape)
34 |     acc_hls4ml = accuracy_score(np.argmax(y_keras, axis=-1).ravel(), np.argmax(y_hls4ml, axis=-1).ravel())
35 | 
36 |     print(f'Accuracy hls4ml relative to keras: {acc_hls4ml}')
37 |     assert acc_hls4ml >= 0.96
38 | 


--------------------------------------------------------------------------------
/test/pytest/test_weight_writer.py:
--------------------------------------------------------------------------------
 1 | from glob import glob
 2 | from pathlib import Path
 3 | 
 4 | import keras
 5 | import numpy as np
 6 | import pytest
 7 | 
 8 | import hls4ml
 9 | 
10 | test_root_path = Path(__file__).parent
11 | 
12 | 
13 | @pytest.mark.parametrize('k', [0, 1])
14 | @pytest.mark.parametrize('i', [4, 8, 10])
15 | @pytest.mark.parametrize('f', [-2, 0, 2, 7, 14])
16 | def test_weight_writer(k, i, f):
17 |     k, b, i = k, k + i + f, k + i
18 |     w = np.array([[np.float32(2.0**-f)]])
19 |     u = '' if k else 'u'
20 |     dtype = f'{u}fixed<{b}, {i}>'
21 |     hls_config = {'LayerName': {'dense': {'Precision': {'weight': dtype}}}}
22 | 
23 |     model = keras.Sequential([keras.layers.Dense(1, input_shape=(1,), name='dense')])
24 |     model.layers[0].kernel.assign(keras.backend.constant(w))
25 |     output_dir = str(test_root_path / f'hls4ml_prj_test_weight_writer_{dtype}')
26 | 
27 |     model_hls = hls4ml.converters.convert_from_keras_model(
28 |         model, hls_config=hls_config, output_dir=output_dir, write_weights_txt=True
29 |     )
30 |     model_hls.write()
31 | 
32 |     w_paths = glob(str(Path(output_dir) / 'firmware/weights/w*.txt'))
33 |     assert len(w_paths) == 1
34 | 
35 |     w_loaded = np.loadtxt(w_paths[0], delimiter=',').reshape(1, 1)
36 |     assert np.all(w == w_loaded)
37 | 


--------------------------------------------------------------------------------
/test/pytorch-models.txt:
--------------------------------------------------------------------------------
 1 | # PyTorch models from examples directory that will be used for testing
 2 | #
 3 | # Synthax:
 4 | #    MODEL_NAME [x:XILINXPART] [c:CLOCK_PERIOD] [io:s] [r:REUSE_FACTOR] [t:AP_TYPE] [s:STRATEGY]
 5 | # where
 6 | #    MODEL_NAME - Name of the file containing the model (without ".pt")
 7 | #    x:XILINXPART - Xilinx part number to use
 8 | #    c:CLOCK_PERIOD - Clock period
 9 | #    io:s - User streaming I/O, otherwise use parallel I/O
10 | #    r:REUSE_FACTOR - Reuse factor
11 | #    s:STRATEGY - Latency-optimized or Resource-optimized strategy
12 | #    t:AP_TYPE - Default precision
13 | #
14 | # Lines starting with "#" are ignored.
15 | #
16 | 
17 | two_layer_model
18 | three_layer_model
19 | 


--------------------------------------------------------------------------------