├── .gitignore
├── AIPUBuilder
    ├── Optimizer
    │   ├── __init__.py
    │   ├── analyzer
    │   │   ├── __init__.py
    │   │   ├── cosine.py
    │   │   └── running_time.py
    │   ├── config
    │   │   ├── __init__.py
    │   │   ├── cfg_fields.py
    │   │   └── parser.py
    │   ├── features
    │   │   ├── __init__.py
    │   │   ├── autosearch
    │   │   │   ├── __init__.py
    │   │   │   └── mixed_precision_naive_search.py
    │   │   ├── calibration
    │   │   │   ├── __init__.py
    │   │   │   ├── calibration.py
    │   │   │   ├── global_calibration
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── adaquant_zy.py
    │   │   │   │   ├── adaround.py
    │   │   │   │   ├── awq_zy.py
    │   │   │   │   ├── easy_quant.py
    │   │   │   │   ├── gptq_zy.py
    │   │   │   │   ├── mvn_correction.py
    │   │   │   │   ├── smooth_quant_zy.py
    │   │   │   │   └── svd_based_quant.py
    │   │   │   └── local_calibration
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── aciq_gauss.py
    │   │   │   │   ├── aciq_laplace.py
    │   │   │   │   ├── extrema.py
    │   │   │   │   ├── in_ir.py
    │   │   │   │   ├── kld.py
    │   │   │   │   ├── mean.py
    │   │   │   │   ├── nstd.py
    │   │   │   │   ├── percentile.py
    │   │   │   │   └── weighted_scale_param.py
    │   │   └── imagetiling
    │   │   │   ├── __init__.py
    │   │   │   └── image_tiling.py
    │   ├── framework
    │   │   ├── __init__.py
    │   │   ├── opt_register.py
    │   │   ├── pycore
    │   │   │   ├── __init__.py
    │   │   │   ├── pygraph.py
    │   │   │   ├── pyir.py
    │   │   │   ├── pynode.py
    │   │   │   ├── pytensor.py
    │   │   │   └── pytype.py
    │   │   └── qgraph.py
    │   ├── logger
    │   │   ├── __init__.py
    │   │   ├── aipu_logger.py
    │   │   ├── opt_log_management.py
    │   │   └── opt_logger.py
    │   ├── ops
    │   │   ├── LRN.py
    │   │   ├── __init__.py
    │   │   ├── abs.py
    │   │   ├── accidentalhits.py
    │   │   ├── acos.py
    │   │   ├── acosh.py
    │   │   ├── activation.py
    │   │   ├── adativepool.py
    │   │   ├── add.py
    │   │   ├── affine_grid.py
    │   │   ├── argminmax.py
    │   │   ├── asin.py
    │   │   ├── asinh.py
    │   │   ├── atan.py
    │   │   ├── atanh.py
    │   │   ├── basiclstm.py
    │   │   ├── batchtodepth.py
    │   │   ├── batchtospace.py
    │   │   ├── batchtospaceNd.py
    │   │   ├── bias_add.py
    │   │   ├── bitshift.py
    │   │   ├── bitwise.py
    │   │   ├── bn.py
    │   │   ├── bnll.py
    │   │   ├── boundingbox.py
    │   │   ├── cacheupdate.py
    │   │   ├── cast.py
    │   │   ├── ceil.py
    │   │   ├── celu.py
    │   │   ├── channelshuffle.py
    │   │   ├── clip.py
    │   │   ├── col2im.py
    │   │   ├── collapse_repeated.py
    │   │   ├── compress.py
    │   │   ├── concat.py
    │   │   ├── constant.py
    │   │   ├── control_op.py
    │   │   ├── conv.py
    │   │   ├── conv2d_integer.py
    │   │   ├── conv3d.py
    │   │   ├── convtranspose3d.py
    │   │   ├── convwinograd.py
    │   │   ├── cosh.py
    │   │   ├── cosine.py
    │   │   ├── count.py
    │   │   ├── crelu.py
    │   │   ├── crop.py
    │   │   ├── crop_and_resize.py
    │   │   ├── ctcgreedydecoder.py
    │   │   ├── cumulate.py
    │   │   ├── datastride.py
    │   │   ├── decodebox.py
    │   │   ├── deconv.py
    │   │   ├── depthtospace.py
    │   │   ├── depthwiseconv.py
    │   │   ├── dequantize.py
    │   │   ├── detectionoutput.py
    │   │   ├── dilation2d.py
    │   │   ├── div.py
    │   │   ├── div_mod.py
    │   │   ├── dummy.py
    │   │   ├── eltwise.py
    │   │   ├── elu.py
    │   │   ├── embedding_lookup_sparse.py
    │   │   ├── erf.py
    │   │   ├── erosion2d.py
    │   │   ├── exp.py
    │   │   ├── fake_quant_with_minmax_vars.py
    │   │   ├── fc.py
    │   │   ├── filter.py
    │   │   ├── filterbox.py
    │   │   ├── floor.py
    │   │   ├── fractionalpool.py
    │   │   ├── gather.py
    │   │   ├── gather_elements.py
    │   │   ├── gather_nd.py
    │   │   ├── gelu.py
    │   │   ├── gemm.py
    │   │   ├── generateproposal.py
    │   │   ├── get_valid_count.py
    │   │   ├── globalpooling.py
    │   │   ├── grid_sample.py
    │   │   ├── groupnorm.py
    │   │   ├── gruv1.py
    │   │   ├── gruv3.py
    │   │   ├── hardmax.py
    │   │   ├── hardsigmoid.py
    │   │   ├── hardswish.py
    │   │   ├── heatmapMaxkeypoint.py
    │   │   ├── inp.py
    │   │   ├── instancenorm.py
    │   │   ├── interp.py
    │   │   ├── intopk.py
    │   │   ├── isinf.py
    │   │   ├── isnan.py
    │   │   ├── layernorm.py
    │   │   ├── leakyrelu.py
    │   │   ├── log.py
    │   │   ├── logical.py
    │   │   ├── logsoftmax.py
    │   │   ├── lpnormalization.py
    │   │   ├── matmul.py
    │   │   ├── matmul_integer.py
    │   │   ├── maxpooling_withargmax.py
    │   │   ├── maxroipooling.py
    │   │   ├── maxunpool.py
    │   │   ├── meshgrid.py
    │   │   ├── mish.py
    │   │   ├── mod.py
    │   │   ├── moments.py
    │   │   ├── mul.py
    │   │   ├── multibox_transform_Loc.py
    │   │   ├── mvn.py
    │   │   ├── neg.py
    │   │   ├── nms.py
    │   │   ├── nonzero.py
    │   │   ├── noop.py
    │   │   ├── normal_moments.py
    │   │   ├── onehot.py
    │   │   ├── overlapadd.py
    │   │   ├── pad.py
    │   │   ├── permute.py
    │   │   ├── pooling.py
    │   │   ├── pooling3D.py
    │   │   ├── postnms1.py
    │   │   ├── postnms2.py
    │   │   ├── pow.py
    │   │   ├── prelu.py
    │   │   ├── proposal.py
    │   │   ├── pyramidroi.py
    │   │   ├── quantize.py
    │   │   ├── queryrebatch.py
    │   │   ├── reciprocal.py
    │   │   ├── reduce.py
    │   │   ├── region.py
    │   │   ├── regionfuse.py
    │   │   ├── relu.py
    │   │   ├── relu6.py
    │   │   ├── relu_family.py
    │   │   ├── repeat.py
    │   │   ├── reshape.py
    │   │   ├── resize.py
    │   │   ├── reversesequence.py
    │   │   ├── rgb2yuv.py
    │   │   ├── rms_norm.py
    │   │   ├── rnn.py
    │   │   ├── roialign.py
    │   │   ├── roipooling.py
    │   │   ├── round.py
    │   │   ├── rsqrt.py
    │   │   ├── scatter_elements.py
    │   │   ├── scatter_nd.py
    │   │   ├── segment_reduce.py
    │   │   ├── select.py
    │   │   ├── selu.py
    │   │   ├── shrink.py
    │   │   ├── sigmoid.py
    │   │   ├── sign.py
    │   │   ├── silu.py
    │   │   ├── sine.py
    │   │   ├── sinh.py
    │   │   ├── slice_operator.py
    │   │   ├── slotupdate.py
    │   │   ├── softmax.py
    │   │   ├── softplus.py
    │   │   ├── softsign.py
    │   │   ├── sort.py
    │   │   ├── spacetobatch.py
    │   │   ├── spacetodepth.py
    │   │   ├── split.py
    │   │   ├── sqrt.py
    │   │   ├── square.py
    │   │   ├── squared_difference.py
    │   │   ├── squeeze.py
    │   │   ├── stridedslice.py
    │   │   ├── sub.py
    │   │   ├── sufficientStatistics.py
    │   │   ├── swish.py
    │   │   ├── tan.py
    │   │   ├── tanh.py
    │   │   ├── tf_ops
    │   │   │   ├── __init__.py
    │   │   │   └── conv2d.py
    │   │   ├── thresholdrelu.py
    │   │   ├── tile.py
    │   │   ├── topk.py
    │   │   ├── transpose.py
    │   │   ├── trunc.py
    │   │   ├── unidirectional_rnn.py
    │   │   ├── unique.py
    │   │   ├── upsamplebyindex.py
    │   │   ├── where.py
    │   │   ├── yuv2rgb.py
    │   │   └── zerofraction.py
    │   ├── optmaster.py
    │   ├── passes
    │   │   ├── __init__.py
    │   │   ├── absorb_cast_to_clip.py
    │   │   ├── batch_modifications.py
    │   │   ├── check_quantization_info_s1.py
    │   │   ├── convert_resize_to_convolution.py
    │   │   ├── decompose_nonmonotonic_activations_s1.py
    │   │   ├── detect_inf_mask_nodes.py
    │   │   ├── eliminate_op.py
    │   │   ├── global_calibration_prepare.py
    │   │   ├── insert_op.py
    │   │   ├── merge_inserted_op.py
    │   │   ├── merge_matmul_mul_s1.py
    │   │   ├── optimize_x2_wdc.py
    │   │   ├── passes.py
    │   │   ├── set_unquantifiable.py
    │   │   ├── shrink_pow_exponent_s1.py
    │   │   ├── split_act_perchannel_matmul_s1.py
    │   │   ├── split_qkv_fc_s1.py
    │   │   ├── transfer_op_to_reshape_op_s3.py
    │   │   ├── tune_op_extra_params_s1.py
    │   │   └── unify_scales_for_multi_inputs_operator.py
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   ├── aipubt_dataset_NumpyZipped.py
    │   │   ├── aipubt_dataset_OpTestNumpyZipped.py
    │   │   ├── aipubt_dataset_aishell.py
    │   │   ├── aipubt_dataset_bevformer.py
    │   │   ├── aipubt_dataset_bevformer_static.py
    │   │   ├── aipubt_dataset_cgtdnn.py
    │   │   ├── aipubt_dataset_coco.py
    │   │   ├── aipubt_dataset_cocokp.py
    │   │   ├── aipubt_dataset_fasterrcnnvoc.py
    │   │   ├── aipubt_dataset_generaldict.py
    │   │   ├── aipubt_dataset_iwslt.py
    │   │   ├── aipubt_dataset_librispeech.py
    │   │   ├── aipubt_dataset_llama2.py
    │   │   ├── aipubt_dataset_mpii.py
    │   │   ├── aipubt_dataset_mtcnn.py
    │   │   ├── aipubt_dataset_nhwcrgb2nhwcbgr.py
    │   │   ├── aipubt_dataset_numpy.py
    │   │   ├── aipubt_dataset_numpymultiinput.py
    │   │   ├── aipubt_dataset_numpymultiinputNCHW.py
    │   │   ├── aipubt_dataset_numpymultiinputwithoutbatchdim.py
    │   │   ├── aipubt_dataset_numpynchw2nhwc.py
    │   │   ├── aipubt_dataset_numpynhwc2nchw.py
    │   │   ├── aipubt_dataset_numpynhwcrgb2ncbgrhw.py
    │   │   ├── aipubt_dataset_numpywithdim.py
    │   │   ├── aipubt_dataset_random.py
    │   │   ├── aipubt_dataset_sphereface_lfw.py
    │   │   ├── aipubt_dataset_stable_diffusion_unet.py
    │   │   ├── aipubt_dataset_tensorfromnumpymultiinput.py
    │   │   ├── aipubt_dataset_tusimple.py
    │   │   ├── aipubt_dataset_vocnchw.py
    │   │   ├── aipubt_dataset_vocnhwc.py
    │   │   ├── aipubt_dataset_widerface.py
    │   │   ├── aipubt_metric_CosDistance.py
    │   │   ├── aipubt_metric_CosDistance_with_seqlen.py
    │   │   ├── aipubt_metric_Detr_mAP.py
    │   │   ├── aipubt_metric_EachCosDistance.py
    │   │   ├── aipubt_metric_FlattenCosDistance.py
    │   │   ├── aipubt_metric_IWSLT_BLEU.py
    │   │   ├── aipubt_metric_IWSLT_BLEU_2_gram.py
    │   │   ├── aipubt_metric_KeywordSpotting.py
    │   │   ├── aipubt_metric_LMHead.py
    │   │   ├── aipubt_metric_MaskRcnnCOCOmAP.py
    │   │   ├── aipubt_metric_MaxAbsError.py
    │   │   ├── aipubt_metric_MaxAbsError_with_seqlen.py
    │   │   ├── aipubt_metric_Ocr.py
    │   │   ├── aipubt_metric_OpTestCosDistance.py
    │   │   ├── aipubt_metric_RMSE.py
    │   │   ├── aipubt_metric_SSDmAP.py
    │   │   ├── aipubt_metric_WER.py
    │   │   ├── aipubt_metric_YOLOmAP.py
    │   │   ├── aipubt_metric_bevformer.py
    │   │   ├── aipubt_metric_centerface.py
    │   │   ├── aipubt_metric_centernet.py
    │   │   ├── aipubt_metric_cocokeypoint.py
    │   │   ├── aipubt_metric_delta1.py
    │   │   ├── aipubt_metric_f1mesure.py
    │   │   ├── aipubt_metric_facebox.py
    │   │   ├── aipubt_metric_fasterrcnnmAP.py
    │   │   ├── aipubt_metric_fcos_mAP.py
    │   │   ├── aipubt_metric_imdb.py
    │   │   ├── aipubt_metric_lightface.py
    │   │   ├── aipubt_metric_mAP.py
    │   │   ├── aipubt_metric_mIoU.py
    │   │   ├── aipubt_metric_mobiledetSSDmAP.py
    │   │   ├── aipubt_metric_pckh.py
    │   │   ├── aipubt_metric_poly_lanenet.py
    │   │   ├── aipubt_metric_psnr.py
    │   │   ├── aipubt_metric_retinafacebox.py
    │   │   ├── aipubt_metric_retinanetmAP.py
    │   │   ├── aipubt_metric_roc.py
    │   │   ├── aipubt_metric_sphereface.py
    │   │   ├── aipubt_metric_topk.py
    │   │   ├── aipubt_metric_widerface.py
    │   │   ├── aipubt_op_ssd_postprocess.py
    │   │   ├── aipubt_op_tile.py
    │   │   └── aipubt_qconfig_bevformer.py
    │   ├── qat
    │   │   ├── __init__.py
    │   │   ├── qatmain.py
    │   │   ├── readme.md
    │   │   ├── readme_CN.md
    │   │   └── src
    │   │   │   ├── __init__.py
    │   │   │   ├── config
    │   │   │       ├── __init__.py
    │   │   │       └── config.py
    │   │   │   ├── fuser
    │   │   │       ├── __init__.py
    │   │   │       ├── concat_fuser.py
    │   │   │       ├── convolution_fuser.py
    │   │   │       ├── eltwise_fuser.py
    │   │   │       ├── expand_fuser.py
    │   │   │       ├── fullyconnected_fuser.py
    │   │   │       ├── gelu_fuser.py
    │   │   │       ├── hardsigmoid_fuser.py
    │   │   │       ├── hardswish_fuser.py
    │   │   │       ├── layernorm_fuser.py
    │   │   │       ├── mha_fuser.py
    │   │   │       ├── multiheadattention_fuser.py
    │   │   │       ├── pooling_fuser.py
    │   │   │       ├── reshape_fuser.py
    │   │   │       └── transpose_fuser.py
    │   │   │   ├── ops
    │   │   │       ├── __init__.py
    │   │   │       ├── qat_activation.py
    │   │   │       ├── qat_base_operator.py
    │   │   │       ├── qat_batchnorm.py
    │   │   │       ├── qat_concat.py
    │   │   │       ├── qat_constant.py
    │   │   │       ├── qat_convolution.py
    │   │   │       ├── qat_eltwise.py
    │   │   │       ├── qat_expand.py
    │   │   │       ├── qat_fullyconnected.py
    │   │   │       ├── qat_gelu.py
    │   │   │       ├── qat_hardsigmoid.py
    │   │   │       ├── qat_hardswish.py
    │   │   │       ├── qat_input.py
    │   │   │       ├── qat_layernorm.py
    │   │   │       ├── qat_matmul.py
    │   │   │       ├── qat_multiheadattention.py
    │   │   │       ├── qat_pooling.py
    │   │   │       ├── qat_reshape.py
    │   │   │       ├── qat_softmax.py
    │   │   │       ├── qat_split.py
    │   │   │       └── qat_transpose.py
    │   │   │   ├── plugin
    │   │   │       ├── __init__.py
    │   │   │       ├── aipubt_train_mbv3.py
    │   │   │       ├── aipubt_train_resnet50.py
    │   │   │       └── aipubt_train_vitb16.py
    │   │   │   ├── qatfield.py
    │   │   │   ├── qatlogger.py
    │   │   │   ├── qatmaster.py
    │   │   │   ├── qatregister.py
    │   │   │   ├── qinfo.py
    │   │   │   ├── quantizer
    │   │   │       ├── __init__.py
    │   │   │       ├── basequantizer.py
    │   │   │       └── pytorchquantizer.py
    │   │   │   └── utils
    │   │   │       ├── __init__.py
    │   │   │       ├── cmp.py
    │   │   │       ├── common_utils.py
    │   │   │       ├── extra_params.py
    │   │   │       └── fuser_utils.py
    │   ├── qtlib_optimize.py
    │   ├── scripts
    │   │   ├── git_hooks
    │   │   │   ├── pre-commit
    │   │   │   └── pre-commit.d
    │   │   │   │   ├── format_pyfile
    │   │   │   │   └── python
    │   │   └── install_git_hooks.sh
    │   ├── test
    │   │   ├── feature_test
    │   │   │   ├── compare_gt_and_opt_op_forward.py
    │   │   │   ├── test_betensor_detile.py
    │   │   │   ├── test_compass_ir_to_torch_module.py
    │   │   │   └── test_merge_insert_op.py
    │   │   ├── model_test
    │   │   │   └── squeezenet
    │   │   │   │   ├── calibration2.npy
    │   │   │   │   ├── opt.cfg
    │   │   │   │   ├── run.sh
    │   │   │   │   ├── squeezenet_s.bin
    │   │   │   │   ├── squeezenet_s.txt
    │   │   │   │   ├── validation10.npy
    │   │   │   │   └── vlabel10.npy
    │   │   ├── op_test
    │   │   │   ├── atan_test.py
    │   │   │   ├── data.npy
    │   │   │   ├── ds_reshape_test.py
    │   │   │   ├── label.npy
    │   │   │   ├── opt.cfg
    │   │   │   ├── run.sh
    │   │   │   ├── single_eltwise_1.bin
    │   │   │   ├── single_eltwise_1.txt
    │   │   │   └── softmax_test.py
    │   │   └── plugin_test
    │   │   │   ├── aipubt_dataset_my_numpynhwcrgb2ncbgrhw.py
    │   │   │   ├── aipubt_metric_my_topk.py
    │   │   │   ├── aipubt_op_my_softmax.py
    │   │   │   ├── opt.cfg
    │   │   │   └── run.sh
    │   ├── tools
    │   │   ├── __init__.py
    │   │   ├── generate_plugin_template.py
    │   │   ├── opt_forward_main.py
    │   │   ├── optimizer_forward.py
    │   │   └── optimizer_main.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── dtype_utils.py
    │   │   ├── files_utils.py
    │   │   ├── math_utils.py
    │   │   ├── passes_utils.py
    │   │   ├── quant_tool_utils.py
    │   │   ├── random_utils.py
    │   │   └── string_utils.py
    │   └── version.py
    └── __init__.py
├── LICENSE
├── images
    ├── opt_flow.svg
    └── opt_uml.svg
├── readme.md
├── readme_CN.md
└── tutorial.pdf


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .vscode/
3 | .idea
4 | __pycache__
5 | report.html
6 | *.swp
7 | report.txt
8 | cython
9 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from AIPUBuilder.Optimizer.optmaster import *
5 | from AIPUBuilder.Optimizer.utils import *
6 | from AIPUBuilder.Optimizer.ops import *
7 | from AIPUBuilder.Optimizer.logger import *
8 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/analyzer/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from . cosine import *
5 | from . running_time import *
6 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/analyzer/running_time.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | def calculate_op_running_time(f_graph, q_graph):
 5 | 
 6 |     from AIPUBuilder.Optimizer.logger import OPT_DEBUG
 7 |     nname_id = {}
 8 |     for idx, n in enumerate(f_graph.nodes):
 9 |         nname_id.update({n.name: idx})
10 |     cost_times = {}
11 |     for n in q_graph.nodes:
12 |         key = f"{n.attrs['layer_id']} {str(n.type)[7:]}"
13 |         q_cost_time = n.attrs['cost_time']
14 |         f_cost_time = 0
15 |         if n.name in nname_id.keys():
16 |             fnodes = f_graph.nodes[nname_id[n.name]]
17 |             f_cost_time = fnodes.attrs.get('cost_time', -1)
18 |         ct = [f_cost_time, q_cost_time]
19 |         cost_times.update({key: ct})
20 | 
21 |     fall_times = sum([v[0] for v in cost_times.values()])
22 |     qall_times = sum([v[1] for v in cost_times.values()])
23 |     type_max_len = max([len(k) for k in cost_times.keys()]) if len(cost_times.keys()) > 0 else 0
24 |     for k, v in cost_times.items():
25 |         v.append(v[0] / fall_times * 100)
26 |         v.append(v[1] / qall_times * 100)
27 |         cost_times[k] = v
28 |         ostr = (f"layer_type={k:{type_max_len}} fp32_forward_time={v[0]:<8.6f}s, quant_forward_time={v[1]:<8.6f}s, "
29 |                 f"this_fp32/all_fp32={v[2]:<3.6f}%%, this_quant/all_quant={v[3]:<3.6f}%%")
30 |         OPT_DEBUG(ostr)
31 | 
32 |     # disable to calculate op running time
33 |     for n in f_graph.nodes:
34 |         n.attrs['calculate_running_time'] = False
35 |     for n in q_graph.nodes:
36 |         n.attrs['calculate_running_time'] = False
37 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from . parser import (arg_parser,
 5 |                       CfgParser,
 6 |                       get_info_from_graph,
 7 |                       filter_valid_properties,
 8 |                       fields_to_str,
 9 |                       show_cfg_fields,
10 |                       show_plugins)
11 | from . cfg_fields import *
12 | 
13 | 
14 | DEFAULT_CONFIG_FILE = 'opt_template.json'
15 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from AIPUBuilder.Optimizer.features.autosearch import NaiveAutoSearchMixedPrecision
5 | from AIPUBuilder.Optimizer.features.calibration import apply_calibration_strategy, apply_global_calibration, statistic_and_calibration
6 | from AIPUBuilder.Optimizer.features.imagetiling import *
7 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/autosearch/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from . mixed_precision_naive_search import NaiveAutoSearchMixedPrecision
5 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from . calibration import apply_calibration_strategy, apply_global_calibration, statistic_and_calibration
5 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/global_calibration/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from . easy_quant import easy_quant_global_calibration
 5 | from . adaround import adaround_global_calibration
 6 | from . adaquant_zy import adaquant_zy_global_calibration
 7 | from . svd_based_quant import svd_based_quant_global_calibration
 8 | from . gptq_zy import gptq_zy_global_calibration
 9 | from . smooth_quant_zy import smooth_quant_zy_global_calibration
10 | from . awq_zy import awq_zy_global_calibration
11 | from . mvn_correction import mvn_correction_global_calibration
12 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/local_calibration/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from . extrema import extrema_calibration
 5 | from . in_ir import in_ir_calibration
 6 | from . mean import mean_calibration
 7 | from . kld import nkld_calibration
 8 | from . nstd import nstd_calibration
 9 | from . weighted_scale_param import weighted_scale_param_calibration
10 | from . aciq_laplace import aciq_laplace_calibration
11 | from . aciq_gauss import aciq_gauss_calibration
12 | from . percentile import percentile_calibration
13 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/local_calibration/extrema.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | def extrema_calibration(t, *args):
 6 |     t.min = t.extrema_min
 7 |     t.max = t.extrema_max
 8 |     if t.extrema_min_key_axis is not None:
 9 |         t.min_key_axis = t.extrema_min_key_axis
10 |         t.max_key_axis = t.extrema_max_key_axis
11 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/local_calibration/in_ir.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | def in_ir_calibration(t, *args):
 6 |     t.min = t.extrema_min
 7 |     t.max = t.extrema_max
 8 |     if t.extrema_min_key_axis is not None:
 9 |         t.min_key_axis = t.extrema_min_key_axis
10 |         t.max_key_axis = t.extrema_max_key_axis
11 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/local_calibration/mean.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | def mean_calibration(t, *args):
 6 |     t.min = t.running_min
 7 |     t.max = t.running_max
 8 |     if t.running_min_key_axis is not None:
 9 |         t.min_key_axis = t.running_min_key_axis
10 |         t.max_key_axis = t.running_max_key_axis
11 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/local_calibration/nstd.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def nstd_calibration(t, *args):
 8 |     # n = int(cstrategy[:-3])
 9 |     n = int(args[0][:-3])
10 |     t.min = max(t.running_min, t.running_mean - n * t.running_std)
11 |     t.max = min(t.running_max, t.running_mean + n * t.running_std)
12 |     if t.running_mean_key_axis is not None:
13 |         t.min_key_axis = torch.max(t.running_min_key_axis, t.running_mean_key_axis - n * t.running_std_key_axis)
14 |         t.max_key_axis = torch.min(t.running_max_key_axis, t.running_mean_key_axis + n * t.running_std_key_axis)
15 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/calibration/local_calibration/percentile.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | def percentile_calibration(t, *args):
 6 |     cstrategy = args[0]
 7 |     try:
 8 |         p = float(cstrategy[:-10])
 9 |     except:
10 |         p = 1.0
11 |     t.min = t.extrema_min * p
12 |     t.max = t.extrema_max * p
13 |     if t.extrema_min_key_axis is not None:
14 |         t.min_key_axis = t.extrema_min_key_axis * p
15 |         t.max_key_axis = t.extrema_max_key_axis * p
16 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/features/imagetiling/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from . image_tiling import featuremap_partition_for_data_parallel
5 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/framework/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | 
5 | from AIPUBuilder.Optimizer.framework.pycore import *
6 | from AIPUBuilder.Optimizer.framework.qgraph import *
7 | from AIPUBuilder.Optimizer.framework.opt_register import *
8 | from AIPUBuilder.Optimizer.logger import OPT_DEBUG, OPT_WARN, OPT_INFO, OPT_ERROR, OPT_FATAL
9 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/framework/pycore/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from AIPUBuilder.Optimizer.framework.pycore.pytype import *
5 | from AIPUBuilder.Optimizer.framework.pycore.pytensor import *
6 | from AIPUBuilder.Optimizer.framework.pycore.pynode import *
7 | from AIPUBuilder.Optimizer.framework.pycore.pygraph import *
8 | from AIPUBuilder.Optimizer.framework.pycore.pyir import *
9 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/logger/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from . opt_logger import *
5 | from . opt_log_management import opt_workflow_register, OPT_START, OPT_END
6 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/add.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.ops.eltwise import eltwise_quantize, eltwise
 7 | 
 8 | 
 9 | @op_register(OpType.Add)
10 | def add_forward(self, *args):
11 |     self.params['method'] = 'ADD'
12 |     eltwise(self, *args)
13 |     self.params.pop('method')
14 |     return self.outputs[0].betensor
15 | 
16 | 
17 | @quant_register(OpType.Add)
18 | def add_quantize(self, *args):
19 |     self.params['method'] = 'ADD'
20 |     eltwise_quantize(self, *args)
21 |     self.params.pop('method')
22 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/argminmax.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | 
 7 | 
 8 | @op_register(OpType.ArgMinMax)
 9 | def argminmax(self, *args):
10 |     axis_ = self.get_param('axis')
11 |     method_ = self.get_param("method").upper()
12 |     select_last_index_ = self.get_param("select_last_index")
13 |     if method_ not in ['MAX', 'MIN']:
14 |         OPT_FATAL(f"please check method(now={method_}) in argminmax op, which only supports [MAX, MIN]")
15 |     inp = self.inputs[0].betensor
16 |     out = self.outputs[0]
17 |     if select_last_index_:
18 |         inp = torch.flip(inp, dims=[axis_])
19 | 
20 |     if method_ == 'MAX':
21 |         out.betensor = torch.argmax(inp, dim=axis_, keepdim=True)
22 |     elif method_ == 'MIN':
23 |         out.betensor = torch.argmin(inp, dim=axis_, keepdim=True)
24 | 
25 |     if select_last_index_:
26 |         out.betensor = inp.shape[axis_] - out.betensor - 1
27 |     return out.betensor
28 | 
29 | 
30 | @quant_register(OpType.ArgMinMax)
31 | def argminmax_quantize(self, *args):
32 |     q_bits_activation = self.attrs["q_bits_activation"]
33 |     out = self.outputs[0]
34 |     out.scale = 1.
35 |     out.zerop = 0
36 |     out.qbits = max(16, q_bits_activation)
37 |     out.dtype = bits2dtype(out.qbits, is_signed=False or self.force_dtype_int)
38 |     out.qinvariant = True
39 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/batchtodepth.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | from AIPUBuilder.Optimizer.logger import OPT_ERROR
 7 | 
 8 | 
 9 | @op_register(OpType.BatchToDepth)
10 | def batchtodepth(self, *args):
11 |     block_size_ = self.get_param('block_size')
12 |     inp = self.inputs[0].betensor
13 |     out = self.outputs[0].betensor
14 |     n, h, w, c = inp.shape
15 |     if self.inputs[0].ir_shape[0] != self.current_batch_size:
16 |         OPT_ERROR("batch size in calibratoin or metric dataset should be equal to batch size in IR")
17 |     y = inp.view(n // block_size_, block_size_, h, w, c).permute(0, 2, 3, 1, 4).contiguous()
18 |     out = y.view(n // block_size_, h, w, c*block_size_)
19 |     self.outputs[0].betensor = out
20 |     return out
21 | 
22 | 
23 | @quant_register(OpType.BatchToDepth)
24 | def batchtodepth_quantize(self, *args):
25 |     inp = self.inputs[0]
26 |     out = self.outputs[0]
27 |     out.scale = inp.scale
28 |     out.zerop = inp.zerop
29 |     out.qbits = inp.qbits
30 |     out.dtype = inp.dtype
31 |     out.qinvariant = inp.qinvariant
32 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/batchtospace.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.logger import OPT_ERROR
 8 | 
 9 | 
10 | @op_register(OpType.BatchToSpace)
11 | def batchtospace(self, *args):
12 |     block_size_x = self.get_param('block_size_x')
13 |     block_size_y = self.get_param('block_size_y')
14 |     crop_left = self.get_param('crop_left')
15 |     crop_right = self.get_param('crop_right')
16 |     crop_top = self.get_param('crop_top')
17 |     crop_bottom = self.get_param('crop_bottom')
18 | 
19 |     inp = self.inputs[0].betensor
20 |     out = self.outputs[0].betensor
21 |     n, h, w, c = inp.shape
22 |     if self.inputs[0].ir_shape[0] != inp.shape[0]:
23 |         OPT_ERROR("batch size in calibratoin or metric dataset should be equal to batch size in IR")
24 |     y = inp.view(block_size_y, block_size_x, n // (block_size_x*block_size_y), h, w, c)
25 |     y = y.permute(2, 3, 0, 4, 1, 5).contiguous()
26 |     y = y.view(n // (block_size_x*block_size_y), h*block_size_y, w*block_size_x, c)
27 |     out = y[:, crop_top:h*block_size_y-crop_bottom, crop_left:w*block_size_x-crop_right, :]
28 |     self.outputs[0].betensor = out
29 |     return out
30 | 
31 | 
32 | @quant_register(OpType.BatchToSpace)
33 | def batchtospace_quantize(self, *args):
34 |     inp = self.inputs[0]
35 |     out = self.outputs[0]
36 |     out.scale = inp.scale
37 |     out.zerop = inp.zerop
38 |     out.qbits = inp.qbits
39 |     out.dtype = inp.dtype
40 |     out.qinvariant = inp.qinvariant
41 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/batchtospaceNd.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.logger import OPT_ERROR
 8 | 
 9 | register_optype('BatchToSpaceND')
10 | 
11 | 
12 | @op_register(OpType.BatchToSpaceND)
13 | def batchtospaceNd(self, *args):
14 |     block_size = self.params['block_size']
15 |     crops = self.params['crops']
16 | 
17 |     inp = self.inputs[0].betensor
18 | 
19 |     if inp.ndim != 5:
20 |         OPT_FATAL(f"{self}, currently input dim only supoort 5-dim, more dimensions will be supported in the future!")
21 |     if self.inputs[0].ir_shape[0] != inp.shape[0]:
22 |         OPT_ERROR(f"{self},batch size in calibratoin or metric dataset should be equal to batch size in IR")
23 | 
24 |     bs_z, bs_y, bs_x = block_size
25 |     n, d, h, w, c = inp.shape
26 | 
27 |     y = inp.view(bs_z, bs_y, bs_x, n // (bs_z * bs_y * bs_x), d, h, w, c)  # ndhwc ->
28 |     y = y.permute(3, 4, 0, 5, 1, 6, 2, 7).contiguous()  # n//(bs_z*bs_y*bs_x), d, bs_z, h, bs_y, w, bs_x, c
29 |     y = y.view(n // (bs_z * bs_y * bs_x), d * bs_z, h * bs_y, w * bs_x, c)
30 |     self.outputs[0].betensor = y[:, crops[0][0]: d * bs_z - crops[0][1], crops[1][0]: h * bs_y - crops[1][1],
31 |                                  crops[2][0]: w * bs_x - crops[2][1], :]
32 |     return self.outputs[0].betensor
33 | 
34 | 
35 | @quant_register(OpType.BatchToSpaceND)
36 | def batchtospaceNd_quantize(self, *args):
37 |     inp = self.inputs[0]
38 |     out = self.outputs[0]
39 |     out.scale = inp.scale
40 |     out.zerop = inp.zerop
41 |     out.qbits = inp.qbits
42 |     out.dtype = inp.dtype
43 |     out.qinvariant = inp.qinvariant
44 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/bias_add.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.ops.bn import *
 8 | import torch
 9 | 
10 | register_optype('BiasAdd')
11 | 
12 | 
13 | @quant_register(OpType.BiasAdd)
14 | def bias_add_quantize(self, *args):
15 |     # bias_add is equal to batchnorm with weights == 1
16 |     self.attrs["q_mode_weight"] = self.attrs["q_mode_activation"]
17 |     self.attrs["q_mode_bias"] = self.attrs["q_mode_weight"]
18 |     self.constants["weights"] = self.constants["weights_bk"]
19 |     batch_norm_quantize(self, *args)
20 |     self.constants.pop('weights_bk')
21 |     self.constants.pop('weights')
22 | 
23 | 
24 | @op_register(OpType.BiasAdd)
25 | def bias_add_forward(self, *args):
26 |     if not self.quantized:
27 |         if 'weights_bk' not in self.constants.keys():
28 |             self.constants["weights_bk"] = PyTensor(self.name + '/temp_weights_bk')
29 |             self.constants["weights_bk"].betensor = torch.ones_like(self.constants["biases"].betensor)
30 |             self.constants['weights_bk'].ir_shape = self.constants["biases"].shape
31 |             self.constants['weights_bk'].ir_dtype = self.constants["biases"].ir_dtype
32 |     self.constants["weights"] = PyTensor(self.name + '/temp_weights')
33 |     self.constants["weights"].betensor = torch.ones_like(self.constants["biases"].betensor)
34 |     self.constants['weights'].ir_shape = self.constants["biases"].shape
35 |     self.constants['weights'].ir_dtype = self.constants["biases"].ir_dtype
36 |     aflag = False
37 |     if 'axis' not in self.params:
38 |         aflag = True
39 |         input_dim = self.inputs[0].betensor.dim()
40 |         self.params['axis'] = input_dim - 1
41 |     batch_norm(self, *args)
42 |     if aflag:
43 |         self.params.pop('axis')
44 |     self.constants.pop('weights')
45 |     return self.outputs[0].betensor
46 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/bn.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.ops.conv import *
 5 | from AIPUBuilder.Optimizer.ops.activation import apply_with_activation
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.framework import *
 8 | 
 9 | import torch
10 | 
11 | 
12 | @quant_register(OpType.BatchNorm)
13 | def batch_norm_quantize(self, *args):
14 |     linear_op_quantize(self, *args)
15 |     absorb_input_zp_to_bias_and_compress_bias_for_aiff(self, *args)
16 | 
17 | #Cf = If * Wf + Bf
18 | # (Cq + Zc)/Sc = ((Iq + Zq) / Si) * ((Wq + Zw)/Sw) + (Bq + Zb)/Sb
19 | # set Sb = Si * Sw
20 | #Cq = ((Iq+Zq) * (Wq+Zw) + (Bq + Zb)) * (Sc/Si*Sw) - Zc
21 | # where Z is zero point, S is scale
22 | 
23 | 
24 | @op_register(OpType.BatchNorm)
25 | def batch_norm(self, *args):
26 |     inp = self.inputs[0].betensor.clone()
27 |     weights = self.constants["weights"].betensor.clone()
28 |     bias = self.constants['biases'].betensor.clone()
29 |     axis = self.get_param('axis')
30 |     if self.quantized:
31 |         # input's zerop has been absorbed to bias.
32 |         # inp += self.inputs[0].zerop
33 |         weights += self.constants["weights"].broadcast_zerop
34 |         bias += self.constants['biases'].broadcast_zerop
35 | 
36 |     if inp.shape[axis] != self.inputs[0].ir_shape[axis]:
37 |         weights = PyTensor.detile(weights, axis)
38 |         bias = PyTensor.detile(bias, axis)
39 |     inp_dim = inp.dim()
40 |     perm = []
41 |     if axis != inp_dim - 1 and inp_dim > 0:
42 |         orig_perm = [p for p in range(inp_dim)]
43 |         perm = orig_perm[:axis] + orig_perm[axis+1:] + [orig_perm[axis]]
44 |         inp = torch.permute(inp, perm)
45 | 
46 |     x = torch.add(torch.multiply(inp, weights.float()), bias)
47 |     if not self.outputs[0].is_perchannel_quantization():
48 |         x = apply_with_activation(self, x, *args)
49 |     if len(perm):
50 |         orig_perm = [p for p in range(inp_dim)]
51 |         n_perm = orig_perm[:axis] + [orig_perm[-1]] + orig_perm[axis:-1]
52 |         x = torch.permute(x, n_perm)
53 |     if self.outputs[0].is_perchannel_quantization():
54 |         x = apply_with_activation(self, x, *args)
55 |     self.outputs[0].betensor = x
56 |     return self.outputs[0].betensor
57 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/ceil.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | 
10 | @op_register(OpType.Ceil)
11 | def ceil(self, *args):
12 |     self.attrs['lambda_func'] = torch.ceil
13 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
14 |     self.attrs.pop('lambda_func')
15 |     return self.outputs[0].betensor
16 | 
17 | 
18 | @quant_register(OpType.Ceil)
19 | def ceil_quantize(self, *args):
20 |     self.attrs['lambda_func'] = torch.ceil
21 |     self.attrs['out_signed'] = True
22 |     activation_module.unknown_quantize(self, *args)
23 |     self.attrs.pop('lambda_func')
24 |     self.attrs.pop('out_signed')
25 | 
26 | 
27 | @approx_register(OpType.Ceil)
28 | def ceil_approx(self, *args):
29 |     # this is not currently used because it is the same as the float process
30 |     self.params['is_perf_mode'] = False
31 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/channelshuffle.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.logger import OPT_ERROR, OPT_WARN
 7 | import torch
 8 | 
 9 | 
10 | @op_register(OpType.ChannelShuffle)
11 | def channelshuffle(self, *args):
12 |     """
13 |     now only support [n, h, w, c] input shape
14 |     torch.nn.functional.channel_shuffle is alpha version in torch 1.7.1
15 |     :param self:
16 |     :param args:
17 |     :return:
18 |     """
19 |     inp_t = self.inputs[0].betensor
20 |     group = self.get_param('group')
21 |     splits = self.get_param('splits')
22 | 
23 |     inp_shape = list(inp_t.shape)
24 |     trans_perm = list(range(len(inp_shape) + 1))
25 |     trans_perm[-1], trans_perm[-2] = trans_perm[-2], trans_perm[-1]
26 |     new_shape = inp_shape[:-1]
27 |     new_shape += [group, inp_shape[-1] // group]
28 |     out = inp_t.reshape(new_shape)
29 |     out = torch.permute(out, trans_perm)
30 |     out = out.reshape(inp_shape)
31 | 
32 |     if splits != 1:
33 |         split_size = out.shape[-1] // splits
34 |         out = torch.split(out, split_size, dim=-1)
35 |         for i, o in enumerate(out):
36 |             self.outputs[i].betensor = o
37 |     else:
38 |         self.outputs[0].betensor = out
39 | 
40 |     return out
41 | 
42 | 
43 | @quant_register(OpType.ChannelShuffle)
44 | def channelshuffle_quantize(self, *args):
45 |     inp = self.inputs[0]
46 |     for out in self.outputs:
47 |         out.dtype = inp.dtype
48 |         out.scale = inp.scale
49 |         out.zerop = inp.zerop
50 |         out.qbits = inp.qbits
51 |         out.qmin = inp.qmin
52 |         out.qmax = inp.qmax
53 |         out.qinvariant = inp.qinvariant
54 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/datastride.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | @op_register(OpType.DataStride)
11 | def datastride(self, *args):
12 | 
13 |     inpt = self.inputs[0].betensor
14 |     inpt = nhwc2nchw(inpt)
15 | 
16 |     kernel_x = self.get_param('kernel_x')
17 |     kernel_y = self.get_param('kernel_y')
18 |     stride_x = self.get_param('stride_x')
19 |     stride_y = self.get_param('stride_y')
20 | 
21 |     patches = inpt.unfold(2, kernel_y, stride_y).unfold(3, kernel_x, stride_x)
22 |     patches = patches.permute(0, 4, 5, 1, 2, 3).contiguous()
23 |     patches = patches.view(inpt.shape[0], -1, patches.shape[-2], patches.shape[-1])
24 |     N, C, H, W = patches.size()
25 |     bs = kernel_y
26 |     patches = patches.view(N, bs, bs, C // (bs ** 2), H, W)  # (N, bs, bs, C//bs^2, H, W)
27 |     patches = patches.permute(0, 3, 4, 1, 5, 2).contiguous()  # (N, C//bs^2, H, bs, W, bs)
28 |     patches = patches.view(N, C // (bs ** 2), H * bs, W * bs)  # (N, C//bs^2, H * bs, W * bs)
29 |     patches = nchw2nhwc(patches)
30 |     self.outputs[0].betensor = patches
31 |     return patches
32 | 
33 | 
34 | @quant_register(OpType.DataStride)
35 | def quantize_datastride(self, *args):
36 |     inp = self.inputs[0]
37 |     out = self.outputs[0]
38 |     out.dtype = inp.dtype
39 |     out.scale = inp.scale
40 |     out.zerop = inp.zerop
41 |     out.qbits = inp.qbits
42 |     out.qinvariant = inp.qinvariant
43 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/depthtospace.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.logger import OPT_WARN
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.utils import *
 8 | import torch
 9 | 
10 | 
11 | @op_register(OpType.DepthToSpace)
12 | def depthtospace(self, *args):
13 |     block_size_x = self.get_param('block_size_x')
14 |     block_size_y = self.get_param('block_size_y')
15 |     mode = self.get_param('mode', optional=True, default_value='DCR').upper()
16 |     if block_size_x != block_size_y:
17 |         OPT_WARN("currently not support block_size_x != block_size_y in layer" +
18 |                  self.attrs['layer_id'], op_name=str(self.type))
19 |     inp = self.inputs[0].betensor
20 |     out = self.outputs[0].betensor
21 |     if inp.dim() != 4:
22 |         OPT_FATAL("currently only support 4 dims input in layer" +
23 |                   self.attrs['layer_id'], op_name=str(self.type))
24 |     # data format is NHWC
25 |     N, H, W, C = inp.size()
26 |     new_channel = C // (block_size_x * block_size_y)
27 |     if mode == 'DCR':
28 |         x = inp.view(N, H, W, block_size_y, block_size_x, new_channel)  # (N, H, W, bs, bs, C//bs^2)
29 |         x = x.permute(0, 1, 3, 2, 4, 5).contiguous()  # (N, H, bs, W, bs, C//bs^2)
30 |     elif mode == 'CRD':
31 |         x = inp.view(N, H, W, new_channel, block_size_y, block_size_x)
32 |         x = x.permute(0, 1, 4, 2, 5, 3).contiguous()  # (N, H, bs, W, bs, C//bs^2)
33 |     else:
34 |         OPT_FATAL("unsupported mode: %s for DepthToSpace in node:%s" % (mode, self.name))
35 |     out = x.view(N, H * block_size_y, W * block_size_x, new_channel)
36 |     self.outputs[0].betensor = out
37 |     return out
38 | 
39 | 
40 | @quant_register(OpType.DepthToSpace)
41 | def depthtospace_quantize(self, *args):
42 |     inp = self.inputs[0]
43 |     out = self.outputs[0]
44 |     out.scale = inp.scale
45 |     out.zerop = inp.zerop
46 |     out.qbits = inp.qbits
47 |     out.dtype = inp.dtype
48 |     out.qinvariant = inp.qinvariant
49 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/depthwiseconv.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils.dtype_utils import *
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | from AIPUBuilder.Optimizer.framework import *
 7 | from AIPUBuilder.Optimizer.ops.conv import conv2d, conv2d_quantize
 8 | 
 9 | 
10 | @op_register(OpType.DepthwiseConv)
11 | def depthwise_conv2d(self, *args):
12 |     x = conv2d(self, *args)
13 |     return x
14 | 
15 | 
16 | @quant_register(OpType.DepthwiseConv)
17 | def depthwise_conv2d_quantize(self, *args):
18 |     conv2d_quantize(self, *args)
19 |     if 'scale_value' in self.params and 'shift_value' in self.params and 'scale_type' in self.params and 'shift_type' in self.params:
20 |         self.constants['scale'] = PyTensor(f"{self.name}_scale", [self.params['scale_value'], ]
21 |                                            * self.constants['weights'].ir_shape[0], dtype=self.params['scale_type'])
22 |         self.constants['shift'] = PyTensor(f"{self.name}_shift", [self.params['shift_value'], ]
23 |                                            * self.constants['weights'].ir_shape[0], dtype=self.params['shift_type'])
24 |         self.params.pop('scale_value')
25 |         self.params.pop('shift_value')
26 |         self.params.pop('scale_type')
27 |         self.params.pop('shift_type')
28 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/dequantize.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | 
 8 | register_optype('DeQuantize')
 9 | 
10 | 
11 | @quant_register(OpType.DeQuantize)
12 | def dequantize_quant(self, *args):
13 |     inp = self.inputs[0]
14 |     out = self.outputs[0]
15 |     out.scale = inp.scale
16 |     out.zerop = inp.zerop
17 |     out.qbits = inp.qbits
18 |     out.dtype = inp.dtype
19 |     out.qinvariant = inp.qinvariant
20 |     out.qmin = inp.qmin
21 |     out.qmax = inp.qmax
22 | 
23 | 
24 | @op_register(OpType.DeQuantize)
25 | def dequantize_forward(self, *args):
26 |     inp = self.inputs[0]
27 |     out = self.outputs[0]
28 |     out.betensor = linear_dequantize(inp.betensor, inp.broadcast_scale, inp.broadcast_zerop)
29 |     return out.betensor
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/dummy.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import torch
 7 | 
 8 | register_optype('Dummy')
 9 | 
10 | 
11 | @op_register(OpType.Dummy)
12 | def dummy__forward(self, *args):
13 |     for ot, it in zip(self.outputs, self.inputs):
14 |         ot.betensor = it.betensor.clone()
15 | 
16 | 
17 | @quant_register(OpType.Dummy)
18 | def dummy__quantize(self, *args):
19 |     for ot, it in zip(self.outputs, self.inputs):
20 |         ot.clone_qinfo(it)
21 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/erosion2d.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.logger import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.ops.dilation2d import *
 8 | import torch
 9 | 
10 | register_optype('Erosion')
11 | 
12 | 
13 | @op_register(OpType.Erosion)
14 | def erosion(self, *args):
15 |     outp = dilation_erosion_fun(self, padding_value=float('inf'), compare_func=torch.amin, weight_reverse=True)
16 |     self.outputs[0].betensor = outp
17 |     return outp
18 | 
19 | 
20 | @quant_register(OpType.Erosion)
21 | def erosion_quantize(self, *args):
22 |     dilation_quantize(self, *args)
23 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/exp.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.ops.softmax import softmax_approx
 7 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 8 | import torch
 9 | 
10 | 
11 | @quant_register(OpType.Exp)
12 | def exp_quantize(self, *args):
13 |     self.attrs['lambda_func'] = torch.exp
14 |     self.attrs['out_signed'] = False or self.force_dtype_int
15 |     activation_module.unknown_quantize(self, *args)
16 |     self.attrs.pop('lambda_func')
17 |     self.attrs.pop('out_signed')
18 | 
19 | 
20 | @op_register(OpType.Exp)
21 | def exp(self, *args):
22 |     def approximated_float_forward(self,  inp_tensor):
23 |         if self.approximated and "lut" in self.constants:
24 |             lut = self.constants["lut"].betensor
25 |             f_vdata = inp_tensor * 1.442695
26 |             out = x3_aiff_exp_approximation(f_vdata, lut)
27 |         else:
28 |             out = torch.exp(inp_tensor)
29 |         return out
30 |     self.attrs['lambda_func'] = lambda x: approximated_float_forward(self,  x)
31 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
32 |     self.attrs.pop('lambda_func')
33 |     return self.outputs[0].betensor
34 | 
35 | 
36 | @approx_register(OpType.Exp)
37 | def elu_approx(self, *args):
38 |     softmax_approx(self, *args)
39 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/fc.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.ops.conv import *
 5 | from AIPUBuilder.Optimizer.ops.activation import apply_with_activation
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.framework import *
 8 | 
 9 | import torch.nn as nn
10 | 
11 | 
12 | @op_register(OpType.FullyConnected)
13 | def fc(self, *args):
14 |     inp, bias, weights = None, None, None
15 |     if self.constants['weights'].dtype in [Dtype.FP32, Dtype.FP16]:
16 |         inp = self.inputs[0].betensor.float()
17 |         bias = self.constants["biases"].betensor.float()
18 |         weights = self.constants["weights"].betensor.float()
19 |     else:
20 |         inp = self.inputs[0].betensor.double()
21 |         bias = self.constants["biases"].betensor.clone().double()
22 |         weights = self.constants["weights"].betensor.clone().double()
23 |     aasrb = self.get_param('remain_shift',
24 |                            optional=True, default_value=None)
25 | 
26 |     if self.quantized:
27 |         # input's zerop has been absorbed to bias.
28 |         # inp += self.inputs[0].zerop
29 |         weights += self.constants["weights"].broadcast_zerop
30 |         bias += self.constants['biases'].broadcast_zerop
31 |         if aasrb is not None and (dtype2bits(self.constants["weights"].dtype) > 8 or dtype2bits(self.inputs[0].dtype) > 8):
32 | 
33 |             x = inp @ weights.T
34 |             self.outputs[0].betensor = apply_with_activation(self, x,
35 |                                                              *args, aasrb=(aasrb, bias))
36 |             return self.outputs[0].betensor
37 |     x = nn.functional.linear(inp, weights, bias,)
38 |     self.outputs[0].betensor = apply_with_activation(self, x, *args)
39 |     return self.outputs[0].betensor
40 | 
41 | 
42 | @quant_register(OpType.FullyConnected)
43 | def fc_quantize(self, *args):
44 |     conv2d_quantize(self, *args)
45 |     if 'remain_shift' in self.attrs:
46 |         self.params['remain_shift'] = self.attrs['remain_shift']
47 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/floor.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | import torch
 8 | 
 9 | 
10 | @op_register(OpType.Floor)
11 | def floor(self, *args):
12 |     self.attrs['lambda_func'] = torch.floor
13 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
14 |     self.attrs.pop('lambda_func')
15 |     return self.outputs[0].betensor
16 | 
17 | 
18 | @quant_register(OpType.Floor)
19 | def floor_quantize(self, *args):
20 |     self.attrs['lambda_func'] = torch.floor
21 |     self.attrs['out_signed'] = True
22 |     activation_module.unknown_quantize(self, *args)
23 |     self.attrs.pop('lambda_func')
24 |     self.attrs.pop('out_signed')
25 | 
26 | 
27 | @approx_register(OpType.Floor)
28 | def floor_approx(self, *args):
29 |     # this is not currently used because it is the same as the float process
30 |     self.params['is_perf_mode'] = False
31 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/gather_elements.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | 
 9 | 
10 | # IR
11 | # layer_id=2
12 | # layer_name=gather
13 | # layer_type=GatherElements
14 | # layer_bottom=[params,ids]
15 | # layer_bottom_shape=[[1,16320],[1,5]]
16 | # layer_bottom_type=[float,int32]
17 | # layer_top=[gather]
18 | # layer_top_shape=[[1,5]]
19 | # layer_top_type=[float]
20 | # axis=1
21 | 
22 | 
23 | # onnx gather_elements is same as torch gather
24 | @op_register(OpType.GatherElements)
25 | def gather_elements(self, *args):
26 |     indice_betensor = self.inputs[1].betensor.clone()
27 |     inp0_betensors = self.inputs[0].betensor
28 | 
29 |     if inp0_betensors.dim() != indice_betensor.dim():
30 |         OPT_FATAL('layer_id=%s, type=%s, inp0 and inp1 rank are not same, please check' % (
31 |             self.attrs['layer_id'], str(self.type)))
32 | 
33 |     axis = self.get_param('axis')
34 |     axis_dim = inp0_betensors.shape[axis]
35 |     positive_bound_mask = indice_betensor >= axis_dim
36 |     indice_betensor[positive_bound_mask] = axis_dim-1
37 |     negative_mask = indice_betensor < 0
38 |     indice_betensor[negative_mask] = indice_betensor[negative_mask] + axis_dim
39 |     negative_bound_mask = indice_betensor < 0
40 |     indice_betensor[negative_bound_mask] = axis_dim-1
41 | 
42 |     self.outputs[0].betensor = torch.gather(
43 |         inp0_betensors, axis, indice_betensor.long())
44 | 
45 |     return self.outputs[0].betensor
46 | 
47 | 
48 | @quant_register(OpType.GatherElements)
49 | def gather_elements_quantize(self, *args):
50 |     inp = self.inputs[0]
51 |     out = self.outputs[0]
52 |     out.dtype = inp.dtype
53 |     out.scale = inp.scale
54 |     out.zerop = inp.zerop
55 |     out.qbits = inp.qbits
56 |     out.qinvariant = inp.qinvariant
57 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/globalpooling.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.ops.pooling import pooling_quantize, pooling
 6 | import torch
 7 | 
 8 | 
 9 | @op_register(OpType.GlobalPool)
10 | def globalpool_forward(self, *args):
11 |     '''
12 |     this op is used for ds forward to get the ds output shape when avgpool's output.ir_shape[1:3] == [1,1]
13 | 
14 |     :param self:
15 |     :param args:
16 |     :return:
17 |     '''
18 |     inp_shape = self.inputs[0].betensor.shape
19 |     padding = (self.get_param('pad_left'),
20 |                self.get_param('pad_right'),
21 |                self.get_param('pad_top', optional=True, default_value=0),
22 |                self.get_param('pad_bottom', optional=True, default_value=0))
23 |     kernel_size = (inp_shape[1] + padding[2] + padding[3], inp_shape[2] + padding[0] + padding[1])
24 |     self.params['kernel_y'] = kernel_size[0]
25 |     self.params['kernel_x'] = kernel_size[1]
26 | 
27 |     out = pooling(self)
28 |     return out
29 | 
30 | 
31 | @quant_register(OpType.GlobalPool)
32 | def globalpool_quantize(self, *args):
33 |     pooling_quantize(self)
34 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/gruv1.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.ops.gruv3 import gruv3_quantize, gruv3
 8 | from AIPUBuilder.Optimizer.logger import *
 9 | import torch.nn as nn
10 | 
11 | 
12 | @op_register(OpType.GRUv1)
13 | def gruv1(self, *args):
14 |     self.params['version'] = "GRUV1"
15 |     gruv3(self, *args)
16 |     self.params.pop('version')
17 | 
18 | 
19 | @quant_register(OpType.GRUv1)
20 | def gruv1_quantize(self, *args):
21 |     self.params['version'] = "GRUV1"
22 |     gruv3_quantize(self, *args)
23 |     self.params.pop('version')
24 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/hardmax.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | # Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 otherwise
10 | 
11 | register_optype('Hardmax')
12 | 
13 | 
14 | @quant_register(OpType.Hardmax)
15 | def Hardmax_quantize(self, *args):
16 |     q_mode_activation = self.attrs["q_mode_activation"]
17 |     if QuantMode.is_per_channel(q_mode_activation) == True:
18 |         OPT_FATAL("Currently not support per-channel quantization")
19 |     q_bits_activation = self.attrs["q_bits_activation"]
20 | 
21 |     inp = self.inputs[0]
22 |     out = self.outputs[0]
23 | 
24 |     out.scale = 1
25 |     out.zerop = 0
26 |     out.dtype = inp.dtype
27 |     out.qbits = inp.qbits
28 |     out.qmin = inp.qmin
29 |     out.qmax = inp.qmax
30 |     out.qinvariant = True
31 | 
32 | 
33 | @op_register(OpType.Hardmax)
34 | def hardmax(self, *args):
35 |     inp = self.inputs[0]
36 |     out = self.outputs[0]
37 | 
38 |     axis = int(self.get_param('axis'))  # only int
39 | 
40 |     argx = torch.argmax(inp.betensor, axis, keepdim=True)
41 |     output = None
42 |     for ax in range(inp.betensor.shape[axis]):
43 |         tmp_input = torch.index_select(inp.betensor, axis, torch.tensor([ax], device=inp.betensor.device))
44 |         tmp_index = torch.full(tmp_input.shape, ax, device=inp.betensor.device)
45 |         tmp_output = torch.where(tmp_index == argx, torch.ones_like(tmp_input), torch.zeros_like(tmp_input))
46 |         output = tmp_output if output == None else torch.cat((output, tmp_output), axis)
47 |     out.betensor = output
48 | 
49 |     return out.betensor
50 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/hardswish.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | register_optype('Hardswish')
10 | 
11 | 
12 | @quant_register(OpType.Hardswish)
13 | def hardswish_quantize(self, *args):
14 |     self.attrs['lambda_func'] = torch.nn.functional.hardswish
15 |     self.attrs['out_signed'] = True
16 |     activation_module.unknown_quantize(self, *args)
17 |     self.attrs.pop('lambda_func')
18 |     self.attrs.pop('out_signed')
19 | 
20 | 
21 | @op_register(OpType.Hardswish)
22 | def hardswish(self, *args):
23 |     self.attrs['lambda_func'] = torch.nn.functional.hardswish
24 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
25 |     self.attrs.pop('lambda_func')
26 |     return self.outputs[0].betensor
27 | 
28 | 
29 | @approx_register(OpType.Hardswish)
30 | def hardswish_approx(self, *args):
31 |     # By default, it is calculated directly on AIFF
32 |     self.params['is_perf_mode'] = True
33 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/instancenorm.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.ops.groupnorm import groupnorm_quantize, groupnorm
 8 | import torch
 9 | 
10 | 
11 | @quant_register(OpType.InstanceNorm)
12 | def instancenorm_quantize(self, *args):
13 |     aflag = False
14 |     if 'axis' not in self.params:
15 |         aflag = True
16 |         input_dim = len(self.inputs[0].ir_shape)
17 |         axis_param = [axis for axis in range(1, input_dim - 1)]
18 |         self.params['axis'] = axis_param
19 |     gflag = False
20 |     if 'group' not in self.params:
21 |         gflag = True
22 |         self.params['group'] = 1
23 |     groupnorm_quantize(self, *args)
24 |     if aflag:
25 |         self.params.pop('axis')
26 |     if gflag:
27 |         self.params.pop('group')
28 | 
29 | 
30 | @op_register(OpType.InstanceNorm)
31 | def instancenorm(self, *args):
32 |     aflag = False
33 |     input_dim = self.inputs[0].betensor.dim()
34 |     if 'axis' not in self.params:
35 |         aflag = True
36 |         # inp0.shape = [N,D1,D2,....,C]
37 |         axis_param = [axis for axis in range(1, input_dim - 1)]
38 |         self.params['axis'] = axis_param
39 |     gflag = False
40 |     if 'group' not in self.params:
41 |         gflag = True
42 |         self.params['group'] = 1
43 |     axis_shape_flag = False
44 |     if 'axis_shape' not in self.params:
45 |         axis_shape_flag = True
46 |         axis_shape = [1 for ax in range(input_dim-1)] + [self.inputs[0].betensor.shape[input_dim-1]]
47 |         self.params['axis_shape'] = axis_shape
48 |         self.params['scale_shift_shape'] = axis_shape
49 |     groupnorm(self, *args)
50 |     if aflag:
51 |         self.params.pop('axis')
52 |     if gflag:
53 |         self.params.pop('group')
54 |     if axis_shape_flag:
55 |         self.params.pop('axis_shape')
56 |         self.params.pop('scale_shift_shape')
57 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/intopk.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | 
 8 | register_optype('InTopK')
 9 | 
10 | 
11 | @op_register(OpType.InTopK)
12 | def intopk(self, *args):
13 |     k = self.get_param('k')
14 | 
15 |     largest = self.get_param("largest", optional=True, default_value=True)
16 |     issorted = self.get_param("sorted", optional=True, default_value=True)
17 | 
18 |     inp_betensors1 = self.inputs[0].betensor
19 |     inp_betensors2 = self.inputs[1].betensor
20 |     if self.inputs[1].betensor.ndim > 1:
21 |         inp_betensors2 = torch.squeeze(self.inputs[1].betensor)  # rank 1
22 |     k = min(k, inp_betensors1.shape[-1])
23 |     topk_v, topk_indice = torch.topk(inp_betensors1, k, -1, largest, issorted)
24 |     if((inp_betensors1.shape[0]) != len(inp_betensors2)):
25 |         OPT_FATAL("target must have the same size as input along predict'axis  ")
26 |     out = []
27 |     for i in range(len(inp_betensors2)):
28 |         out.append(inp_betensors1[i, int(inp_betensors2[i])] in topk_v[i, :])
29 |     self.outputs[0].betensor = torch.tensor(out)
30 |     return self.outputs[0].betensor
31 | 
32 | 
33 | @quant_register(OpType.InTopK)
34 | def intopk_quantize(self, *args):
35 |     out = self.outputs[0]
36 |     out.scale = 1.0
37 |     out.zerop = 0
38 |     out.dtype = Dtype.INT8 if self.force_dtype_int else Dtype.UINT8
39 |     out.qbits = dtype2bits(out.dtype)
40 |     out.qinvariant = True
41 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/isinf.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | from AIPUBuilder.Optimizer.framework import *
 4 | 
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | 
 7 | register_optype('IsInf')
 8 | 
 9 | 
10 | @op_register(OpType.IsInf)
11 | def isinf(self, *args):
12 |     detect_negative = self.get_param('detect_negative', optional=True, default_value=True)
13 |     detect_positive = self.get_param('detect_positive', optional=True, default_value=True)
14 | 
15 |     inp = self.inputs[0].betensor
16 |     mask = torch.zeros_like(inp, device=inp.device).bool()
17 |     if detect_positive:
18 |         positive_mask = (inp == float('inf'))
19 |         mask = torch.bitwise_or(mask, positive_mask)
20 |     if detect_negative:
21 |         negative_mask = (inp == float('-inf'))
22 |         mask = torch.bitwise_or(mask, negative_mask)
23 |     self.outputs[0].betensor = PyTensor('out', mask.int(), self.outputs[0].dtype).betensor
24 |     return self.outputs[0].betensor
25 | 
26 | 
27 | @quant_register(OpType.IsInf)
28 | def isinf_quantize(self, *args):
29 |     out = self.outputs[0]
30 |     out.scale = 1.0
31 |     out.zerop = 0
32 |     out.qbits = 8
33 |     out.dtype = bits2dtype(out.qbits, is_signed=False)
34 |     out.qinvariant = True
35 |     out.qmin, out.qmax = dtype2range(out.dtype)
36 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/isnan.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | from AIPUBuilder.Optimizer.framework import *
 4 | 
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | 
 7 | register_optype('IsNaN')
 8 | 
 9 | 
10 | @op_register(OpType.IsNaN)
11 | def isnan(self, *args):
12 |     out = torch.isnan(self.inputs[0].betensor).int()
13 |     self.outputs[0].betensor = PyTensor('out', out, self.outputs[0].dtype).betensor
14 |     return self.outputs[0].betensor
15 | 
16 | 
17 | @quant_register(OpType.IsNaN)
18 | def isnan_quantize(self, *args):
19 |     out = self.outputs[0]
20 |     out.scale = 1.0
21 |     out.zerop = 0
22 |     out.qbits = 8
23 |     out.dtype = bits2dtype(out.qbits, is_signed=False)
24 |     out.qinvariant = True
25 |     out.qmin, out.qmax = dtype2range(out.dtype)
26 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/layernorm.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.ops.groupnorm import groupnorm_quantize, groupnorm
 8 | import torch
 9 | 
10 | 
11 | @quant_register(OpType.LayerNorm)
12 | def layernorm_quantize(self, *args):
13 |     groupnorm_quantize(self, *args)
14 | 
15 | 
16 | @op_register(OpType.LayerNorm)
17 | def layernorm(self, *args):
18 |     aflag = False
19 |     if 'axis' not in self.params:
20 |         aflag = True
21 |         self.params['axis'] = [-1]
22 |     gflag = False
23 |     if 'group' not in self.params:
24 |         gflag = True
25 |         self.params['group'] = 1
26 |     axis_shape_flag = False
27 |     if 'axis_shape' not in self.params:
28 |         axis_shape_flag = True
29 |         axis = self.params['axis']
30 |         input_dim = self.inputs[0].betensor.dim()
31 |         axis_positive = [ax + input_dim if ax < 0 else ax for ax in axis]
32 |         axis_shape = [self.inputs[0].betensor.shape[ax] if ax in axis_positive else 1 for ax in range(input_dim)]
33 |         self.params['axis_shape'] = axis_shape
34 |         self.params['scale_shift_shape'] = [self.inputs[0].betensor.shape[ax]
35 |                                             if ax == axis_positive[-1] else 1 for ax in range(input_dim)]
36 |     groupnorm(self, *args)
37 |     if aflag:
38 |         self.params.pop('axis')
39 |     if gflag:
40 |         self.params.pop('group')
41 |     if axis_shape_flag:
42 |         self.params.pop('axis_shape')
43 |         self.params.pop('scale_shift_shape')
44 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/maxunpool.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.ops.upsamplebyindex import upsamplebyindex_quantize, upsamplebyindex
 8 | 
 9 | 
10 | @quant_register(OpType.MaxUnpool)
11 | def maxunpool_quantize(self, *args):
12 |     upsamplebyindex_quantize(self, *args)
13 | 
14 | 
15 | @op_register(OpType.MaxUnpool)
16 | def maxunpool(self, *args):
17 |     upsamplebyindex(self, *args)
18 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/mul.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.ops.eltwise import eltwise_quantize, eltwise
 7 | 
 8 | 
 9 | @op_register(OpType.Mul)
10 | def mul_forward(self, *args):
11 |     self.params['method'] = 'MUL'
12 |     eltwise(self, *args)
13 |     self.params.pop('method')
14 |     return self.outputs[0].betensor
15 | 
16 | 
17 | @quant_register(OpType.Mul)
18 | def mul_quantize(self, *args):
19 |     self.params['method'] = 'MUL'
20 |     eltwise_quantize(self, *args)
21 |     self.params.pop('method')
22 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/neg.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | 
 8 | 
 9 | @op_register(OpType.Negative)
10 | def neg(self, *args):
11 |     inp = self.inputs[0]
12 |     out = self.outputs[0]
13 |     if self.quantized:
14 |         y = torch.neg(inp.betensor + inp.zerop) - out.zerop
15 |         out.betensor = torch.clamp(y, out.qmin, out.qmax)
16 |     else:
17 |         out.betensor = torch.neg(inp.betensor)
18 |     return out.betensor
19 | 
20 | 
21 | @quant_register(OpType.Negative)
22 | def neg_quantize(self, *args):
23 |     inp = self.inputs[0]
24 |     out = self.outputs[0]
25 |     q_mode_activation = self.attrs["q_mode_activation"]
26 |     out.qinvariant = inp.qinvariant
27 |     out.scale = inp.scale
28 |     out.qbits = inp.qbits
29 |     out.dtype = bits2dtype(dtype2bits(inp.dtype), is_signed=True)
30 |     out.zerop = inp.zerop
31 |     if is_signed(inp.dtype):
32 |         out.zerop = -1 * inp.zerop + 1
33 |     else:
34 |         out.qmin, out.qmax = dtype2range(out.dtype)
35 |         out.zerop = -1 * inp.zerop - out.qmax
36 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/nonzero.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | from AIPUBuilder.Optimizer.framework import *
 4 | 
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | 
 7 | register_optype('NonZero')
 8 | 
 9 | 
10 | @op_register(OpType.NonZero)
11 | def nonzero(self, *args):
12 |     self.outputs[0].betensor = torch.nonzero(self.inputs[0].betensor)
13 |     self.outputs[0].betensor = self.outputs[0].betensor.permute(1, 0)
14 |     self.outputs[1].betensor = torch_tensor(self.outputs[0].betensor.shape[1], self.outputs[0].device).reshape([1])
15 |     return self.outputs[0].betensor, self.outputs[1].betensor
16 | 
17 | 
18 | @quant_register(OpType.NonZero)
19 | def nonzero_quantize(self, *args):
20 |     inp = self.inputs[0]
21 |     out = self.outputs[0]
22 |     q_bits_activation = self.attrs["q_bits_activation"]
23 |     max_len = max(list(inp.ir_shape))
24 |     max_qbits = torch.log2(torch.tensor(max_len)).ceil()
25 |     out.scale = 1.0
26 |     out.zerop = 0
27 |     out.qbits = max(q_bits_activation, max_qbits)
28 |     out.dtype = bits2dtype(out.qbits, is_signed=False)
29 |     out.qinvariant = True
30 |     out.qmin, out.qmax = dtype2range(out.dtype)
31 |     self.outputs[1].clone_qinfo(self.outputs[0])
32 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/noop.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | 
 7 | register_optype('NoOp')
 8 | 
 9 | 
10 | @op_register(OpType.NoOp)
11 | def noop(self, *args):
12 |     for ot in self.outputs:
13 |         ot.betensor = self.inputs[0].betensor
14 |     return [ot.betensor for ot in self.outputs]
15 | 
16 | 
17 | @quant_register(OpType.NoOp)
18 | def noop_quantize(self, *args):
19 |     inp = self.inputs[0]
20 |     for ot in self.outputs:
21 |         ot.clone_qinfo(inp)
22 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/permute.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | @op_register(OpType.Permute)
10 | def permute(self, *args):
11 |     inp = self.inputs[0].betensor
12 |     out = self.outputs[0]
13 |     perm = self.get_param('perm')
14 |     if not isinstance(perm, list):
15 |         perm = list(reversed([i for i in range(len(inp.shape))]))
16 |     out.betensor = inp.permute(perm)
17 |     return out.betensor
18 | 
19 | 
20 | @quant_register(OpType.Permute)
21 | def permute_quantize(self, *args):
22 |     inp = self.inputs[0]
23 |     out = self.outputs[0]
24 |     out.dtype = inp.dtype
25 |     out.scale = inp.scale
26 |     out.zerop = inp.zerop
27 |     out.qbits = inp.qbits
28 |     out.qmin, out.qmax = inp.qmin, inp.qmax
29 |     out.qinvariant = inp.qinvariant
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/quantize.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | 
 8 | register_optype('Quantize')
 9 | 
10 | 
11 | @quant_register(OpType.Quantize)
12 | def quantize_quant(self, *args):
13 |     inp = self.inputs[0]
14 |     out = self.outputs[0]
15 |     out.scale = inp.scale
16 |     out.zerop = inp.zerop
17 |     out.qbits = inp.qbits
18 |     out.dtype = inp.dtype
19 |     out.qinvariant = inp.qinvariant
20 |     out.qmin = inp.qmin
21 |     out.qmax = inp.qmax
22 | 
23 | 
24 | @op_register(OpType.Quantize)
25 | def quantize_forward(self, *args):
26 |     inp = self.inputs[0]
27 |     out = self.outputs[0]
28 |     round_mode = self.get_param('round_mode', optional=True, default_value="ROUND_TO_EVEN")
29 |     round_func = get_round_mode_func(round_mode)
30 |     if out.qmin is None:
31 |         out.betensor = inp.betensor
32 |     else:
33 |         out.betensor = linear_quantize_clip(inp.betensor, out.broadcast_scale,
34 |                                             out.broadcast_zerop, out.qmin, out.qmax, round_func=round_func)
35 |     return out.betensor
36 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/queryrebatch.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | 
 7 | register_optype('QueryRebatch')
 8 | 
 9 | 
10 | @op_register(OpType.QueryRebatch)
11 | def queryrebatch_forward(self, *args):
12 |     inputs = self.inputs
13 |     if len(inputs) - 1 != self.outputs[0].ir_shape[1]:
14 |         OPT_ERROR(f"please check the queryrebatch IR, the len(input) - 1 should be equal to output[0].shape[1]")
15 | 
16 |     max_len = max([t.betensor.shape[0] for t in self.inputs[1:]])
17 |     output_shape = list(self.outputs[0].ir_shape)
18 |     output_shape[2] = max_len
19 |     self.outputs[0].betensor = torch.zeros(output_shape).to(self.inputs[0].device)
20 |     for i, inp_t in enumerate(self.inputs[1:]):
21 |         self.outputs[0].betensor[:, i, :inp_t.betensor.shape[0]
22 |                                  ] = self.inputs[0].betensor[:, self.inputs[i+1].betensor.long()]
23 | 
24 |     return self.outputs[0].betensor
25 | 
26 | 
27 | @quant_register(OpType.QueryRebatch)
28 | def queryrebatch_quantize(self, *args):
29 |     self.outputs[0].clone_qinfo(self.inputs[0])
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/reciprocal.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.Reciprocal)
11 | def reciprocal_quantize(self, *args):
12 |     q_mode_activation = self.attrs["q_mode_activation"]
13 |     if QuantMode.is_per_channel(q_mode_activation) == True:
14 |         OPT_FATAL("Currently not support per-channel quantization")
15 |     q_bits_activation = self.attrs["q_bits_activation"]
16 | 
17 |     inp = self.inputs[0]
18 |     out = self.outputs[0]
19 | 
20 |     out.qbits = q_bits_activation
21 |     out_sign = is_signed(inp.dtype) or self.force_dtype_int
22 |     dev = inp.betensor.device
23 |     out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor(
24 |         out, q_mode_activation, out.qbits, out_sign)
25 |     lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits')))
26 |     lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop)
27 |     lut = torch.reciprocal(lut)
28 |     lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax)
29 |     self.constants["lut"] = PyTensor(self.name+"/reciprocal_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype)))
30 |     out.qinvariant = False
31 | 
32 | 
33 | @op_register(OpType.Reciprocal)
34 | def reciprocal(self, *args):
35 |     inp = self.inputs[0]
36 |     out = self.outputs[0]
37 |     if self.quantized:
38 |         x = inp.betensor
39 |         x = x - inp.qmin
40 |         lut = self.constants["lut"].betensor
41 |         x = torch.reshape(x, (-1,))
42 |         y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits(
43 |             self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype))
44 |         out.betensor = torch.reshape(y, inp.betensor.shape)
45 |     else:
46 |         out.betensor = torch.reciprocal(inp.betensor)
47 | 
48 |     return out.betensor
49 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/relu.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.RELU)
11 | def relu_quantize(self, *args):
12 |     q_mode_activation = self.attrs["q_mode_activation"]
13 |     q_bits_activation = self.attrs["q_bits_activation"]
14 | 
15 |     inp = self.inputs[0]
16 |     out = self.outputs[0]
17 |     out.qbits = q_bits_activation
18 |     out_sign = False or self.force_dtype_int
19 |     out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor(
20 |         out, q_mode_activation, out.qbits, out_sign)
21 |     out.qinvariant = inp.qinvariant
22 | 
23 |     do_scale, do_scale_type, do_shift, do_shift_type = get_scale_approximation_params(
24 |         out.scale / inp.scale, mult_bits=out.qbits, force_shift_positive=self.force_shift_positive)
25 |     scale_name = 'scale' if is_torch_tensor_with_multi_data(do_scale) else 'scale_value'
26 |     shift_name = 'shift' if is_torch_tensor_with_multi_data(do_shift) else 'shift_value'
27 |     self.set_ir_field(scale_name, do_scale, do_scale_type)
28 |     self.set_ir_field(shift_name, do_shift, do_shift_type)
29 |     if not is_torch_tensor_with_multi_data(do_scale):
30 |         self.params["shift_type"] = do_shift_type
31 |         self.params["scale_type"] = do_scale_type
32 | 
33 | 
34 | @op_register(OpType.RELU)
35 | def relu(self, *args):
36 |     inp = self.inputs[0]
37 |     out = self.outputs[0]
38 |     if self.quantized:
39 |         #Yf = relu(Xf)
40 |         # (Yq+Zy)/Sy = relu((Xq+Zx)/Sx)
41 |         #Yq = relu(Xq+Zx) * Sy/Sx - Zy
42 |         # on cpu device, torch.relu does not support 'clamp_min_cpu' for half, so use inp.betensor.float()
43 |         y = torch.nn.functional.relu(inp.betensor.float() + inp.zerop)
44 |         do_shift = self.get_ir_field(['shift_value', 'shift'], default_value=0)
45 |         do_scale = self.get_ir_field(['scale_value', 'scale'], default_value=1)
46 |         out.betensor = linear_requantize(y, do_scale, do_shift, out.zerop, out.qmin, out.qmax, out.key_axis)
47 |     else:
48 |         out.betensor = torch.nn.functional.relu(inp.betensor.float())
49 |     return out.betensor
50 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/relu6.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.RELU6)
11 | def relu6_quantize(self, *args):
12 |     q_mode_activation = self.attrs["q_mode_activation"]
13 |     if QuantMode.is_per_channel(q_mode_activation) == True:
14 |         OPT_FATAL("Currently not support per-channel quantization")
15 |     q_bits_activation = self.attrs["q_bits_activation"]
16 | 
17 |     inp = self.inputs[0]
18 |     out = self.outputs[0]
19 |     out.qbits = q_bits_activation
20 |     out_sign = False or self.force_dtype_int
21 |     out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor(
22 |         out, q_mode_activation, out.qbits, out_sign)
23 |     do_scale, do_scale_type, do_shift, do_shift_type = get_scale_approximation_params(
24 |         out.scale / inp.scale, mult_bits=out.qbits, force_shift_positive=self.force_shift_positive)
25 |     self.params["shift_value"] = int(do_shift)
26 |     self.params["shift_type"] = do_shift_type
27 |     self.params["scale_value"] = int(do_scale)
28 |     self.params["scale_type"] = do_scale_type
29 |     out.qinvariant = inp.qinvariant
30 | 
31 | 
32 | @op_register(OpType.RELU6)
33 | def relu6(self, *args):
34 |     inp = self.inputs[0]
35 |     out = self.outputs[0]
36 |     if self.quantized:
37 |         #Yf = relu(Xf)
38 |         # (Yq+Zy)/Sy = relu((Xq+Zx)/Sx)
39 |         #Yq = relu(Xq+Zx) * Sy/Sx - Zy
40 |         y = torch.nn.functional.relu(inp.betensor + inp.zerop)
41 |         do_shift = 0
42 |         do_scale = 1
43 |         if "shift" not in self.constants:
44 |             do_shift = self.params["shift_value"]
45 |             do_scale = self.params["scale_value"]
46 |         else:
47 |             do_shift = self.constants["shift"].betensor
48 |             do_scale = self.constants["scale"].betensor
49 |         out.betensor = linear_requantize(y, do_scale, do_shift, out.zerop, out.qmin, out.qmax)
50 |     else:
51 |         out.betensor = torch.nn.functional.relu6(inp.betensor)
52 |     return out.betensor
53 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/relu_family.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.ops.relu import *
 5 | from AIPUBuilder.Optimizer.ops.clip import *
 6 | 
 7 | 
 8 | def relu_family(self, *args):
 9 |     if {'max_clamp_value', 'min_clamp_value'}.issubset(self.params.keys()):
10 |         return clip(self, *args)
11 |     else:
12 |         return relu(self, *args)
13 | 
14 | 
15 | def relu_family_quantize(self, *args):
16 |     if {'max_clamp_value', 'min_clamp_value'}.issubset(self.params.keys()):
17 |         clip_quantize(self, *args)
18 |     else:
19 |         relu_quantize(self, *args)
20 | 
21 | 
22 | def relu_family_out_signed(self, *args):
23 |     if {'max_clamp_value', 'min_clamp_value'}.issubset(self.params.keys()):
24 |         clip_out_signed(self, *args)
25 |     else:
26 |         return False
27 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/resize.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.ops.interp import *
 7 | 
 8 | 
 9 | @op_register(OpType.Resize)
10 | def resize(self, *args):
11 |     return interp(self, *args)
12 | 
13 | 
14 | @quant_register(OpType.Resize)
15 | def resize_quantize(self, *args):
16 |     interp_quantize(self, *args)
17 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/round.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.Round)
11 | def round_quantize(self, *args):
12 |     q_mode_activation = self.attrs["q_mode_activation"]
13 |     if QuantMode.is_per_channel(q_mode_activation) == True:
14 |         OPT_FATAL("Currently not support per-channel quantization")
15 |     q_bits_activation = self.attrs["q_bits_activation"]
16 | 
17 |     inp = self.inputs[0]
18 |     out = self.outputs[0]
19 | 
20 |     out.qbits = q_bits_activation
21 |     out_sign = is_signed(inp.dtype) or self.force_dtype_int
22 |     dev = inp.betensor.device
23 |     out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor(
24 |         out, q_mode_activation, out.qbits, out_sign)
25 |     lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits')))
26 |     lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop)
27 |     lut = torch.round(lut)
28 |     lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax)
29 |     self.constants["lut"] = PyTensor(self.name+"/round_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype)))
30 |     out.qinvariant = False
31 | 
32 | 
33 | @op_register(OpType.Round)
34 | def round(self, *args):
35 |     self.attrs['lambda_func'] = torch.round
36 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
37 |     self.attrs.pop('lambda_func')
38 |     return self.outputs[0].betensor
39 | 
40 | 
41 | @approx_register(OpType.Round)
42 | def round_approx(self, *args):
43 |     # this is not currently used because it is the same as the float process
44 |     self.params['is_perf_mode'] = False
45 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/rsqrt.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.Rsqrt)
11 | def rsqrt_quantize(self, *args):
12 |     q_mode_activation = self.attrs["q_mode_activation"]
13 |     if QuantMode.is_per_channel(q_mode_activation) == True:
14 |         OPT_FATAL("Currently not support per-channel quantization")
15 |     q_bits_activation = self.attrs["q_bits_activation"]
16 | 
17 |     inp = self.inputs[0]
18 |     out = self.outputs[0]
19 |     out.qbits = q_bits_activation
20 |     out_sign = False or self.force_dtype_int
21 |     dev = inp.betensor.device
22 |     out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor(
23 |         out, q_mode_activation, out.qbits, out_sign)
24 |     lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits')))
25 |     lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop)
26 |     lut = torch.rsqrt(lut)
27 |     lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax)
28 |     self.constants["lut"] = PyTensor(self.name+"/rsqrt_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype)))
29 |     out.qinvariant = False
30 | 
31 | 
32 | @op_register(OpType.Rsqrt)
33 | def rsqrt(self, *args):
34 |     inp = self.inputs[0]
35 |     out = self.outputs[0]
36 |     if self.quantized:
37 |         x = inp.betensor
38 |         x = x - inp.qmin
39 |         lut = self.constants["lut"].betensor
40 |         x = torch.reshape(x, (-1,))
41 |         y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits(
42 |             self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype))
43 |         out.betensor = torch.reshape(y, inp.betensor.shape)
44 |     else:
45 |         out.betensor = torch.rsqrt(inp.betensor)
46 |     return out.betensor
47 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/sigmoid.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | from AIPUBuilder.Optimizer.ops.silu import silu_approx
 8 | from AIPUBuilder.Optimizer.utils.math_utils import *
 9 | import torch
10 | 
11 | register_optype('Sigmoid')
12 | 
13 | 
14 | @op_register(OpType.Sigmoid)
15 | def sigmoid_forward(self, *args):
16 |     def approximated_float_forward(self,  inp_tensor):
17 |         if self.approximated and "lut" in self.constants:
18 |             lut = self.constants["lut"].betensor
19 |             out = lookup_float_index_lut(inp_tensor, lut,
20 |                                          self.params['index_scale_value'],
21 |                                          self.params['index_offset_value'],
22 |                                          mirror_mode=True,
23 |                                          value_offset_for_mirror_mode=self.params['value_offset_value'])
24 |         else:
25 |             out = torch.sigmoid(inp_tensor)
26 |         return out
27 |     self.attrs['lambda_func'] = lambda x: approximated_float_forward(self,  x)
28 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
29 |     self.attrs.pop('lambda_func')
30 |     return self.outputs[0].betensor
31 | 
32 | 
33 | @quant_register(OpType.Sigmoid)
34 | def sigmoid_quantize(self, *args):
35 |     def sigmoid_func(x): return torch.sigmoid(x)
36 |     self.attrs['lambda_func'] = sigmoid_func
37 |     self.attrs['out_signed'] = False or self.force_dtype_int
38 |     offset = 0.0
39 |     if self.type in [OpType.BasicLSTM, OpType.GRUv3, OpType.GRUv1]:
40 |         offset = torch.sigmoid(torch.tensor(0.0)).item()
41 |     self.attrs['mirror_offset'] = offset
42 | 
43 |     activation_module.unknown_quantize(self, *args)
44 | 
45 |     for k in ['lambda_func', 'out_signed', 'mirror_offset']:
46 |         self.attrs.pop(k)
47 | 
48 | 
49 | @approx_register(OpType.Sigmoid)
50 | def sigmoid_approx(self, *args):
51 |     silu_approx(self, *args)
52 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/sign.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | 
 8 | 
 9 | @op_register(OpType.Sign)
10 | def sign(self, *args):
11 |     inp = self.inputs[0]
12 |     out = self.outputs[0]
13 |     out.betensor = torch.sign((inp.betensor.float() + inp.zerop) if self.quantized else inp.betensor)  # -1, 0, 1
14 |     return out.betensor
15 | 
16 | 
17 | @quant_register(OpType.Sign)
18 | def sign_quantize(self, *args):
19 |     inp = self.inputs[0]
20 |     out = self.outputs[0]
21 |     out.scale = 1
22 |     out.zerop = 0
23 |     out.qbits = inp.qbits
24 |     out.dtype = bits2dtype(out.qbits, is_signed=True, use_float=False)
25 |     out.qinvariant = True
26 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/sine.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.ops.cosine import trigonometric_quantize, trigonometric_forward, cosine_approx
 7 | 
 8 | import torch
 9 | 
10 | 
11 | @quant_register(OpType.Sine)
12 | def sine_quantize(self, *args):
13 |     trigonometric_quantize(self, torch.sin)
14 | 
15 | 
16 | @approx_register(OpType.Sine)
17 | def sine_approx(self, *args):
18 |     cosine_approx(self, *args)
19 | 
20 | 
21 | @op_register(OpType.Sine)
22 | def sine(self, *args):
23 |     inp = self.inputs[0]
24 |     out = self.outputs[0]
25 |     if self.quantized:
26 |         q_bits_activation = inp.qbits
27 |         if q_bits_activation > 8:
28 |             x = inp.betensor.long() + inp.zerop
29 |             lut = self.constants["lut"].betensor
30 |             out.betensor = trigonometric_forward(self, x, lut)
31 |         else:
32 |             x = inp.betensor
33 |             x = x - inp.qmin
34 |             lut = self.constants["lut"].betensor
35 |             x = torch.reshape(x, (-1,))
36 |             y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits(
37 |                 self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype))
38 |             out.betensor = torch.reshape(y, inp.betensor.shape)
39 |     else:
40 |         if self.approximated and "lut" in self.constants:
41 |             lut = self.constants["lut"].betensor
42 |             inp_tensor = inp.betensor.float()
43 |             inter = (inp_tensor * (1/(2*torch.pi))).int()
44 |             Fractional = inp_tensor - inter*2*torch.pi
45 |             out.betensor = lookup_float_index_lut(
46 |                 Fractional, lut, self.params['index_scale_value'], self.params['index_offset_value'], mirror_mode=True, value_offset_for_mirror_mode=self.params['value_offset_value'])
47 |         else:
48 |             out.betensor = torch.sin(inp.betensor)
49 |     return out.betensor
50 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/slice_operator.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.ops.stridedslice import *
 7 | 
 8 | 
 9 | @op_register(OpType.Slice)
10 | def slice_forward(self, *args):
11 |     return stridedslice(self, *args)
12 | 
13 | 
14 | @quant_register(OpType.Slice)
15 | def slice_quantize(self, *args):
16 |     stridedslice_quantize(self, *args)
17 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/softplus.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | register_optype('Softplus')
10 | 
11 | 
12 | @quant_register(OpType.Softplus)
13 | def softplus_quantize(self, *args):
14 |     self.attrs['lambda_func'] = torch.nn.functional.softplus
15 |     self.attrs['out_signed'] = False or self.force_dtype_int
16 |     activation_module.unknown_quantize(self, *args)
17 |     self.attrs.pop('lambda_func')
18 |     self.attrs.pop('out_signed')
19 | 
20 | 
21 | @op_register(OpType.Softplus)
22 | def softplus(self, *args):
23 |     def approximated_float_forward(self,  inp_tensor):
24 |         if self.approximated and "lut" in self.constants:
25 |             lut = self.constants["lut"].betensor
26 |             out = lookup_float_index_lut(
27 |                 inp_tensor, lut, self.params['index_scale_value'], self.params['index_offset_value'], mirror_mode=False, value_offset_for_mirror_mode=self.params['value_offset_value'])
28 |         else:
29 |             out = torch.nn.functional.softplus(inp_tensor)
30 |         return out
31 |     self.attrs['lambda_func'] = lambda x: approximated_float_forward(self,  x)
32 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
33 |     self.attrs.pop('lambda_func')
34 |     return self.outputs[0].betensor
35 | 
36 | 
37 | @approx_register(OpType.Softplus)
38 | def softplus_approx(self, *args):
39 |     def set_min_max(inp, use_dynamic_lut):
40 |         if use_dynamic_lut:
41 |             clip_min = inp.min if inp.min < -6 else -6
42 |             clip_max = inp.max
43 |         else:
44 |             clip_min = -6
45 |             clip_max = 20
46 |         return clip_min, clip_max
47 | 
48 |     self.attrs['set_min_max'] = set_min_max
49 |     self.attrs['lambda_func'] = torch.nn.functional.softplus
50 |     self.attrs['out_signed'] = False
51 |     activation_module.unknown_approx(self, *args)
52 |     self.attrs.pop('lambda_func')
53 |     self.attrs.pop('set_min_max')
54 |     self.attrs.pop('out_signed')
55 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/sort.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | import torch
 8 | 
 9 | 
10 | '''
11 | # IR
12 | layer_id=1
13 | layer_name=sort
14 | layer_type=Sort
15 | layer_bottom=score
16 | layer_bottom_shape=[1,5000]
17 | layer_bottom_type=int16
18 | layer_top=out_score_ptr,keep
19 | layer_top_shape=[1,5000],[1,5000]
20 | layer_top_type=int16,uint16
21 | '''
22 | 
23 | 
24 | @op_register(OpType.Sort)
25 | def sort(self, *args):
26 |     dim = self.get_param('axis', optional=True, default_value=-1)
27 |     descending = self.get_param('descending', optional=True, default_value=True)
28 |     inpt = self.inputs[0].betensor
29 |     st, indices = torch.sort(inpt, dim=dim, descending=descending)
30 |     self.outputs[0].betensor = st
31 |     self.outputs[1].betensor = indices
32 |     return [o.betensor for o in self.outputs]
33 | 
34 | 
35 | @quant_register(OpType.Sort)
36 | def sort_quantize(self, *args):
37 |     inp = self.inputs[0]
38 |     out = self.outputs[0]
39 |     out.dtype = inp.dtype
40 |     out.scale = inp.scale
41 |     out.zerop = inp.zerop
42 |     out.qmin = inp.qmin
43 |     out.qmax = inp.qmax
44 |     out.qinvariant = inp.qinvariant
45 |     out.qbits = inp.qbits
46 | 
47 |     q_bits_activation = self.attrs['q_bits_activation']
48 |     out = self.outputs[1]
49 |     out.qbits = max(16, q_bits_activation)
50 |     out.dtype = bits2dtype(out.qbits, False or self.force_dtype_int)
51 |     out.scale = 1.0
52 |     out.zerop = 0.0
53 |     out.qinvariant = True
54 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/spacetobatch.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | import torch
 8 | 
 9 | 
10 | @op_register(OpType.SpaceToBatch)
11 | def spacetobatch(self, *args):
12 |     block_size_x = self.get_param('block_size_x')
13 |     block_size_y = self.get_param('block_size_y')
14 |     pad_left = self.get_param('pad_left')
15 |     pad_right = self.get_param('pad_right')
16 |     pad_top = self.get_param('pad_top')
17 |     pad_bottom = self.get_param('pad_bottom')
18 |     inp = self.inputs[0].betensor
19 |     out = self.outputs[0].betensor
20 |     if self.inputs[0].shape[0] != inp.shape[0]:
21 |         OPT_ERROR("batch size in calibratoin or metric dataset should be equal to batch size in IR")
22 |     # inp is NHWC format
23 |     paddings = (0, 0, pad_left, pad_right, pad_top, pad_bottom)
24 |     # TODO: support per-channel zerop and pad the per-channel zerop
25 |     pad_value = -self.inputs[0].zerop[0] if self.quantized else 0
26 |     y = torch.nn.functional.pad(inp, paddings, value=pad_value)
27 |     n, h, w, c = y.shape
28 |     y = y.view(n, h//block_size_y, block_size_y, w//block_size_x, block_size_x, c)
29 |     y = y.permute(2, 4, 0, 1, 3, 5).contiguous()
30 |     out = y.view(n*block_size_x*block_size_y, h//block_size_y, w//block_size_x, c)
31 |     self.outputs[0].betensor = out
32 |     return out
33 | 
34 | 
35 | @quant_register(OpType.SpaceToBatch)
36 | def spacetobatch_quantize(self, *args):
37 |     inp = self.inputs[0]
38 |     out = self.outputs[0]
39 |     out.scale = inp.scale
40 |     out.zerop = inp.zerop
41 |     out.qbits = inp.qbits
42 |     out.dtype = inp.dtype
43 |     out.qinvariant = inp.qinvariant
44 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/spacetodepth.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | import torch
 8 | 
 9 | 
10 | def space_to_depth(x, bs):
11 |     N, C, H, W = x.size()
12 |     x = x.view(N, C, H // bs, bs, W // bs, bs)       # (N, C, H//bs, bs, W//bs, bs)
13 |     x = x.permute(0, 3, 5, 1, 2, 4).contiguous()     # (N, bs, bs, C, H//bs, W//bs)
14 |     x = x.view(N, C * (bs ** 2), H // bs, W // bs)   # (N, C*bs^2, H//bs, W//bs)
15 |     return x
16 | 
17 | 
18 | @op_register(OpType.SpaceToDepth)
19 | def spacetodepth(self, *args):
20 |     block_size_x = self.get_param('block_size_x')
21 |     block_size_y = self.get_param('block_size_y')
22 |     if block_size_x != block_size_y:
23 |         OPT_WARN("currently not support block_size_x != block_size_y in layer" +
24 |                  self.attrs['layer_id'], op_name=str(self.type))
25 |     inp = self.inputs[0].betensor
26 |     # data format is NHWC
27 |     inp = nhwc2nchw(inp)
28 |     o = space_to_depth(inp, block_size_x)
29 |     self.outputs[0].betensor = nchw2nhwc(o)
30 |     return self.outputs[0].betensor
31 | 
32 | 
33 | @quant_register(OpType.SpaceToDepth)
34 | def spacetodepth_quantize(self, *args):
35 |     inp = self.inputs[0]
36 |     out = self.outputs[0]
37 |     out.scale = inp.scale
38 |     out.zerop = inp.zerop
39 |     out.qbits = inp.qbits
40 |     out.dtype = inp.dtype
41 |     out.qinvariant = inp.qinvariant
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/split.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | 
 8 | 
 9 | @op_register(OpType.Split)
10 | # IR
11 | # layer_type=Split
12 | # layer_bottom=rpn_class/concat_0
13 | # layer_bottom_shape=[1,261888,2]
14 | # layer_bottom_type=float32
15 | # layer_top=split_out0,split_out1
16 | # layer_top_shape=[1,261888,1],[1,261888,1]
17 | # layer_top_type=float32,float32
18 | # axis=2
19 | # num_split=2
20 | def split(self, *args):
21 |     axis = self.get_param('axis')
22 |     inp_betensors = self.inputs[0].betensor
23 |     split_sizes = self.get_param('splits')
24 |     out = torch.split(inp_betensors, split_sizes, dim=axis)
25 | 
26 |     for i, outp in enumerate(out):
27 |         self.outputs[i].betensor = outp
28 |     return out
29 | 
30 | 
31 | @quant_register(OpType.Split)
32 | def split_quantize(self, *args):
33 |     for i, out in enumerate(self.outputs):
34 |         inp = self.inputs[0]
35 |         out = self.outputs[i]
36 |         out.dtype = inp.dtype
37 |         out.scale = inp.scale
38 |         out.zerop = inp.zerop
39 |         out.qbits = inp.qbits
40 |         out.qinvariant = inp.qinvariant
41 | 
42 |         if out.key_axis != inp.key_axis:
43 |             OPT_ERROR(f"split input and out[{i}] key_axis is difference")
44 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/sqrt.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.Sqrt)
11 | def sqrt_quantize(self, *args):
12 |     q_mode_activation = self.attrs["q_mode_activation"]
13 |     if QuantMode.is_per_channel(q_mode_activation) == True:
14 |         OPT_FATAL("Currently not support per-channel quantization")
15 |     q_bits_activation = self.attrs["q_bits_activation"]
16 | 
17 |     inp = self.inputs[0]
18 |     out = self.outputs[0]
19 |     out.qbits = q_bits_activation
20 |     out_sign = False or self.force_dtype_int
21 |     dev = inp.betensor.device
22 |     out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor(
23 |         out, q_mode_activation, out.qbits, out_sign)
24 |     lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits')))
25 |     lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop)
26 |     lut = torch.sqrt(lut)
27 |     lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax)
28 |     self.constants["lut"] = PyTensor(self.name+"/sqrt_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype)))
29 |     out.qinvariant = False
30 | 
31 | 
32 | @op_register(OpType.Sqrt)
33 | def sqrt(self, *args):
34 |     inp = self.inputs[0]
35 |     out = self.outputs[0]
36 |     if self.quantized:
37 |         x = inp.betensor
38 |         x = x - inp.qmin
39 |         lut = self.constants["lut"].betensor
40 |         x = torch.reshape(x, (-1,))
41 |         y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits(
42 |             self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype))
43 |         out.betensor = torch.reshape(y, inp.betensor.shape)
44 |     else:
45 |         out.betensor = torch.sqrt(inp.betensor.float())
46 |     return out.betensor
47 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/square.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.Square)
11 | def square_quantize(self, *args):
12 |     self.attrs['lambda_func'] = torch.square
13 |     self.attrs['out_signed'] = False or self.force_dtype_int
14 |     activation_module.unknown_quantize(self, *args)
15 |     self.attrs.pop('lambda_func')
16 |     self.attrs.pop('out_signed')
17 | 
18 | 
19 | @op_register(OpType.Square)
20 | def square(self, *args):
21 |     self.attrs['lambda_func'] = torch.square
22 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
23 |     self.attrs.pop('lambda_func')
24 |     return self.outputs[0].betensor
25 | 
26 | 
27 | @approx_register(OpType.Square)
28 | def square_approx(self, *args):
29 |     # By default, it is calculated directly on AIFF
30 |     self.params['is_perf_mode'] = True
31 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/squeeze.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | import torch
 8 | 
 9 | 
10 | @op_register(OpType.Squeeze)
11 | def squeeze(self, *args):
12 |     axis = self.get_param('axis')
13 |     inp = self.inputs[0].betensor
14 |     out = self.outputs[0].betensor
15 |     out = torch.squeeze(inp, dim=axis)
16 |     self.outputs[0].betensor = out
17 |     return out
18 | 
19 | 
20 | @quant_register(OpType.Squeeze)
21 | def squeeze_quantize(self, *args):
22 |     inp = self.inputs[0]
23 |     out = self.outputs[0]
24 |     out.scale = inp.scale
25 |     out.zerop = inp.zerop
26 |     out.qbits = inp.qbits
27 |     out.dtype = inp.dtype
28 |     out.qinvariant = inp.qinvariant
29 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/sub.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | from AIPUBuilder.Optimizer.ops.eltwise import eltwise_quantize, eltwise
 6 | 
 7 | 
 8 | @op_register(OpType.Sub)
 9 | def sub_forward(self, *args):
10 |     self.params['method'] = 'SUB'
11 |     eltwise(self, *args)
12 |     self.params.pop('method')
13 | 
14 |     return self.outputs[0].betensor
15 | 
16 | 
17 | @quant_register(OpType.Sub)
18 | def sub_quantize(self, *args):
19 |     self.params['method'] = 'SUB'
20 |     eltwise_quantize(self, *args)
21 |     self.params.pop('method')
22 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/swish.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.ops.silu import silu_approx
 7 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 8 | 
 9 | import torch
10 | 
11 | register_optype('Swish')
12 | 
13 | 
14 | def swish_func(x, alpha):
15 |     return x * torch.sigmoid(alpha * x)
16 | 
17 | 
18 | @quant_register(OpType.Swish)
19 | def swish_quantize(self, *args):
20 |     self.attrs['lambda_func'] = lambda x: swish_func(x,  self.get_param('alpha'))
21 |     self.attrs['out_signed'] = True
22 |     activation_module.unknown_quantize(self, *args)
23 |     self.attrs.pop('lambda_func')
24 |     self.attrs.pop('out_signed')
25 | 
26 | 
27 | @op_register(OpType.Swish)
28 | def swish(self, *args):
29 |     def approximated_float_forward(self,  inp_tensor):
30 |         if self.approximated and "lut" in self.constants:
31 |             lut = self.constants["lut"].betensor
32 |             out = inp_tensor * lookup_float_index_lut(
33 |                 inp_tensor, lut, self.params['index_scale_value'], self.params['index_offset_value'], mirror_mode=True, value_offset_for_mirror_mode=self.params['value_offset_value'])
34 |         else:
35 |             out = swish_func(inp_tensor,  self.get_param('alpha'))
36 |         return out
37 |     self.attrs['lambda_func'] = lambda x: approximated_float_forward(self,  x)
38 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
39 |     self.attrs.pop('lambda_func')
40 |     return self.outputs[0].betensor
41 | 
42 | 
43 | @approx_register(OpType.Swish)
44 | def swish_approx(self, *args):
45 |     silu_approx(self, *args)
46 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/tan.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | 
10 | @quant_register(OpType.Tan)
11 | def tan_quantize(self, *args):
12 |     self.attrs['lambda_func'] = torch.tan
13 |     self.attrs['out_signed'] = True
14 |     activation_module.unknown_quantize(self, *args)
15 |     self.attrs.pop('lambda_func')
16 |     self.attrs.pop('out_signed')
17 | 
18 | 
19 | @op_register(OpType.Tan)
20 | def tan(self, *args):
21 |     self.attrs['lambda_func'] = torch.tan
22 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
23 |     self.attrs.pop('lambda_func')
24 |     return self.outputs[0].betensor
25 | 
26 | 
27 | @approx_register(OpType.Tan)
28 | def tan_approx(self, *args):
29 |     # By default, it is calculated directly on TPC
30 |     self.params['is_perf_mode'] = False
31 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/tf_ops/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/thresholdrelu.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | # y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
10 | 
11 | register_optype('THRESHOLDEDRELU')
12 | 
13 | 
14 | @quant_register(OpType.THRESHOLDEDRELU)
15 | def thresholdedrelu_quantize(self, *args):
16 |     alpha = float(self.get_param("alpha"))
17 |     self.attrs['lambda_func'] = lambda x: torch.nn.functional.threshold(x, alpha, 0)
18 |     self.attrs['out_signed'] = True if alpha < 0.0 else False
19 |     activation_module.unknown_quantize(self, *args)
20 |     self.attrs.pop('lambda_func')
21 |     self.attrs.pop('out_signed')
22 | 
23 | 
24 | @op_register(OpType.THRESHOLDEDRELU)
25 | def thresholdedrelu(self, *args):
26 |     def float_forward(self,  inp_tensor):
27 |         alpha = float(self.get_param("alpha"))
28 |         out = torch.nn.functional.threshold(inp_tensor, alpha, 0)
29 |         return out
30 |     self.attrs['lambda_func'] = lambda x: float_forward(self,  x)
31 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
32 |     self.attrs.pop('lambda_func')
33 |     return self.outputs[0].betensor
34 | 
35 | 
36 | @approx_register(OpType.THRESHOLDEDRELU)
37 | def thresholdrelu_approx(self, *args):
38 |     # By default, it is calculated directly on AIFF
39 |     self.params['is_perf_mode'] = True
40 | 
41 | 
42 | def threshold_out_signed(self):
43 |     alpha = float(self.get_param("alpha"))
44 |     return False if alpha >= 0 else True
45 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/tile.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.logger import OPT_WARN
 8 | 
 9 | 
10 | @op_register(OpType.Tile)
11 | def tile(self, *args):
12 |     inp = self.inputs[0].betensor
13 |     out = self.outputs[0]
14 |     _reps = [oshape // ishape for oshape, ishape in zip(out.shape, inp.shape)]
15 |     reps = self.get_param('repeats')
16 |     if inp.dim() != len(reps):
17 |         OPT_WARN('please check the dim between input.dim and len(repeats) in Tile Op')
18 |     out.betensor = inp.repeat(reps)
19 |     return out.betensor
20 | 
21 | 
22 | @quant_register(OpType.Tile)
23 | def tile_quantize(self, *args):
24 |     inp = self.inputs[0]
25 |     out = self.outputs[0]
26 |     out.scale = inp.scale
27 |     out.zerop = inp.zerop
28 |     out.qbits = inp.qbits
29 |     out.dtype = inp.dtype
30 |     out.qinvariant = inp.qinvariant
31 |     out.qmin = inp.qmin
32 |     out.qmax = inp.qmax
33 | 
34 |     if out.key_axis is not None:
35 |         ka = out.key_axis
36 |         if inp.key_axis is not None:
37 |             rep = self.params['repeats'][ka]
38 |         else:
39 |             rep = out.ir_shape[ka]
40 |         out.scale = inp.scale.repeat(rep)
41 |         out.zerop = inp.zerop.repeat(rep)
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/transpose.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.ops.permute import *
 7 | 
 8 | 
 9 | @op_register(OpType.Transpose)
10 | def transpose(self, *args):
11 |     return permute(self, *args)
12 | 
13 | 
14 | @quant_register(OpType.Transpose)
15 | def transpose_quantize(self, *args):
16 |     permute_quantize(self, *args)
17 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/trunc.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | from AIPUBuilder.Optimizer.utils import *
 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module
 7 | import torch
 8 | 
 9 | register_optype('Trunc')
10 | 
11 | 
12 | @op_register(OpType.Trunc)
13 | def trunc(self, *args):
14 |     self.attrs['lambda_func'] = torch.trunc
15 |     self.outputs[0].betensor = activation_module.unknown_activation(self, *args)
16 |     self.attrs.pop('lambda_func')
17 |     return self.outputs[0].betensor
18 | 
19 | 
20 | @quant_register(OpType.Trunc)
21 | def trunc_quantize(self, *args):
22 |     self.attrs['lambda_func'] = torch.trunc
23 |     self.attrs['out_signed'] = True
24 |     activation_module.unknown_quantize(self, *args)
25 |     self.attrs.pop('lambda_func')
26 |     self.attrs.pop('out_signed')
27 | 
28 | 
29 | @approx_register(OpType.Trunc)
30 | def trunc_approx(self, *args):
31 |     # this is not currently used because it is the same as the float process
32 |     self.params['is_perf_mode'] = False
33 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/ops/where.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | from AIPUBuilder.Optimizer.utils import *
 7 | from AIPUBuilder.Optimizer.ops.select import *
 8 | import torch
 9 | 
10 | register_optype('Where')
11 | 
12 | 
13 | @op_register(OpType.Where)
14 | def where_forward(self, *args):
15 |     if len(self.inputs) > 1:
16 |         return select_forward(self, *args)
17 |     else:
18 |         inp = self.inputs[0]
19 |         out = self.outputs[0]
20 |         indexes = torch.where(inp.betensor + inp.zerop)
21 |         input_dim = inp.betensor.dim()
22 |         valid_num = indexes[0].numel()
23 |         total_num = inp.betensor.numel()
24 |         invalid_num = total_num - valid_num
25 |         y = torch.tensor([t.cpu().numpy() for t in indexes], device=inp.betensor.device).long()
26 |         _, invalid_value = dtype2range(out.dtype)
27 |         # arrange indexes like tf.where results
28 |         # out.betensor = torch.nn.functional.pad(y.permute(1, 0), (0, 0, 0, inp.betensor.numel()-num), value=padding_value)
29 |         first_invalid_num = min(1, invalid_num)
30 |         invalid_tensor = torch.ones([first_invalid_num, input_dim], device=inp.betensor.device).long() * invalid_value
31 | 
32 |         other_invalid_num = max(0, invalid_num - 1)
33 |         other_invalid_tensor = torch.zeros([other_invalid_num, input_dim], device=inp.betensor.device).long()
34 | 
35 |         out.betensor = torch.cat([y.permute(1, 0), invalid_tensor, other_invalid_tensor], dim=0)
36 |         return out.betensor
37 | 
38 | 
39 | @quant_register(OpType.Where)
40 | def where_quantize(self, *args):
41 |     if len(self.inputs) > 1:
42 |         select_quantize(self, *args)
43 |     else:
44 |         inp = self.inputs[0]
45 |         out = self.outputs[0]
46 |         str_type = self.attrs['layer_top_type_original'][0]
47 |         out.dtype = str2dtype(str_type)
48 |         out.qbits = dtype2bits(out.dtype)
49 |         out.scale = 1.0
50 |         out.zerop = 0
51 |         out.qinvariant = True
52 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/passes/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from . passes import *
5 | from . convert_resize_to_convolution import convert_resize_to_convolution
6 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/passes/check_quantization_info_s1.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | 
 6 | 
 7 | def check_quantization_info(graph: PyGraph, config=None):
 8 |     """
 9 |     check the 16bits quantization should be symmetric
10 |     :param graph:
11 |     :param config:
12 |     :return:
13 |     """
14 |     for node in graph.nodes:
15 |         activation_mode = node.attrs.get('q_mode_activation')
16 |         activation_bits = node.attrs.get('q_bits_activation')
17 |         if activation_bits >= 16 and QuantMode.is_asymmetric(activation_mode):
18 |             sym_activation_mode = QuantMode.to_symmetric(activation_mode)
19 |             node.attrs['q_mode_activation'] = sym_activation_mode
20 |             OPT_DEBUG(f"{node} changes quantization method of activation tensor "
21 |                       f"from {activation_mode} to {sym_activation_mode}")
22 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/passes/detect_inf_mask_nodes.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | import torch
 6 | 
 7 | 
 8 | def detect_inf_mask_nodes(graph, config):
 9 |     if config.enable_pass_detect_inf_mask_nodes:
10 |         # filter batchnorm weight and bias's inf,-inf
11 |         for i, n in enumerate(graph.nodes):
12 |             if n.type == OpType.BatchNorm:
13 |                 aa = n.constants['weights'].betensor + n.constants['biases'].betensor
14 |                 if torch.sum(aa) == 0 and torch.max(n.constants['biases'].betensor) < -65536:
15 |                     n.constants['weights'].betensor = torch.zeros(
16 |                         n.constants['weights'].ir_shape, device=n.constants['weights'].device) + 32767
17 |                     n.constants['biases'].betensor = torch.zeros(
18 |                         n.constants['biases'].ir_shape, device=n.constants['weights'].device) - 32767
19 |             if n.type == OpType.Constant:
20 |                 n.constants['weights'].betensor[n.constants['weights'].betensor < -32767] = -32768
21 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/passes/global_calibration_prepare.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.logger import *
 7 | 
 8 | 
 9 | def global_calibration_prepare(graph: PyGraph, config):
10 |     for method in config.global_calibration:
11 |         mname = method[0]
12 |         if 'smooth_quant_zy' == mname:
13 |             for node in graph.nodes:
14 |                 if node.type in [OpType.FullyConnected, ]:
15 |                     node.inputs[0].key_axis = len(node.inputs[0].ir_shape) - 1
16 |         elif 'awq_zy' == mname:
17 |             # def add_inp_abs_plh_for_fc(n: PyNode):
18 |             #     inp_abs = n.inputs[0].betensor.abs().float()
19 |             #     if len(n.placeholders) < 1:
20 |             #         plh = PyTensor(n.name+'/inp_abs', dtype=Dtype.FP32)
21 |             #         n.placeholders.append(plh)
22 |             #     n.placeholders[0].betensor = inp_abs
23 |             #     n.placeholders[0].key_axis = len(node.inputs[0].ir_shape) - 1
24 |             for node in graph.nodes:
25 |                 if node.type in [OpType.FullyConnected, ]:
26 |                     node.inputs[0].key_axis = len(node.inputs[0].ir_shape) - 1
27 |                     # node.forward_hook = add_inp_abs_plh_for_fc
28 |         else:
29 |             pass
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/passes/optimize_x2_wdc.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils import *
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.logger import *
 7 | 
 8 | 
 9 | def optimize_x2_wdc(graph: PyGraph, config=None):
10 |     for node in graph.nodes:
11 |         if not str(node.attrs['optimize_wdc_for_x2']).lower() == 'true':
12 |             continue
13 |         q_mode_weight = node.attrs['q_mode_weight']
14 |         q_bits_weight = node.attrs['q_bits_weight']
15 |         for k, w in node.constants.items():
16 |             for trial in range(4):
17 |                 w.scale, w.zerop, w.qmin, w.qmax, w.dtype =\
18 |                     get_linear_quant_params_from_tensor(w,
19 |                                                         q_mode_weight, q_bits_weight, is_signed=True)
20 |                 # We only try to scale up weight 4 times to keep acc
21 |                 fake_quant = linear_quantize_clip(w.betensor, w.broadcast_scale, w.broadcast_zerop, w.qmin, w.qmax)
22 |                 comp_rate = simulate_x2_wdc(fake_quant, q_bits_weight)
23 |                 if comp_rate < 0.9:
24 |                     OPT_DEBUG(f"weight {k} gets comp rate of {comp_rate} at step {trial}")
25 |                     break
26 |                 else:
27 |                     w.min *= 2
28 |                     w.max *= 2
29 |                     OPT_INFO(f"Scaling up {node.name}'s tensor {k} by 2 times to adapt WDC, acc may be affacted")
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/passes/shrink_pow_exponent_s1.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | 
 6 | 
 7 | def shrink_pow_exponent(graph, config=None):
 8 |     def criteria(n):
 9 |         if n is not None and n.type == OpType.Pow:
10 |             pow_parent = None
11 |             for parent in n.parents:
12 |                 for outp in parent.outputs:
13 |                     if outp.name == n.inputs[1].name:
14 |                         pow_parent = parent
15 |                         break
16 |                 if pow_parent:
17 |                     break
18 |             pow_nods, count_root, count_constant = pow_parent.get_ancestors()
19 |             if count_root > 0 and count_root == count_constant:
20 |                 for node in pow_nods:
21 |                     node.forward()
22 |                 unq = n.inputs[1].betensor.unique()
23 |                 if unq.numel() == 1:
24 |                     return True
25 |         return False
26 |     # for powN: collect all inputs edge , if all of them == constant, then exponent should be N
27 |     for n in graph.nodes:
28 |         if criteria(n):
29 |             unq = n.inputs[1].betensor.unique()
30 |             n.params['exponent'] = float(unq[0])
31 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/passes/transfer_op_to_reshape_op_s3.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | 
 8 | def criteria(n):
 9 |     ret = False
10 |     if n.type == OpType.Cast:
11 |         if (n.parents[0].attrs['q_mode_activation'] == n.attrs['q_mode_activation'] and
12 |             len(n.inputs) > 0 and len(n.outputs) > 0 and
13 |                 n.inputs[0].dtype == n.outputs[0].dtype):
14 |             ret = True
15 |     elif n.type == OpType.FakeQuantWithMinMaxVars:
16 |         ret = True
17 |     else:
18 |         pass
19 |     return ret
20 | 
21 | 
22 | def transfer_op_to_reshape_op(g, config):
23 |     # transform useless op to lightweight reshape op
24 |     need_replace_ops = []
25 |     for n in g.nodes:
26 |         if n is not None and criteria(n):
27 |             # create reshape node
28 |             transform_op = PyNode(n.name, OpType.Reshape)
29 |             transform_op.additional = True
30 |             # set attrs and params
31 |             transform_op.attrs.update(n.attrs.clone())
32 |             transform_op.params['shape'] = n.outputs[0].ir_shape
33 |             # record pairs
34 |             need_replace_ops.append((n, transform_op))
35 |     for old, new in need_replace_ops:
36 |         g.replace_node_safely(old, new)
37 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_cocokp.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | from AIPUBuilder.Optimizer.framework import *
 8 | 
 9 | from torch.utils.data import Dataset
10 | from collections import defaultdict
11 | 
12 | 
13 | @register_plugin(PluginType.Dataset, '1.0')
14 | class CocokpDataset(Dataset):
15 |     """
16 |     This CocoDataset plugin is used for yolov4_onnx/yolov4_tflite/fasterrcnn_tensorflow models in Optimizer.
17 |     """
18 | 
19 |     def __init__(self, data_file=None, label_file=None):
20 |         """
21 |         :param data_file: a .npy file
22 |         :param label_file: a dict format in .npy file and format is {
23 |                         are: list box area,
24 |                         bbox: list of boxes,
25 |                         keypoint: list of 17*3}
26 |         """
27 |         self.data = np.load(data_file, mmap_mode='c').astype(np.float32)
28 |         self.label = None
29 |         if label_file is not None:
30 |             self.label = np.load(label_file, allow_pickle=True).item()
31 | 
32 |     def __len__(self):
33 |         return len(self.data)
34 | 
35 |     def __getitem__(self, idx):
36 |         image_data = self.data[idx]
37 |         sample = [image_data, {}]
38 |         if self.label is not None:
39 |             raw_label = self.label[idx]
40 |             sample[1] = raw_label
41 |         return sample
42 | 
43 |     @staticmethod
44 |     def collate_fn(batch):
45 |         batch_label = []
46 |         batch_data = None
47 |         for batch_idx in range(len(batch)):
48 |             els_sample = batch[batch_idx][0]
49 |             single_data = torch.unsqueeze(torch.tensor(els_sample), 0)
50 |             batch_data = single_data if batch_idx == 0 else torch.cat(
51 |                 (batch_data, single_data), 0)
52 | 
53 |         for idx, sample in enumerate(batch):
54 |             if not sample[1]:
55 |                 continue
56 |             label = {}
57 |             for k, v in sample[1].items():
58 |                 label[k] = torch.tensor(v)
59 |             batch_label.append(label)
60 |         return batch_data, batch_label
61 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_iwslt.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from torch.utils.data import Dataset
 8 | import numpy as np
 9 | 
10 | 
11 | @register_plugin(PluginType.Dataset, '1.0')
12 | class IWSLTDataset(Dataset):
13 |     """
14 |     This IWSLTDataset plugin is used for the transformer_tensorflow model in Optimizer.
15 |     """
16 | 
17 |     # when used as calibration dataset, label_file can be omitted.
18 |     def __init__(self, data_file, label_file=None):
19 |         self.data = None
20 |         self.label = None
21 |         try:
22 |             self.data = np.load(data_file, mmap_mode='c')
23 |         except ValueError:
24 |             self.data = np.load(data_file, allow_pickle=True)
25 |         if label_file is not None:
26 |             self.label = np.load(label_file, allow_pickle=True)
27 | 
28 |     def __len__(self):
29 |         return len(self.data)
30 | 
31 |     def __getitem__(self, idx):
32 |         sample = [self.data[idx], float("-inf")]
33 |         if self.label is not None:
34 |             sample[1] = self.label[idx]
35 |         return sample
36 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_mpii.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from torch.utils.data import Dataset
 8 | import numpy as np
 9 | 
10 | 
11 | @register_plugin(PluginType.Dataset, '1.0')
12 | class MpiiDataset(Dataset):
13 |     """
14 |     This MpiiDataset plugin is mainly used for stacked_hourglass_tensorflow model in Optimizer.
15 |     MPII Human Pose dataset is a state of the art benchmark for evaluation of articulated human pose estimation.
16 |     The dataset includes around 25K images containing over 40K people with annotated body joints.
17 |     http://human-pose.mpi-inf.mpg.de/
18 |     """
19 | 
20 |     def __init__(self, data_file=None, label_file=None):
21 |         '''
22 |         :param data_file: ndarray in npy file
23 |         :param label_file: a dict format in npy file and the keys of dict include
24 |         ['__header__', '__version__', '__globals__', 'jnt_missing', 'pos_gt_src', 'headboxes_src', 'center', 'scale'].
25 |         '''
26 |         self.data = np.load(data_file, allow_pickle=True)
27 |         self.label = None
28 |         if label_file is not None:
29 |             self.label = np.load(label_file, allow_pickle=True).tolist()
30 |             self.keys = [k for k in self.label if '__' not in k]
31 | 
32 |     def __len__(self):
33 |         return len(self.data)
34 | 
35 |     def __getitem__(self, idx):
36 |         sample = [self.data[idx], {}]
37 |         if self.label is not None:
38 |             pick_label = {}
39 |             for k in self.keys:
40 |                 if k in ['center', 'scale']:
41 |                     pick_label[k] = self.label[k][idx]
42 |                 else:
43 |                     pick_label[k] = self.label[k][..., idx]
44 |             sample[1] = pick_label
45 |         return sample
46 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_nhwcrgb2nhwcbgr.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | from torch.utils.data import Dataset
 9 | import numpy as np
10 | 
11 | 
12 | @register_plugin(PluginType.Dataset, '1.0')
13 | class NUMPYNHWCRGB2BGR(Dataset):
14 |     """
15 |     This NUMPYNHWCRGB2BGR dataset plugin is used for transfering rgb to bgr of channel dimension in NHWC datalayout format.
16 |     """
17 | 
18 |     def __init__(self, data_file, label_file=None):
19 |         self.data = None
20 |         self.label = None
21 |         try:
22 |             self.data = np.load(data_file, mmap_mode='c')
23 |             # rgb -> bgr
24 |             self.data = np.flip(self.data, -1).copy()
25 |         except Exception as e:
26 |             OPT_FATAL('the data of NUMPYNHWCRGB2BGR plugin should be Numpy.ndarray and allow_pickle=False.')
27 |         if label_file is not None:
28 |             try:
29 |                 self.label = np.load(label_file, mmap_mode='c')
30 |             except ValueError:
31 |                 self.label = np.load(label_file, allow_pickle=True)
32 | 
33 |     def __len__(self):
34 |         return len(self.data)
35 | 
36 |     def __getitem__(self, idx):
37 |         sample = [[self.data[idx]], float("-inf")]
38 |         if self.label is not None:
39 |             sample[1] = self.label[idx]
40 |         return sample
41 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpy.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | from torch.utils.data import Dataset
 9 | import numpy as np
10 | import torch
11 | 
12 | 
13 | @register_plugin(PluginType.Dataset, '1.0')
14 | class NumpyDataset(Dataset):
15 |     """
16 |     This NumpyDataset plugin is mainly used for image classification domain models which have one input.
17 |     The data in npy file has the same datalayout with the input datalayout in model.
18 | 
19 |     Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part.
20 |     """
21 |     # when used as calibration dataset, label_file can be omitted.
22 | 
23 |     def __init__(self, data_file, label_file=None):
24 |         '''
25 |         :param data_file: ndarray in npy file.
26 |         :param label_file: ndarray in npy file.
27 |         '''
28 |         self.data = None
29 |         self.label = None
30 | 
31 |         try:
32 |             self.data = np.load(data_file, mmap_mode='c')
33 |         except Exception as e:
34 |             OPT_FATAL(
35 |                 'the data of NumpyDataset plugin should be Numpy.ndarray and allow_pickle=False.')
36 |         if label_file is not None:
37 |             try:
38 |                 self.label = np.load(label_file, mmap_mode='c')
39 |             except ValueError:
40 |                 self.label = np.load(label_file, allow_pickle=True)
41 | 
42 |     def __len__(self):
43 |         return len(self.data)
44 | 
45 |     def __getitem__(self, idx):
46 |         sample = [[self.data[idx]], float("-inf")]
47 |         if self.label is not None:
48 |             sample[1] = self.label[idx]
49 |         return sample
50 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpynchw2nhwc.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | from torch.utils.data import Dataset
 9 | import numpy as np
10 | 
11 | 
12 | @register_plugin(PluginType.Dataset, '1.0')
13 | class NumpyNCHW2NHWCDataset(Dataset):
14 |     """
15 |     This NumpyNCHW2NHWCDataset plugin is used for the dataset has NCHW data format, but the CompassIR needs NHWC data format.
16 |     This plugin automatically transfers the NCHW data format to NHWC data format, which meets the CompassIR requirement.
17 | 
18 |     Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part.
19 |     """
20 | 
21 |     def __init__(self, data_file, label_file=None):
22 |         '''
23 |         :param data_file: ndarray in npy file.
24 |         :param label_file: ndarray in npy file.
25 |         '''
26 |         self.data = None
27 |         self.label = None
28 |         try:
29 |             self.data = np.load(data_file, mmap_mode='c')
30 |             self.data = np.transpose(self.data, [0, 2, 3, 1])
31 |         except Exception as e:
32 |             OPT_FATAL('the data of NumpyNCHW2NHWCDataset plugin should be Numpy.ndarray and allow_pickle=False.')
33 |         if label_file is not None:
34 |             try:
35 |                 self.label = np.load(label_file, mmap_mode='c')
36 |             except ValueError:
37 |                 self.label = np.load(label_file, allow_pickle=True)
38 | 
39 |     def __len__(self):
40 |         return len(self.data)
41 | 
42 |     def __getitem__(self, idx):
43 |         sample = [[self.data[idx]], float("-inf")]
44 |         if self.label is not None:
45 |             sample[1] = self.label[idx]
46 |         return sample
47 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpynhwc2nchw.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | from torch.utils.data import Dataset
 9 | import numpy as np
10 | 
11 | 
12 | @register_plugin(PluginType.Dataset, '1.0')
13 | class NumpyNHWC2NCHWDataset(Dataset):
14 |     """
15 |     This NumpyNHWC2NCHWDataset plugin is used for the dataset has NHWC data format, but the CompassIR needs NCHW data format.
16 |     This plugin automatically transfers the NHWC data format to NCHW data format, which meets the CompassIR requirement.
17 | 
18 |     Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part.
19 |     """
20 | 
21 |     def __init__(self, data_file, label_file=None):
22 |         '''
23 |         :param data_file: ndarray in npy file.
24 |         :param label_file: ndarray in npy file.
25 |         '''
26 |         self.data = None
27 |         self.label = None
28 |         try:
29 |             self.data = np.load(data_file, mmap_mode='c')
30 |             self.data = np.transpose(self.data, [0, 3, 1, 2])
31 |         except Exception as e:
32 |             OPT_FATAL('the data of NumpyNHWC2NCHWDataset plugin should be Numpy.ndarray and allow_pickle=False.')
33 |         if label_file is not None:
34 |             try:
35 |                 self.label = np.load(label_file, mmap_mode='c')
36 |             except ValueError:
37 |                 self.label = np.load(label_file, allow_pickle=True)
38 | 
39 |     def __len__(self):
40 |         return len(self.data)
41 | 
42 |     def __getitem__(self, idx):
43 |         sample = [[self.data[idx]], float("-inf")]
44 |         if self.label is not None:
45 |             sample[1] = self.label[idx]
46 |         return sample
47 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpynhwcrgb2ncbgrhw.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | from torch.utils.data import Dataset
 9 | import numpy as np
10 | 
11 | 
12 | @register_plugin(PluginType.Dataset, '1.0')
13 | class NumpyNHWCRGB2NCBGRHWDataset(Dataset):
14 |     """
15 |     This NumpyNHWCRGB2NCBGRHWDataset plugin is used for changing the RGB to BGR in channel dimition and then
16 |     transfering the NHWC data format to NCHW data format, which meets the CompassIR requirement.
17 | 
18 |     Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part.
19 |     """
20 | 
21 |     def __init__(self, data_file, label_file=None):
22 |         '''
23 |         :param data_file: ndarray in npy file.
24 |         :param label_file: ndarray in npy file.
25 |         '''
26 |         self.data = None
27 |         self.label = None
28 |         try:
29 |             self.data = np.load(data_file, mmap_mode='c')
30 |             self.data = np.flip(self.data, -1).copy()
31 |             self.data = np.transpose(self.data, [0, 3, 1, 2])
32 |         except Exception as e:
33 |             OPT_FATAL('the data of NumpyNHWCRGB2NCBGRHWDataset plugin should be Numpy.ndarray and allow_pickle=False.')
34 |         if label_file is not None:
35 |             try:
36 |                 self.label = np.load(label_file, mmap_mode='c')
37 |             except ValueError:
38 |                 self.label = np.load(label_file, allow_pickle=True)
39 | 
40 |     def __len__(self):
41 |         return len(self.data)
42 | 
43 |     def __getitem__(self, idx):
44 |         sample = [[self.data[idx]], float("-inf")]
45 |         if self.label is not None:
46 |             sample[1] = self.label[idx]
47 |         return sample
48 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_random.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from torch.utils.data import Dataset
 8 | 
 9 | 
10 | @register_plugin(PluginType.Dataset, '1.0')
11 | class RandomDataset(Dataset):
12 |     """
13 |     This RandomDataset plugin is based on the input data shape and label shape to generate the random data/label as dataset/labelset.
14 | 
15 |     Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part.
16 |     """
17 | 
18 |     def __init__(self, sample_shape, label_shape=(), num_sample=1, seed=None):
19 |         import numpy as np
20 |         if seed:
21 |             np.random.seed(seed)
22 |         self.num_sample = num_sample
23 |         self.sample_shape = sample_shape
24 |         self.label_shape = label_shape
25 |         self.data = []
26 |         self.label = []
27 |         for _ in range(num_sample):
28 |             self.data.append(np.random.randn(*tuple(self.sample_shape)).astype(np.float32))
29 |             self.label.append(np.random.randn(*tuple(self.label_shape)).astype(np.float32))
30 | 
31 |     def __getitem__(self, idx):
32 |         return self.data[idx], self.label[idx]
33 | 
34 |     def __len__(self):
35 |         return len(self.data)
36 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_sphereface_lfw.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | from torch.utils.data import Dataset
 9 | import numpy as np
10 | import torch
11 | 
12 | 
13 | @register_plugin(PluginType.Dataset, '1.0')
14 | class SphereFaceLFWDataset(Dataset):
15 |     """
16 |     This SphereFaceLFWDataset plugin is mainly used for sphereface_caffe model.
17 |     The data in npy file has the same datalayout with the input datalayout in model.
18 | 
19 |     Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part.
20 |     """
21 | 
22 |     def __init__(self, data_file, label_file=None):
23 |         '''
24 |         :param data_file: ndarray in npy file.
25 |         :param label_file: ndarray in npy file.
26 |         '''
27 |         self.data = None
28 |         self.label = []
29 | 
30 |         self.data = np.load(data_file, mmap_mode='c')
31 |         if label_file is not None:
32 |             try:
33 |                 label = np.load(label_file, mmap_mode='c')
34 |             except ValueError:
35 |                 label = np.load(label_file, allow_pickle=True).item()
36 |             keys = list(label.keys())
37 |             for key in keys:
38 |                 self.label.append(label[key])
39 | 
40 |     def __len__(self):
41 |         return len(self.data)
42 | 
43 |     def __getitem__(self, idx):
44 |         sample = [[self.data[idx]], float("-inf")]
45 |         if len(self.label) != 0:
46 |             sample[1] = self.label[idx]
47 |         return sample
48 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_stable_diffusion_unet.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import numpy as np
 5 | from torch.utils.data import Dataset, DataLoader
 6 | from AIPUBuilder.Optimizer.framework import *
 7 | 
 8 | 
 9 | @register_plugin(PluginType.Dataset, '0.01')
10 | class StableDiffusionUNetDataset(Dataset):
11 |     def __init__(self, data_file, label_file=None):
12 |         dataset = np.load(data_file, allow_pickle=True).item()
13 |         keys = list(dataset.keys())
14 |         self.s_dataset = dataset[keys[0]]
15 |         self.t_dataset = dataset[keys[1]]
16 |         self.h_dataset = dataset[keys[2]]
17 |         '''
18 |         self.s_dataset = dataset['input1']
19 |         self.t_dataset = dataset['input2']
20 |         self.h_dataset = dataset['input3']
21 |         '''
22 | 
23 |     def __getitem__(self, idx):
24 |         sample = [[self.s_dataset[idx], self.t_dataset[idx], self.h_dataset[idx]], float("-inf")]
25 | 
26 |         return sample
27 | 
28 |     def __len__(self):
29 |         return len(self.s_dataset)
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_dataset_tusimple.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | import numpy as np
 6 | 
 7 | from AIPUBuilder.Optimizer.framework import *
 8 | 
 9 | from torch.utils.data import Dataset
10 | from collections import defaultdict
11 | 
12 | 
13 | @register_plugin(PluginType.Dataset, '1.0')
14 | class tusimpleDataset(Dataset):
15 |     def __init__(self, data_file=None, label_file=None):
16 |         self.data = np.load(data_file, mmap_mode='c').astype(np.float32)
17 |         self.label = None
18 |         if label_file is not None:
19 |             self.label = np.load(label_file, allow_pickle=True).item()
20 | 
21 |     def __len__(self):
22 |         return len(self.data)
23 | 
24 |     def __getitem__(self, idx):
25 |         image_data = self.data[idx]
26 |         sample = [image_data, {}]
27 |         if self.label is not None:
28 |             raw_label = self.label[idx]
29 |             image_name = idx
30 | 
31 |             sample[1].update({
32 |                 'image_name': np.array(image_name),
33 |                 'lanes': np.array(raw_label[0]),
34 |                 'h_samples': np.array(raw_label[1]),
35 | 
36 |             })
37 |         return sample
38 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_CosDistance_with_seqlen.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | 
 9 | 
10 | @register_plugin(PluginType.Metric, '1.0')
11 | class CosDistancewseqlenMetric(OptBaseMetric):
12 |     """
13 |     This CosDistancewseqlenMetric is used for the metric of RNNT_encoder model in Optimizer.
14 |     The label of metric has two elements: [label_value(tensor), actual_len(int)]. This metric will
15 |     select the actual_len in predict_value and label_value to calculate the cosine distance.
16 |     """
17 | 
18 |     def __init__(self):
19 |         self.cos = torch.nn.CosineSimilarity(dim=-1)
20 |         self.sim = []
21 | 
22 |     def __call__(self, pred, target):
23 |         preds = pred[0].cpu()
24 |         padded_targets = target[0].cpu()
25 |         act_len = target[1].cpu()
26 |         targets = padded_targets
27 |         for i in range(targets.shape[0]):
28 |             flatten_pred = preds[i][:act_len[i]].reshape([-1])
29 |             flatten_target = targets[i][:act_len[i]].reshape([-1])
30 |             self.sim.append(self.cos(flatten_pred, flatten_target))
31 | 
32 |     def reset(self):
33 |         self.sim = []
34 | 
35 |     def compute(self):
36 |         # shape of tensor self.sim is rank 1 as x, y above have iterate all dims and reshape
37 |         t = torch.Tensor(self.sim)
38 |         return float(torch.mean(t, 0))
39 | 
40 |     def report(self):
41 |         return "cosine similarity is %f" % (self.compute())
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_FlattenCosDistance.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | import numpy
 9 | 
10 | # For OPT OP Test
11 | # It using mean value of all multi outputs of all batch
12 | 
13 | 
14 | @register_plugin(PluginType.Metric, '0.01')
15 | class FlattenCosDistanceMetric(OptBaseMetric):
16 |     def __init__(self):
17 |         self.cos = torch.nn.CosineSimilarity()
18 |         self.sim = []
19 | 
20 |     def __call__(self, pred, target):
21 |         sim_per_output = []
22 |         for o_p, o_t in zip(pred, target):
23 |             if len(o_p.shape):
24 |                 b = o_p.shape[0]
25 |                 x = o_p.reshape(b, -1).float()
26 |                 y = o_t.reshape(b, -1).float()
27 |                 sim = numpy.mean(self.cos(x, y).cpu().flatten().numpy())
28 |             else:  # if output is a scalar
29 |                 x = o_p
30 |                 y = o_t[0]
31 |                 sim = (x == y)
32 | 
33 |             sim_per_output.append(sim)
34 |         self.sim.append(sim_per_output)
35 | 
36 |     def reset(self):
37 |         self.sim = []
38 | 
39 |     def compute(self):
40 |         sim = numpy.array(self.sim)
41 |         return numpy.mean(sim)
42 | 
43 |     def report(self):
44 |         txt = ''
45 |         sims = numpy.array(self.sim).T  # [output, per call result]
46 |         txt += "cosine similarity is %f" % numpy.mean(sims)
47 |         for i, sim in enumerate(sims):
48 |             txt += "\noutput %d: cosine similarity is %f" % (i, numpy.mean(sim))
49 |         return txt
50 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_KeywordSpotting.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | import torch
 9 | 
10 | 
11 | @register_plugin(PluginType.Metric, '1.0')
12 | class KeywordSpottingMetric(OptBaseMetric):
13 |     """
14 |     This KeywordSpottingMetric is used for the metric of kws_gru/kws_lstm models in Optimizer.
15 |     accuracy = correct / total.
16 |     """
17 | 
18 |     def __init__(self, K=1):
19 |         self.correct = 0
20 |         self.total = 0
21 | 
22 |     def __call__(self, pred, target):
23 |         _, pt = torch.topk(pred[0], 1, dim=-1)  # NHWC
24 |         _, gt = torch.topk(target, 1, dim=-1)  # NHWC
25 |         batch = pt.shape[0]
26 |         for idx in range(batch):
27 |             if pt[idx][0] == gt[idx][0]:
28 |                 self.correct += 1
29 |         self.total += batch
30 | 
31 |     def reset(self):
32 |         self.correct = 0
33 |         self.total = 0
34 | 
35 |     def compute(self):
36 |         try:
37 |             acc = float(self.correct) / float(self.total)
38 |             return acc
39 |         except ZeroDivisionError:
40 |             OPT_ERROR('zeroDivisionError: kws acc total label = 0')
41 |             return float("-inf")
42 | 
43 |     def report(self):
44 |         return "accuracy is %f" % (self.compute())
45 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_LMHead.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | import torch
 6 | 
 7 | 
 8 | @register_plugin(PluginType.Metric, '1.0')
 9 | class LMHeadMetric(OptBaseMetric):
10 |     '''
11 |     Compare logits PPL with label
12 |     CrossEntropyLoss([batch, vocab_size], [batch, 1(token id)]) -> [batch, 1(neg log liklihood)]
13 |     PPL = exp([batch, 1(nll)].mean())
14 |     '''
15 | 
16 |     def __init__(self):
17 |         self.nlls = []
18 |         self.loss = torch.nn.CrossEntropyLoss()
19 | 
20 |     def __call__(self, pred, target):
21 |         vocab = pred[0][:, -1, :]  # [batch, seqlen, vocabsize] -> [batch, vocabsize]
22 |         nll = self.loss(vocab, target[0][:, 0])
23 |         self.nlls.append(nll)
24 | 
25 |     def reset(self):
26 |         self.nlls = []
27 | 
28 |     def compute(self):
29 |         total_nll = torch.tensor(self.nlls)
30 |         return torch.exp(total_nll.mean())
31 | 
32 |     def report(self):
33 |         return f"Correct/Total: {self.compute()}"
34 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_MaxAbsError.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | import numpy
 9 | 
10 | # For OPT OP Test
11 | 
12 | 
13 | @register_plugin(PluginType.Metric, '0.01')
14 | class MaxAbsErrorMetric(OptBaseMetric):
15 |     def __init__(self):
16 |         self.errors = []
17 | 
18 |     def __call__(self, pred, target):
19 |         sim_per_output = []
20 |         for o_p, o_t in zip(pred, target):
21 |             x = o_p.float().reshape(-1)
22 |             y = o_t.float().reshape(-1)
23 |             sim_per_output.append(torch.max(torch.abs(x - y)).cpu().numpy())
24 |         self.errors.append(sim_per_output)
25 | 
26 |     def reset(self):
27 |         self.errors = []
28 | 
29 |     def compute(self):
30 |         errors = numpy.array(self.errors)
31 |         return numpy.mean(errors)
32 | 
33 |     def report(self):
34 |         txt = ''
35 |         errors = numpy.array(self.errors).T  # [output, per call result]
36 |         txt += "maximum absolute error is %f" % numpy.mean(errors)
37 |         for i, e in enumerate(errors):
38 |             txt += "\noutput %d: maximum absolute error is %f" % (i, numpy.mean(e))
39 |         return txt
40 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_MaxAbsError_with_seqlen.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | import torch
 8 | import numpy
 9 | 
10 | 
11 | @register_plugin(PluginType.Metric, '1.0')
12 | class MaxAbsErrorwseqlenMetric(OptBaseMetric):
13 |     """
14 |     This MaxAbsErrorwseqlenMetric is used for the metric of RNNT_encoder model in Optimizer.
15 |     The label of metric has two elements: [label_value(tensor), actual_len(int)]. This metric will
16 |     select the actual_len in predict_value and label_value to calculate the maximum absolute error.
17 |     """
18 | 
19 |     def __init__(self):
20 |         self.errors = []
21 | 
22 |     def __call__(self, pred, target):
23 |         sim_per_output = []
24 |         for o_p, o_t, o_len in zip(pred, target[0], target[1]):
25 |             o_len = o_len.cpu()
26 |             x = o_p.float()[:, :o_len].reshape(-1)
27 |             y = o_t.float()[:o_len].reshape(-1)
28 |             sim_per_output.append(torch.max(torch.abs(x - y)).cpu().numpy())
29 |         self.errors.append(sim_per_output)
30 | 
31 |     def reset(self):
32 |         self.errors = []
33 | 
34 |     def compute(self):
35 |         errors = numpy.array(self.errors)
36 |         return numpy.mean(errors)
37 | 
38 |     def report(self):
39 |         txt = ''
40 |         errors = numpy.array(self.errors).T  # [output, per call result]
41 |         txt += "maximum absolute error is %f" % numpy.mean(errors)
42 |         for i, e in enumerate(errors):
43 |             txt += "\noutput %d: maximum absolute error is %f" % (i, numpy.mean(e))
44 |         return txt
45 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_OpTestCosDistance.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.framework import *
 5 | import torch
 6 | import numpy
 7 | 
 8 | 
 9 | @register_plugin(PluginType.Metric, '0.01')
10 | class OpTestCosDistanceMetric(OptBaseMetric):
11 |     def __init__(self):
12 |         self.cos = torch.nn.CosineSimilarity()
13 |         self.sim = []
14 | 
15 |     def __call__(self, pred, target):
16 |         sim_per_output = []
17 |         for o_p, o_t in zip(pred, target):
18 |             if len(o_p.shape):
19 |                 b = o_p.shape[0]
20 |                 x = o_p.reshape(b, -1).float()
21 |                 y = o_t.reshape(b, -1).float()
22 |                 sim = numpy.mean(self.cos(x, y).cpu().flatten().numpy())
23 |             else:  # if output is a scalar
24 |                 x = o_p
25 |                 y = o_t[0]
26 |                 sim = (x == y).item()
27 | 
28 |             sim_per_output.append(sim)
29 |         self.sim.append(sim_per_output)
30 | 
31 |     def reset(self):
32 |         self.sim = []
33 | 
34 |     def compute(self):
35 |         sim = numpy.array(self.sim)
36 |         return numpy.mean(sim)
37 | 
38 |     def report(self):
39 |         txt = ''
40 |         sims = numpy.array(self.sim).T  # [output, per call result]
41 |         txt += "cosine similarity is %f" % numpy.mean(sims)
42 |         for i, sim in enumerate(sims):
43 |             txt += "\noutput %d: cosine similarity is %f" % (i, numpy.mean(sim))
44 |         return txt
45 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_RMSE.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import os
 5 | import sys
 6 | import cv2
 7 | import torch
 8 | import numpy as np
 9 | 
10 | from AIPUBuilder.Optimizer.framework import *
11 | from AIPUBuilder.Optimizer.logger import *
12 | 
13 | 
14 | @register_plugin(PluginType.Metric, '0.01')
15 | class RMSEMetric(OptBaseMetric):
16 |     """
17 |     This RMSEMetric is used for the metric of dinov2-small-nyu model in Optimizer.
18 |     """
19 | 
20 |     def __init__(self):
21 |         self.total = 0
22 |         self.total_loss = 0.0
23 | 
24 |     def __call__(self, pred, target):
25 |         image_size = target.shape[1:]
26 |         prediction = torch.nn.functional.interpolate(
27 |             pred[0].unsqueeze(1),
28 |             size=image_size,
29 |             mode="bicubic",
30 |             align_corners=False
31 |         ).squeeze().cpu().numpy()
32 | 
33 |         target = target.cpu().numpy().reshape(image_size)
34 |         loss = np.sqrt(np.mean((prediction - target / 1000.) ** 2))
35 |         # print(f"{self.total}: RMSE loss is {loss}")
36 |         self.total += 1
37 |         self.total_loss += loss
38 | 
39 |     def reset(self):
40 |         self.total = 0
41 |         self.total_loss = 0.0
42 | 
43 |     def compute(self):
44 |         average_loss = self.total_loss / self.total
45 |         return average_loss
46 | 
47 |     def report(self):
48 |         return "rmse accuracy is %f" % (self.compute())
49 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_SSDmAP.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.plugins.aipubt_metric_mAP import mAPMetric
 6 | from AIPUBuilder.Optimizer.framework import *
 7 | 
 8 | from AIPUBuilder.Optimizer.logger import OPT_FATAL
 9 | 
10 | 
11 | @register_plugin(PluginType.Metric, '1.0')
12 | class SSDmAPMetric(mAPMetric):
13 |     """
14 |     This SSDmAPMetric is used for the metric of SSD models in Optimizer.
15 |     This plugin computes the mAP of SSD models.
16 |     We assume the iou_threshold=0.5.
17 |     """
18 | 
19 |     def __init__(self, class_num=90, start_id=0):
20 |         super().__init__(class_num, start_id)
21 | 
22 |     def __call__(self, pred, target):
23 |         assert len(pred) == 9, OPT_FATAL('please check the outputs number(should be 9)')
24 |         pred_post = [pred[2], pred[4], pred[5], pred[6], pred[7], pred[8]]
25 |         super().__call__(pred_post, target)
26 | 
27 |     def reset(self):
28 |         super().reset()
29 | 
30 |     def compute(self):
31 |         self.mAP = super().compute()
32 |         return self.mAP
33 | 
34 |     def report(self):
35 |         return "SSD mAP accuracy is %f" % (self.compute())
36 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_WER.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | import editdistance
 9 | import numpy as np
10 | 
11 | 
12 | '''
13 | WER: word error rate
14 | Work Error Rate =  100 * (insertions + substitutions + deletions) / (total words in correct transcript)
15 | '''
16 | 
17 | 
18 | @register_plugin(PluginType.Metric, '1.0')
19 | class WERMetric(OptBaseMetric):
20 |     """
21 |     This WERMetric is used for the metric of deepspeech_official/wavenet models in Optimizer.
22 | 
23 |     Word error rate (WER) is a common metric of the performance of a speech recognition or machine translation system.
24 |     Work Error Rate =  100 * (insertions + substitutions + deletions) / (total words in correct transcript)
25 |     """
26 | 
27 |     def __init__(self, EOF=''):
28 |         self.predictions = []
29 |         self.WER = 0
30 |         self.EOF = EOF
31 | 
32 |     def __call__(self, preds, targets):
33 |         '''
34 |         :param preds:
35 |         :param targets: list(padding_label, act_label_len), padding_label.shape=(batch_size, padding_len), act_label_len.shape=(batch_size, act_len)
36 |         :return:
37 |         '''
38 |         preds = preds[0].cpu().numpy()
39 |         padded_targets = targets[0].cpu().numpy()
40 |         act_len = targets[1].cpu().numpy()
41 |         targets = padded_targets
42 |         for i in range(targets.shape[0]):
43 |             flatten_pred = preds[i].reshape([-1])
44 |             eof_value = int(self.EOF) if len(self.EOF) > 0 else flatten_pred[-1]
45 |             flatten_pred = flatten_pred[flatten_pred != eof_value]
46 |             flatten_target = targets[i][:act_len[i]].reshape([-1])
47 |             self.predictions.append(editdistance.eval(flatten_pred, flatten_target) / len(flatten_target))
48 | 
49 |     def reset(self):
50 |         self.predictions = []
51 |         self.WER = 0
52 | 
53 |     def compute(self):
54 |         self.WER = np.average(np.array(self.predictions))
55 |         return self.WER
56 | 
57 |     def report(self):
58 |         return "ASR Word Error Rate(WER) is %f" % (self.compute())
59 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_delta1.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | import torch
 9 | import math
10 | 
11 | 
12 | @register_plugin(PluginType.Metric, '1.0')
13 | class delta1Metric(OptBaseMetric):
14 |     """
15 |     This delta1Metric is used for the metric of fast_depth_onnx model in Optimizer.
16 |     """
17 | 
18 |     def __init__(self):
19 |         self.num = 0
20 |         self.delta1_sum = 0
21 | 
22 |     def __call__(self, pred, target):
23 |         prediction = pred[0]
24 |         batch_size = pred[0].shape[0]
25 |         mask = ((target > 0) + (prediction > 0)) > 0
26 | 
27 |         prediction = prediction[mask] * 1000
28 |         target = target[mask] * 1000
29 | 
30 |         max_ratio = torch.max(prediction / target, target / prediction)
31 |         delta1 = float((max_ratio < 1.25).float().mean())
32 | 
33 |         self.num += batch_size
34 |         self.delta1_sum += batch_size * delta1
35 | 
36 |     def reset(self):
37 |         self.num = 0
38 |         self.delta1_sum = 0
39 | 
40 |     def compute(self):
41 |         ret = self.delta1_sum / self.num
42 |         return ret
43 | 
44 |     def report(self):
45 |         return "delta1 accuracy is %f" % (self.compute())
46 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_f1mesure.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.plugins.aipubt_metric_mIoU import mIoUMetricBase
 7 | import torch
 8 | import cv2
 9 | import numpy as np
10 | 
11 | 
12 | @register_plugin(PluginType.Metric, '1.0')
13 | class F1scoreMetric(mIoUMetricBase):
14 |     """
15 |     This F1scoreMetric is used for the metric of onnx_sne_roadseg models in Optimizer.
16 |     This plugin computes the f1-measure metric for kitti dataset.
17 |     """
18 | 
19 |     def __init__(self, layout='NHWC'):
20 |         super().__init__()
21 |         if layout == 'NCHW':
22 |             self.channel_axis = 1
23 | 
24 |     def __call__(self, pred, target):
25 |         if isinstance(target, list):
26 |             target = target[0]
27 |         super().__call__(pred, target)
28 | 
29 |     def reset(self):
30 |         super().reset()
31 | 
32 |     def compute(self):
33 |         conf = self.confusion_matrix
34 |         pred = (np.diag(conf) / conf.sum(0).astype(np.float32))[1]
35 |         recall = (np.diag(conf) / conf.sum(1).astype(np.float32))[1]
36 |         f1score = 2*(recall*pred)/(recall+pred)
37 |         return f1score
38 | 
39 |     def report(self):
40 |         return "F1 score is %f" % (self.compute())
41 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_imdb.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.logger import *
 7 | import torch
 8 | 
 9 | 
10 | @register_plugin(PluginType.Metric, '1.0')
11 | class IMDBMetric(OptBaseMetric):
12 |     """
13 |     This IMDBMetric is used for the metric of robert-a models in Optimizer.
14 |     accuracy = correct / total.
15 |     half sample in total is negative sentiment, half sample is postive sentiment
16 |     The IMDb data set  is a sentiment analysis data set (two classifications),
17 |     test set each have 25000 samples (each sample is a movie review),
18 |     The number of samples of the positive/the negative class (ie positive/negative) is the same, 12500/12500.
19 |     """
20 | 
21 |     def __init__(self):
22 |         self.correct = [0, 0]
23 |         self.total = 0
24 | 
25 |     def __call__(self, pred, target):
26 |         pt = torch.argmax(pred[0], 1)  #
27 |         gt = target
28 |         batch = pt.shape[0]
29 |         for idx in range(batch):
30 |             if pt == gt and gt == 0:
31 |                 self.correct[0] += 1
32 |             elif pt == gt and gt == 1:
33 |                 self.correct[1] += 1
34 | 
35 |         self.total += batch
36 | 
37 |     def reset(self):
38 |         self.correct = [0, 0]
39 |         self.total = 0
40 | 
41 |     def compute(self):
42 |         try:
43 |             acc = float(self.correct[0]+self.correct[1]) / float(self.total)
44 |             return acc
45 |         except ZeroDivisionError:
46 |             OPT_ERROR('zeroDivisionError: imdb acc total label = 0')
47 |             return float("-inf")
48 | 
49 |     def report(self):
50 |         return "imdb sentiment acc is %f " % (self.compute())
51 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_metric_topk.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | import torch
 9 | 
10 | 
11 | @register_plugin(PluginType.Metric, '1.0')
12 | class TopKMetric(OptBaseMetric):
13 |     """
14 |     This TopKMetric is used for the metric of image classfication models in Optimizer.
15 |     This plugin defaultly computes Top1.
16 |     """
17 | 
18 |     def __init__(self, K='1', with_argmax=False):
19 |         self.correct = 0
20 |         self.total = 0
21 |         self.K = int(K) if not with_argmax else 1
22 |         self.with_argmax = with_argmax
23 | 
24 |     def __call__(self, pred, target):
25 |         if self.with_argmax:
26 |             pt = pred[0].reshape([pred[0].shape[0], -1]).cpu().numpy().astype('int32')
27 |         else:
28 |             _, pt = torch.topk(pred[0].reshape([pred[0].shape[0], -1]), self.K, dim=-1)  # NHWC
29 |         for i in range(target.numel()):
30 |             if target[i] in pt[i]:
31 |                 self.correct += 1
32 |         self.total += target.numel()
33 | 
34 |     def reset(self):
35 |         self.correct = 0
36 |         self.total = 0
37 | 
38 |     def compute(self):
39 |         try:
40 |             acc = float(self.correct) / float(self.total)
41 |             return acc
42 |         except ZeroDivisionError:
43 |             OPT_ERROR('zeroDivisionError: Topk acc total label = 0')
44 |             return float("-inf")
45 | 
46 |     def report(self):
47 |         return "top-%d accuracy is %f" % (self.K, self.compute())
48 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/plugins/aipubt_op_tile.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | 
 7 | from AIPUBuilder.Optimizer.logger import *
 8 | 
 9 | # Delete the comment on the next line to enable this plugin
10 | # @op_register(OpType.Tile)
11 | 
12 | 
13 | def tile(self, *args):
14 |     if 'multipliers' in self.params:
15 |         key = 'multiplier'
16 |     elif 'reps' in self.params:
17 |         key = 'reps'
18 |     else:
19 |         OPT_ERROR("Tile op needs 'multipliers' or 'reps' param.")
20 | 
21 |     reps = self.params[key]
22 |     if isinstance(reps, str):
23 |         reps = [int(r) for r in reps.split(',')]
24 |     inp_t = self.inputs[0].betensor
25 |     out_t = inp_t.repeat(reps)
26 |     self.outputs[0].betensor = out_t
27 | 
28 |     return out_t
29 | 
30 | # Delete the comment on the next line to enable this plugin
31 | # @op_register(OpType.Tile)
32 | 
33 | 
34 | def tile_quantize(self, *args):
35 |     inp = self.inputs[0]
36 |     out = self.outputs[0]
37 |     out.dtype = inp.dtype
38 |     out.scale = inp.scale
39 |     out.zerop = inp.zerop
40 |     out.qbits = inp.qbits
41 |     out.qinvariant = inp.qinvariant
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/qat/__init__.py


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/qatmain.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import sys
 5 | from AIPUBuilder.Optimizer.plugins import *
 6 | try:
 7 |     from AIPUBuilder.Optimizer.plugins_internal import *
 8 | except:
 9 |     pass
10 | 
11 | from AIPUBuilder.Optimizer.framework import (traverse_opt_plugins,
12 |                                              QUANTIZE_DATASET_DICT,
13 |                                              QUANTIZE_METRIC_DICT)
14 | from AIPUBuilder.Optimizer.config import arg_parser
15 | from src import AIPUQATMaster
16 | from src.qatlogger import QAT_INFO
17 | 
18 | 
19 | def main():
20 |     try:
21 |         traverse_opt_plugins()
22 |         args = arg_parser(metric_dict=QUANTIZE_METRIC_DICT,
23 |                           dataset_dict=QUANTIZE_DATASET_DICT)  # pylint: disable=undefined-variable
24 |         if isinstance(args, bool):
25 |             return 0 if args else 1
26 | 
27 |         qat_master = AIPUQATMaster(args)
28 |         qat_master.run()
29 |         QAT_INFO(f"running QAT Done.")
30 |     except Exception as e:
31 |         raise e
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     ret = main()
36 |     sys.exit(ret)
37 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from .config import *
 5 | from .plugin import *
 6 | from .quantizer import *
 7 | from .qatmaster import AIPUQATMaster
 8 | 
 9 | 
10 | __OPT_QAT_VERSION__ = '0.1'
11 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/config/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from .config import QATConfig, get_device, default_device
5 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from ..utils import is_match
 5 | from .concat_fuser import ConcatFusion
 6 | from .convolution_fuser import ConvBNActFusion
 7 | from .eltwise_fuser import MulFusion, AddFusion
 8 | from .expand_fuser import ExpandFusion
 9 | from .fullyconnected_fuser import LinearBNActFusion
10 | from .gelu_fuser import GeLUFusion
11 | from .hardswish_fuser import HardswishFusion
12 | from .hardsigmoid_fuser import HardsigmoidFusion
13 | from .layernorm_fuser import LayerNormFusion
14 | from .multiheadattention_fuser import MultiheadAttentionFusion
15 | from .mha_fuser import MHAFusion
16 | from .pooling_fuser import AvgPool2dFusion, MaxPool2dFusion
17 | from .reshape_fuser import ReshapeFusion
18 | from .transpose_fuser import TransposeFusion
19 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/concat_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | from operator import eq
 8 | 
 9 | from ..qatregister import register_fusion_pattern
10 | from ..qatlogger import QAT_WARN
11 | from ..ops import QConcat
12 | from ..utils import replace_node_module
13 | 
14 | 
15 | @register_fusion_pattern((torch.cat))
16 | @register_fusion_pattern((torch.concat))
17 | class ConcatFusion:
18 |     def __init__(self, quantizer, node):
19 |         self.cat_node = None
20 |         if eq(node.target, torch.concat) or eq(node.target, torch.cat):
21 |             self.cat_node = node
22 |         assert self.cat_node is not None, '[FATAL]: cat_node can not be None!'
23 |         if 'dim' in self.cat_node.kwargs:
24 |             self.dim = self.cat_node.kwargs['dim']
25 |         elif len(self.cat_node.args) > 1:
26 |             self.dim = self.cat_node.args[1]
27 |         else:
28 |             self.dim = -1
29 |             QAT_WARN(f"not found the dim parameters in cat node, using dim=-1.")
30 | 
31 |     def fuse(self, graph_module, modules):
32 |         qname = self.cat_node.name + "_QConcat"
33 |         q_cat = QConcat(dim=self.dim, name=qname)
34 |         fused_graph = graph_module.graph
35 |         with fused_graph.inserting_after(self.cat_node):
36 |             graph_module.add_module(qname, q_cat)
37 |             new_node = fused_graph.call_module(qname, args=self.cat_node.args)
38 |         self.cat_node.replace_all_uses_with(new_node)
39 |         fused_graph.erase_node(self.cat_node)
40 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/expand_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from ..qatregister import register_fusion_pattern
 9 | from ..qatlogger import QAT_FATAL, QAT_INFO, QAT_ERROR
10 | from ..ops import QExpand
11 | from ..utils import replace_node_module
12 | from types import BuiltinFunctionType
13 | 
14 | 
15 | @register_fusion_pattern((torch.Tensor.expand))  # 0, the highest priority
16 | class ExpandFusion:
17 |     def __init__(self, quantizer, node):
18 |         self.expand_node = None
19 |         # if isinstance(node.target, BuiltinFunctionType):
20 |         #     self.expand_node = node
21 |         self.expand_node = node
22 |         assert self.expand_node is not None, '[FATAL]: expand_node can not be None!'
23 | 
24 |     def fuse(self, graph_module, modules):
25 |         QAT_INFO(f"begin to expand fuse")
26 |         fused_graph = graph_module.graph
27 |         local_args = (self.expand_node.args[0],)
28 |         for arg in self.expand_node.args[1:]:
29 |             if not isinstance(arg, int):
30 |                 local_args = self.expand_node.args
31 |                 break
32 |         size = [] if len(local_args) > 1 else self.expand_node.args[1:]
33 |         q_expand = QExpand(size=size)
34 |         with fused_graph.inserting_after(self.expand_node):
35 |             graph_module.add_module(self.expand_node.name + "_QExpand", q_expand)
36 |             new_node = fused_graph.call_module(
37 |                 self.expand_node.name + "_QExpand", args=local_args)
38 |         self.expand_node.replace_all_uses_with(new_node)
39 |         fused_graph.erase_node(self.expand_node)
40 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/gelu_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from ..qatlogger import QAT_FATAL
 9 | from ..qatregister import register_fusion_pattern
10 | from ..ops import QGeLU
11 | from ..utils import replace_node_module
12 | 
13 | 
14 | @register_fusion_pattern((nn.GELU))
15 | class GeLUFusion:
16 |     def __init__(self, quantizer, node):
17 |         self.gelu_node = node
18 |         if not isinstance(quantizer.modules[node.target], (nn.GELU)):
19 |             self.gelu_node = None
20 |         if self.gelu_node is None:
21 |             QAT_FATAL(f"gelu_node can not be None!")
22 |         self.gelu_module = quantizer.modules[self.gelu_node.target]
23 | 
24 |     def fuse(self, graph_module, modules):
25 |         q_gelu = QGeLU(name=self.gelu_node.name, approximate=self.gelu_module.approximate)
26 |         replace_node_module(self.gelu_node, modules, q_gelu)
27 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/hardsigmoid_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from ..qatlogger import QAT_FATAL
 9 | from ..qatregister import register_fusion_pattern
10 | from ..ops import QHardSigmoid
11 | from ..utils import replace_node_module
12 | 
13 | 
14 | @register_fusion_pattern((nn.Hardsigmoid))
15 | class HardsigmoidFusion:
16 |     def __init__(self, quantizer, node):
17 |         self.hardsigmoid_node = node
18 |         if not isinstance(quantizer.modules[node.target], (nn.Hardsigmoid)):
19 |             self.hardsigmoid_node = None
20 |         if self.hardsigmoid_node is None:
21 |             QAT_FATAL(f"hardsigmoid_node can not be None!")
22 |         self.hardsigmoid_module = quantizer.modules[self.hardsigmoid_node.target]
23 | 
24 |     def fuse(self, graph_module, modules):
25 |         q_hardsigmoid = QHardSigmoid(name=self.hardsigmoid_node.name)
26 |         replace_node_module(self.hardsigmoid_node, modules, q_hardsigmoid)
27 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/hardswish_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from ..qatlogger import QAT_FATAL
 9 | from ..qatregister import register_fusion_pattern
10 | from ..ops import QHardSwish
11 | from ..utils import replace_node_module
12 | 
13 | 
14 | @register_fusion_pattern((nn.Hardswish))
15 | class HardswishFusion:
16 |     def __init__(self, quantizer, node):
17 |         self.hardswish_node = node
18 |         if not isinstance(quantizer.modules[node.target], (nn.Hardswish)):
19 |             self.hardswish_node = None
20 |         if self.hardswish_node is None:
21 |             QAT_FATAL(f"hardswish_node can not be None!")
22 |         self.hardswish_module = quantizer.modules[self.hardswish_node.target]
23 | 
24 |     def fuse(self, graph_module, modules):
25 |         q_hardswish = QHardSwish(name=self.hardswish_node.name)
26 |         replace_node_module(self.hardswish_node, modules, q_hardswish)
27 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/layernorm_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.er
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from ..qatregister import register_fusion_pattern
 9 | from ..ops import QLayerNorm
10 | from ..utils import extract_linear_hyperparams, replace_node_module
11 | 
12 | 
13 | @register_fusion_pattern((nn.LayerNorm))
14 | class LayerNormFusion:
15 |     def __init__(self, quantizer, node):
16 | 
17 |         self.ln_node = None
18 |         if isinstance(quantizer.modules[node.target], nn.LayerNorm):
19 |             self.ln_node = node
20 |         self.ln_name = self.ln_node.name
21 |         assert self.ln_node is not None, '[FATAL]: layernorm node can not be None!'
22 |         self.ln_module = quantizer.modules[self.ln_node.target]
23 | 
24 |     def _extract_hyperparams(self, m, name=None):
25 |         hps = {}
26 |         hps = {
27 |             'name': name,
28 |             'normalized_shape': m.normalized_shape,
29 |             'eps': m.eps,
30 |             'bias': True if hasattr(m, 'bias') and isinstance(m.bias, torch.nn.Parameter) else False
31 |         }
32 |         return hps
33 | 
34 |     def fuse(self, graph_module, modules):
35 |         hyper_params = self._extract_hyperparams(self.ln_module, self.ln_name)
36 |         qln = QLayerNorm(**hyper_params)
37 |         qln.weight.data = self.ln_module.weight
38 |         if qln.bias is not None:
39 |             qln.bias.data = self.ln_module.bias
40 | 
41 |         replace_node_module(self.ln_node, modules, qln)
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/pooling_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from ..qatlogger import QAT_FATAL
 9 | from ..qatregister import register_fusion_pattern
10 | from ..ops import QAveragePooling2D, QMaxPooling2D
11 | from ..utils import replace_node_module
12 | 
13 | 
14 | @register_fusion_pattern((nn.AdaptiveAvgPool2d))
15 | @register_fusion_pattern((nn.AvgPool2d))  # 0, the highest priority
16 | class AvgPool2dFusion:
17 |     def __init__(self, quantizer, node):
18 |         self.avg_node = None
19 | 
20 |         if isinstance(quantizer.modules[node.target], (nn.AvgPool2d, nn.AdaptiveAvgPool2d)):
21 |             self.avg_node = node
22 | 
23 |         if self.avg_node is None:
24 |             QAT_FATAL(f"avg_node can not be None!")
25 | 
26 |         self.avg_module = quantizer.modules[self.avg_node.target]
27 |         self.avg_name = self.avg_node.name
28 | 
29 |     def fuse(self, graph_module, modules):
30 |         avg_module = copy.deepcopy(self.avg_module)
31 |         q_avg = QAveragePooling2D(avg_module, self.avg_name)
32 |         replace_node_module(self.avg_node, modules, q_avg)
33 | 
34 | 
35 | @register_fusion_pattern((nn.AdaptiveMaxPool2d))
36 | @register_fusion_pattern((nn.MaxPool2d))  # 0, the highest priority
37 | class MaxPool2dFusion:
38 |     def __init__(self, quantizer, node):
39 |         self.max_node = None
40 | 
41 |         if isinstance(quantizer.modules[node.target], (nn.MaxPool2d, nn.AdaptiveAvgPool2d)):
42 |             self.max_node = node
43 | 
44 |         assert self.max_node is not None, '[FATAL]: max_node can not be None!'
45 | 
46 |         self.max_module = quantizer.modules[self.max_node.target]
47 |         self.max_name = self.max_node.name
48 | 
49 |     def fuse(self, graph_module, modules):
50 |         max_module = copy.deepcopy(self.max_module)
51 |         q_max = QMaxPooling2D(max_module, self.max_name)
52 |         replace_node_module(self.max_node, modules, q_max)
53 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/fuser/transpose_fuser.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import copy
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from ..qatregister import register_fusion_pattern
 9 | from ..qatlogger import QAT_FATAL, QAT_INFO, QAT_ERROR
10 | from ..ops import QTranspose
11 | from ..utils import replace_node_module
12 | from types import BuiltinFunctionType
13 | 
14 | 
15 | @register_fusion_pattern((torch.permute))  # 0, the highest priority
16 | @register_fusion_pattern((torch.Tensor.permute))  # 0, the highest priority
17 | class TransposeFusion:
18 |     def __init__(self, quantizer, node):
19 |         self.transpose_node = None
20 |         # if isinstance(node.target, BuiltinFunctionType):
21 |         #     self.transpose_node = node
22 |         self.transpose_node = node
23 |         assert self.transpose_node is not None, '[FATAL]: transpose_node can not be None!'
24 | 
25 |     def fuse(self, graph_module, modules):
26 |         # QAT_INFO(f"begin to transpose fuse")
27 |         fused_graph = graph_module.graph
28 |         local_args = (self.transpose_node.args[0],)
29 |         for arg in self.transpose_node.args[1:]:
30 |             if not isinstance(arg, int):
31 |                 local_args = self.transpose_node.args
32 |                 break
33 |         perm = [] if len(local_args) > 1 else self.transpose_node.args[1:]
34 |         qname = self.transpose_node.name + "_QTranspose"
35 |         q_transpose = QTranspose(name=qname, perm=perm)
36 |         with fused_graph.inserting_after(self.transpose_node):
37 |             graph_module.add_module(qname, q_transpose)
38 |             new_node = fused_graph.call_module(qname, args=local_args)
39 |         self.transpose_node.replace_all_uses_with(new_node)
40 |         fused_graph.erase_node(self.transpose_node)
41 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from .qat_base_operator import QBaseOperator
 5 | 
 6 | from .qat_batchnorm import QBatchNorm
 7 | from .qat_concat import QConcat
 8 | from .qat_constant import QConstant
 9 | from .qat_convolution import QConvolution2D
10 | from .qat_eltwise import QElementwiseAdd, QElementwiseMul
11 | from .qat_expand import QExpand
12 | from .qat_fullyconnected import QFullyConnected
13 | from .qat_gelu import QGeLU
14 | from .qat_hardsigmoid import QHardSigmoid
15 | from .qat_hardswish import QHardSwish
16 | from .qat_input import QInput
17 | from .qat_layernorm import QLayerNorm
18 | from .qat_matmul import QMatMul
19 | from .qat_multiheadattention import QMultiHeadAttention
20 | from .qat_pooling import QAveragePooling2D, QMaxPooling2D
21 | from .qat_reshape import QReshape
22 | from .qat_softmax import QSoftmax
23 | from .qat_split import QSplit
24 | from .qat_transpose import QTranspose
25 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_activation.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | from .qat_base_operator import QBaseOperator
 7 | from ..config import QATConfig
 8 | 
 9 | 
10 | class QActivation(QBaseOperator):
11 |     def __init__(self,
12 |                  name,
13 |                  method,
14 |                  dtype=None,
15 |                  ) -> None:
16 |         super().__init__(dtype)
17 |         self.name = name
18 |         self.method = method
19 | 
20 |     def forward(self, inputs):
21 |         pass
22 | 
23 |     def serialize(self, input):
24 |         pass
25 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_concat.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | from ..qatregister import register_operator
 7 | from ..config import QATConfig
 8 | from .qat_base_operator import QBaseOperator, check_args
 9 | 
10 | 
11 | @register_operator()
12 | class QConcat(QBaseOperator):
13 |     def __init__(self, dim=0, dtype=None, name='') -> None:
14 |         super().__init__(dtype, name)
15 |         self.dim = dim
16 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
17 | 
18 |     @check_args
19 |     def forward(self, inputs, *args):
20 |         self.dim = args[0] if len(args) else self.dim
21 |         outputs = torch.cat(inputs, dim=self.dim)
22 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
23 |         return outputs
24 | 
25 |     def serialize(self, inputs):
26 |         from AIPUBuilder import ops
27 |         out_q = self.get_quantization(self.activation_qinfo)
28 |         out = ops.concat(inputs, axis=self.dim, quantization=out_q)
29 |         return out
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_constant.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.nn.parameter import Parameter
 8 | from ..qatlogger import QAT_ERROR
 9 | from ..qinfo import QuantStage
10 | from ..qatregister import register_operator
11 | from ..config import QATConfig
12 | from .qat_base_operator import QBaseOperator
13 | 
14 | 
15 | @register_operator()
16 | class QConstant(QBaseOperator):
17 |     def __init__(self, name, data, dtype=None) -> None:
18 |         super().__init__(dtype)
19 |         self.name = name
20 |         if data is None:
21 |             QAT_ERROR(f"when instances one QConstant, the data arg is None")
22 |         self.register_buffer('weight', data)
23 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
24 | 
25 |     def forward(self):
26 |         if not torch.is_floating_point(self.weight):
27 |             self.activation_qinfo.qinvariant = True
28 |         outputs = self.fake_quant(self.weight, self.activation_qinfo)
29 |         return outputs
30 | 
31 |     def serialize(self):
32 |         from AIPUBuilder import ops
33 |         from AIPUBuilder.core import Tensor
34 |         weight = Tensor(self.name + "_weight", self.weight.cpu().numpy().astype('float32'))
35 |         if self.ir_mode == 'fp':
36 |             out = ops.constant(weight)
37 |         else:
38 |             bk = self.quant_stage
39 |             self.quant_stage = QuantStage.QAT
40 |             self.forward()
41 |             self.quant_stage = bk
42 |             out_q = self.get_quantization(self.activation_qinfo)
43 |             out = ops.constant(weight, quantization=out_q)
44 |         return out
45 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_expand.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from ..qatlogger import QAT_ERROR
 5 | from ..qatregister import register_operator
 6 | from ..config import QATConfig
 7 | from .qat_base_operator import QBaseOperator, check_args
 8 | 
 9 | 
10 | @register_operator()
11 | class QExpand(QBaseOperator):
12 |     def __init__(self, size, dtype=None) -> None:
13 |         super().__init__(dtype)
14 |         self._use_input_QConfig = True
15 |         self.size = size
16 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
17 | 
18 |     @check_args
19 |     def forward(self, input, *args):
20 |         if len(self.size) == 0 and len(args) == 0:
21 |             QAT_ERROR(f"QExpand meets the len(size) == 0.")
22 |         self.size = list(args) if len(args) else self.size
23 |         outputs = input.expand(self.size)
24 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
25 |         return outputs
26 | 
27 |     def serialize(self, inputs):
28 |         from AIPUBuilder import ops
29 |         repeats = [s if s != -1 else 1 for s in self.size]
30 |         repeats = [s if s != inputs.shape[i] else 1 for i, s in enumerate(repeats)]
31 |         out = ops.tile(inputs, repeats)
32 |         out.quantization = inputs.quantization
33 |         return out
34 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_gelu.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from ..qatregister import register_operator
 6 | from ..config import QATConfig
 7 | from .qat_base_operator import QBaseOperator, check_args
 8 | 
 9 | 
10 | @register_operator()
11 | class QGeLU(QBaseOperator):
12 |     def __init__(self, name, approximate, dtype=None) -> None:
13 |         super().__init__(dtype)
14 | 
15 |         self.name = name
16 |         self.approximate = approximate
17 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
18 | 
19 |     @check_args
20 |     def forward(self, inputs):
21 |         outputs = torch.nn.functional.gelu(input=inputs, approximate=self.approximate)
22 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
23 |         return outputs
24 | 
25 |     def serialize(self, input):
26 |         from AIPUBuilder import ops
27 |         out_q = self.get_quantization(self.activation_qinfo)
28 |         gl = ops.gelu(input, quantization=out_q)
29 |         return gl
30 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_hardsigmoid.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from ..qatregister import register_operator
 6 | from ..qinfo import CMode
 7 | from ..config import QATConfig
 8 | from .qat_base_operator import QBaseOperator, check_args
 9 | 
10 | 
11 | @register_operator()
12 | class QHardSigmoid(QBaseOperator):
13 |     def __init__(self, name, dtype=None) -> None:
14 |         super().__init__(dtype)
15 | 
16 |         self.name = name
17 |         self.method = "HARDSIGMOID"
18 |         self.clip_min = 0.0
19 |         self.clip_max = 1.0
20 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
21 |         self.activation_qinfo.cmode = 'extrema'
22 | 
23 |     def forward(self, inputs):
24 |         outputs = torch.nn.functional.hardsigmoid(inputs)
25 |         dev = outputs.device
26 |         outputs = torch.maximum(torch.tensor(self.clip_min, device=dev),
27 |                                 torch.minimum(outputs, torch.tensor(self.clip_max, device=dev)))
28 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
29 |         return outputs
30 | 
31 |     def serialize(self, input):
32 |         from AIPUBuilder import ops
33 |         out_q = self.get_quantization(self.activation_qinfo)
34 |         hs = ops.hard_sigmoid(input, quantization=out_q)
35 |         hs.op.name = self.name
36 |         return hs
37 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_hardswish.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | from AIPUBuilder.Optimizer.framework import OpType
 7 | from ..qatregister import register_operator
 8 | from ..config import QATConfig
 9 | from .qat_base_operator import QBaseOperator, check_args
10 | 
11 | 
12 | @register_operator()
13 | class QHardSwish(QBaseOperator):
14 |     def __init__(self, name, dtype=None) -> None:
15 |         super().__init__(dtype)
16 | 
17 |         self.name = name
18 |         self.method = "HARDSWISH"
19 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
20 |         self.activation_qinfo.cmode = 'extrema'
21 | 
22 |     def forward(self, inputs):
23 |         outputs = torch.nn.functional.hardswish(inputs)
24 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
25 |         return outputs
26 | 
27 |     def serialize(self, input):
28 |         from AIPUBuilder import ops
29 |         out_q = self.get_quantization(self.activation_qinfo)
30 |         hs = ops.hard_swish(input, quantization=out_q)
31 |         hs.op.name = self.name
32 |         return hs
33 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_matmul.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from ..qatregister import register_operator
 6 | from ..config import QATConfig
 7 | from .qat_base_operator import QBaseOperator, check_args
 8 | 
 9 | 
10 | @register_operator()
11 | class QMatMul(QBaseOperator):
12 |     def __init__(self,
13 |                  name,
14 |                  trans_a=False,
15 |                  trans_b=False,
16 |                  dtype=None) -> None:
17 |         super().__init__(dtype, name=name)
18 |         self.trans_a = trans_a
19 |         self.trans_b = trans_b
20 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
21 | 
22 |     @check_args
23 |     def forward(self, x, y):
24 |         if self.trans_a:
25 |             if x.dim() == 0:
26 |                 x = x.unsqueeze(0).unsqueeze(0)
27 |             elif x.dim() == 1:
28 |                 x = x.unsqueeze(0)
29 |             x = x.transpose(-1, -2)
30 |         if self.trans_b:
31 |             if y.dim() == 0:
32 |                 y = y.unsqueeze(0).unsqueeze(0)
33 |             elif x.dim() == 1:
34 |                 y = y.unsqueeze(0)
35 |             y = y.transpose(-1, -2)
36 |         outputs = torch.matmul(x, y)
37 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
38 |         return outputs
39 | 
40 |     def serialize(self, input0, input1):
41 |         from AIPUBuilder import ops
42 |         out_q = self.get_quantization(self.activation_qinfo)
43 |         return ops.matmul(input0, input1, self.trans_a, self.trans_b, quantization=out_q)
44 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_softmax.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from ..qatregister import register_operator
 6 | from ..config import QATConfig
 7 | from .qat_base_operator import QBaseOperator, check_args
 8 | 
 9 | 
10 | @register_operator()
11 | class QSoftmax(QBaseOperator):
12 |     def __init__(self,
13 |                  name,
14 |                  dim=-1,
15 |                  dtype=None) -> None:
16 |         super().__init__(dtype, name=name)
17 |         self.dim = dim
18 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
19 | 
20 |     @check_args
21 |     def forward(self, inputs, *args):
22 |         self.dim = args[0] if len(args) else self.dim
23 |         outputs = torch.nn.functional.softmax(inputs, self.dim)
24 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
25 |         return outputs
26 | 
27 |     def serialize(self, inputs):
28 |         from AIPUBuilder import ops
29 |         out_q = self.get_quantization(self.activation_qinfo)
30 |         return ops.softmax(inputs, axis=self.dim, quantization=out_q)
31 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_split.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from ..qatregister import register_operator
 6 | from ..config import QATConfig
 7 | from .qat_base_operator import QBaseOperator, check_args
 8 | 
 9 | 
10 | @register_operator()
11 | class QSplit(QBaseOperator):
12 |     def __init__(self,
13 |                  name,
14 |                  split_size_or_sections,
15 |                  dim=0,
16 |                  dtype=None) -> None:
17 |         super().__init__(dtype, name=name)
18 |         self._use_input_QConfig = True
19 |         self.dim = dim
20 |         self.split_size_or_sections = split_size_or_sections
21 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
22 | 
23 |     @check_args
24 |     def forward(self, inputs, *args):
25 |         self.split_size_or_sections = args[0] if len(args) else self.split_size_or_sections
26 |         self.dim = args[1] if len(args) > 1 else self.dim
27 |         outputs = torch.split(inputs, self.split_size_or_sections, self.dim)
28 |         outputs = list(outputs)
29 |         for i, out in enumerate(outputs):
30 |             outputs[i] = self.fake_quant(out, self.activation_qinfo)
31 |         return outputs
32 | 
33 |     def serialize(self, inputs):
34 |         from AIPUBuilder import ops
35 |         ops_split_size_or_sections = self.split_size_or_sections
36 |         if isinstance(self.split_size_or_sections, int):
37 |             s = inputs.shape[self.dim]
38 |             ops_split_size_or_sections = s // self.split_size_or_sections
39 | 
40 |         return ops.split(inputs, splits=ops_split_size_or_sections, axis=self.dim)
41 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/ops/qat_transpose.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import torch
 5 | from ..qatlogger import QAT_ERROR
 6 | from ..qatregister import register_operator
 7 | from ..config import QATConfig
 8 | from .qat_base_operator import QBaseOperator, check_args
 9 | 
10 | 
11 | @register_operator()
12 | class QTranspose(QBaseOperator):
13 |     def __init__(self,
14 |                  name,
15 |                  perm,
16 |                  dtype=None) -> None:
17 |         super().__init__(dtype, name=name)
18 |         self._use_input_QConfig = True
19 |         self.perm = perm
20 |         self.activation_qinfo = QATConfig.get('activation_qinfo')
21 | 
22 |     def forward(self, inputs, perm=None):
23 |         if len(self.perm) == 0 and perm is None:
24 |             QAT_ERROR(f"QTranspose meets the len(perm) == 0.")
25 |         self.perm = perm if perm is not None else self.perm
26 |         outputs = torch.permute(inputs, self.perm)
27 |         outputs = self.fake_quant(outputs, self.activation_qinfo)
28 |         return outputs
29 | 
30 |     def serialize(self, inputs):
31 |         from AIPUBuilder import ops
32 |         return ops.transpose(inputs, list(self.perm))
33 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/plugin/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | 
5 | from .aipubt_train_resnet50 import ResNet50TrainLoop
6 | from .aipubt_train_mbv3 import MobilenetV3TrainLoop
7 | from .aipubt_train_vitb16 import ViTB16TrainLoop
8 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/qatlogger.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.e
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.logger import OPT_INFO, OPT_ERROR, OPT_WARN, OPT_DEBUG, OPT_FATAL
 6 | 
 7 | 
 8 | def QAT_INFO(*args, **kwargs):
 9 |     OPT_INFO(*args, **kwargs, prefix_header='[QAT]')
10 | 
11 | 
12 | def QAT_ERROR(*args, **kwargs):
13 |     OPT_ERROR(*args, **kwargs, prefix_header='[QAT]')
14 | 
15 | 
16 | def QAT_WARN(*args, **kwargs):
17 |     OPT_WARN(*args, **kwargs, prefix_header='[QAT]')
18 | 
19 | 
20 | def QAT_DEBUG(*args, **kwargs):
21 |     OPT_DEBUG(*args, **kwargs, prefix_header='[QAT]')
22 | 
23 | 
24 | def QAT_FATAL(*args, **kwargs):
25 |     OPT_FATAL(*args, **kwargs, prefix_header='[QAT]')
26 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/qatregister.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import abc
 5 | from collections import OrderedDict
 6 | from .qatlogger import QAT_WARN
 7 | 
 8 | QAT_FUSION_PATTERNS = OrderedDict()
 9 | QAT_COMPASS_OPERATORS = OrderedDict()
10 | 
11 | 
12 | def register_fusion_pattern(pattern):
13 |     def insert(fn):
14 |         if pattern in QAT_FUSION_PATTERNS.keys():
15 |             QAT_WARN(f"QAT Pattern {pattern} has already registered, and will be overwritten")
16 |         QAT_FUSION_PATTERNS[pattern] = fn
17 |         return fn
18 |     return insert
19 | 
20 | 
21 | def register_operator(optype=None):
22 |     def insert(fn):
23 |         if fn in QAT_COMPASS_OPERATORS.keys():
24 |             QAT_WARN(f"QAT compass operater({fn}) has already registered, and will be overwritten")
25 |         if optype is None:
26 |             QAT_COMPASS_OPERATORS[fn] = True
27 |         else:
28 |             QAT_COMPASS_OPERATORS[fn] = optype
29 |         return fn
30 |     return insert
31 | 
32 | 
33 | def get_default_fusion_patterns():
34 |     return QAT_FUSION_PATTERNS
35 | 
36 | 
37 | def get_compass_supported_operators():
38 |     '''tuple of supported operators!'''
39 |     return tuple(QAT_COMPASS_OPERATORS.keys())
40 | 
41 | 
42 | class QATBaseTrainLoop(object):
43 |     # def __init__(self, *args, **kwargs):
44 |     #     pass
45 |     def set_stage(self, model, stage='qat'):
46 |         from .qinfo import QuantStage
47 |         for m in model.modules():
48 |             if isinstance(m, tuple(QAT_COMPASS_OPERATORS.keys())):
49 |                 m.quant_stage = QuantStage.str_to_quantstage(stage)
50 |     # @abc
51 | 
52 |     def __call__(self, model, *args, **kwargs):
53 |         pass
54 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/quantizer/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from .pytorchquantizer import PytorchQuantizer
5 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/quantizer/basequantizer.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class QATBaseQuantizer(object):
 9 | 
10 |     def __init__(self, config, *args, **kwargs):
11 |         self.config = config
12 |         self.ir_mode = ""
13 | 
14 |     def forward(self, model, input):
15 |         output = model(*input)
16 |         if isinstance(output, torch.Tensor):
17 |             output = [output]
18 |         return output
19 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | from .common_utils import convert2tuple
5 | from .cmp import check_result
6 | from .extra_params import *
7 | from .fuser_utils import is_match, replace_node_module
8 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/utils/cmp.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import numpy as np
 5 | from AIPUBuilder.Optimizer.utils import cosine_distance
 6 | from ..qatlogger import QAT_INFO
 7 | 
 8 | 
 9 | def check_result(actual, desired):
10 |     assert len(actual) == len(desired), "actual: %d vs desired %d" % (
11 |         len(actual),
12 |         len(desired),
13 |     )
14 | 
15 |     ret = True
16 |     for idx in range(len(actual)):
17 |         cos = cosine_distance(actual[idx], desired[idx])
18 |         QAT_INFO(f"cosine distance of {idx} output: {cos}")
19 |         # ret = np.testing.assert_allclose(
20 |         #     actual[idx].detach().cpu().numpy(),
21 |         #     desired[idx].detach().cpu().numpy(),
22 |         #     rtol=1e-5,
23 |         #     atol=1e-5) and ret
24 | 
25 |     return ret
26 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/qat/src/utils/common_utils.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 
4 | def convert2tuple(input):
5 |     '''For DNN parameters conversion'''
6 |     return input if isinstance(input, tuple) else (input, input)
7 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/scripts/git_hooks/pre-commit:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # An hook script to verify and check what is about to be committed.
 4 | # Called by "git commit" with no arguments.
 5 | # The hook exits with non-zero status and stops the commit if extra
 6 | # changes are needed.
 7 | 
 8 | STYLE=$(git config --get hooks.clangformat.style)
 9 | if [ -n "${STYLE}" ] ; then
10 |   STYLEARG="-style=${STYLE}"
11 | else
12 |   STYLEARG=""
13 | fi
14 | 
15 | format_file() {
16 |   file="${1}"
17 |   if [ -f $file ]; then
18 |     clang-format -i ${STYLEARG} ${1}
19 |     git add ${1}
20 |   fi
21 | }
22 | 
23 | current_workspace=`pwd`
24 | hook_failed=0
25 | case "${1}" in
26 |   --about )
27 |     echo "Runs clang-format on source files"
28 |     ;;
29 |   * )
30 |     # Only check the files in status A(added), C(copied) and M(modified).
31 |     # Ignore D(deleted), U(unmerged), T(type changed), R(renamed) and X(unknown).
32 |     files_to_check=`git diff-index --cached --name-only --diff-filter=ACM HEAD`
33 |     for file in ${files_to_check} ; do
34 |       # Convert to lower case
35 |       lc_file=${file,,}
36 |       if [[ ${lc_file} =~ .*\.(cpp|cc|h|hpp) ]]
37 |       then
38 |         format_file "${file}"
39 |       elif [[ ${lc_file} =~ .*\.py ]]
40 |       then
41 |         # Call format_pyfile to check format of Parser python code
42 |         basedir=`dirname $0`
43 |         ${basedir}/pre-commit.d/format_pyfile "${current_workspace}/${file}"
44 |         exit_status=$?
45 |         if [[ ${exit_status} != 0 ]]
46 |         then
47 |           hook_failed=1
48 |         fi
49 |       fi
50 |     done
51 |     ;;
52 | esac
53 | 
54 | if [[ ${hook_failed} != 0 ]]
55 | then
56 |   echo "----------- pre-commit hook failed -----------"
57 |   exit 1
58 | fi
59 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/scripts/git_hooks/pre-commit.d/format_pyfile:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # The script will use autopep8 to scan and format python files.
 4 | #
 5 | # Usage:
 6 | # format_pyfile file-to-check
 7 | #
 8 | # Arguments:
 9 | # - file-to-check: the path of one file or paths of multiple files
10 | #                  that need to be checked and reformatted.
11 | # Returns:
12 | #   0 means pass checking and no differences;
13 | #   1 means error exit, which requires checking input file(s);
14 | #   2 means differences exist and file(s) has/have been reformatted.
15 | 
16 | usage="Usage:\n  $0 file-to-check [file-to-check]\n\n"
17 | current_workspace=`pwd`
18 | failed_checking=0
19 | 
20 | files_to_check=$*
21 | if [[ -z ${files_to_check} ]]
22 | then
23 |   printf "Error: No input file(s) provided to $0\n\n"
24 |   printf ${usage}
25 |   exit 1
26 | fi
27 | 
28 | for file in ${files_to_check}
29 | do
30 |   if [[ ! -f ${file} ]]
31 |   then
32 |     printf ${usage}
33 |     exit 1
34 |   fi
35 | 
36 |   # Use autopep8 to check code format for parser's python files
37 |   printf "autopep8: Checking ${file} "
38 |   # Options of autopep8:
39 |   #  --in-place: make changes to files in place
40 |   #  --exit-code: return 2 if differences exist
41 |   autopep8_output=`autopep8 --verbose --in-place --exit-code --max-line-length=120 ${file} 2>&1`
42 |   exit_code=$?
43 |   if [[ ${exit_code} = 2 ]]
44 |   then
45 |     printf "[modified]\n"
46 |     failed_files="${failed_files} ${file}"
47 |     failed_checking=1
48 |   elif [[ ${exit_code} = 0 ]]
49 |   then
50 |     printf "[passed]\n"
51 |   else
52 |     printf "[error]\nAdditional output of autopep8:\n${autopep8_output}\n\n"
53 |     exit 1
54 |   fi
55 | done
56 | 
57 | if [[ ${failed_checking} = 1 ]]
58 | then
59 |   printf "File(s) did not comply with code format.\n"
60 |   printf "Please add changes made by autopep8 for the following file(s):\n"
61 |   for file in ${failed_files}
62 |   do
63 |     printf "    ${file}\n"
64 |   done
65 |   printf "\n"
66 |   exit 2
67 | else
68 |   exit 0
69 | fi
70 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/scripts/git_hooks/pre-commit.d/python:
--------------------------------------------------------------------------------
 1 | #!/bin/python3
 2 | from __future__ import with_statement
 3 | import os
 4 | import re
 5 | import shutil
 6 | import subprocess
 7 | import sys
 8 | import tempfile
 9 | 
10 | 
11 | def system(*args, **kwargs):
12 |     kwargs.setdefault('stdout', subprocess.PIPE)
13 |     proc = subprocess.Popen(args, **kwargs)
14 |     out, err = proc.communicate()
15 |     return out
16 | 
17 | 
18 | def main():
19 |     modified = re.compile('^[AM]+\s+(?P<name>.*\.py)', re.MULTILINE)
20 |     files = system('git', 'status', '--porcelain').decode("utf-8")
21 |     files = modified.findall(files)
22 | 
23 |     tempdir = tempfile.mkdtemp()
24 |     for name in files:
25 |         filename = os.path.join(tempdir, name)
26 |         filepath = os.path.dirname(filename)
27 |         if not os.path.exists(filepath):
28 |             os.makedirs(filepath)
29 |         with open(filename, 'w') as f:
30 |             system('git', 'show', ':' + name, stdout=f)
31 |     output = system('pep8', '.', cwd=tempdir)
32 |     shutil.rmtree(tempdir)
33 |     if output:
34 |         print(output)
35 |         sys.exit(1)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     pass
40 |     # main()


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/scripts/install_git_hooks.sh:
--------------------------------------------------------------------------------
1 | #!/usr/sbin/env bash
2 | 
3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
4 | 
5 | echo "installing git hooks..."
6 | cp -r ${SCRIPT_DIR}/git_hooks/* `git rev-parse --git-dir`/hooks/
7 | echo "git hooks setup done!"


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/model_test/squeezenet/calibration2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/calibration2.npy


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/model_test/squeezenet/opt.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | #the paths for this model's IR
 3 | graph = ./squeezenet_s.txt
 4 | bin = ./squeezenet_s.bin
 5 | model_name = squeezenet_caffe
 6 | #the name of dataset plugin for this model's input dataset
 7 | #if omitted, will use all zeros as input data for executing forward
 8 | dataset = numpynhwcrgb2ncbgrhwdataset
 9 | #the path of dataset used for calibration during quantization
10 | #if omitted, will use all zeros as input data for executing calibration
11 | calibration_data = ./calibration2.npy
12 | #the batch_size used for calibration during quantization
13 | calibration_batch_size = 1
14 | #the name of metric plugins for computing accuracy metrics for this model
15 | #if omitted, will not computing accuracy metrics
16 | metric = TopKMetric
17 | #the path of dataset (and corresponding labels) used for computing accuracy metrics for this model
18 | #if ommitted, will not computing accuracy metrics
19 | data = ./validation10.npy
20 | label = ./vlabel10.npy
21 | #the batch_size used for computing accuracy metrics for this model
22 | metric_batch_size = 2
23 | #the quantization method used for weights, default to 'per_tensor_symmetric_restricted_range'
24 | quantize_method_for_weight = per_channel_symmetric_restricted_range
25 | #the quantization method used for activations, default to 'per_tensor_symmetric_full_range'
26 | quantize_method_for_activation = per_tensor_asymmetric
27 | #the bits used for quantizing weight tensors, default to 8
28 | weight_bits = 8
29 | #the bits used for quantizing bias tensors, default to 32
30 | bias_bits = 32
31 | #the bits used for quantizing activation tensors, default to 8
32 | activation_bits = 8
33 | #Maximal LUT items (in bits, as only support LUT with 2**N items) amount when representing nonlinear functions in quantization, 
34 | #default to 8, suggest to set to 10+ when quantizing activations to 16bit
35 | lut_items_in_bits = 8
36 | #the output directory path, default to pwd
37 | output_dir = ./
38 | #the dataloader thread numbers for torch dataset, default to 0, 
39 | #which means do not using multi-threads to accelerate data loading
40 | dataloader_workers=4
41 | 
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/model_test/squeezenet/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | export AIPUBUILDER_LOG=2
4 | export PYTHONPATH=../../../../../:$PYTHONPATH
5 | 
6 | python3 ../../../tools/optimizer_main.py --cfg ./opt.cfg
7 | 
8 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/model_test/squeezenet/squeezenet_s.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/squeezenet_s.bin


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/model_test/squeezenet/validation10.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/validation10.npy


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/model_test/squeezenet/vlabel10.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/vlabel10.npy


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/op_test/data.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/op_test/data.npy


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/op_test/label.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/op_test/label.npy


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/op_test/opt.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | #the paths for this model's IR
 3 | graph = ./single_eltwise_1.txt
 4 | bin = ./single_eltwise_1.bin
 5 | model_name = single_eltwise_1
 6 | #the name of dataset plugin for this model's input dataset
 7 | #if omitted, will use all zeros as input data for executing forward
 8 | dataset = NumpyMultiInputDataset
 9 | #the path of dataset used for calibration during quantization
10 | #if omitted, will use all zeros as input data for executing calibration
11 | calibration_data = ./data.npy
12 | #the batch_size used for calibration during quantization
13 | calibration_batch_size = 1
14 | #the name of metric plugins for computing accuracy metrics for this model
15 | #if ommitted, will not computing accuracy metrics
16 | metric = CosDistanceMetric,MaxAbsErrorMetric
17 | #the path of dataset (and corresponding labels) used for computing accuracy metrics for this model
18 | #if ommitted, will not computing accuracy metrics
19 | data = ./data.npy
20 | label = ./label.npy
21 | #the batch_size used for computing accuracy metrics for this model
22 | metric_batch_size = 1
23 | #the quantization method used for weights, default to 'per_tensor_symmetric_restricted_range'
24 | quantize_method_for_weight = per_channel_symmetric_restricted_range
25 | #the quantization method used for activations, default to 'per_tensor_symmetric_full_range'
26 | quantize_method_for_activation = per_tensor_asymmetric
27 | #the bits used for quantizing weight tensors, default to 8
28 | weight_bits = 8
29 | #the bits used for quantizing bias tensors, default to 32
30 | bias_bits = 32
31 | #the bits used for quantizing activation tensors, default to 8
32 | activation_bits = 8
33 | #Maximal LUT items (in bits, as only support LUT with 2**N items) amount when representing nonlinear functions in quantization, 
34 | #default to 8, suggest to set to 10+ when quantizing activations to 16bit
35 | lut_items_in_bits = 8
36 | #the output directory path, default to pwd
37 | output_dir = ./
38 | #the dataloader thread numbers for torch dataset, default to 0, 
39 | #which means do not using multi-threads to accelerate data loading
40 | dataloader_workers=4
41 | 
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/op_test/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | export AIPUBUILDER_LOG=2
4 | export PYTHONPATH=../../../../:$PYTHONPATH
5 | 
6 | python3 ../../tools/optimizer_main.py --cfg ./opt.cfg
7 | 
8 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/op_test/single_eltwise_1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/op_test/single_eltwise_1.bin


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/op_test/single_eltwise_1.txt:
--------------------------------------------------------------------------------
 1 | model_name=eltwise
 2 | layer_number=3
 3 | input_tensors=[Placeholder1,Placeholder2]
 4 | output_tensors=[eltwise]
 5 | 
 6 | layer_id=0
 7 | layer_name=Placeholder1
 8 | layer_type=Input
 9 | layer_bottom=[]
10 | layer_bottom_shape=[]
11 | layer_bottom_type=[]
12 | layer_top=[Placeholder1]
13 | layer_top_shape=[[1,4,4,16]]
14 | layer_top_type=[float32]
15 | 
16 | layer_id=1
17 | layer_name=Placeholder2
18 | layer_type=Input
19 | layer_bottom=[]
20 | layer_bottom_shape=[]
21 | layer_bottom_type=[]
22 | layer_top=[Placeholder2]
23 | layer_top_shape=[[1,4,4,16]]
24 | layer_top_type=[float32]
25 | 
26 | layer_id=2
27 | layer_name=eltwise
28 | layer_type=Eltwise
29 | layer_bottom=[Placeholder1,Placeholder2]
30 | layer_bottom_shape=[[1,4,4,16],[1,4,4,16]]
31 | layer_bottom_type=[float32,float32]
32 | layer_top=[eltwise]
33 | layer_top_shape=[[1,4,4,16]]
34 | layer_top_type=[float32]
35 | method=ADD
36 | with_activation=NONE
37 | 
38 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/plugin_test/aipubt_dataset_my_numpynhwcrgb2ncbgrhw.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.logger import *
 7 | from torch.utils.data import Dataset
 8 | import numpy as np
 9 | 
10 | 
11 | @register_plugin(PluginType.Dataset, '2.0')
12 | class MyNumpyNHWCRGB2NCBGRHWDataset(Dataset):
13 |     def __init__(self, data_file, label_file=None):
14 |         '''
15 |         :param data_file: ndarray in npy file.
16 |         :param label_file: ndarray in npy file.
17 |         '''
18 |         OPT_INFO('Customized dataset plugin is enabled.')
19 |         self.data = None
20 |         self.label = None
21 |         try:
22 |             self.data = np.load(data_file, mmap_mode='c')
23 |             self.data = np.flip(self.data, -1).copy()
24 |             self.data = np.transpose(self.data, [0, 3, 1, 2])
25 |         except Exception as e:
26 |             OPT_FATAL('the data of MyNumpyNHWCRGB2NCBGRHWDataset plugin should be Numpy.ndarray and allow_pickle=False.')
27 |         if label_file is not None:
28 |             try:
29 |                 self.label = np.load(label_file, mmap_mode='c')
30 |             except ValueError:
31 |                 self.label = np.load(label_file, allow_pickle=True)
32 | 
33 |     def __len__(self):
34 |         return len(self.data)
35 | 
36 |     def __getitem__(self, idx):
37 |         sample = [[self.data[idx]], float("-inf")]
38 |         if self.label is not None:
39 |             sample[1] = self.label[idx]
40 |         return sample
41 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/plugin_test/aipubt_metric_my_topk.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.logger import *
 7 | import torch
 8 | 
 9 | 
10 | @register_plugin(PluginType.Metric, '2.0')
11 | class MyTopKMetric(OptBaseMetric):
12 |     def __init__(self, K='1'):
13 |         self.correct = 0
14 |         self.total = 0
15 |         self.K = int(K)
16 |         OPT_INFO(f'Customized metric plugin is enabled. k={K}')
17 | 
18 |     def __call__(self, pred, target):
19 |         _, pt = torch.topk(pred[0].reshape([pred[0].shape[0], -1]), self.K, dim=-1)  # NHWC
20 |         for i in range(target.numel()):
21 |             if target[i] in pt[i]:
22 |                 self.correct += 1
23 |         self.total += target.numel()
24 | 
25 |     def reset(self):
26 |         self.correct = 0
27 |         self.total = 0
28 | 
29 |     def compute(self):
30 |         try:
31 |             acc = float(self.correct) / float(self.total)
32 |             return acc
33 |         except ZeroDivisionError:
34 |             OPT_ERROR('zeroDivisionError: Topk acc total label = 0')
35 |             return float("-inf")
36 | 
37 |     def report(self):
38 |         return "top-%d accuracy is %f" % (self.K, self.compute())
39 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/plugin_test/aipubt_op_my_softmax.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | 
 5 | from AIPUBuilder.Optimizer.framework import *
 6 | from AIPUBuilder.Optimizer.logger import *
 7 | from AIPUBuilder.Optimizer.ops.softmax import softmax, softmax_quantize
 8 | 
 9 | # for optype out of IR guide's definition, use register_optype(xxtype_name_string) to register optype firstly
10 | # register_optype('Softmax')
11 | 
12 | 
13 | @op_register(OpType.Softmax, version='2.0')
14 | def my_softmax(self, *args):
15 |     OPT_INFO('Customized OP forward function is enabled.')
16 |     return softmax(self, *args)
17 | 
18 | 
19 | @quant_register(OpType.Softmax, version='2.0')
20 | def my_softmax_quantize(self, *args):
21 |     OPT_INFO('Customized OP quantize function is enabled.')
22 |     inp = self.inputs[0]
23 |     out = self.outputs[0]
24 |     axis = self.get_param('axis')
25 |     shape_value_in_axis = inp.betensor.shape[axis]
26 |     if shape_value_in_axis < 8:
27 |         customized_softmax_quantize_func(self, *args)
28 |     else:
29 |         softmax_quantize(self, *args)
30 | 
31 | 
32 | def customized_softmax_quantize_func(self, *args):
33 |     softmax_quantize(self, *args)
34 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/plugin_test/opt.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | #the paths for this model's IR
 3 | graph = ../model_test/squeezenet/squeezenet_s.txt
 4 | bin = ../model_test/squeezenet/squeezenet_s.bin
 5 | model_name = squeezenet_caffe
 6 | #the name of dataset plugin for this model's input dataset
 7 | #if omitted, will use all zeros as input data for executing forward
 8 | dataset = MyNumpyNHWCRGB2NCBGRHWDataset
 9 | #the path of dataset used for calibration during quantization
10 | #if omitted, will use all zeros as input data for executing calibration
11 | calibration_data = ../model_test/squeezenet/calibration2.npy
12 | #the batch_size used for calibration during quantization
13 | calibration_batch_size = 1
14 | #the name of metric plugins for computing accuracy metrics for this model
15 | #if ommitted, will not computing accuracy metrics
16 | metric = MyTopKMetric(1),MyTopKMetric(5)
17 | #the path of dataset (and corresponding labels) used for computing accuracy metrics for this model
18 | #if ommitted, will not computing accuracy metrics
19 | data = ../model_test/squeezenet/validation10.npy
20 | label = ../model_test/squeezenet/vlabel10.npy
21 | #the batch_size used for computing accuracy metrics for this model
22 | metric_batch_size = 2
23 | #the quantization method used for weights, default to 'per_tensor_symmetric_restricted_range'
24 | quantize_method_for_weight = per_channel_symmetric_restricted_range
25 | #the quantization method used for activations, default to 'per_tensor_symmetric_full_range'
26 | quantize_method_for_activation = per_tensor_asymmetric
27 | #the bits used for quantizing weight tensors, default to 8
28 | weight_bits = 8
29 | #the bits used for quantizing bias tensors, default to 32
30 | bias_bits = 32
31 | #the bits used for quantizing activation tensors, default to 8
32 | activation_bits = 8
33 | #Maximal LUT items (in bits, as only support LUT with 2**N items) amount when representing nonlinear functions in quantization,
34 | #default to 8, suggest to set to 10+ when quantizing activations to 16bit
35 | lut_items_in_bits = 8
36 | #the output directory path, default to pwd
37 | output_dir = ./
38 | #the dataloader thread numbers for torch dataset, default to 0,
39 | #which means do not using multi-threads to accelerate data loading
40 | dataloader_workers=0
41 | 
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/test/plugin_test/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | export AIPUBUILDER_LOG=2
4 | export PYTHONPATH=../../../../:$PYTHONPATH
5 | export AIPUPLUGIN_PATH=./
6 | echo $AIPUPLUGIN_PATH
7 | python3 ../../tools/optimizer_main.py --cfg ./opt.cfg
8 | 
9 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/tools/optimizer_main.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import sys
 5 | from AIPUBuilder.Optimizer.plugins import *
 6 | try:
 7 |     from AIPUBuilder.Optimizer.plugins_internal import *
 8 | except:
 9 |     pass
10 | from AIPUBuilder.Optimizer.framework import *
11 | from AIPUBuilder.Optimizer.config import *
12 | from AIPUBuilder.Optimizer.optmaster import *
13 | from AIPUBuilder.Optimizer.logger import OPT_START, OPT_END
14 | 
15 | 
16 | def OPT_WORK(argv):
17 |     graph = QuantizeGraph.parse(argv.graph, argv.bin)
18 |     optimizer = OptMaster(graph, argv)
19 |     report = optimizer()
20 |     return report
21 | 
22 | 
23 | def main():
24 |     try:
25 |         traverse_opt_plugins()
26 |         argv = arg_parser(metric_dict=QUANTIZE_METRIC_DICT, dataset_dict=QUANTIZE_DATASET_DICT)
27 |         if isinstance(argv, bool):
28 |             return 0 if argv is True else 1  # return 0/1 value for tvm calling the optimizer
29 | 
30 |         OPT_START(argv)
31 |         report = OPT_WORK(argv)
32 |         OPT_END(report)
33 |         return 0
34 |     except Exception as e:
35 |         OPT_END()
36 |         raise e
37 | 
38 | 
39 | if __name__ == '__main__':
40 | 
41 |     ret = main()
42 |     sys.exit(ret)
43 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.utils.dtype_utils import *
 5 | from AIPUBuilder.Optimizer.utils.quant_tool_utils import *
 6 | from AIPUBuilder.Optimizer.utils.files_utils import *
 7 | from AIPUBuilder.Optimizer.utils.math_utils import *
 8 | from AIPUBuilder.Optimizer.utils.string_utils import *
 9 | from AIPUBuilder.Optimizer.utils.random_utils import *
10 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/utils/files_utils.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import os
 5 | 
 6 | __all__ = ['make_path', 'make_dir_path']
 7 | 
 8 | 
 9 | def make_path(path):
10 |     dpath = os.path.dirname(path)
11 |     if dpath != '' and not os.path.exists(dpath):
12 |         os.makedirs(dpath)
13 |     return path
14 | 
15 | 
16 | def make_dir_path(path):
17 |     if not os.path.exists(path):
18 |         os.makedirs(path)
19 |     return path
20 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/utils/passes_utils.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2024 Arm Technology (China) Co. Ltd.
 3 | 
 4 | import functools
 5 | from AIPUBuilder.Optimizer.framework import PyGraph
 6 | from AIPUBuilder.Optimizer.logger import OPT_DEBUG
 7 | 
 8 | __all__ = ['passes_run', 'PASSES', 'ENABLE_PASSES']
 9 | 
10 | PASSES = dict()
11 | ENABLE_PASSES = dict()
12 | 
13 | 
14 | def passes_run(func):
15 |     """
16 |     this decorator is used for enabling or disabling the pass for all nodes, which is setted in cfg file and defaultly
17 |     worked for all nodes. if node has independently flag this decorator does not work.
18 |     """
19 |     @functools.wraps(func)
20 |     def wrapper(*args, **kwargs):
21 |         from AIPUBuilder.Optimizer.config import CfgParser
22 |         flag = len(args) == 2 and isinstance(args[0], PyGraph) and isinstance(args[1], CfgParser)
23 |         flag = flag or (len(args) == 1 and len(kwargs) == 1 and isinstance(args[0], PyGraph)
24 |                         and isinstance(list(kwargs.values())[0], CfgParser))
25 |         if flag:
26 |             hparams = args[1] if len(args) == 2 else list(kwargs.values())[0]
27 |             prefix = 'enable_pass_'
28 |             pass_func_name = f"{prefix}{func.__name__}"
29 |             if not hasattr(hparams, pass_func_name):
30 |                 # fixed enable pass, like shrink_pow_exponent
31 |                 func(*args, **kwargs)
32 |                 OPT_DEBUG(f"now run pass: {func.__name__}")
33 |             elif hasattr(hparams, pass_func_name) and hparams.__getattr__(pass_func_name):
34 |                 func(*args, **kwargs)
35 |                 OPT_DEBUG(f"now run pass: {func.__name__}")
36 |                 if func.__name__ not in ENABLE_PASSES:
37 |                     ENABLE_PASSES.update({func.__name__: func})
38 |     if func.__name__ not in PASSES:
39 |         PASSES.update({func.__name__: wrapper})
40 | 
41 |     return wrapper
42 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/utils/string_utils.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | from AIPUBuilder.Optimizer.logger import *
 5 | 
 6 | 
 7 | def list_any_to_str(s):
 8 |     if isinstance(s, list):
 9 |         lstr = '['
10 |         for x in s:
11 |             lstr += str(x) + ','
12 |         if len(lstr) > 1:
13 |             lstr = lstr[:-1] + ']'
14 |         else:
15 |             lstr += ']'
16 |         return lstr
17 |     else:
18 |         return str(s)
19 | 
20 | 
21 | def timestamp_string():
22 |     from datetime import datetime
23 |     import random
24 |     return '_' + str(datetime.timestamp(datetime.now())).replace('.', '_') + '_' + str(random.random()).replace('.', '_') + '_'
25 | 
26 | 
27 | def string_to_base_type(s: str):
28 |     import re
29 |     opt_v = s.strip()
30 |     if opt_v.upper() == "FALSE":
31 |         opt_v = False
32 |     elif opt_v.upper() == "TRUE":
33 |         opt_v = True
34 |     elif re.findall('^[-+]?\d+$', opt_v):
35 |         opt_v = int(opt_v)
36 |     elif re.findall('^[-+]?[0-9]+\.?[0-9]*$', opt_v):
37 |         opt_v = float(opt_v)
38 |     return opt_v
39 | 


--------------------------------------------------------------------------------
/AIPUBuilder/Optimizer/version.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: Apache-2.0
 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
 3 | 
 4 | #!/usr/bin/python3
 5 | # -*- coding: UTF-8 -*-
 6 | 
 7 | __OPT_VERSION__ = '1.3'
 8 | __build_number__ = None     # placeholder for build script
 9 | if __build_number__ is not None:
10 |     __OPT_VERSION__ = __OPT_VERSION__+"."+str(__build_number__)
11 | __OPT_NAME__ = 'Compass-Optimizer'
12 | 


--------------------------------------------------------------------------------
/AIPUBuilder/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-License-Identifier: Apache-2.0
2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.
3 | 


--------------------------------------------------------------------------------
/tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/tutorial.pdf


--------------------------------------------------------------------------------