├── .gitignore ├── AIPUBuilder ├── Optimizer │ ├── __init__.py │ ├── analyzer │ │ ├── __init__.py │ │ ├── cosine.py │ │ └── running_time.py │ ├── config │ │ ├── __init__.py │ │ ├── cfg_fields.py │ │ └── parser.py │ ├── features │ │ ├── __init__.py │ │ ├── autosearch │ │ │ ├── __init__.py │ │ │ └── mixed_precision_naive_search.py │ │ ├── calibration │ │ │ ├── __init__.py │ │ │ ├── calibration.py │ │ │ ├── global_calibration │ │ │ │ ├── __init__.py │ │ │ │ ├── adaquant_zy.py │ │ │ │ ├── adaround.py │ │ │ │ ├── awq_zy.py │ │ │ │ ├── easy_quant.py │ │ │ │ ├── gptq_zy.py │ │ │ │ ├── mvn_correction.py │ │ │ │ ├── smooth_quant_zy.py │ │ │ │ └── svd_based_quant.py │ │ │ └── local_calibration │ │ │ │ ├── __init__.py │ │ │ │ ├── aciq_gauss.py │ │ │ │ ├── aciq_laplace.py │ │ │ │ ├── extrema.py │ │ │ │ ├── in_ir.py │ │ │ │ ├── kld.py │ │ │ │ ├── mean.py │ │ │ │ ├── nstd.py │ │ │ │ ├── percentile.py │ │ │ │ └── weighted_scale_param.py │ │ └── imagetiling │ │ │ ├── __init__.py │ │ │ └── image_tiling.py │ ├── framework │ │ ├── __init__.py │ │ ├── opt_register.py │ │ ├── pycore │ │ │ ├── __init__.py │ │ │ ├── pygraph.py │ │ │ ├── pyir.py │ │ │ ├── pynode.py │ │ │ ├── pytensor.py │ │ │ └── pytype.py │ │ └── qgraph.py │ ├── logger │ │ ├── __init__.py │ │ ├── aipu_logger.py │ │ ├── opt_log_management.py │ │ └── opt_logger.py │ ├── ops │ │ ├── LRN.py │ │ ├── __init__.py │ │ ├── abs.py │ │ ├── accidentalhits.py │ │ ├── acos.py │ │ ├── acosh.py │ │ ├── activation.py │ │ ├── adativepool.py │ │ ├── add.py │ │ ├── affine_grid.py │ │ ├── argminmax.py │ │ ├── asin.py │ │ ├── asinh.py │ │ ├── atan.py │ │ ├── atanh.py │ │ ├── basiclstm.py │ │ ├── batchtodepth.py │ │ ├── batchtospace.py │ │ ├── batchtospaceNd.py │ │ ├── bias_add.py │ │ ├── bitshift.py │ │ ├── bitwise.py │ │ ├── bn.py │ │ ├── bnll.py │ │ ├── boundingbox.py │ │ ├── cacheupdate.py │ │ ├── cast.py │ │ ├── ceil.py │ │ ├── celu.py │ │ ├── channelshuffle.py │ │ ├── clip.py │ │ ├── col2im.py │ │ ├── collapse_repeated.py │ │ ├── compress.py │ │ ├── concat.py │ │ ├── constant.py │ │ ├── control_op.py │ │ ├── conv.py │ │ ├── conv2d_integer.py │ │ ├── conv3d.py │ │ ├── convtranspose3d.py │ │ ├── convwinograd.py │ │ ├── cosh.py │ │ ├── cosine.py │ │ ├── count.py │ │ ├── crelu.py │ │ ├── crop.py │ │ ├── crop_and_resize.py │ │ ├── ctcgreedydecoder.py │ │ ├── cumulate.py │ │ ├── datastride.py │ │ ├── decodebox.py │ │ ├── deconv.py │ │ ├── depthtospace.py │ │ ├── depthwiseconv.py │ │ ├── dequantize.py │ │ ├── detectionoutput.py │ │ ├── dilation2d.py │ │ ├── div.py │ │ ├── div_mod.py │ │ ├── dummy.py │ │ ├── eltwise.py │ │ ├── elu.py │ │ ├── embedding_lookup_sparse.py │ │ ├── erf.py │ │ ├── erosion2d.py │ │ ├── exp.py │ │ ├── fake_quant_with_minmax_vars.py │ │ ├── fc.py │ │ ├── filter.py │ │ ├── filterbox.py │ │ ├── floor.py │ │ ├── fractionalpool.py │ │ ├── gather.py │ │ ├── gather_elements.py │ │ ├── gather_nd.py │ │ ├── gelu.py │ │ ├── gemm.py │ │ ├── generateproposal.py │ │ ├── get_valid_count.py │ │ ├── globalpooling.py │ │ ├── grid_sample.py │ │ ├── groupnorm.py │ │ ├── gruv1.py │ │ ├── gruv3.py │ │ ├── hardmax.py │ │ ├── hardsigmoid.py │ │ ├── hardswish.py │ │ ├── heatmapMaxkeypoint.py │ │ ├── inp.py │ │ ├── instancenorm.py │ │ ├── interp.py │ │ ├── intopk.py │ │ ├── isinf.py │ │ ├── isnan.py │ │ ├── layernorm.py │ │ ├── leakyrelu.py │ │ ├── log.py │ │ ├── logical.py │ │ ├── logsoftmax.py │ │ ├── lpnormalization.py │ │ ├── matmul.py │ │ ├── matmul_integer.py │ │ ├── maxpooling_withargmax.py │ │ ├── maxroipooling.py │ │ ├── maxunpool.py │ │ ├── meshgrid.py │ │ ├── mish.py │ │ ├── mod.py │ │ ├── moments.py │ │ ├── mul.py │ │ ├── multibox_transform_Loc.py │ │ ├── mvn.py │ │ ├── neg.py │ │ ├── nms.py │ │ ├── nonzero.py │ │ ├── noop.py │ │ ├── normal_moments.py │ │ ├── onehot.py │ │ ├── overlapadd.py │ │ ├── pad.py │ │ ├── permute.py │ │ ├── pooling.py │ │ ├── pooling3D.py │ │ ├── postnms1.py │ │ ├── postnms2.py │ │ ├── pow.py │ │ ├── prelu.py │ │ ├── proposal.py │ │ ├── pyramidroi.py │ │ ├── quantize.py │ │ ├── queryrebatch.py │ │ ├── reciprocal.py │ │ ├── reduce.py │ │ ├── region.py │ │ ├── regionfuse.py │ │ ├── relu.py │ │ ├── relu6.py │ │ ├── relu_family.py │ │ ├── repeat.py │ │ ├── reshape.py │ │ ├── resize.py │ │ ├── reversesequence.py │ │ ├── rgb2yuv.py │ │ ├── rms_norm.py │ │ ├── rnn.py │ │ ├── roialign.py │ │ ├── roipooling.py │ │ ├── round.py │ │ ├── rsqrt.py │ │ ├── scatter_elements.py │ │ ├── scatter_nd.py │ │ ├── segment_reduce.py │ │ ├── select.py │ │ ├── selu.py │ │ ├── shrink.py │ │ ├── sigmoid.py │ │ ├── sign.py │ │ ├── silu.py │ │ ├── sine.py │ │ ├── sinh.py │ │ ├── slice_operator.py │ │ ├── slotupdate.py │ │ ├── softmax.py │ │ ├── softplus.py │ │ ├── softsign.py │ │ ├── sort.py │ │ ├── spacetobatch.py │ │ ├── spacetodepth.py │ │ ├── split.py │ │ ├── sqrt.py │ │ ├── square.py │ │ ├── squared_difference.py │ │ ├── squeeze.py │ │ ├── stridedslice.py │ │ ├── sub.py │ │ ├── sufficientStatistics.py │ │ ├── swish.py │ │ ├── tan.py │ │ ├── tanh.py │ │ ├── tf_ops │ │ │ ├── __init__.py │ │ │ └── conv2d.py │ │ ├── thresholdrelu.py │ │ ├── tile.py │ │ ├── topk.py │ │ ├── transpose.py │ │ ├── trunc.py │ │ ├── unidirectional_rnn.py │ │ ├── unique.py │ │ ├── upsamplebyindex.py │ │ ├── where.py │ │ ├── yuv2rgb.py │ │ └── zerofraction.py │ ├── optmaster.py │ ├── passes │ │ ├── __init__.py │ │ ├── absorb_cast_to_clip.py │ │ ├── batch_modifications.py │ │ ├── check_quantization_info_s1.py │ │ ├── convert_resize_to_convolution.py │ │ ├── decompose_nonmonotonic_activations_s1.py │ │ ├── detect_inf_mask_nodes.py │ │ ├── eliminate_op.py │ │ ├── global_calibration_prepare.py │ │ ├── insert_op.py │ │ ├── merge_inserted_op.py │ │ ├── merge_matmul_mul_s1.py │ │ ├── optimize_x2_wdc.py │ │ ├── passes.py │ │ ├── set_unquantifiable.py │ │ ├── shrink_pow_exponent_s1.py │ │ ├── split_act_perchannel_matmul_s1.py │ │ ├── split_qkv_fc_s1.py │ │ ├── transfer_op_to_reshape_op_s3.py │ │ ├── tune_op_extra_params_s1.py │ │ └── unify_scales_for_multi_inputs_operator.py │ ├── plugins │ │ ├── __init__.py │ │ ├── aipubt_dataset_NumpyZipped.py │ │ ├── aipubt_dataset_OpTestNumpyZipped.py │ │ ├── aipubt_dataset_aishell.py │ │ ├── aipubt_dataset_bevformer.py │ │ ├── aipubt_dataset_bevformer_static.py │ │ ├── aipubt_dataset_cgtdnn.py │ │ ├── aipubt_dataset_coco.py │ │ ├── aipubt_dataset_cocokp.py │ │ ├── aipubt_dataset_fasterrcnnvoc.py │ │ ├── aipubt_dataset_generaldict.py │ │ ├── aipubt_dataset_iwslt.py │ │ ├── aipubt_dataset_librispeech.py │ │ ├── aipubt_dataset_llama2.py │ │ ├── aipubt_dataset_mpii.py │ │ ├── aipubt_dataset_mtcnn.py │ │ ├── aipubt_dataset_nhwcrgb2nhwcbgr.py │ │ ├── aipubt_dataset_numpy.py │ │ ├── aipubt_dataset_numpymultiinput.py │ │ ├── aipubt_dataset_numpymultiinputNCHW.py │ │ ├── aipubt_dataset_numpymultiinputwithoutbatchdim.py │ │ ├── aipubt_dataset_numpynchw2nhwc.py │ │ ├── aipubt_dataset_numpynhwc2nchw.py │ │ ├── aipubt_dataset_numpynhwcrgb2ncbgrhw.py │ │ ├── aipubt_dataset_numpywithdim.py │ │ ├── aipubt_dataset_random.py │ │ ├── aipubt_dataset_sphereface_lfw.py │ │ ├── aipubt_dataset_stable_diffusion_unet.py │ │ ├── aipubt_dataset_tensorfromnumpymultiinput.py │ │ ├── aipubt_dataset_tusimple.py │ │ ├── aipubt_dataset_vocnchw.py │ │ ├── aipubt_dataset_vocnhwc.py │ │ ├── aipubt_dataset_widerface.py │ │ ├── aipubt_metric_CosDistance.py │ │ ├── aipubt_metric_CosDistance_with_seqlen.py │ │ ├── aipubt_metric_Detr_mAP.py │ │ ├── aipubt_metric_EachCosDistance.py │ │ ├── aipubt_metric_FlattenCosDistance.py │ │ ├── aipubt_metric_IWSLT_BLEU.py │ │ ├── aipubt_metric_IWSLT_BLEU_2_gram.py │ │ ├── aipubt_metric_KeywordSpotting.py │ │ ├── aipubt_metric_LMHead.py │ │ ├── aipubt_metric_MaskRcnnCOCOmAP.py │ │ ├── aipubt_metric_MaxAbsError.py │ │ ├── aipubt_metric_MaxAbsError_with_seqlen.py │ │ ├── aipubt_metric_Ocr.py │ │ ├── aipubt_metric_OpTestCosDistance.py │ │ ├── aipubt_metric_RMSE.py │ │ ├── aipubt_metric_SSDmAP.py │ │ ├── aipubt_metric_WER.py │ │ ├── aipubt_metric_YOLOmAP.py │ │ ├── aipubt_metric_bevformer.py │ │ ├── aipubt_metric_centerface.py │ │ ├── aipubt_metric_centernet.py │ │ ├── aipubt_metric_cocokeypoint.py │ │ ├── aipubt_metric_delta1.py │ │ ├── aipubt_metric_f1mesure.py │ │ ├── aipubt_metric_facebox.py │ │ ├── aipubt_metric_fasterrcnnmAP.py │ │ ├── aipubt_metric_fcos_mAP.py │ │ ├── aipubt_metric_imdb.py │ │ ├── aipubt_metric_lightface.py │ │ ├── aipubt_metric_mAP.py │ │ ├── aipubt_metric_mIoU.py │ │ ├── aipubt_metric_mobiledetSSDmAP.py │ │ ├── aipubt_metric_pckh.py │ │ ├── aipubt_metric_poly_lanenet.py │ │ ├── aipubt_metric_psnr.py │ │ ├── aipubt_metric_retinafacebox.py │ │ ├── aipubt_metric_retinanetmAP.py │ │ ├── aipubt_metric_roc.py │ │ ├── aipubt_metric_sphereface.py │ │ ├── aipubt_metric_topk.py │ │ ├── aipubt_metric_widerface.py │ │ ├── aipubt_op_ssd_postprocess.py │ │ ├── aipubt_op_tile.py │ │ └── aipubt_qconfig_bevformer.py │ ├── qat │ │ ├── __init__.py │ │ ├── qatmain.py │ │ ├── readme.md │ │ ├── readme_CN.md │ │ └── src │ │ │ ├── __init__.py │ │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── config.py │ │ │ ├── fuser │ │ │ ├── __init__.py │ │ │ ├── concat_fuser.py │ │ │ ├── convolution_fuser.py │ │ │ ├── eltwise_fuser.py │ │ │ ├── expand_fuser.py │ │ │ ├── fullyconnected_fuser.py │ │ │ ├── gelu_fuser.py │ │ │ ├── hardsigmoid_fuser.py │ │ │ ├── hardswish_fuser.py │ │ │ ├── layernorm_fuser.py │ │ │ ├── mha_fuser.py │ │ │ ├── multiheadattention_fuser.py │ │ │ ├── pooling_fuser.py │ │ │ ├── reshape_fuser.py │ │ │ └── transpose_fuser.py │ │ │ ├── ops │ │ │ ├── __init__.py │ │ │ ├── qat_activation.py │ │ │ ├── qat_base_operator.py │ │ │ ├── qat_batchnorm.py │ │ │ ├── qat_concat.py │ │ │ ├── qat_constant.py │ │ │ ├── qat_convolution.py │ │ │ ├── qat_eltwise.py │ │ │ ├── qat_expand.py │ │ │ ├── qat_fullyconnected.py │ │ │ ├── qat_gelu.py │ │ │ ├── qat_hardsigmoid.py │ │ │ ├── qat_hardswish.py │ │ │ ├── qat_input.py │ │ │ ├── qat_layernorm.py │ │ │ ├── qat_matmul.py │ │ │ ├── qat_multiheadattention.py │ │ │ ├── qat_pooling.py │ │ │ ├── qat_reshape.py │ │ │ ├── qat_softmax.py │ │ │ ├── qat_split.py │ │ │ └── qat_transpose.py │ │ │ ├── plugin │ │ │ ├── __init__.py │ │ │ ├── aipubt_train_mbv3.py │ │ │ ├── aipubt_train_resnet50.py │ │ │ └── aipubt_train_vitb16.py │ │ │ ├── qatfield.py │ │ │ ├── qatlogger.py │ │ │ ├── qatmaster.py │ │ │ ├── qatregister.py │ │ │ ├── qinfo.py │ │ │ ├── quantizer │ │ │ ├── __init__.py │ │ │ ├── basequantizer.py │ │ │ └── pytorchquantizer.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── cmp.py │ │ │ ├── common_utils.py │ │ │ ├── extra_params.py │ │ │ └── fuser_utils.py │ ├── qtlib_optimize.py │ ├── scripts │ │ ├── git_hooks │ │ │ ├── pre-commit │ │ │ └── pre-commit.d │ │ │ │ ├── format_pyfile │ │ │ │ └── python │ │ └── install_git_hooks.sh │ ├── test │ │ ├── feature_test │ │ │ ├── compare_gt_and_opt_op_forward.py │ │ │ ├── test_betensor_detile.py │ │ │ ├── test_compass_ir_to_torch_module.py │ │ │ └── test_merge_insert_op.py │ │ ├── model_test │ │ │ └── squeezenet │ │ │ │ ├── calibration2.npy │ │ │ │ ├── opt.cfg │ │ │ │ ├── run.sh │ │ │ │ ├── squeezenet_s.bin │ │ │ │ ├── squeezenet_s.txt │ │ │ │ ├── validation10.npy │ │ │ │ └── vlabel10.npy │ │ ├── op_test │ │ │ ├── atan_test.py │ │ │ ├── data.npy │ │ │ ├── ds_reshape_test.py │ │ │ ├── label.npy │ │ │ ├── opt.cfg │ │ │ ├── run.sh │ │ │ ├── single_eltwise_1.bin │ │ │ ├── single_eltwise_1.txt │ │ │ └── softmax_test.py │ │ └── plugin_test │ │ │ ├── aipubt_dataset_my_numpynhwcrgb2ncbgrhw.py │ │ │ ├── aipubt_metric_my_topk.py │ │ │ ├── aipubt_op_my_softmax.py │ │ │ ├── opt.cfg │ │ │ └── run.sh │ ├── tools │ │ ├── __init__.py │ │ ├── generate_plugin_template.py │ │ ├── opt_forward_main.py │ │ ├── optimizer_forward.py │ │ └── optimizer_main.py │ ├── utils │ │ ├── __init__.py │ │ ├── dtype_utils.py │ │ ├── files_utils.py │ │ ├── math_utils.py │ │ ├── passes_utils.py │ │ ├── quant_tool_utils.py │ │ ├── random_utils.py │ │ └── string_utils.py │ └── version.py └── __init__.py ├── LICENSE ├── images ├── opt_flow.svg └── opt_uml.svg ├── readme.md ├── readme_CN.md └── tutorial.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .vscode/ 3 | .idea 4 | __pycache__ 5 | report.html 6 | *.swp 7 | report.txt 8 | cython 9 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.optmaster import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | from AIPUBuilder.Optimizer.ops import * 7 | from AIPUBuilder.Optimizer.logger import * 8 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/analyzer/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . cosine import * 5 | from . running_time import * 6 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/analyzer/running_time.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | def calculate_op_running_time(f_graph, q_graph): 5 | 6 | from AIPUBuilder.Optimizer.logger import OPT_DEBUG 7 | nname_id = {} 8 | for idx, n in enumerate(f_graph.nodes): 9 | nname_id.update({n.name: idx}) 10 | cost_times = {} 11 | for n in q_graph.nodes: 12 | key = f"{n.attrs['layer_id']} {str(n.type)[7:]}" 13 | q_cost_time = n.attrs['cost_time'] 14 | f_cost_time = 0 15 | if n.name in nname_id.keys(): 16 | fnodes = f_graph.nodes[nname_id[n.name]] 17 | f_cost_time = fnodes.attrs.get('cost_time', -1) 18 | ct = [f_cost_time, q_cost_time] 19 | cost_times.update({key: ct}) 20 | 21 | fall_times = sum([v[0] for v in cost_times.values()]) 22 | qall_times = sum([v[1] for v in cost_times.values()]) 23 | type_max_len = max([len(k) for k in cost_times.keys()]) if len(cost_times.keys()) > 0 else 0 24 | for k, v in cost_times.items(): 25 | v.append(v[0] / fall_times * 100) 26 | v.append(v[1] / qall_times * 100) 27 | cost_times[k] = v 28 | ostr = (f"layer_type={k:{type_max_len}} fp32_forward_time={v[0]:<8.6f}s, quant_forward_time={v[1]:<8.6f}s, " 29 | f"this_fp32/all_fp32={v[2]:<3.6f}%%, this_quant/all_quant={v[3]:<3.6f}%%") 30 | OPT_DEBUG(ostr) 31 | 32 | # disable to calculate op running time 33 | for n in f_graph.nodes: 34 | n.attrs['calculate_running_time'] = False 35 | for n in q_graph.nodes: 36 | n.attrs['calculate_running_time'] = False 37 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/config/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . parser import (arg_parser, 5 | CfgParser, 6 | get_info_from_graph, 7 | filter_valid_properties, 8 | fields_to_str, 9 | show_cfg_fields, 10 | show_plugins) 11 | from . cfg_fields import * 12 | 13 | 14 | DEFAULT_CONFIG_FILE = 'opt_template.json' 15 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.features.autosearch import NaiveAutoSearchMixedPrecision 5 | from AIPUBuilder.Optimizer.features.calibration import apply_calibration_strategy, apply_global_calibration, statistic_and_calibration 6 | from AIPUBuilder.Optimizer.features.imagetiling import * 7 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/autosearch/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . mixed_precision_naive_search import NaiveAutoSearchMixedPrecision 5 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . calibration import apply_calibration_strategy, apply_global_calibration, statistic_and_calibration 5 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/global_calibration/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . easy_quant import easy_quant_global_calibration 5 | from . adaround import adaround_global_calibration 6 | from . adaquant_zy import adaquant_zy_global_calibration 7 | from . svd_based_quant import svd_based_quant_global_calibration 8 | from . gptq_zy import gptq_zy_global_calibration 9 | from . smooth_quant_zy import smooth_quant_zy_global_calibration 10 | from . awq_zy import awq_zy_global_calibration 11 | from . mvn_correction import mvn_correction_global_calibration 12 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/local_calibration/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . extrema import extrema_calibration 5 | from . in_ir import in_ir_calibration 6 | from . mean import mean_calibration 7 | from . kld import nkld_calibration 8 | from . nstd import nstd_calibration 9 | from . weighted_scale_param import weighted_scale_param_calibration 10 | from . aciq_laplace import aciq_laplace_calibration 11 | from . aciq_gauss import aciq_gauss_calibration 12 | from . percentile import percentile_calibration 13 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/local_calibration/extrema.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | def extrema_calibration(t, *args): 6 | t.min = t.extrema_min 7 | t.max = t.extrema_max 8 | if t.extrema_min_key_axis is not None: 9 | t.min_key_axis = t.extrema_min_key_axis 10 | t.max_key_axis = t.extrema_max_key_axis 11 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/local_calibration/in_ir.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | def in_ir_calibration(t, *args): 6 | t.min = t.extrema_min 7 | t.max = t.extrema_max 8 | if t.extrema_min_key_axis is not None: 9 | t.min_key_axis = t.extrema_min_key_axis 10 | t.max_key_axis = t.extrema_max_key_axis 11 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/local_calibration/mean.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | def mean_calibration(t, *args): 6 | t.min = t.running_min 7 | t.max = t.running_max 8 | if t.running_min_key_axis is not None: 9 | t.min_key_axis = t.running_min_key_axis 10 | t.max_key_axis = t.running_max_key_axis 11 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/local_calibration/nstd.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | 6 | 7 | def nstd_calibration(t, *args): 8 | # n = int(cstrategy[:-3]) 9 | n = int(args[0][:-3]) 10 | t.min = max(t.running_min, t.running_mean - n * t.running_std) 11 | t.max = min(t.running_max, t.running_mean + n * t.running_std) 12 | if t.running_mean_key_axis is not None: 13 | t.min_key_axis = torch.max(t.running_min_key_axis, t.running_mean_key_axis - n * t.running_std_key_axis) 14 | t.max_key_axis = torch.min(t.running_max_key_axis, t.running_mean_key_axis + n * t.running_std_key_axis) 15 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/calibration/local_calibration/percentile.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | def percentile_calibration(t, *args): 6 | cstrategy = args[0] 7 | try: 8 | p = float(cstrategy[:-10]) 9 | except: 10 | p = 1.0 11 | t.min = t.extrema_min * p 12 | t.max = t.extrema_max * p 13 | if t.extrema_min_key_axis is not None: 14 | t.min_key_axis = t.extrema_min_key_axis * p 15 | t.max_key_axis = t.extrema_max_key_axis * p 16 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/features/imagetiling/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . image_tiling import featuremap_partition_for_data_parallel 5 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/framework/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework.pycore import * 6 | from AIPUBuilder.Optimizer.framework.qgraph import * 7 | from AIPUBuilder.Optimizer.framework.opt_register import * 8 | from AIPUBuilder.Optimizer.logger import OPT_DEBUG, OPT_WARN, OPT_INFO, OPT_ERROR, OPT_FATAL 9 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/framework/pycore/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework.pycore.pytype import * 5 | from AIPUBuilder.Optimizer.framework.pycore.pytensor import * 6 | from AIPUBuilder.Optimizer.framework.pycore.pynode import * 7 | from AIPUBuilder.Optimizer.framework.pycore.pygraph import * 8 | from AIPUBuilder.Optimizer.framework.pycore.pyir import * 9 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . opt_logger import * 5 | from . opt_log_management import opt_workflow_register, OPT_START, OPT_END 6 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/add.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.ops.eltwise import eltwise_quantize, eltwise 7 | 8 | 9 | @op_register(OpType.Add) 10 | def add_forward(self, *args): 11 | self.params['method'] = 'ADD' 12 | eltwise(self, *args) 13 | self.params.pop('method') 14 | return self.outputs[0].betensor 15 | 16 | 17 | @quant_register(OpType.Add) 18 | def add_quantize(self, *args): 19 | self.params['method'] = 'ADD' 20 | eltwise_quantize(self, *args) 21 | self.params.pop('method') 22 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/argminmax.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | 7 | 8 | @op_register(OpType.ArgMinMax) 9 | def argminmax(self, *args): 10 | axis_ = self.get_param('axis') 11 | method_ = self.get_param("method").upper() 12 | select_last_index_ = self.get_param("select_last_index") 13 | if method_ not in ['MAX', 'MIN']: 14 | OPT_FATAL(f"please check method(now={method_}) in argminmax op, which only supports [MAX, MIN]") 15 | inp = self.inputs[0].betensor 16 | out = self.outputs[0] 17 | if select_last_index_: 18 | inp = torch.flip(inp, dims=[axis_]) 19 | 20 | if method_ == 'MAX': 21 | out.betensor = torch.argmax(inp, dim=axis_, keepdim=True) 22 | elif method_ == 'MIN': 23 | out.betensor = torch.argmin(inp, dim=axis_, keepdim=True) 24 | 25 | if select_last_index_: 26 | out.betensor = inp.shape[axis_] - out.betensor - 1 27 | return out.betensor 28 | 29 | 30 | @quant_register(OpType.ArgMinMax) 31 | def argminmax_quantize(self, *args): 32 | q_bits_activation = self.attrs["q_bits_activation"] 33 | out = self.outputs[0] 34 | out.scale = 1. 35 | out.zerop = 0 36 | out.qbits = max(16, q_bits_activation) 37 | out.dtype = bits2dtype(out.qbits, is_signed=False or self.force_dtype_int) 38 | out.qinvariant = True 39 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/batchtodepth.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | from AIPUBuilder.Optimizer.logger import OPT_ERROR 7 | 8 | 9 | @op_register(OpType.BatchToDepth) 10 | def batchtodepth(self, *args): 11 | block_size_ = self.get_param('block_size') 12 | inp = self.inputs[0].betensor 13 | out = self.outputs[0].betensor 14 | n, h, w, c = inp.shape 15 | if self.inputs[0].ir_shape[0] != self.current_batch_size: 16 | OPT_ERROR("batch size in calibratoin or metric dataset should be equal to batch size in IR") 17 | y = inp.view(n // block_size_, block_size_, h, w, c).permute(0, 2, 3, 1, 4).contiguous() 18 | out = y.view(n // block_size_, h, w, c*block_size_) 19 | self.outputs[0].betensor = out 20 | return out 21 | 22 | 23 | @quant_register(OpType.BatchToDepth) 24 | def batchtodepth_quantize(self, *args): 25 | inp = self.inputs[0] 26 | out = self.outputs[0] 27 | out.scale = inp.scale 28 | out.zerop = inp.zerop 29 | out.qbits = inp.qbits 30 | out.dtype = inp.dtype 31 | out.qinvariant = inp.qinvariant 32 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/batchtospace.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.logger import OPT_ERROR 8 | 9 | 10 | @op_register(OpType.BatchToSpace) 11 | def batchtospace(self, *args): 12 | block_size_x = self.get_param('block_size_x') 13 | block_size_y = self.get_param('block_size_y') 14 | crop_left = self.get_param('crop_left') 15 | crop_right = self.get_param('crop_right') 16 | crop_top = self.get_param('crop_top') 17 | crop_bottom = self.get_param('crop_bottom') 18 | 19 | inp = self.inputs[0].betensor 20 | out = self.outputs[0].betensor 21 | n, h, w, c = inp.shape 22 | if self.inputs[0].ir_shape[0] != inp.shape[0]: 23 | OPT_ERROR("batch size in calibratoin or metric dataset should be equal to batch size in IR") 24 | y = inp.view(block_size_y, block_size_x, n // (block_size_x*block_size_y), h, w, c) 25 | y = y.permute(2, 3, 0, 4, 1, 5).contiguous() 26 | y = y.view(n // (block_size_x*block_size_y), h*block_size_y, w*block_size_x, c) 27 | out = y[:, crop_top:h*block_size_y-crop_bottom, crop_left:w*block_size_x-crop_right, :] 28 | self.outputs[0].betensor = out 29 | return out 30 | 31 | 32 | @quant_register(OpType.BatchToSpace) 33 | def batchtospace_quantize(self, *args): 34 | inp = self.inputs[0] 35 | out = self.outputs[0] 36 | out.scale = inp.scale 37 | out.zerop = inp.zerop 38 | out.qbits = inp.qbits 39 | out.dtype = inp.dtype 40 | out.qinvariant = inp.qinvariant 41 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/batchtospaceNd.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.logger import OPT_ERROR 8 | 9 | register_optype('BatchToSpaceND') 10 | 11 | 12 | @op_register(OpType.BatchToSpaceND) 13 | def batchtospaceNd(self, *args): 14 | block_size = self.params['block_size'] 15 | crops = self.params['crops'] 16 | 17 | inp = self.inputs[0].betensor 18 | 19 | if inp.ndim != 5: 20 | OPT_FATAL(f"{self}, currently input dim only supoort 5-dim, more dimensions will be supported in the future!") 21 | if self.inputs[0].ir_shape[0] != inp.shape[0]: 22 | OPT_ERROR(f"{self},batch size in calibratoin or metric dataset should be equal to batch size in IR") 23 | 24 | bs_z, bs_y, bs_x = block_size 25 | n, d, h, w, c = inp.shape 26 | 27 | y = inp.view(bs_z, bs_y, bs_x, n // (bs_z * bs_y * bs_x), d, h, w, c) # ndhwc -> 28 | y = y.permute(3, 4, 0, 5, 1, 6, 2, 7).contiguous() # n//(bs_z*bs_y*bs_x), d, bs_z, h, bs_y, w, bs_x, c 29 | y = y.view(n // (bs_z * bs_y * bs_x), d * bs_z, h * bs_y, w * bs_x, c) 30 | self.outputs[0].betensor = y[:, crops[0][0]: d * bs_z - crops[0][1], crops[1][0]: h * bs_y - crops[1][1], 31 | crops[2][0]: w * bs_x - crops[2][1], :] 32 | return self.outputs[0].betensor 33 | 34 | 35 | @quant_register(OpType.BatchToSpaceND) 36 | def batchtospaceNd_quantize(self, *args): 37 | inp = self.inputs[0] 38 | out = self.outputs[0] 39 | out.scale = inp.scale 40 | out.zerop = inp.zerop 41 | out.qbits = inp.qbits 42 | out.dtype = inp.dtype 43 | out.qinvariant = inp.qinvariant 44 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/bias_add.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.ops.bn import * 8 | import torch 9 | 10 | register_optype('BiasAdd') 11 | 12 | 13 | @quant_register(OpType.BiasAdd) 14 | def bias_add_quantize(self, *args): 15 | # bias_add is equal to batchnorm with weights == 1 16 | self.attrs["q_mode_weight"] = self.attrs["q_mode_activation"] 17 | self.attrs["q_mode_bias"] = self.attrs["q_mode_weight"] 18 | self.constants["weights"] = self.constants["weights_bk"] 19 | batch_norm_quantize(self, *args) 20 | self.constants.pop('weights_bk') 21 | self.constants.pop('weights') 22 | 23 | 24 | @op_register(OpType.BiasAdd) 25 | def bias_add_forward(self, *args): 26 | if not self.quantized: 27 | if 'weights_bk' not in self.constants.keys(): 28 | self.constants["weights_bk"] = PyTensor(self.name + '/temp_weights_bk') 29 | self.constants["weights_bk"].betensor = torch.ones_like(self.constants["biases"].betensor) 30 | self.constants['weights_bk'].ir_shape = self.constants["biases"].shape 31 | self.constants['weights_bk'].ir_dtype = self.constants["biases"].ir_dtype 32 | self.constants["weights"] = PyTensor(self.name + '/temp_weights') 33 | self.constants["weights"].betensor = torch.ones_like(self.constants["biases"].betensor) 34 | self.constants['weights'].ir_shape = self.constants["biases"].shape 35 | self.constants['weights'].ir_dtype = self.constants["biases"].ir_dtype 36 | aflag = False 37 | if 'axis' not in self.params: 38 | aflag = True 39 | input_dim = self.inputs[0].betensor.dim() 40 | self.params['axis'] = input_dim - 1 41 | batch_norm(self, *args) 42 | if aflag: 43 | self.params.pop('axis') 44 | self.constants.pop('weights') 45 | return self.outputs[0].betensor 46 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/bn.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.ops.conv import * 5 | from AIPUBuilder.Optimizer.ops.activation import apply_with_activation 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.framework import * 8 | 9 | import torch 10 | 11 | 12 | @quant_register(OpType.BatchNorm) 13 | def batch_norm_quantize(self, *args): 14 | linear_op_quantize(self, *args) 15 | absorb_input_zp_to_bias_and_compress_bias_for_aiff(self, *args) 16 | 17 | #Cf = If * Wf + Bf 18 | # (Cq + Zc)/Sc = ((Iq + Zq) / Si) * ((Wq + Zw)/Sw) + (Bq + Zb)/Sb 19 | # set Sb = Si * Sw 20 | #Cq = ((Iq+Zq) * (Wq+Zw) + (Bq + Zb)) * (Sc/Si*Sw) - Zc 21 | # where Z is zero point, S is scale 22 | 23 | 24 | @op_register(OpType.BatchNorm) 25 | def batch_norm(self, *args): 26 | inp = self.inputs[0].betensor.clone() 27 | weights = self.constants["weights"].betensor.clone() 28 | bias = self.constants['biases'].betensor.clone() 29 | axis = self.get_param('axis') 30 | if self.quantized: 31 | # input's zerop has been absorbed to bias. 32 | # inp += self.inputs[0].zerop 33 | weights += self.constants["weights"].broadcast_zerop 34 | bias += self.constants['biases'].broadcast_zerop 35 | 36 | if inp.shape[axis] != self.inputs[0].ir_shape[axis]: 37 | weights = PyTensor.detile(weights, axis) 38 | bias = PyTensor.detile(bias, axis) 39 | inp_dim = inp.dim() 40 | perm = [] 41 | if axis != inp_dim - 1 and inp_dim > 0: 42 | orig_perm = [p for p in range(inp_dim)] 43 | perm = orig_perm[:axis] + orig_perm[axis+1:] + [orig_perm[axis]] 44 | inp = torch.permute(inp, perm) 45 | 46 | x = torch.add(torch.multiply(inp, weights.float()), bias) 47 | if not self.outputs[0].is_perchannel_quantization(): 48 | x = apply_with_activation(self, x, *args) 49 | if len(perm): 50 | orig_perm = [p for p in range(inp_dim)] 51 | n_perm = orig_perm[:axis] + [orig_perm[-1]] + orig_perm[axis:-1] 52 | x = torch.permute(x, n_perm) 53 | if self.outputs[0].is_perchannel_quantization(): 54 | x = apply_with_activation(self, x, *args) 55 | self.outputs[0].betensor = x 56 | return self.outputs[0].betensor 57 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/ceil.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | 10 | @op_register(OpType.Ceil) 11 | def ceil(self, *args): 12 | self.attrs['lambda_func'] = torch.ceil 13 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 14 | self.attrs.pop('lambda_func') 15 | return self.outputs[0].betensor 16 | 17 | 18 | @quant_register(OpType.Ceil) 19 | def ceil_quantize(self, *args): 20 | self.attrs['lambda_func'] = torch.ceil 21 | self.attrs['out_signed'] = True 22 | activation_module.unknown_quantize(self, *args) 23 | self.attrs.pop('lambda_func') 24 | self.attrs.pop('out_signed') 25 | 26 | 27 | @approx_register(OpType.Ceil) 28 | def ceil_approx(self, *args): 29 | # this is not currently used because it is the same as the float process 30 | self.params['is_perf_mode'] = False 31 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/channelshuffle.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.logger import OPT_ERROR, OPT_WARN 7 | import torch 8 | 9 | 10 | @op_register(OpType.ChannelShuffle) 11 | def channelshuffle(self, *args): 12 | """ 13 | now only support [n, h, w, c] input shape 14 | torch.nn.functional.channel_shuffle is alpha version in torch 1.7.1 15 | :param self: 16 | :param args: 17 | :return: 18 | """ 19 | inp_t = self.inputs[0].betensor 20 | group = self.get_param('group') 21 | splits = self.get_param('splits') 22 | 23 | inp_shape = list(inp_t.shape) 24 | trans_perm = list(range(len(inp_shape) + 1)) 25 | trans_perm[-1], trans_perm[-2] = trans_perm[-2], trans_perm[-1] 26 | new_shape = inp_shape[:-1] 27 | new_shape += [group, inp_shape[-1] // group] 28 | out = inp_t.reshape(new_shape) 29 | out = torch.permute(out, trans_perm) 30 | out = out.reshape(inp_shape) 31 | 32 | if splits != 1: 33 | split_size = out.shape[-1] // splits 34 | out = torch.split(out, split_size, dim=-1) 35 | for i, o in enumerate(out): 36 | self.outputs[i].betensor = o 37 | else: 38 | self.outputs[0].betensor = out 39 | 40 | return out 41 | 42 | 43 | @quant_register(OpType.ChannelShuffle) 44 | def channelshuffle_quantize(self, *args): 45 | inp = self.inputs[0] 46 | for out in self.outputs: 47 | out.dtype = inp.dtype 48 | out.scale = inp.scale 49 | out.zerop = inp.zerop 50 | out.qbits = inp.qbits 51 | out.qmin = inp.qmin 52 | out.qmax = inp.qmax 53 | out.qinvariant = inp.qinvariant 54 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/datastride.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | 10 | @op_register(OpType.DataStride) 11 | def datastride(self, *args): 12 | 13 | inpt = self.inputs[0].betensor 14 | inpt = nhwc2nchw(inpt) 15 | 16 | kernel_x = self.get_param('kernel_x') 17 | kernel_y = self.get_param('kernel_y') 18 | stride_x = self.get_param('stride_x') 19 | stride_y = self.get_param('stride_y') 20 | 21 | patches = inpt.unfold(2, kernel_y, stride_y).unfold(3, kernel_x, stride_x) 22 | patches = patches.permute(0, 4, 5, 1, 2, 3).contiguous() 23 | patches = patches.view(inpt.shape[0], -1, patches.shape[-2], patches.shape[-1]) 24 | N, C, H, W = patches.size() 25 | bs = kernel_y 26 | patches = patches.view(N, bs, bs, C // (bs ** 2), H, W) # (N, bs, bs, C//bs^2, H, W) 27 | patches = patches.permute(0, 3, 4, 1, 5, 2).contiguous() # (N, C//bs^2, H, bs, W, bs) 28 | patches = patches.view(N, C // (bs ** 2), H * bs, W * bs) # (N, C//bs^2, H * bs, W * bs) 29 | patches = nchw2nhwc(patches) 30 | self.outputs[0].betensor = patches 31 | return patches 32 | 33 | 34 | @quant_register(OpType.DataStride) 35 | def quantize_datastride(self, *args): 36 | inp = self.inputs[0] 37 | out = self.outputs[0] 38 | out.dtype = inp.dtype 39 | out.scale = inp.scale 40 | out.zerop = inp.zerop 41 | out.qbits = inp.qbits 42 | out.qinvariant = inp.qinvariant 43 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/depthtospace.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.logger import OPT_WARN 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.utils import * 8 | import torch 9 | 10 | 11 | @op_register(OpType.DepthToSpace) 12 | def depthtospace(self, *args): 13 | block_size_x = self.get_param('block_size_x') 14 | block_size_y = self.get_param('block_size_y') 15 | mode = self.get_param('mode', optional=True, default_value='DCR').upper() 16 | if block_size_x != block_size_y: 17 | OPT_WARN("currently not support block_size_x != block_size_y in layer" + 18 | self.attrs['layer_id'], op_name=str(self.type)) 19 | inp = self.inputs[0].betensor 20 | out = self.outputs[0].betensor 21 | if inp.dim() != 4: 22 | OPT_FATAL("currently only support 4 dims input in layer" + 23 | self.attrs['layer_id'], op_name=str(self.type)) 24 | # data format is NHWC 25 | N, H, W, C = inp.size() 26 | new_channel = C // (block_size_x * block_size_y) 27 | if mode == 'DCR': 28 | x = inp.view(N, H, W, block_size_y, block_size_x, new_channel) # (N, H, W, bs, bs, C//bs^2) 29 | x = x.permute(0, 1, 3, 2, 4, 5).contiguous() # (N, H, bs, W, bs, C//bs^2) 30 | elif mode == 'CRD': 31 | x = inp.view(N, H, W, new_channel, block_size_y, block_size_x) 32 | x = x.permute(0, 1, 4, 2, 5, 3).contiguous() # (N, H, bs, W, bs, C//bs^2) 33 | else: 34 | OPT_FATAL("unsupported mode: %s for DepthToSpace in node:%s" % (mode, self.name)) 35 | out = x.view(N, H * block_size_y, W * block_size_x, new_channel) 36 | self.outputs[0].betensor = out 37 | return out 38 | 39 | 40 | @quant_register(OpType.DepthToSpace) 41 | def depthtospace_quantize(self, *args): 42 | inp = self.inputs[0] 43 | out = self.outputs[0] 44 | out.scale = inp.scale 45 | out.zerop = inp.zerop 46 | out.qbits = inp.qbits 47 | out.dtype = inp.dtype 48 | out.qinvariant = inp.qinvariant 49 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/depthwiseconv.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils.dtype_utils import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | from AIPUBuilder.Optimizer.framework import * 7 | from AIPUBuilder.Optimizer.ops.conv import conv2d, conv2d_quantize 8 | 9 | 10 | @op_register(OpType.DepthwiseConv) 11 | def depthwise_conv2d(self, *args): 12 | x = conv2d(self, *args) 13 | return x 14 | 15 | 16 | @quant_register(OpType.DepthwiseConv) 17 | def depthwise_conv2d_quantize(self, *args): 18 | conv2d_quantize(self, *args) 19 | if 'scale_value' in self.params and 'shift_value' in self.params and 'scale_type' in self.params and 'shift_type' in self.params: 20 | self.constants['scale'] = PyTensor(f"{self.name}_scale", [self.params['scale_value'], ] 21 | * self.constants['weights'].ir_shape[0], dtype=self.params['scale_type']) 22 | self.constants['shift'] = PyTensor(f"{self.name}_shift", [self.params['shift_value'], ] 23 | * self.constants['weights'].ir_shape[0], dtype=self.params['shift_type']) 24 | self.params.pop('scale_value') 25 | self.params.pop('shift_value') 26 | self.params.pop('scale_type') 27 | self.params.pop('shift_type') 28 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/dequantize.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | 8 | register_optype('DeQuantize') 9 | 10 | 11 | @quant_register(OpType.DeQuantize) 12 | def dequantize_quant(self, *args): 13 | inp = self.inputs[0] 14 | out = self.outputs[0] 15 | out.scale = inp.scale 16 | out.zerop = inp.zerop 17 | out.qbits = inp.qbits 18 | out.dtype = inp.dtype 19 | out.qinvariant = inp.qinvariant 20 | out.qmin = inp.qmin 21 | out.qmax = inp.qmax 22 | 23 | 24 | @op_register(OpType.DeQuantize) 25 | def dequantize_forward(self, *args): 26 | inp = self.inputs[0] 27 | out = self.outputs[0] 28 | out.betensor = linear_dequantize(inp.betensor, inp.broadcast_scale, inp.broadcast_zerop) 29 | return out.betensor 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/dummy.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import torch 7 | 8 | register_optype('Dummy') 9 | 10 | 11 | @op_register(OpType.Dummy) 12 | def dummy__forward(self, *args): 13 | for ot, it in zip(self.outputs, self.inputs): 14 | ot.betensor = it.betensor.clone() 15 | 16 | 17 | @quant_register(OpType.Dummy) 18 | def dummy__quantize(self, *args): 19 | for ot, it in zip(self.outputs, self.inputs): 20 | ot.clone_qinfo(it) 21 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/erosion2d.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.logger import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.ops.dilation2d import * 8 | import torch 9 | 10 | register_optype('Erosion') 11 | 12 | 13 | @op_register(OpType.Erosion) 14 | def erosion(self, *args): 15 | outp = dilation_erosion_fun(self, padding_value=float('inf'), compare_func=torch.amin, weight_reverse=True) 16 | self.outputs[0].betensor = outp 17 | return outp 18 | 19 | 20 | @quant_register(OpType.Erosion) 21 | def erosion_quantize(self, *args): 22 | dilation_quantize(self, *args) 23 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/exp.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.ops.softmax import softmax_approx 7 | import AIPUBuilder.Optimizer.ops.activation as activation_module 8 | import torch 9 | 10 | 11 | @quant_register(OpType.Exp) 12 | def exp_quantize(self, *args): 13 | self.attrs['lambda_func'] = torch.exp 14 | self.attrs['out_signed'] = False or self.force_dtype_int 15 | activation_module.unknown_quantize(self, *args) 16 | self.attrs.pop('lambda_func') 17 | self.attrs.pop('out_signed') 18 | 19 | 20 | @op_register(OpType.Exp) 21 | def exp(self, *args): 22 | def approximated_float_forward(self, inp_tensor): 23 | if self.approximated and "lut" in self.constants: 24 | lut = self.constants["lut"].betensor 25 | f_vdata = inp_tensor * 1.442695 26 | out = x3_aiff_exp_approximation(f_vdata, lut) 27 | else: 28 | out = torch.exp(inp_tensor) 29 | return out 30 | self.attrs['lambda_func'] = lambda x: approximated_float_forward(self, x) 31 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 32 | self.attrs.pop('lambda_func') 33 | return self.outputs[0].betensor 34 | 35 | 36 | @approx_register(OpType.Exp) 37 | def elu_approx(self, *args): 38 | softmax_approx(self, *args) 39 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/fc.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.ops.conv import * 5 | from AIPUBuilder.Optimizer.ops.activation import apply_with_activation 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.framework import * 8 | 9 | import torch.nn as nn 10 | 11 | 12 | @op_register(OpType.FullyConnected) 13 | def fc(self, *args): 14 | inp, bias, weights = None, None, None 15 | if self.constants['weights'].dtype in [Dtype.FP32, Dtype.FP16]: 16 | inp = self.inputs[0].betensor.float() 17 | bias = self.constants["biases"].betensor.float() 18 | weights = self.constants["weights"].betensor.float() 19 | else: 20 | inp = self.inputs[0].betensor.double() 21 | bias = self.constants["biases"].betensor.clone().double() 22 | weights = self.constants["weights"].betensor.clone().double() 23 | aasrb = self.get_param('remain_shift', 24 | optional=True, default_value=None) 25 | 26 | if self.quantized: 27 | # input's zerop has been absorbed to bias. 28 | # inp += self.inputs[0].zerop 29 | weights += self.constants["weights"].broadcast_zerop 30 | bias += self.constants['biases'].broadcast_zerop 31 | if aasrb is not None and (dtype2bits(self.constants["weights"].dtype) > 8 or dtype2bits(self.inputs[0].dtype) > 8): 32 | 33 | x = inp @ weights.T 34 | self.outputs[0].betensor = apply_with_activation(self, x, 35 | *args, aasrb=(aasrb, bias)) 36 | return self.outputs[0].betensor 37 | x = nn.functional.linear(inp, weights, bias,) 38 | self.outputs[0].betensor = apply_with_activation(self, x, *args) 39 | return self.outputs[0].betensor 40 | 41 | 42 | @quant_register(OpType.FullyConnected) 43 | def fc_quantize(self, *args): 44 | conv2d_quantize(self, *args) 45 | if 'remain_shift' in self.attrs: 46 | self.params['remain_shift'] = self.attrs['remain_shift'] 47 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/floor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | import AIPUBuilder.Optimizer.ops.activation as activation_module 6 | from AIPUBuilder.Optimizer.utils import * 7 | import torch 8 | 9 | 10 | @op_register(OpType.Floor) 11 | def floor(self, *args): 12 | self.attrs['lambda_func'] = torch.floor 13 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 14 | self.attrs.pop('lambda_func') 15 | return self.outputs[0].betensor 16 | 17 | 18 | @quant_register(OpType.Floor) 19 | def floor_quantize(self, *args): 20 | self.attrs['lambda_func'] = torch.floor 21 | self.attrs['out_signed'] = True 22 | activation_module.unknown_quantize(self, *args) 23 | self.attrs.pop('lambda_func') 24 | self.attrs.pop('out_signed') 25 | 26 | 27 | @approx_register(OpType.Floor) 28 | def floor_approx(self, *args): 29 | # this is not currently used because it is the same as the float process 30 | self.params['is_perf_mode'] = False 31 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/gather_elements.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.logger import * 8 | 9 | 10 | # IR 11 | # layer_id=2 12 | # layer_name=gather 13 | # layer_type=GatherElements 14 | # layer_bottom=[params,ids] 15 | # layer_bottom_shape=[[1,16320],[1,5]] 16 | # layer_bottom_type=[float,int32] 17 | # layer_top=[gather] 18 | # layer_top_shape=[[1,5]] 19 | # layer_top_type=[float] 20 | # axis=1 21 | 22 | 23 | # onnx gather_elements is same as torch gather 24 | @op_register(OpType.GatherElements) 25 | def gather_elements(self, *args): 26 | indice_betensor = self.inputs[1].betensor.clone() 27 | inp0_betensors = self.inputs[0].betensor 28 | 29 | if inp0_betensors.dim() != indice_betensor.dim(): 30 | OPT_FATAL('layer_id=%s, type=%s, inp0 and inp1 rank are not same, please check' % ( 31 | self.attrs['layer_id'], str(self.type))) 32 | 33 | axis = self.get_param('axis') 34 | axis_dim = inp0_betensors.shape[axis] 35 | positive_bound_mask = indice_betensor >= axis_dim 36 | indice_betensor[positive_bound_mask] = axis_dim-1 37 | negative_mask = indice_betensor < 0 38 | indice_betensor[negative_mask] = indice_betensor[negative_mask] + axis_dim 39 | negative_bound_mask = indice_betensor < 0 40 | indice_betensor[negative_bound_mask] = axis_dim-1 41 | 42 | self.outputs[0].betensor = torch.gather( 43 | inp0_betensors, axis, indice_betensor.long()) 44 | 45 | return self.outputs[0].betensor 46 | 47 | 48 | @quant_register(OpType.GatherElements) 49 | def gather_elements_quantize(self, *args): 50 | inp = self.inputs[0] 51 | out = self.outputs[0] 52 | out.dtype = inp.dtype 53 | out.scale = inp.scale 54 | out.zerop = inp.zerop 55 | out.qbits = inp.qbits 56 | out.qinvariant = inp.qinvariant 57 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/globalpooling.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.ops.pooling import pooling_quantize, pooling 6 | import torch 7 | 8 | 9 | @op_register(OpType.GlobalPool) 10 | def globalpool_forward(self, *args): 11 | ''' 12 | this op is used for ds forward to get the ds output shape when avgpool's output.ir_shape[1:3] == [1,1] 13 | 14 | :param self: 15 | :param args: 16 | :return: 17 | ''' 18 | inp_shape = self.inputs[0].betensor.shape 19 | padding = (self.get_param('pad_left'), 20 | self.get_param('pad_right'), 21 | self.get_param('pad_top', optional=True, default_value=0), 22 | self.get_param('pad_bottom', optional=True, default_value=0)) 23 | kernel_size = (inp_shape[1] + padding[2] + padding[3], inp_shape[2] + padding[0] + padding[1]) 24 | self.params['kernel_y'] = kernel_size[0] 25 | self.params['kernel_x'] = kernel_size[1] 26 | 27 | out = pooling(self) 28 | return out 29 | 30 | 31 | @quant_register(OpType.GlobalPool) 32 | def globalpool_quantize(self, *args): 33 | pooling_quantize(self) 34 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/gruv1.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.ops.gruv3 import gruv3_quantize, gruv3 8 | from AIPUBuilder.Optimizer.logger import * 9 | import torch.nn as nn 10 | 11 | 12 | @op_register(OpType.GRUv1) 13 | def gruv1(self, *args): 14 | self.params['version'] = "GRUV1" 15 | gruv3(self, *args) 16 | self.params.pop('version') 17 | 18 | 19 | @quant_register(OpType.GRUv1) 20 | def gruv1_quantize(self, *args): 21 | self.params['version'] = "GRUV1" 22 | gruv3_quantize(self, *args) 23 | self.params.pop('version') 24 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/hardmax.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | # Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 otherwise 10 | 11 | register_optype('Hardmax') 12 | 13 | 14 | @quant_register(OpType.Hardmax) 15 | def Hardmax_quantize(self, *args): 16 | q_mode_activation = self.attrs["q_mode_activation"] 17 | if QuantMode.is_per_channel(q_mode_activation) == True: 18 | OPT_FATAL("Currently not support per-channel quantization") 19 | q_bits_activation = self.attrs["q_bits_activation"] 20 | 21 | inp = self.inputs[0] 22 | out = self.outputs[0] 23 | 24 | out.scale = 1 25 | out.zerop = 0 26 | out.dtype = inp.dtype 27 | out.qbits = inp.qbits 28 | out.qmin = inp.qmin 29 | out.qmax = inp.qmax 30 | out.qinvariant = True 31 | 32 | 33 | @op_register(OpType.Hardmax) 34 | def hardmax(self, *args): 35 | inp = self.inputs[0] 36 | out = self.outputs[0] 37 | 38 | axis = int(self.get_param('axis')) # only int 39 | 40 | argx = torch.argmax(inp.betensor, axis, keepdim=True) 41 | output = None 42 | for ax in range(inp.betensor.shape[axis]): 43 | tmp_input = torch.index_select(inp.betensor, axis, torch.tensor([ax], device=inp.betensor.device)) 44 | tmp_index = torch.full(tmp_input.shape, ax, device=inp.betensor.device) 45 | tmp_output = torch.where(tmp_index == argx, torch.ones_like(tmp_input), torch.zeros_like(tmp_input)) 46 | output = tmp_output if output == None else torch.cat((output, tmp_output), axis) 47 | out.betensor = output 48 | 49 | return out.betensor 50 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/hardswish.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | register_optype('Hardswish') 10 | 11 | 12 | @quant_register(OpType.Hardswish) 13 | def hardswish_quantize(self, *args): 14 | self.attrs['lambda_func'] = torch.nn.functional.hardswish 15 | self.attrs['out_signed'] = True 16 | activation_module.unknown_quantize(self, *args) 17 | self.attrs.pop('lambda_func') 18 | self.attrs.pop('out_signed') 19 | 20 | 21 | @op_register(OpType.Hardswish) 22 | def hardswish(self, *args): 23 | self.attrs['lambda_func'] = torch.nn.functional.hardswish 24 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 25 | self.attrs.pop('lambda_func') 26 | return self.outputs[0].betensor 27 | 28 | 29 | @approx_register(OpType.Hardswish) 30 | def hardswish_approx(self, *args): 31 | # By default, it is calculated directly on AIFF 32 | self.params['is_perf_mode'] = True 33 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/instancenorm.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.ops.groupnorm import groupnorm_quantize, groupnorm 8 | import torch 9 | 10 | 11 | @quant_register(OpType.InstanceNorm) 12 | def instancenorm_quantize(self, *args): 13 | aflag = False 14 | if 'axis' not in self.params: 15 | aflag = True 16 | input_dim = len(self.inputs[0].ir_shape) 17 | axis_param = [axis for axis in range(1, input_dim - 1)] 18 | self.params['axis'] = axis_param 19 | gflag = False 20 | if 'group' not in self.params: 21 | gflag = True 22 | self.params['group'] = 1 23 | groupnorm_quantize(self, *args) 24 | if aflag: 25 | self.params.pop('axis') 26 | if gflag: 27 | self.params.pop('group') 28 | 29 | 30 | @op_register(OpType.InstanceNorm) 31 | def instancenorm(self, *args): 32 | aflag = False 33 | input_dim = self.inputs[0].betensor.dim() 34 | if 'axis' not in self.params: 35 | aflag = True 36 | # inp0.shape = [N,D1,D2,....,C] 37 | axis_param = [axis for axis in range(1, input_dim - 1)] 38 | self.params['axis'] = axis_param 39 | gflag = False 40 | if 'group' not in self.params: 41 | gflag = True 42 | self.params['group'] = 1 43 | axis_shape_flag = False 44 | if 'axis_shape' not in self.params: 45 | axis_shape_flag = True 46 | axis_shape = [1 for ax in range(input_dim-1)] + [self.inputs[0].betensor.shape[input_dim-1]] 47 | self.params['axis_shape'] = axis_shape 48 | self.params['scale_shift_shape'] = axis_shape 49 | groupnorm(self, *args) 50 | if aflag: 51 | self.params.pop('axis') 52 | if gflag: 53 | self.params.pop('group') 54 | if axis_shape_flag: 55 | self.params.pop('axis_shape') 56 | self.params.pop('scale_shift_shape') 57 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/intopk.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | 8 | register_optype('InTopK') 9 | 10 | 11 | @op_register(OpType.InTopK) 12 | def intopk(self, *args): 13 | k = self.get_param('k') 14 | 15 | largest = self.get_param("largest", optional=True, default_value=True) 16 | issorted = self.get_param("sorted", optional=True, default_value=True) 17 | 18 | inp_betensors1 = self.inputs[0].betensor 19 | inp_betensors2 = self.inputs[1].betensor 20 | if self.inputs[1].betensor.ndim > 1: 21 | inp_betensors2 = torch.squeeze(self.inputs[1].betensor) # rank 1 22 | k = min(k, inp_betensors1.shape[-1]) 23 | topk_v, topk_indice = torch.topk(inp_betensors1, k, -1, largest, issorted) 24 | if((inp_betensors1.shape[0]) != len(inp_betensors2)): 25 | OPT_FATAL("target must have the same size as input along predict'axis ") 26 | out = [] 27 | for i in range(len(inp_betensors2)): 28 | out.append(inp_betensors1[i, int(inp_betensors2[i])] in topk_v[i, :]) 29 | self.outputs[0].betensor = torch.tensor(out) 30 | return self.outputs[0].betensor 31 | 32 | 33 | @quant_register(OpType.InTopK) 34 | def intopk_quantize(self, *args): 35 | out = self.outputs[0] 36 | out.scale = 1.0 37 | out.zerop = 0 38 | out.dtype = Dtype.INT8 if self.force_dtype_int else Dtype.UINT8 39 | out.qbits = dtype2bits(out.dtype) 40 | out.qinvariant = True 41 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/isinf.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | from AIPUBuilder.Optimizer.framework import * 4 | 5 | from AIPUBuilder.Optimizer.utils import * 6 | 7 | register_optype('IsInf') 8 | 9 | 10 | @op_register(OpType.IsInf) 11 | def isinf(self, *args): 12 | detect_negative = self.get_param('detect_negative', optional=True, default_value=True) 13 | detect_positive = self.get_param('detect_positive', optional=True, default_value=True) 14 | 15 | inp = self.inputs[0].betensor 16 | mask = torch.zeros_like(inp, device=inp.device).bool() 17 | if detect_positive: 18 | positive_mask = (inp == float('inf')) 19 | mask = torch.bitwise_or(mask, positive_mask) 20 | if detect_negative: 21 | negative_mask = (inp == float('-inf')) 22 | mask = torch.bitwise_or(mask, negative_mask) 23 | self.outputs[0].betensor = PyTensor('out', mask.int(), self.outputs[0].dtype).betensor 24 | return self.outputs[0].betensor 25 | 26 | 27 | @quant_register(OpType.IsInf) 28 | def isinf_quantize(self, *args): 29 | out = self.outputs[0] 30 | out.scale = 1.0 31 | out.zerop = 0 32 | out.qbits = 8 33 | out.dtype = bits2dtype(out.qbits, is_signed=False) 34 | out.qinvariant = True 35 | out.qmin, out.qmax = dtype2range(out.dtype) 36 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/isnan.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | from AIPUBuilder.Optimizer.framework import * 4 | 5 | from AIPUBuilder.Optimizer.utils import * 6 | 7 | register_optype('IsNaN') 8 | 9 | 10 | @op_register(OpType.IsNaN) 11 | def isnan(self, *args): 12 | out = torch.isnan(self.inputs[0].betensor).int() 13 | self.outputs[0].betensor = PyTensor('out', out, self.outputs[0].dtype).betensor 14 | return self.outputs[0].betensor 15 | 16 | 17 | @quant_register(OpType.IsNaN) 18 | def isnan_quantize(self, *args): 19 | out = self.outputs[0] 20 | out.scale = 1.0 21 | out.zerop = 0 22 | out.qbits = 8 23 | out.dtype = bits2dtype(out.qbits, is_signed=False) 24 | out.qinvariant = True 25 | out.qmin, out.qmax = dtype2range(out.dtype) 26 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/layernorm.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.ops.groupnorm import groupnorm_quantize, groupnorm 8 | import torch 9 | 10 | 11 | @quant_register(OpType.LayerNorm) 12 | def layernorm_quantize(self, *args): 13 | groupnorm_quantize(self, *args) 14 | 15 | 16 | @op_register(OpType.LayerNorm) 17 | def layernorm(self, *args): 18 | aflag = False 19 | if 'axis' not in self.params: 20 | aflag = True 21 | self.params['axis'] = [-1] 22 | gflag = False 23 | if 'group' not in self.params: 24 | gflag = True 25 | self.params['group'] = 1 26 | axis_shape_flag = False 27 | if 'axis_shape' not in self.params: 28 | axis_shape_flag = True 29 | axis = self.params['axis'] 30 | input_dim = self.inputs[0].betensor.dim() 31 | axis_positive = [ax + input_dim if ax < 0 else ax for ax in axis] 32 | axis_shape = [self.inputs[0].betensor.shape[ax] if ax in axis_positive else 1 for ax in range(input_dim)] 33 | self.params['axis_shape'] = axis_shape 34 | self.params['scale_shift_shape'] = [self.inputs[0].betensor.shape[ax] 35 | if ax == axis_positive[-1] else 1 for ax in range(input_dim)] 36 | groupnorm(self, *args) 37 | if aflag: 38 | self.params.pop('axis') 39 | if gflag: 40 | self.params.pop('group') 41 | if axis_shape_flag: 42 | self.params.pop('axis_shape') 43 | self.params.pop('scale_shift_shape') 44 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/maxunpool.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.ops.upsamplebyindex import upsamplebyindex_quantize, upsamplebyindex 8 | 9 | 10 | @quant_register(OpType.MaxUnpool) 11 | def maxunpool_quantize(self, *args): 12 | upsamplebyindex_quantize(self, *args) 13 | 14 | 15 | @op_register(OpType.MaxUnpool) 16 | def maxunpool(self, *args): 17 | upsamplebyindex(self, *args) 18 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/mul.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.ops.eltwise import eltwise_quantize, eltwise 7 | 8 | 9 | @op_register(OpType.Mul) 10 | def mul_forward(self, *args): 11 | self.params['method'] = 'MUL' 12 | eltwise(self, *args) 13 | self.params.pop('method') 14 | return self.outputs[0].betensor 15 | 16 | 17 | @quant_register(OpType.Mul) 18 | def mul_quantize(self, *args): 19 | self.params['method'] = 'MUL' 20 | eltwise_quantize(self, *args) 21 | self.params.pop('method') 22 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/neg.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | 8 | 9 | @op_register(OpType.Negative) 10 | def neg(self, *args): 11 | inp = self.inputs[0] 12 | out = self.outputs[0] 13 | if self.quantized: 14 | y = torch.neg(inp.betensor + inp.zerop) - out.zerop 15 | out.betensor = torch.clamp(y, out.qmin, out.qmax) 16 | else: 17 | out.betensor = torch.neg(inp.betensor) 18 | return out.betensor 19 | 20 | 21 | @quant_register(OpType.Negative) 22 | def neg_quantize(self, *args): 23 | inp = self.inputs[0] 24 | out = self.outputs[0] 25 | q_mode_activation = self.attrs["q_mode_activation"] 26 | out.qinvariant = inp.qinvariant 27 | out.scale = inp.scale 28 | out.qbits = inp.qbits 29 | out.dtype = bits2dtype(dtype2bits(inp.dtype), is_signed=True) 30 | out.zerop = inp.zerop 31 | if is_signed(inp.dtype): 32 | out.zerop = -1 * inp.zerop + 1 33 | else: 34 | out.qmin, out.qmax = dtype2range(out.dtype) 35 | out.zerop = -1 * inp.zerop - out.qmax 36 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/nonzero.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | from AIPUBuilder.Optimizer.framework import * 4 | 5 | from AIPUBuilder.Optimizer.utils import * 6 | 7 | register_optype('NonZero') 8 | 9 | 10 | @op_register(OpType.NonZero) 11 | def nonzero(self, *args): 12 | self.outputs[0].betensor = torch.nonzero(self.inputs[0].betensor) 13 | self.outputs[0].betensor = self.outputs[0].betensor.permute(1, 0) 14 | self.outputs[1].betensor = torch_tensor(self.outputs[0].betensor.shape[1], self.outputs[0].device).reshape([1]) 15 | return self.outputs[0].betensor, self.outputs[1].betensor 16 | 17 | 18 | @quant_register(OpType.NonZero) 19 | def nonzero_quantize(self, *args): 20 | inp = self.inputs[0] 21 | out = self.outputs[0] 22 | q_bits_activation = self.attrs["q_bits_activation"] 23 | max_len = max(list(inp.ir_shape)) 24 | max_qbits = torch.log2(torch.tensor(max_len)).ceil() 25 | out.scale = 1.0 26 | out.zerop = 0 27 | out.qbits = max(q_bits_activation, max_qbits) 28 | out.dtype = bits2dtype(out.qbits, is_signed=False) 29 | out.qinvariant = True 30 | out.qmin, out.qmax = dtype2range(out.dtype) 31 | self.outputs[1].clone_qinfo(self.outputs[0]) 32 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/noop.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | 7 | register_optype('NoOp') 8 | 9 | 10 | @op_register(OpType.NoOp) 11 | def noop(self, *args): 12 | for ot in self.outputs: 13 | ot.betensor = self.inputs[0].betensor 14 | return [ot.betensor for ot in self.outputs] 15 | 16 | 17 | @quant_register(OpType.NoOp) 18 | def noop_quantize(self, *args): 19 | inp = self.inputs[0] 20 | for ot in self.outputs: 21 | ot.clone_qinfo(inp) 22 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/permute.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | import torch 7 | 8 | 9 | @op_register(OpType.Permute) 10 | def permute(self, *args): 11 | inp = self.inputs[0].betensor 12 | out = self.outputs[0] 13 | perm = self.get_param('perm') 14 | if not isinstance(perm, list): 15 | perm = list(reversed([i for i in range(len(inp.shape))])) 16 | out.betensor = inp.permute(perm) 17 | return out.betensor 18 | 19 | 20 | @quant_register(OpType.Permute) 21 | def permute_quantize(self, *args): 22 | inp = self.inputs[0] 23 | out = self.outputs[0] 24 | out.dtype = inp.dtype 25 | out.scale = inp.scale 26 | out.zerop = inp.zerop 27 | out.qbits = inp.qbits 28 | out.qmin, out.qmax = inp.qmin, inp.qmax 29 | out.qinvariant = inp.qinvariant 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/quantize.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | 8 | register_optype('Quantize') 9 | 10 | 11 | @quant_register(OpType.Quantize) 12 | def quantize_quant(self, *args): 13 | inp = self.inputs[0] 14 | out = self.outputs[0] 15 | out.scale = inp.scale 16 | out.zerop = inp.zerop 17 | out.qbits = inp.qbits 18 | out.dtype = inp.dtype 19 | out.qinvariant = inp.qinvariant 20 | out.qmin = inp.qmin 21 | out.qmax = inp.qmax 22 | 23 | 24 | @op_register(OpType.Quantize) 25 | def quantize_forward(self, *args): 26 | inp = self.inputs[0] 27 | out = self.outputs[0] 28 | round_mode = self.get_param('round_mode', optional=True, default_value="ROUND_TO_EVEN") 29 | round_func = get_round_mode_func(round_mode) 30 | if out.qmin is None: 31 | out.betensor = inp.betensor 32 | else: 33 | out.betensor = linear_quantize_clip(inp.betensor, out.broadcast_scale, 34 | out.broadcast_zerop, out.qmin, out.qmax, round_func=round_func) 35 | return out.betensor 36 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/queryrebatch.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | 7 | register_optype('QueryRebatch') 8 | 9 | 10 | @op_register(OpType.QueryRebatch) 11 | def queryrebatch_forward(self, *args): 12 | inputs = self.inputs 13 | if len(inputs) - 1 != self.outputs[0].ir_shape[1]: 14 | OPT_ERROR(f"please check the queryrebatch IR, the len(input) - 1 should be equal to output[0].shape[1]") 15 | 16 | max_len = max([t.betensor.shape[0] for t in self.inputs[1:]]) 17 | output_shape = list(self.outputs[0].ir_shape) 18 | output_shape[2] = max_len 19 | self.outputs[0].betensor = torch.zeros(output_shape).to(self.inputs[0].device) 20 | for i, inp_t in enumerate(self.inputs[1:]): 21 | self.outputs[0].betensor[:, i, :inp_t.betensor.shape[0] 22 | ] = self.inputs[0].betensor[:, self.inputs[i+1].betensor.long()] 23 | 24 | return self.outputs[0].betensor 25 | 26 | 27 | @quant_register(OpType.QueryRebatch) 28 | def queryrebatch_quantize(self, *args): 29 | self.outputs[0].clone_qinfo(self.inputs[0]) 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/reciprocal.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | 10 | @quant_register(OpType.Reciprocal) 11 | def reciprocal_quantize(self, *args): 12 | q_mode_activation = self.attrs["q_mode_activation"] 13 | if QuantMode.is_per_channel(q_mode_activation) == True: 14 | OPT_FATAL("Currently not support per-channel quantization") 15 | q_bits_activation = self.attrs["q_bits_activation"] 16 | 17 | inp = self.inputs[0] 18 | out = self.outputs[0] 19 | 20 | out.qbits = q_bits_activation 21 | out_sign = is_signed(inp.dtype) or self.force_dtype_int 22 | dev = inp.betensor.device 23 | out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor( 24 | out, q_mode_activation, out.qbits, out_sign) 25 | lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits'))) 26 | lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop) 27 | lut = torch.reciprocal(lut) 28 | lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax) 29 | self.constants["lut"] = PyTensor(self.name+"/reciprocal_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype))) 30 | out.qinvariant = False 31 | 32 | 33 | @op_register(OpType.Reciprocal) 34 | def reciprocal(self, *args): 35 | inp = self.inputs[0] 36 | out = self.outputs[0] 37 | if self.quantized: 38 | x = inp.betensor 39 | x = x - inp.qmin 40 | lut = self.constants["lut"].betensor 41 | x = torch.reshape(x, (-1,)) 42 | y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits( 43 | self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype)) 44 | out.betensor = torch.reshape(y, inp.betensor.shape) 45 | else: 46 | out.betensor = torch.reciprocal(inp.betensor) 47 | 48 | return out.betensor 49 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/relu.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | 10 | @quant_register(OpType.RELU) 11 | def relu_quantize(self, *args): 12 | q_mode_activation = self.attrs["q_mode_activation"] 13 | q_bits_activation = self.attrs["q_bits_activation"] 14 | 15 | inp = self.inputs[0] 16 | out = self.outputs[0] 17 | out.qbits = q_bits_activation 18 | out_sign = False or self.force_dtype_int 19 | out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor( 20 | out, q_mode_activation, out.qbits, out_sign) 21 | out.qinvariant = inp.qinvariant 22 | 23 | do_scale, do_scale_type, do_shift, do_shift_type = get_scale_approximation_params( 24 | out.scale / inp.scale, mult_bits=out.qbits, force_shift_positive=self.force_shift_positive) 25 | scale_name = 'scale' if is_torch_tensor_with_multi_data(do_scale) else 'scale_value' 26 | shift_name = 'shift' if is_torch_tensor_with_multi_data(do_shift) else 'shift_value' 27 | self.set_ir_field(scale_name, do_scale, do_scale_type) 28 | self.set_ir_field(shift_name, do_shift, do_shift_type) 29 | if not is_torch_tensor_with_multi_data(do_scale): 30 | self.params["shift_type"] = do_shift_type 31 | self.params["scale_type"] = do_scale_type 32 | 33 | 34 | @op_register(OpType.RELU) 35 | def relu(self, *args): 36 | inp = self.inputs[0] 37 | out = self.outputs[0] 38 | if self.quantized: 39 | #Yf = relu(Xf) 40 | # (Yq+Zy)/Sy = relu((Xq+Zx)/Sx) 41 | #Yq = relu(Xq+Zx) * Sy/Sx - Zy 42 | # on cpu device, torch.relu does not support 'clamp_min_cpu' for half, so use inp.betensor.float() 43 | y = torch.nn.functional.relu(inp.betensor.float() + inp.zerop) 44 | do_shift = self.get_ir_field(['shift_value', 'shift'], default_value=0) 45 | do_scale = self.get_ir_field(['scale_value', 'scale'], default_value=1) 46 | out.betensor = linear_requantize(y, do_scale, do_shift, out.zerop, out.qmin, out.qmax, out.key_axis) 47 | else: 48 | out.betensor = torch.nn.functional.relu(inp.betensor.float()) 49 | return out.betensor 50 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/relu6.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | 10 | @quant_register(OpType.RELU6) 11 | def relu6_quantize(self, *args): 12 | q_mode_activation = self.attrs["q_mode_activation"] 13 | if QuantMode.is_per_channel(q_mode_activation) == True: 14 | OPT_FATAL("Currently not support per-channel quantization") 15 | q_bits_activation = self.attrs["q_bits_activation"] 16 | 17 | inp = self.inputs[0] 18 | out = self.outputs[0] 19 | out.qbits = q_bits_activation 20 | out_sign = False or self.force_dtype_int 21 | out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor( 22 | out, q_mode_activation, out.qbits, out_sign) 23 | do_scale, do_scale_type, do_shift, do_shift_type = get_scale_approximation_params( 24 | out.scale / inp.scale, mult_bits=out.qbits, force_shift_positive=self.force_shift_positive) 25 | self.params["shift_value"] = int(do_shift) 26 | self.params["shift_type"] = do_shift_type 27 | self.params["scale_value"] = int(do_scale) 28 | self.params["scale_type"] = do_scale_type 29 | out.qinvariant = inp.qinvariant 30 | 31 | 32 | @op_register(OpType.RELU6) 33 | def relu6(self, *args): 34 | inp = self.inputs[0] 35 | out = self.outputs[0] 36 | if self.quantized: 37 | #Yf = relu(Xf) 38 | # (Yq+Zy)/Sy = relu((Xq+Zx)/Sx) 39 | #Yq = relu(Xq+Zx) * Sy/Sx - Zy 40 | y = torch.nn.functional.relu(inp.betensor + inp.zerop) 41 | do_shift = 0 42 | do_scale = 1 43 | if "shift" not in self.constants: 44 | do_shift = self.params["shift_value"] 45 | do_scale = self.params["scale_value"] 46 | else: 47 | do_shift = self.constants["shift"].betensor 48 | do_scale = self.constants["scale"].betensor 49 | out.betensor = linear_requantize(y, do_scale, do_shift, out.zerop, out.qmin, out.qmax) 50 | else: 51 | out.betensor = torch.nn.functional.relu6(inp.betensor) 52 | return out.betensor 53 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/relu_family.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.ops.relu import * 5 | from AIPUBuilder.Optimizer.ops.clip import * 6 | 7 | 8 | def relu_family(self, *args): 9 | if {'max_clamp_value', 'min_clamp_value'}.issubset(self.params.keys()): 10 | return clip(self, *args) 11 | else: 12 | return relu(self, *args) 13 | 14 | 15 | def relu_family_quantize(self, *args): 16 | if {'max_clamp_value', 'min_clamp_value'}.issubset(self.params.keys()): 17 | clip_quantize(self, *args) 18 | else: 19 | relu_quantize(self, *args) 20 | 21 | 22 | def relu_family_out_signed(self, *args): 23 | if {'max_clamp_value', 'min_clamp_value'}.issubset(self.params.keys()): 24 | clip_out_signed(self, *args) 25 | else: 26 | return False 27 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/resize.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.ops.interp import * 7 | 8 | 9 | @op_register(OpType.Resize) 10 | def resize(self, *args): 11 | return interp(self, *args) 12 | 13 | 14 | @quant_register(OpType.Resize) 15 | def resize_quantize(self, *args): 16 | interp_quantize(self, *args) 17 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/round.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | 10 | @quant_register(OpType.Round) 11 | def round_quantize(self, *args): 12 | q_mode_activation = self.attrs["q_mode_activation"] 13 | if QuantMode.is_per_channel(q_mode_activation) == True: 14 | OPT_FATAL("Currently not support per-channel quantization") 15 | q_bits_activation = self.attrs["q_bits_activation"] 16 | 17 | inp = self.inputs[0] 18 | out = self.outputs[0] 19 | 20 | out.qbits = q_bits_activation 21 | out_sign = is_signed(inp.dtype) or self.force_dtype_int 22 | dev = inp.betensor.device 23 | out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor( 24 | out, q_mode_activation, out.qbits, out_sign) 25 | lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits'))) 26 | lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop) 27 | lut = torch.round(lut) 28 | lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax) 29 | self.constants["lut"] = PyTensor(self.name+"/round_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype))) 30 | out.qinvariant = False 31 | 32 | 33 | @op_register(OpType.Round) 34 | def round(self, *args): 35 | self.attrs['lambda_func'] = torch.round 36 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 37 | self.attrs.pop('lambda_func') 38 | return self.outputs[0].betensor 39 | 40 | 41 | @approx_register(OpType.Round) 42 | def round_approx(self, *args): 43 | # this is not currently used because it is the same as the float process 44 | self.params['is_perf_mode'] = False 45 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/rsqrt.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | 10 | @quant_register(OpType.Rsqrt) 11 | def rsqrt_quantize(self, *args): 12 | q_mode_activation = self.attrs["q_mode_activation"] 13 | if QuantMode.is_per_channel(q_mode_activation) == True: 14 | OPT_FATAL("Currently not support per-channel quantization") 15 | q_bits_activation = self.attrs["q_bits_activation"] 16 | 17 | inp = self.inputs[0] 18 | out = self.outputs[0] 19 | out.qbits = q_bits_activation 20 | out_sign = False or self.force_dtype_int 21 | dev = inp.betensor.device 22 | out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor( 23 | out, q_mode_activation, out.qbits, out_sign) 24 | lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits'))) 25 | lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop) 26 | lut = torch.rsqrt(lut) 27 | lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax) 28 | self.constants["lut"] = PyTensor(self.name+"/rsqrt_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype))) 29 | out.qinvariant = False 30 | 31 | 32 | @op_register(OpType.Rsqrt) 33 | def rsqrt(self, *args): 34 | inp = self.inputs[0] 35 | out = self.outputs[0] 36 | if self.quantized: 37 | x = inp.betensor 38 | x = x - inp.qmin 39 | lut = self.constants["lut"].betensor 40 | x = torch.reshape(x, (-1,)) 41 | y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits( 42 | self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype)) 43 | out.betensor = torch.reshape(y, inp.betensor.shape) 44 | else: 45 | out.betensor = torch.rsqrt(inp.betensor) 46 | return out.betensor 47 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/sigmoid.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | from AIPUBuilder.Optimizer.ops.silu import silu_approx 8 | from AIPUBuilder.Optimizer.utils.math_utils import * 9 | import torch 10 | 11 | register_optype('Sigmoid') 12 | 13 | 14 | @op_register(OpType.Sigmoid) 15 | def sigmoid_forward(self, *args): 16 | def approximated_float_forward(self, inp_tensor): 17 | if self.approximated and "lut" in self.constants: 18 | lut = self.constants["lut"].betensor 19 | out = lookup_float_index_lut(inp_tensor, lut, 20 | self.params['index_scale_value'], 21 | self.params['index_offset_value'], 22 | mirror_mode=True, 23 | value_offset_for_mirror_mode=self.params['value_offset_value']) 24 | else: 25 | out = torch.sigmoid(inp_tensor) 26 | return out 27 | self.attrs['lambda_func'] = lambda x: approximated_float_forward(self, x) 28 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 29 | self.attrs.pop('lambda_func') 30 | return self.outputs[0].betensor 31 | 32 | 33 | @quant_register(OpType.Sigmoid) 34 | def sigmoid_quantize(self, *args): 35 | def sigmoid_func(x): return torch.sigmoid(x) 36 | self.attrs['lambda_func'] = sigmoid_func 37 | self.attrs['out_signed'] = False or self.force_dtype_int 38 | offset = 0.0 39 | if self.type in [OpType.BasicLSTM, OpType.GRUv3, OpType.GRUv1]: 40 | offset = torch.sigmoid(torch.tensor(0.0)).item() 41 | self.attrs['mirror_offset'] = offset 42 | 43 | activation_module.unknown_quantize(self, *args) 44 | 45 | for k in ['lambda_func', 'out_signed', 'mirror_offset']: 46 | self.attrs.pop(k) 47 | 48 | 49 | @approx_register(OpType.Sigmoid) 50 | def sigmoid_approx(self, *args): 51 | silu_approx(self, *args) 52 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/sign.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | 8 | 9 | @op_register(OpType.Sign) 10 | def sign(self, *args): 11 | inp = self.inputs[0] 12 | out = self.outputs[0] 13 | out.betensor = torch.sign((inp.betensor.float() + inp.zerop) if self.quantized else inp.betensor) # -1, 0, 1 14 | return out.betensor 15 | 16 | 17 | @quant_register(OpType.Sign) 18 | def sign_quantize(self, *args): 19 | inp = self.inputs[0] 20 | out = self.outputs[0] 21 | out.scale = 1 22 | out.zerop = 0 23 | out.qbits = inp.qbits 24 | out.dtype = bits2dtype(out.qbits, is_signed=True, use_float=False) 25 | out.qinvariant = True 26 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/sine.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.ops.cosine import trigonometric_quantize, trigonometric_forward, cosine_approx 7 | 8 | import torch 9 | 10 | 11 | @quant_register(OpType.Sine) 12 | def sine_quantize(self, *args): 13 | trigonometric_quantize(self, torch.sin) 14 | 15 | 16 | @approx_register(OpType.Sine) 17 | def sine_approx(self, *args): 18 | cosine_approx(self, *args) 19 | 20 | 21 | @op_register(OpType.Sine) 22 | def sine(self, *args): 23 | inp = self.inputs[0] 24 | out = self.outputs[0] 25 | if self.quantized: 26 | q_bits_activation = inp.qbits 27 | if q_bits_activation > 8: 28 | x = inp.betensor.long() + inp.zerop 29 | lut = self.constants["lut"].betensor 30 | out.betensor = trigonometric_forward(self, x, lut) 31 | else: 32 | x = inp.betensor 33 | x = x - inp.qmin 34 | lut = self.constants["lut"].betensor 35 | x = torch.reshape(x, (-1,)) 36 | y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits( 37 | self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype)) 38 | out.betensor = torch.reshape(y, inp.betensor.shape) 39 | else: 40 | if self.approximated and "lut" in self.constants: 41 | lut = self.constants["lut"].betensor 42 | inp_tensor = inp.betensor.float() 43 | inter = (inp_tensor * (1/(2*torch.pi))).int() 44 | Fractional = inp_tensor - inter*2*torch.pi 45 | out.betensor = lookup_float_index_lut( 46 | Fractional, lut, self.params['index_scale_value'], self.params['index_offset_value'], mirror_mode=True, value_offset_for_mirror_mode=self.params['value_offset_value']) 47 | else: 48 | out.betensor = torch.sin(inp.betensor) 49 | return out.betensor 50 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/slice_operator.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.ops.stridedslice import * 7 | 8 | 9 | @op_register(OpType.Slice) 10 | def slice_forward(self, *args): 11 | return stridedslice(self, *args) 12 | 13 | 14 | @quant_register(OpType.Slice) 15 | def slice_quantize(self, *args): 16 | stridedslice_quantize(self, *args) 17 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/softplus.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | register_optype('Softplus') 10 | 11 | 12 | @quant_register(OpType.Softplus) 13 | def softplus_quantize(self, *args): 14 | self.attrs['lambda_func'] = torch.nn.functional.softplus 15 | self.attrs['out_signed'] = False or self.force_dtype_int 16 | activation_module.unknown_quantize(self, *args) 17 | self.attrs.pop('lambda_func') 18 | self.attrs.pop('out_signed') 19 | 20 | 21 | @op_register(OpType.Softplus) 22 | def softplus(self, *args): 23 | def approximated_float_forward(self, inp_tensor): 24 | if self.approximated and "lut" in self.constants: 25 | lut = self.constants["lut"].betensor 26 | out = lookup_float_index_lut( 27 | inp_tensor, lut, self.params['index_scale_value'], self.params['index_offset_value'], mirror_mode=False, value_offset_for_mirror_mode=self.params['value_offset_value']) 28 | else: 29 | out = torch.nn.functional.softplus(inp_tensor) 30 | return out 31 | self.attrs['lambda_func'] = lambda x: approximated_float_forward(self, x) 32 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 33 | self.attrs.pop('lambda_func') 34 | return self.outputs[0].betensor 35 | 36 | 37 | @approx_register(OpType.Softplus) 38 | def softplus_approx(self, *args): 39 | def set_min_max(inp, use_dynamic_lut): 40 | if use_dynamic_lut: 41 | clip_min = inp.min if inp.min < -6 else -6 42 | clip_max = inp.max 43 | else: 44 | clip_min = -6 45 | clip_max = 20 46 | return clip_min, clip_max 47 | 48 | self.attrs['set_min_max'] = set_min_max 49 | self.attrs['lambda_func'] = torch.nn.functional.softplus 50 | self.attrs['out_signed'] = False 51 | activation_module.unknown_approx(self, *args) 52 | self.attrs.pop('lambda_func') 53 | self.attrs.pop('set_min_max') 54 | self.attrs.pop('out_signed') 55 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/sort.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | import torch 8 | 9 | 10 | ''' 11 | # IR 12 | layer_id=1 13 | layer_name=sort 14 | layer_type=Sort 15 | layer_bottom=score 16 | layer_bottom_shape=[1,5000] 17 | layer_bottom_type=int16 18 | layer_top=out_score_ptr,keep 19 | layer_top_shape=[1,5000],[1,5000] 20 | layer_top_type=int16,uint16 21 | ''' 22 | 23 | 24 | @op_register(OpType.Sort) 25 | def sort(self, *args): 26 | dim = self.get_param('axis', optional=True, default_value=-1) 27 | descending = self.get_param('descending', optional=True, default_value=True) 28 | inpt = self.inputs[0].betensor 29 | st, indices = torch.sort(inpt, dim=dim, descending=descending) 30 | self.outputs[0].betensor = st 31 | self.outputs[1].betensor = indices 32 | return [o.betensor for o in self.outputs] 33 | 34 | 35 | @quant_register(OpType.Sort) 36 | def sort_quantize(self, *args): 37 | inp = self.inputs[0] 38 | out = self.outputs[0] 39 | out.dtype = inp.dtype 40 | out.scale = inp.scale 41 | out.zerop = inp.zerop 42 | out.qmin = inp.qmin 43 | out.qmax = inp.qmax 44 | out.qinvariant = inp.qinvariant 45 | out.qbits = inp.qbits 46 | 47 | q_bits_activation = self.attrs['q_bits_activation'] 48 | out = self.outputs[1] 49 | out.qbits = max(16, q_bits_activation) 50 | out.dtype = bits2dtype(out.qbits, False or self.force_dtype_int) 51 | out.scale = 1.0 52 | out.zerop = 0.0 53 | out.qinvariant = True 54 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/spacetobatch.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | import torch 8 | 9 | 10 | @op_register(OpType.SpaceToBatch) 11 | def spacetobatch(self, *args): 12 | block_size_x = self.get_param('block_size_x') 13 | block_size_y = self.get_param('block_size_y') 14 | pad_left = self.get_param('pad_left') 15 | pad_right = self.get_param('pad_right') 16 | pad_top = self.get_param('pad_top') 17 | pad_bottom = self.get_param('pad_bottom') 18 | inp = self.inputs[0].betensor 19 | out = self.outputs[0].betensor 20 | if self.inputs[0].shape[0] != inp.shape[0]: 21 | OPT_ERROR("batch size in calibratoin or metric dataset should be equal to batch size in IR") 22 | # inp is NHWC format 23 | paddings = (0, 0, pad_left, pad_right, pad_top, pad_bottom) 24 | # TODO: support per-channel zerop and pad the per-channel zerop 25 | pad_value = -self.inputs[0].zerop[0] if self.quantized else 0 26 | y = torch.nn.functional.pad(inp, paddings, value=pad_value) 27 | n, h, w, c = y.shape 28 | y = y.view(n, h//block_size_y, block_size_y, w//block_size_x, block_size_x, c) 29 | y = y.permute(2, 4, 0, 1, 3, 5).contiguous() 30 | out = y.view(n*block_size_x*block_size_y, h//block_size_y, w//block_size_x, c) 31 | self.outputs[0].betensor = out 32 | return out 33 | 34 | 35 | @quant_register(OpType.SpaceToBatch) 36 | def spacetobatch_quantize(self, *args): 37 | inp = self.inputs[0] 38 | out = self.outputs[0] 39 | out.scale = inp.scale 40 | out.zerop = inp.zerop 41 | out.qbits = inp.qbits 42 | out.dtype = inp.dtype 43 | out.qinvariant = inp.qinvariant 44 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/spacetodepth.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | import torch 8 | 9 | 10 | def space_to_depth(x, bs): 11 | N, C, H, W = x.size() 12 | x = x.view(N, C, H // bs, bs, W // bs, bs) # (N, C, H//bs, bs, W//bs, bs) 13 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 14 | x = x.view(N, C * (bs ** 2), H // bs, W // bs) # (N, C*bs^2, H//bs, W//bs) 15 | return x 16 | 17 | 18 | @op_register(OpType.SpaceToDepth) 19 | def spacetodepth(self, *args): 20 | block_size_x = self.get_param('block_size_x') 21 | block_size_y = self.get_param('block_size_y') 22 | if block_size_x != block_size_y: 23 | OPT_WARN("currently not support block_size_x != block_size_y in layer" + 24 | self.attrs['layer_id'], op_name=str(self.type)) 25 | inp = self.inputs[0].betensor 26 | # data format is NHWC 27 | inp = nhwc2nchw(inp) 28 | o = space_to_depth(inp, block_size_x) 29 | self.outputs[0].betensor = nchw2nhwc(o) 30 | return self.outputs[0].betensor 31 | 32 | 33 | @quant_register(OpType.SpaceToDepth) 34 | def spacetodepth_quantize(self, *args): 35 | inp = self.inputs[0] 36 | out = self.outputs[0] 37 | out.scale = inp.scale 38 | out.zerop = inp.zerop 39 | out.qbits = inp.qbits 40 | out.dtype = inp.dtype 41 | out.qinvariant = inp.qinvariant 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/split.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | 8 | 9 | @op_register(OpType.Split) 10 | # IR 11 | # layer_type=Split 12 | # layer_bottom=rpn_class/concat_0 13 | # layer_bottom_shape=[1,261888,2] 14 | # layer_bottom_type=float32 15 | # layer_top=split_out0,split_out1 16 | # layer_top_shape=[1,261888,1],[1,261888,1] 17 | # layer_top_type=float32,float32 18 | # axis=2 19 | # num_split=2 20 | def split(self, *args): 21 | axis = self.get_param('axis') 22 | inp_betensors = self.inputs[0].betensor 23 | split_sizes = self.get_param('splits') 24 | out = torch.split(inp_betensors, split_sizes, dim=axis) 25 | 26 | for i, outp in enumerate(out): 27 | self.outputs[i].betensor = outp 28 | return out 29 | 30 | 31 | @quant_register(OpType.Split) 32 | def split_quantize(self, *args): 33 | for i, out in enumerate(self.outputs): 34 | inp = self.inputs[0] 35 | out = self.outputs[i] 36 | out.dtype = inp.dtype 37 | out.scale = inp.scale 38 | out.zerop = inp.zerop 39 | out.qbits = inp.qbits 40 | out.qinvariant = inp.qinvariant 41 | 42 | if out.key_axis != inp.key_axis: 43 | OPT_ERROR(f"split input and out[{i}] key_axis is difference") 44 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/sqrt.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | 10 | @quant_register(OpType.Sqrt) 11 | def sqrt_quantize(self, *args): 12 | q_mode_activation = self.attrs["q_mode_activation"] 13 | if QuantMode.is_per_channel(q_mode_activation) == True: 14 | OPT_FATAL("Currently not support per-channel quantization") 15 | q_bits_activation = self.attrs["q_bits_activation"] 16 | 17 | inp = self.inputs[0] 18 | out = self.outputs[0] 19 | out.qbits = q_bits_activation 20 | out_sign = False or self.force_dtype_int 21 | dev = inp.betensor.device 22 | out.scale, out.zerop, out.qmin, out.qmax, out.dtype = get_linear_quant_params_from_tensor( 23 | out, q_mode_activation, out.qbits, out_sign) 24 | lsteps = 2 ** min(inp.qbits, int(self.get_attrs('lut_items_in_bits'))) 25 | lut = linear_dequantize(torch.linspace(inp.qmin, inp.qmax, steps=lsteps, device=dev), inp.scale, inp.zerop) 26 | lut = torch.sqrt(lut) 27 | lut = linear_quantize_clip(lut, out.scale, out.zerop, out.qmin, out.qmax) 28 | self.constants["lut"] = PyTensor(self.name+"/sqrt_lut", lut.cpu().numpy().astype(dtype2nptype(out.dtype))) 29 | out.qinvariant = False 30 | 31 | 32 | @op_register(OpType.Sqrt) 33 | def sqrt(self, *args): 34 | inp = self.inputs[0] 35 | out = self.outputs[0] 36 | if self.quantized: 37 | x = inp.betensor 38 | x = x - inp.qmin 39 | lut = self.constants["lut"].betensor 40 | x = torch.reshape(x, (-1,)) 41 | y = lookup_lut_powerof2(x, lut, inp.qbits, False, dtype2bits( 42 | self.constants["lut"].dtype), is_signed(self.constants["lut"].dtype)) 43 | out.betensor = torch.reshape(y, inp.betensor.shape) 44 | else: 45 | out.betensor = torch.sqrt(inp.betensor.float()) 46 | return out.betensor 47 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/square.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | 10 | @quant_register(OpType.Square) 11 | def square_quantize(self, *args): 12 | self.attrs['lambda_func'] = torch.square 13 | self.attrs['out_signed'] = False or self.force_dtype_int 14 | activation_module.unknown_quantize(self, *args) 15 | self.attrs.pop('lambda_func') 16 | self.attrs.pop('out_signed') 17 | 18 | 19 | @op_register(OpType.Square) 20 | def square(self, *args): 21 | self.attrs['lambda_func'] = torch.square 22 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 23 | self.attrs.pop('lambda_func') 24 | return self.outputs[0].betensor 25 | 26 | 27 | @approx_register(OpType.Square) 28 | def square_approx(self, *args): 29 | # By default, it is calculated directly on AIFF 30 | self.params['is_perf_mode'] = True 31 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/squeeze.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | import torch 8 | 9 | 10 | @op_register(OpType.Squeeze) 11 | def squeeze(self, *args): 12 | axis = self.get_param('axis') 13 | inp = self.inputs[0].betensor 14 | out = self.outputs[0].betensor 15 | out = torch.squeeze(inp, dim=axis) 16 | self.outputs[0].betensor = out 17 | return out 18 | 19 | 20 | @quant_register(OpType.Squeeze) 21 | def squeeze_quantize(self, *args): 22 | inp = self.inputs[0] 23 | out = self.outputs[0] 24 | out.scale = inp.scale 25 | out.zerop = inp.zerop 26 | out.qbits = inp.qbits 27 | out.dtype = inp.dtype 28 | out.qinvariant = inp.qinvariant 29 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/sub.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | from AIPUBuilder.Optimizer.ops.eltwise import eltwise_quantize, eltwise 6 | 7 | 8 | @op_register(OpType.Sub) 9 | def sub_forward(self, *args): 10 | self.params['method'] = 'SUB' 11 | eltwise(self, *args) 12 | self.params.pop('method') 13 | 14 | return self.outputs[0].betensor 15 | 16 | 17 | @quant_register(OpType.Sub) 18 | def sub_quantize(self, *args): 19 | self.params['method'] = 'SUB' 20 | eltwise_quantize(self, *args) 21 | self.params.pop('method') 22 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/swish.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.ops.silu import silu_approx 7 | import AIPUBuilder.Optimizer.ops.activation as activation_module 8 | 9 | import torch 10 | 11 | register_optype('Swish') 12 | 13 | 14 | def swish_func(x, alpha): 15 | return x * torch.sigmoid(alpha * x) 16 | 17 | 18 | @quant_register(OpType.Swish) 19 | def swish_quantize(self, *args): 20 | self.attrs['lambda_func'] = lambda x: swish_func(x, self.get_param('alpha')) 21 | self.attrs['out_signed'] = True 22 | activation_module.unknown_quantize(self, *args) 23 | self.attrs.pop('lambda_func') 24 | self.attrs.pop('out_signed') 25 | 26 | 27 | @op_register(OpType.Swish) 28 | def swish(self, *args): 29 | def approximated_float_forward(self, inp_tensor): 30 | if self.approximated and "lut" in self.constants: 31 | lut = self.constants["lut"].betensor 32 | out = inp_tensor * lookup_float_index_lut( 33 | inp_tensor, lut, self.params['index_scale_value'], self.params['index_offset_value'], mirror_mode=True, value_offset_for_mirror_mode=self.params['value_offset_value']) 34 | else: 35 | out = swish_func(inp_tensor, self.get_param('alpha')) 36 | return out 37 | self.attrs['lambda_func'] = lambda x: approximated_float_forward(self, x) 38 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 39 | self.attrs.pop('lambda_func') 40 | return self.outputs[0].betensor 41 | 42 | 43 | @approx_register(OpType.Swish) 44 | def swish_approx(self, *args): 45 | silu_approx(self, *args) 46 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/tan.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | 10 | @quant_register(OpType.Tan) 11 | def tan_quantize(self, *args): 12 | self.attrs['lambda_func'] = torch.tan 13 | self.attrs['out_signed'] = True 14 | activation_module.unknown_quantize(self, *args) 15 | self.attrs.pop('lambda_func') 16 | self.attrs.pop('out_signed') 17 | 18 | 19 | @op_register(OpType.Tan) 20 | def tan(self, *args): 21 | self.attrs['lambda_func'] = torch.tan 22 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 23 | self.attrs.pop('lambda_func') 24 | return self.outputs[0].betensor 25 | 26 | 27 | @approx_register(OpType.Tan) 28 | def tan_approx(self, *args): 29 | # By default, it is calculated directly on TPC 30 | self.params['is_perf_mode'] = False 31 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/tf_ops/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/thresholdrelu.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | # y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. 10 | 11 | register_optype('THRESHOLDEDRELU') 12 | 13 | 14 | @quant_register(OpType.THRESHOLDEDRELU) 15 | def thresholdedrelu_quantize(self, *args): 16 | alpha = float(self.get_param("alpha")) 17 | self.attrs['lambda_func'] = lambda x: torch.nn.functional.threshold(x, alpha, 0) 18 | self.attrs['out_signed'] = True if alpha < 0.0 else False 19 | activation_module.unknown_quantize(self, *args) 20 | self.attrs.pop('lambda_func') 21 | self.attrs.pop('out_signed') 22 | 23 | 24 | @op_register(OpType.THRESHOLDEDRELU) 25 | def thresholdedrelu(self, *args): 26 | def float_forward(self, inp_tensor): 27 | alpha = float(self.get_param("alpha")) 28 | out = torch.nn.functional.threshold(inp_tensor, alpha, 0) 29 | return out 30 | self.attrs['lambda_func'] = lambda x: float_forward(self, x) 31 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 32 | self.attrs.pop('lambda_func') 33 | return self.outputs[0].betensor 34 | 35 | 36 | @approx_register(OpType.THRESHOLDEDRELU) 37 | def thresholdrelu_approx(self, *args): 38 | # By default, it is calculated directly on AIFF 39 | self.params['is_perf_mode'] = True 40 | 41 | 42 | def threshold_out_signed(self): 43 | alpha = float(self.get_param("alpha")) 44 | return False if alpha >= 0 else True 45 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/tile.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.logger import OPT_WARN 8 | 9 | 10 | @op_register(OpType.Tile) 11 | def tile(self, *args): 12 | inp = self.inputs[0].betensor 13 | out = self.outputs[0] 14 | _reps = [oshape // ishape for oshape, ishape in zip(out.shape, inp.shape)] 15 | reps = self.get_param('repeats') 16 | if inp.dim() != len(reps): 17 | OPT_WARN('please check the dim between input.dim and len(repeats) in Tile Op') 18 | out.betensor = inp.repeat(reps) 19 | return out.betensor 20 | 21 | 22 | @quant_register(OpType.Tile) 23 | def tile_quantize(self, *args): 24 | inp = self.inputs[0] 25 | out = self.outputs[0] 26 | out.scale = inp.scale 27 | out.zerop = inp.zerop 28 | out.qbits = inp.qbits 29 | out.dtype = inp.dtype 30 | out.qinvariant = inp.qinvariant 31 | out.qmin = inp.qmin 32 | out.qmax = inp.qmax 33 | 34 | if out.key_axis is not None: 35 | ka = out.key_axis 36 | if inp.key_axis is not None: 37 | rep = self.params['repeats'][ka] 38 | else: 39 | rep = out.ir_shape[ka] 40 | out.scale = inp.scale.repeat(rep) 41 | out.zerop = inp.zerop.repeat(rep) 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/transpose.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.ops.permute import * 7 | 8 | 9 | @op_register(OpType.Transpose) 10 | def transpose(self, *args): 11 | return permute(self, *args) 12 | 13 | 14 | @quant_register(OpType.Transpose) 15 | def transpose_quantize(self, *args): 16 | permute_quantize(self, *args) 17 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/trunc.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | from AIPUBuilder.Optimizer.utils import * 6 | import AIPUBuilder.Optimizer.ops.activation as activation_module 7 | import torch 8 | 9 | register_optype('Trunc') 10 | 11 | 12 | @op_register(OpType.Trunc) 13 | def trunc(self, *args): 14 | self.attrs['lambda_func'] = torch.trunc 15 | self.outputs[0].betensor = activation_module.unknown_activation(self, *args) 16 | self.attrs.pop('lambda_func') 17 | return self.outputs[0].betensor 18 | 19 | 20 | @quant_register(OpType.Trunc) 21 | def trunc_quantize(self, *args): 22 | self.attrs['lambda_func'] = torch.trunc 23 | self.attrs['out_signed'] = True 24 | activation_module.unknown_quantize(self, *args) 25 | self.attrs.pop('lambda_func') 26 | self.attrs.pop('out_signed') 27 | 28 | 29 | @approx_register(OpType.Trunc) 30 | def trunc_approx(self, *args): 31 | # this is not currently used because it is the same as the float process 32 | self.params['is_perf_mode'] = False 33 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/ops/where.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | from AIPUBuilder.Optimizer.utils import * 7 | from AIPUBuilder.Optimizer.ops.select import * 8 | import torch 9 | 10 | register_optype('Where') 11 | 12 | 13 | @op_register(OpType.Where) 14 | def where_forward(self, *args): 15 | if len(self.inputs) > 1: 16 | return select_forward(self, *args) 17 | else: 18 | inp = self.inputs[0] 19 | out = self.outputs[0] 20 | indexes = torch.where(inp.betensor + inp.zerop) 21 | input_dim = inp.betensor.dim() 22 | valid_num = indexes[0].numel() 23 | total_num = inp.betensor.numel() 24 | invalid_num = total_num - valid_num 25 | y = torch.tensor([t.cpu().numpy() for t in indexes], device=inp.betensor.device).long() 26 | _, invalid_value = dtype2range(out.dtype) 27 | # arrange indexes like tf.where results 28 | # out.betensor = torch.nn.functional.pad(y.permute(1, 0), (0, 0, 0, inp.betensor.numel()-num), value=padding_value) 29 | first_invalid_num = min(1, invalid_num) 30 | invalid_tensor = torch.ones([first_invalid_num, input_dim], device=inp.betensor.device).long() * invalid_value 31 | 32 | other_invalid_num = max(0, invalid_num - 1) 33 | other_invalid_tensor = torch.zeros([other_invalid_num, input_dim], device=inp.betensor.device).long() 34 | 35 | out.betensor = torch.cat([y.permute(1, 0), invalid_tensor, other_invalid_tensor], dim=0) 36 | return out.betensor 37 | 38 | 39 | @quant_register(OpType.Where) 40 | def where_quantize(self, *args): 41 | if len(self.inputs) > 1: 42 | select_quantize(self, *args) 43 | else: 44 | inp = self.inputs[0] 45 | out = self.outputs[0] 46 | str_type = self.attrs['layer_top_type_original'][0] 47 | out.dtype = str2dtype(str_type) 48 | out.qbits = dtype2bits(out.dtype) 49 | out.scale = 1.0 50 | out.zerop = 0 51 | out.qinvariant = True 52 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from . passes import * 5 | from . convert_resize_to_convolution import convert_resize_to_convolution 6 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/passes/check_quantization_info_s1.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | 6 | 7 | def check_quantization_info(graph: PyGraph, config=None): 8 | """ 9 | check the 16bits quantization should be symmetric 10 | :param graph: 11 | :param config: 12 | :return: 13 | """ 14 | for node in graph.nodes: 15 | activation_mode = node.attrs.get('q_mode_activation') 16 | activation_bits = node.attrs.get('q_bits_activation') 17 | if activation_bits >= 16 and QuantMode.is_asymmetric(activation_mode): 18 | sym_activation_mode = QuantMode.to_symmetric(activation_mode) 19 | node.attrs['q_mode_activation'] = sym_activation_mode 20 | OPT_DEBUG(f"{node} changes quantization method of activation tensor " 21 | f"from {activation_mode} to {sym_activation_mode}") 22 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/passes/detect_inf_mask_nodes.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | import torch 6 | 7 | 8 | def detect_inf_mask_nodes(graph, config): 9 | if config.enable_pass_detect_inf_mask_nodes: 10 | # filter batchnorm weight and bias's inf,-inf 11 | for i, n in enumerate(graph.nodes): 12 | if n.type == OpType.BatchNorm: 13 | aa = n.constants['weights'].betensor + n.constants['biases'].betensor 14 | if torch.sum(aa) == 0 and torch.max(n.constants['biases'].betensor) < -65536: 15 | n.constants['weights'].betensor = torch.zeros( 16 | n.constants['weights'].ir_shape, device=n.constants['weights'].device) + 32767 17 | n.constants['biases'].betensor = torch.zeros( 18 | n.constants['biases'].ir_shape, device=n.constants['weights'].device) - 32767 19 | if n.type == OpType.Constant: 20 | n.constants['weights'].betensor[n.constants['weights'].betensor < -32767] = -32768 21 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/passes/global_calibration_prepare.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.logger import * 7 | 8 | 9 | def global_calibration_prepare(graph: PyGraph, config): 10 | for method in config.global_calibration: 11 | mname = method[0] 12 | if 'smooth_quant_zy' == mname: 13 | for node in graph.nodes: 14 | if node.type in [OpType.FullyConnected, ]: 15 | node.inputs[0].key_axis = len(node.inputs[0].ir_shape) - 1 16 | elif 'awq_zy' == mname: 17 | # def add_inp_abs_plh_for_fc(n: PyNode): 18 | # inp_abs = n.inputs[0].betensor.abs().float() 19 | # if len(n.placeholders) < 1: 20 | # plh = PyTensor(n.name+'/inp_abs', dtype=Dtype.FP32) 21 | # n.placeholders.append(plh) 22 | # n.placeholders[0].betensor = inp_abs 23 | # n.placeholders[0].key_axis = len(node.inputs[0].ir_shape) - 1 24 | for node in graph.nodes: 25 | if node.type in [OpType.FullyConnected, ]: 26 | node.inputs[0].key_axis = len(node.inputs[0].ir_shape) - 1 27 | # node.forward_hook = add_inp_abs_plh_for_fc 28 | else: 29 | pass 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/passes/optimize_x2_wdc.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils import * 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.logger import * 7 | 8 | 9 | def optimize_x2_wdc(graph: PyGraph, config=None): 10 | for node in graph.nodes: 11 | if not str(node.attrs['optimize_wdc_for_x2']).lower() == 'true': 12 | continue 13 | q_mode_weight = node.attrs['q_mode_weight'] 14 | q_bits_weight = node.attrs['q_bits_weight'] 15 | for k, w in node.constants.items(): 16 | for trial in range(4): 17 | w.scale, w.zerop, w.qmin, w.qmax, w.dtype =\ 18 | get_linear_quant_params_from_tensor(w, 19 | q_mode_weight, q_bits_weight, is_signed=True) 20 | # We only try to scale up weight 4 times to keep acc 21 | fake_quant = linear_quantize_clip(w.betensor, w.broadcast_scale, w.broadcast_zerop, w.qmin, w.qmax) 22 | comp_rate = simulate_x2_wdc(fake_quant, q_bits_weight) 23 | if comp_rate < 0.9: 24 | OPT_DEBUG(f"weight {k} gets comp rate of {comp_rate} at step {trial}") 25 | break 26 | else: 27 | w.min *= 2 28 | w.max *= 2 29 | OPT_INFO(f"Scaling up {node.name}'s tensor {k} by 2 times to adapt WDC, acc may be affacted") 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/passes/shrink_pow_exponent_s1.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | 6 | 7 | def shrink_pow_exponent(graph, config=None): 8 | def criteria(n): 9 | if n is not None and n.type == OpType.Pow: 10 | pow_parent = None 11 | for parent in n.parents: 12 | for outp in parent.outputs: 13 | if outp.name == n.inputs[1].name: 14 | pow_parent = parent 15 | break 16 | if pow_parent: 17 | break 18 | pow_nods, count_root, count_constant = pow_parent.get_ancestors() 19 | if count_root > 0 and count_root == count_constant: 20 | for node in pow_nods: 21 | node.forward() 22 | unq = n.inputs[1].betensor.unique() 23 | if unq.numel() == 1: 24 | return True 25 | return False 26 | # for powN: collect all inputs edge , if all of them == constant, then exponent should be N 27 | for n in graph.nodes: 28 | if criteria(n): 29 | unq = n.inputs[1].betensor.unique() 30 | n.params['exponent'] = float(unq[0]) 31 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/passes/transfer_op_to_reshape_op_s3.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | 8 | def criteria(n): 9 | ret = False 10 | if n.type == OpType.Cast: 11 | if (n.parents[0].attrs['q_mode_activation'] == n.attrs['q_mode_activation'] and 12 | len(n.inputs) > 0 and len(n.outputs) > 0 and 13 | n.inputs[0].dtype == n.outputs[0].dtype): 14 | ret = True 15 | elif n.type == OpType.FakeQuantWithMinMaxVars: 16 | ret = True 17 | else: 18 | pass 19 | return ret 20 | 21 | 22 | def transfer_op_to_reshape_op(g, config): 23 | # transform useless op to lightweight reshape op 24 | need_replace_ops = [] 25 | for n in g.nodes: 26 | if n is not None and criteria(n): 27 | # create reshape node 28 | transform_op = PyNode(n.name, OpType.Reshape) 29 | transform_op.additional = True 30 | # set attrs and params 31 | transform_op.attrs.update(n.attrs.clone()) 32 | transform_op.params['shape'] = n.outputs[0].ir_shape 33 | # record pairs 34 | need_replace_ops.append((n, transform_op)) 35 | for old, new in need_replace_ops: 36 | g.replace_node_safely(old, new) 37 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_cocokp.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from AIPUBuilder.Optimizer.framework import * 8 | 9 | from torch.utils.data import Dataset 10 | from collections import defaultdict 11 | 12 | 13 | @register_plugin(PluginType.Dataset, '1.0') 14 | class CocokpDataset(Dataset): 15 | """ 16 | This CocoDataset plugin is used for yolov4_onnx/yolov4_tflite/fasterrcnn_tensorflow models in Optimizer. 17 | """ 18 | 19 | def __init__(self, data_file=None, label_file=None): 20 | """ 21 | :param data_file: a .npy file 22 | :param label_file: a dict format in .npy file and format is { 23 | are: list box area, 24 | bbox: list of boxes, 25 | keypoint: list of 17*3} 26 | """ 27 | self.data = np.load(data_file, mmap_mode='c').astype(np.float32) 28 | self.label = None 29 | if label_file is not None: 30 | self.label = np.load(label_file, allow_pickle=True).item() 31 | 32 | def __len__(self): 33 | return len(self.data) 34 | 35 | def __getitem__(self, idx): 36 | image_data = self.data[idx] 37 | sample = [image_data, {}] 38 | if self.label is not None: 39 | raw_label = self.label[idx] 40 | sample[1] = raw_label 41 | return sample 42 | 43 | @staticmethod 44 | def collate_fn(batch): 45 | batch_label = [] 46 | batch_data = None 47 | for batch_idx in range(len(batch)): 48 | els_sample = batch[batch_idx][0] 49 | single_data = torch.unsqueeze(torch.tensor(els_sample), 0) 50 | batch_data = single_data if batch_idx == 0 else torch.cat( 51 | (batch_data, single_data), 0) 52 | 53 | for idx, sample in enumerate(batch): 54 | if not sample[1]: 55 | continue 56 | label = {} 57 | for k, v in sample[1].items(): 58 | label[k] = torch.tensor(v) 59 | batch_label.append(label) 60 | return batch_data, batch_label 61 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_iwslt.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from torch.utils.data import Dataset 8 | import numpy as np 9 | 10 | 11 | @register_plugin(PluginType.Dataset, '1.0') 12 | class IWSLTDataset(Dataset): 13 | """ 14 | This IWSLTDataset plugin is used for the transformer_tensorflow model in Optimizer. 15 | """ 16 | 17 | # when used as calibration dataset, label_file can be omitted. 18 | def __init__(self, data_file, label_file=None): 19 | self.data = None 20 | self.label = None 21 | try: 22 | self.data = np.load(data_file, mmap_mode='c') 23 | except ValueError: 24 | self.data = np.load(data_file, allow_pickle=True) 25 | if label_file is not None: 26 | self.label = np.load(label_file, allow_pickle=True) 27 | 28 | def __len__(self): 29 | return len(self.data) 30 | 31 | def __getitem__(self, idx): 32 | sample = [self.data[idx], float("-inf")] 33 | if self.label is not None: 34 | sample[1] = self.label[idx] 35 | return sample 36 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_mpii.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from torch.utils.data import Dataset 8 | import numpy as np 9 | 10 | 11 | @register_plugin(PluginType.Dataset, '1.0') 12 | class MpiiDataset(Dataset): 13 | """ 14 | This MpiiDataset plugin is mainly used for stacked_hourglass_tensorflow model in Optimizer. 15 | MPII Human Pose dataset is a state of the art benchmark for evaluation of articulated human pose estimation. 16 | The dataset includes around 25K images containing over 40K people with annotated body joints. 17 | http://human-pose.mpi-inf.mpg.de/ 18 | """ 19 | 20 | def __init__(self, data_file=None, label_file=None): 21 | ''' 22 | :param data_file: ndarray in npy file 23 | :param label_file: a dict format in npy file and the keys of dict include 24 | ['__header__', '__version__', '__globals__', 'jnt_missing', 'pos_gt_src', 'headboxes_src', 'center', 'scale']. 25 | ''' 26 | self.data = np.load(data_file, allow_pickle=True) 27 | self.label = None 28 | if label_file is not None: 29 | self.label = np.load(label_file, allow_pickle=True).tolist() 30 | self.keys = [k for k in self.label if '__' not in k] 31 | 32 | def __len__(self): 33 | return len(self.data) 34 | 35 | def __getitem__(self, idx): 36 | sample = [self.data[idx], {}] 37 | if self.label is not None: 38 | pick_label = {} 39 | for k in self.keys: 40 | if k in ['center', 'scale']: 41 | pick_label[k] = self.label[k][idx] 42 | else: 43 | pick_label[k] = self.label[k][..., idx] 44 | sample[1] = pick_label 45 | return sample 46 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_nhwcrgb2nhwcbgr.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | from torch.utils.data import Dataset 9 | import numpy as np 10 | 11 | 12 | @register_plugin(PluginType.Dataset, '1.0') 13 | class NUMPYNHWCRGB2BGR(Dataset): 14 | """ 15 | This NUMPYNHWCRGB2BGR dataset plugin is used for transfering rgb to bgr of channel dimension in NHWC datalayout format. 16 | """ 17 | 18 | def __init__(self, data_file, label_file=None): 19 | self.data = None 20 | self.label = None 21 | try: 22 | self.data = np.load(data_file, mmap_mode='c') 23 | # rgb -> bgr 24 | self.data = np.flip(self.data, -1).copy() 25 | except Exception as e: 26 | OPT_FATAL('the data of NUMPYNHWCRGB2BGR plugin should be Numpy.ndarray and allow_pickle=False.') 27 | if label_file is not None: 28 | try: 29 | self.label = np.load(label_file, mmap_mode='c') 30 | except ValueError: 31 | self.label = np.load(label_file, allow_pickle=True) 32 | 33 | def __len__(self): 34 | return len(self.data) 35 | 36 | def __getitem__(self, idx): 37 | sample = [[self.data[idx]], float("-inf")] 38 | if self.label is not None: 39 | sample[1] = self.label[idx] 40 | return sample 41 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpy.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | from torch.utils.data import Dataset 9 | import numpy as np 10 | import torch 11 | 12 | 13 | @register_plugin(PluginType.Dataset, '1.0') 14 | class NumpyDataset(Dataset): 15 | """ 16 | This NumpyDataset plugin is mainly used for image classification domain models which have one input. 17 | The data in npy file has the same datalayout with the input datalayout in model. 18 | 19 | Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part. 20 | """ 21 | # when used as calibration dataset, label_file can be omitted. 22 | 23 | def __init__(self, data_file, label_file=None): 24 | ''' 25 | :param data_file: ndarray in npy file. 26 | :param label_file: ndarray in npy file. 27 | ''' 28 | self.data = None 29 | self.label = None 30 | 31 | try: 32 | self.data = np.load(data_file, mmap_mode='c') 33 | except Exception as e: 34 | OPT_FATAL( 35 | 'the data of NumpyDataset plugin should be Numpy.ndarray and allow_pickle=False.') 36 | if label_file is not None: 37 | try: 38 | self.label = np.load(label_file, mmap_mode='c') 39 | except ValueError: 40 | self.label = np.load(label_file, allow_pickle=True) 41 | 42 | def __len__(self): 43 | return len(self.data) 44 | 45 | def __getitem__(self, idx): 46 | sample = [[self.data[idx]], float("-inf")] 47 | if self.label is not None: 48 | sample[1] = self.label[idx] 49 | return sample 50 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpynchw2nhwc.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | from torch.utils.data import Dataset 9 | import numpy as np 10 | 11 | 12 | @register_plugin(PluginType.Dataset, '1.0') 13 | class NumpyNCHW2NHWCDataset(Dataset): 14 | """ 15 | This NumpyNCHW2NHWCDataset plugin is used for the dataset has NCHW data format, but the CompassIR needs NHWC data format. 16 | This plugin automatically transfers the NCHW data format to NHWC data format, which meets the CompassIR requirement. 17 | 18 | Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part. 19 | """ 20 | 21 | def __init__(self, data_file, label_file=None): 22 | ''' 23 | :param data_file: ndarray in npy file. 24 | :param label_file: ndarray in npy file. 25 | ''' 26 | self.data = None 27 | self.label = None 28 | try: 29 | self.data = np.load(data_file, mmap_mode='c') 30 | self.data = np.transpose(self.data, [0, 2, 3, 1]) 31 | except Exception as e: 32 | OPT_FATAL('the data of NumpyNCHW2NHWCDataset plugin should be Numpy.ndarray and allow_pickle=False.') 33 | if label_file is not None: 34 | try: 35 | self.label = np.load(label_file, mmap_mode='c') 36 | except ValueError: 37 | self.label = np.load(label_file, allow_pickle=True) 38 | 39 | def __len__(self): 40 | return len(self.data) 41 | 42 | def __getitem__(self, idx): 43 | sample = [[self.data[idx]], float("-inf")] 44 | if self.label is not None: 45 | sample[1] = self.label[idx] 46 | return sample 47 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpynhwc2nchw.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | from torch.utils.data import Dataset 9 | import numpy as np 10 | 11 | 12 | @register_plugin(PluginType.Dataset, '1.0') 13 | class NumpyNHWC2NCHWDataset(Dataset): 14 | """ 15 | This NumpyNHWC2NCHWDataset plugin is used for the dataset has NHWC data format, but the CompassIR needs NCHW data format. 16 | This plugin automatically transfers the NHWC data format to NCHW data format, which meets the CompassIR requirement. 17 | 18 | Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part. 19 | """ 20 | 21 | def __init__(self, data_file, label_file=None): 22 | ''' 23 | :param data_file: ndarray in npy file. 24 | :param label_file: ndarray in npy file. 25 | ''' 26 | self.data = None 27 | self.label = None 28 | try: 29 | self.data = np.load(data_file, mmap_mode='c') 30 | self.data = np.transpose(self.data, [0, 3, 1, 2]) 31 | except Exception as e: 32 | OPT_FATAL('the data of NumpyNHWC2NCHWDataset plugin should be Numpy.ndarray and allow_pickle=False.') 33 | if label_file is not None: 34 | try: 35 | self.label = np.load(label_file, mmap_mode='c') 36 | except ValueError: 37 | self.label = np.load(label_file, allow_pickle=True) 38 | 39 | def __len__(self): 40 | return len(self.data) 41 | 42 | def __getitem__(self, idx): 43 | sample = [[self.data[idx]], float("-inf")] 44 | if self.label is not None: 45 | sample[1] = self.label[idx] 46 | return sample 47 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_numpynhwcrgb2ncbgrhw.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | from torch.utils.data import Dataset 9 | import numpy as np 10 | 11 | 12 | @register_plugin(PluginType.Dataset, '1.0') 13 | class NumpyNHWCRGB2NCBGRHWDataset(Dataset): 14 | """ 15 | This NumpyNHWCRGB2NCBGRHWDataset plugin is used for changing the RGB to BGR in channel dimition and then 16 | transfering the NHWC data format to NCHW data format, which meets the CompassIR requirement. 17 | 18 | Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part. 19 | """ 20 | 21 | def __init__(self, data_file, label_file=None): 22 | ''' 23 | :param data_file: ndarray in npy file. 24 | :param label_file: ndarray in npy file. 25 | ''' 26 | self.data = None 27 | self.label = None 28 | try: 29 | self.data = np.load(data_file, mmap_mode='c') 30 | self.data = np.flip(self.data, -1).copy() 31 | self.data = np.transpose(self.data, [0, 3, 1, 2]) 32 | except Exception as e: 33 | OPT_FATAL('the data of NumpyNHWCRGB2NCBGRHWDataset plugin should be Numpy.ndarray and allow_pickle=False.') 34 | if label_file is not None: 35 | try: 36 | self.label = np.load(label_file, mmap_mode='c') 37 | except ValueError: 38 | self.label = np.load(label_file, allow_pickle=True) 39 | 40 | def __len__(self): 41 | return len(self.data) 42 | 43 | def __getitem__(self, idx): 44 | sample = [[self.data[idx]], float("-inf")] 45 | if self.label is not None: 46 | sample[1] = self.label[idx] 47 | return sample 48 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_random.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from torch.utils.data import Dataset 8 | 9 | 10 | @register_plugin(PluginType.Dataset, '1.0') 11 | class RandomDataset(Dataset): 12 | """ 13 | This RandomDataset plugin is based on the input data shape and label shape to generate the random data/label as dataset/labelset. 14 | 15 | Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part. 16 | """ 17 | 18 | def __init__(self, sample_shape, label_shape=(), num_sample=1, seed=None): 19 | import numpy as np 20 | if seed: 21 | np.random.seed(seed) 22 | self.num_sample = num_sample 23 | self.sample_shape = sample_shape 24 | self.label_shape = label_shape 25 | self.data = [] 26 | self.label = [] 27 | for _ in range(num_sample): 28 | self.data.append(np.random.randn(*tuple(self.sample_shape)).astype(np.float32)) 29 | self.label.append(np.random.randn(*tuple(self.label_shape)).astype(np.float32)) 30 | 31 | def __getitem__(self, idx): 32 | return self.data[idx], self.label[idx] 33 | 34 | def __len__(self): 35 | return len(self.data) 36 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_sphereface_lfw.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | from torch.utils.data import Dataset 9 | import numpy as np 10 | import torch 11 | 12 | 13 | @register_plugin(PluginType.Dataset, '1.0') 14 | class SphereFaceLFWDataset(Dataset): 15 | """ 16 | This SphereFaceLFWDataset plugin is mainly used for sphereface_caffe model. 17 | The data in npy file has the same datalayout with the input datalayout in model. 18 | 19 | Assume that all preprocesses of data have been done before save to npy file if the CompassIR doesnot have preprocess part. 20 | """ 21 | 22 | def __init__(self, data_file, label_file=None): 23 | ''' 24 | :param data_file: ndarray in npy file. 25 | :param label_file: ndarray in npy file. 26 | ''' 27 | self.data = None 28 | self.label = [] 29 | 30 | self.data = np.load(data_file, mmap_mode='c') 31 | if label_file is not None: 32 | try: 33 | label = np.load(label_file, mmap_mode='c') 34 | except ValueError: 35 | label = np.load(label_file, allow_pickle=True).item() 36 | keys = list(label.keys()) 37 | for key in keys: 38 | self.label.append(label[key]) 39 | 40 | def __len__(self): 41 | return len(self.data) 42 | 43 | def __getitem__(self, idx): 44 | sample = [[self.data[idx]], float("-inf")] 45 | if len(self.label) != 0: 46 | sample[1] = self.label[idx] 47 | return sample 48 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_stable_diffusion_unet.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import numpy as np 5 | from torch.utils.data import Dataset, DataLoader 6 | from AIPUBuilder.Optimizer.framework import * 7 | 8 | 9 | @register_plugin(PluginType.Dataset, '0.01') 10 | class StableDiffusionUNetDataset(Dataset): 11 | def __init__(self, data_file, label_file=None): 12 | dataset = np.load(data_file, allow_pickle=True).item() 13 | keys = list(dataset.keys()) 14 | self.s_dataset = dataset[keys[0]] 15 | self.t_dataset = dataset[keys[1]] 16 | self.h_dataset = dataset[keys[2]] 17 | ''' 18 | self.s_dataset = dataset['input1'] 19 | self.t_dataset = dataset['input2'] 20 | self.h_dataset = dataset['input3'] 21 | ''' 22 | 23 | def __getitem__(self, idx): 24 | sample = [[self.s_dataset[idx], self.t_dataset[idx], self.h_dataset[idx]], float("-inf")] 25 | 26 | return sample 27 | 28 | def __len__(self): 29 | return len(self.s_dataset) 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_dataset_tusimple.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | import numpy as np 6 | 7 | from AIPUBuilder.Optimizer.framework import * 8 | 9 | from torch.utils.data import Dataset 10 | from collections import defaultdict 11 | 12 | 13 | @register_plugin(PluginType.Dataset, '1.0') 14 | class tusimpleDataset(Dataset): 15 | def __init__(self, data_file=None, label_file=None): 16 | self.data = np.load(data_file, mmap_mode='c').astype(np.float32) 17 | self.label = None 18 | if label_file is not None: 19 | self.label = np.load(label_file, allow_pickle=True).item() 20 | 21 | def __len__(self): 22 | return len(self.data) 23 | 24 | def __getitem__(self, idx): 25 | image_data = self.data[idx] 26 | sample = [image_data, {}] 27 | if self.label is not None: 28 | raw_label = self.label[idx] 29 | image_name = idx 30 | 31 | sample[1].update({ 32 | 'image_name': np.array(image_name), 33 | 'lanes': np.array(raw_label[0]), 34 | 'h_samples': np.array(raw_label[1]), 35 | 36 | }) 37 | return sample 38 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_CosDistance_with_seqlen.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | 9 | 10 | @register_plugin(PluginType.Metric, '1.0') 11 | class CosDistancewseqlenMetric(OptBaseMetric): 12 | """ 13 | This CosDistancewseqlenMetric is used for the metric of RNNT_encoder model in Optimizer. 14 | The label of metric has two elements: [label_value(tensor), actual_len(int)]. This metric will 15 | select the actual_len in predict_value and label_value to calculate the cosine distance. 16 | """ 17 | 18 | def __init__(self): 19 | self.cos = torch.nn.CosineSimilarity(dim=-1) 20 | self.sim = [] 21 | 22 | def __call__(self, pred, target): 23 | preds = pred[0].cpu() 24 | padded_targets = target[0].cpu() 25 | act_len = target[1].cpu() 26 | targets = padded_targets 27 | for i in range(targets.shape[0]): 28 | flatten_pred = preds[i][:act_len[i]].reshape([-1]) 29 | flatten_target = targets[i][:act_len[i]].reshape([-1]) 30 | self.sim.append(self.cos(flatten_pred, flatten_target)) 31 | 32 | def reset(self): 33 | self.sim = [] 34 | 35 | def compute(self): 36 | # shape of tensor self.sim is rank 1 as x, y above have iterate all dims and reshape 37 | t = torch.Tensor(self.sim) 38 | return float(torch.mean(t, 0)) 39 | 40 | def report(self): 41 | return "cosine similarity is %f" % (self.compute()) 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_FlattenCosDistance.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | import numpy 9 | 10 | # For OPT OP Test 11 | # It using mean value of all multi outputs of all batch 12 | 13 | 14 | @register_plugin(PluginType.Metric, '0.01') 15 | class FlattenCosDistanceMetric(OptBaseMetric): 16 | def __init__(self): 17 | self.cos = torch.nn.CosineSimilarity() 18 | self.sim = [] 19 | 20 | def __call__(self, pred, target): 21 | sim_per_output = [] 22 | for o_p, o_t in zip(pred, target): 23 | if len(o_p.shape): 24 | b = o_p.shape[0] 25 | x = o_p.reshape(b, -1).float() 26 | y = o_t.reshape(b, -1).float() 27 | sim = numpy.mean(self.cos(x, y).cpu().flatten().numpy()) 28 | else: # if output is a scalar 29 | x = o_p 30 | y = o_t[0] 31 | sim = (x == y) 32 | 33 | sim_per_output.append(sim) 34 | self.sim.append(sim_per_output) 35 | 36 | def reset(self): 37 | self.sim = [] 38 | 39 | def compute(self): 40 | sim = numpy.array(self.sim) 41 | return numpy.mean(sim) 42 | 43 | def report(self): 44 | txt = '' 45 | sims = numpy.array(self.sim).T # [output, per call result] 46 | txt += "cosine similarity is %f" % numpy.mean(sims) 47 | for i, sim in enumerate(sims): 48 | txt += "\noutput %d: cosine similarity is %f" % (i, numpy.mean(sim)) 49 | return txt 50 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_KeywordSpotting.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | import torch 9 | 10 | 11 | @register_plugin(PluginType.Metric, '1.0') 12 | class KeywordSpottingMetric(OptBaseMetric): 13 | """ 14 | This KeywordSpottingMetric is used for the metric of kws_gru/kws_lstm models in Optimizer. 15 | accuracy = correct / total. 16 | """ 17 | 18 | def __init__(self, K=1): 19 | self.correct = 0 20 | self.total = 0 21 | 22 | def __call__(self, pred, target): 23 | _, pt = torch.topk(pred[0], 1, dim=-1) # NHWC 24 | _, gt = torch.topk(target, 1, dim=-1) # NHWC 25 | batch = pt.shape[0] 26 | for idx in range(batch): 27 | if pt[idx][0] == gt[idx][0]: 28 | self.correct += 1 29 | self.total += batch 30 | 31 | def reset(self): 32 | self.correct = 0 33 | self.total = 0 34 | 35 | def compute(self): 36 | try: 37 | acc = float(self.correct) / float(self.total) 38 | return acc 39 | except ZeroDivisionError: 40 | OPT_ERROR('zeroDivisionError: kws acc total label = 0') 41 | return float("-inf") 42 | 43 | def report(self): 44 | return "accuracy is %f" % (self.compute()) 45 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_LMHead.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | import torch 6 | 7 | 8 | @register_plugin(PluginType.Metric, '1.0') 9 | class LMHeadMetric(OptBaseMetric): 10 | ''' 11 | Compare logits PPL with label 12 | CrossEntropyLoss([batch, vocab_size], [batch, 1(token id)]) -> [batch, 1(neg log liklihood)] 13 | PPL = exp([batch, 1(nll)].mean()) 14 | ''' 15 | 16 | def __init__(self): 17 | self.nlls = [] 18 | self.loss = torch.nn.CrossEntropyLoss() 19 | 20 | def __call__(self, pred, target): 21 | vocab = pred[0][:, -1, :] # [batch, seqlen, vocabsize] -> [batch, vocabsize] 22 | nll = self.loss(vocab, target[0][:, 0]) 23 | self.nlls.append(nll) 24 | 25 | def reset(self): 26 | self.nlls = [] 27 | 28 | def compute(self): 29 | total_nll = torch.tensor(self.nlls) 30 | return torch.exp(total_nll.mean()) 31 | 32 | def report(self): 33 | return f"Correct/Total: {self.compute()}" 34 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_MaxAbsError.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | import numpy 9 | 10 | # For OPT OP Test 11 | 12 | 13 | @register_plugin(PluginType.Metric, '0.01') 14 | class MaxAbsErrorMetric(OptBaseMetric): 15 | def __init__(self): 16 | self.errors = [] 17 | 18 | def __call__(self, pred, target): 19 | sim_per_output = [] 20 | for o_p, o_t in zip(pred, target): 21 | x = o_p.float().reshape(-1) 22 | y = o_t.float().reshape(-1) 23 | sim_per_output.append(torch.max(torch.abs(x - y)).cpu().numpy()) 24 | self.errors.append(sim_per_output) 25 | 26 | def reset(self): 27 | self.errors = [] 28 | 29 | def compute(self): 30 | errors = numpy.array(self.errors) 31 | return numpy.mean(errors) 32 | 33 | def report(self): 34 | txt = '' 35 | errors = numpy.array(self.errors).T # [output, per call result] 36 | txt += "maximum absolute error is %f" % numpy.mean(errors) 37 | for i, e in enumerate(errors): 38 | txt += "\noutput %d: maximum absolute error is %f" % (i, numpy.mean(e)) 39 | return txt 40 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_MaxAbsError_with_seqlen.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | import torch 8 | import numpy 9 | 10 | 11 | @register_plugin(PluginType.Metric, '1.0') 12 | class MaxAbsErrorwseqlenMetric(OptBaseMetric): 13 | """ 14 | This MaxAbsErrorwseqlenMetric is used for the metric of RNNT_encoder model in Optimizer. 15 | The label of metric has two elements: [label_value(tensor), actual_len(int)]. This metric will 16 | select the actual_len in predict_value and label_value to calculate the maximum absolute error. 17 | """ 18 | 19 | def __init__(self): 20 | self.errors = [] 21 | 22 | def __call__(self, pred, target): 23 | sim_per_output = [] 24 | for o_p, o_t, o_len in zip(pred, target[0], target[1]): 25 | o_len = o_len.cpu() 26 | x = o_p.float()[:, :o_len].reshape(-1) 27 | y = o_t.float()[:o_len].reshape(-1) 28 | sim_per_output.append(torch.max(torch.abs(x - y)).cpu().numpy()) 29 | self.errors.append(sim_per_output) 30 | 31 | def reset(self): 32 | self.errors = [] 33 | 34 | def compute(self): 35 | errors = numpy.array(self.errors) 36 | return numpy.mean(errors) 37 | 38 | def report(self): 39 | txt = '' 40 | errors = numpy.array(self.errors).T # [output, per call result] 41 | txt += "maximum absolute error is %f" % numpy.mean(errors) 42 | for i, e in enumerate(errors): 43 | txt += "\noutput %d: maximum absolute error is %f" % (i, numpy.mean(e)) 44 | return txt 45 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_OpTestCosDistance.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.framework import * 5 | import torch 6 | import numpy 7 | 8 | 9 | @register_plugin(PluginType.Metric, '0.01') 10 | class OpTestCosDistanceMetric(OptBaseMetric): 11 | def __init__(self): 12 | self.cos = torch.nn.CosineSimilarity() 13 | self.sim = [] 14 | 15 | def __call__(self, pred, target): 16 | sim_per_output = [] 17 | for o_p, o_t in zip(pred, target): 18 | if len(o_p.shape): 19 | b = o_p.shape[0] 20 | x = o_p.reshape(b, -1).float() 21 | y = o_t.reshape(b, -1).float() 22 | sim = numpy.mean(self.cos(x, y).cpu().flatten().numpy()) 23 | else: # if output is a scalar 24 | x = o_p 25 | y = o_t[0] 26 | sim = (x == y).item() 27 | 28 | sim_per_output.append(sim) 29 | self.sim.append(sim_per_output) 30 | 31 | def reset(self): 32 | self.sim = [] 33 | 34 | def compute(self): 35 | sim = numpy.array(self.sim) 36 | return numpy.mean(sim) 37 | 38 | def report(self): 39 | txt = '' 40 | sims = numpy.array(self.sim).T # [output, per call result] 41 | txt += "cosine similarity is %f" % numpy.mean(sims) 42 | for i, sim in enumerate(sims): 43 | txt += "\noutput %d: cosine similarity is %f" % (i, numpy.mean(sim)) 44 | return txt 45 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_RMSE.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import os 5 | import sys 6 | import cv2 7 | import torch 8 | import numpy as np 9 | 10 | from AIPUBuilder.Optimizer.framework import * 11 | from AIPUBuilder.Optimizer.logger import * 12 | 13 | 14 | @register_plugin(PluginType.Metric, '0.01') 15 | class RMSEMetric(OptBaseMetric): 16 | """ 17 | This RMSEMetric is used for the metric of dinov2-small-nyu model in Optimizer. 18 | """ 19 | 20 | def __init__(self): 21 | self.total = 0 22 | self.total_loss = 0.0 23 | 24 | def __call__(self, pred, target): 25 | image_size = target.shape[1:] 26 | prediction = torch.nn.functional.interpolate( 27 | pred[0].unsqueeze(1), 28 | size=image_size, 29 | mode="bicubic", 30 | align_corners=False 31 | ).squeeze().cpu().numpy() 32 | 33 | target = target.cpu().numpy().reshape(image_size) 34 | loss = np.sqrt(np.mean((prediction - target / 1000.) ** 2)) 35 | # print(f"{self.total}: RMSE loss is {loss}") 36 | self.total += 1 37 | self.total_loss += loss 38 | 39 | def reset(self): 40 | self.total = 0 41 | self.total_loss = 0.0 42 | 43 | def compute(self): 44 | average_loss = self.total_loss / self.total 45 | return average_loss 46 | 47 | def report(self): 48 | return "rmse accuracy is %f" % (self.compute()) 49 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_SSDmAP.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.plugins.aipubt_metric_mAP import mAPMetric 6 | from AIPUBuilder.Optimizer.framework import * 7 | 8 | from AIPUBuilder.Optimizer.logger import OPT_FATAL 9 | 10 | 11 | @register_plugin(PluginType.Metric, '1.0') 12 | class SSDmAPMetric(mAPMetric): 13 | """ 14 | This SSDmAPMetric is used for the metric of SSD models in Optimizer. 15 | This plugin computes the mAP of SSD models. 16 | We assume the iou_threshold=0.5. 17 | """ 18 | 19 | def __init__(self, class_num=90, start_id=0): 20 | super().__init__(class_num, start_id) 21 | 22 | def __call__(self, pred, target): 23 | assert len(pred) == 9, OPT_FATAL('please check the outputs number(should be 9)') 24 | pred_post = [pred[2], pred[4], pred[5], pred[6], pred[7], pred[8]] 25 | super().__call__(pred_post, target) 26 | 27 | def reset(self): 28 | super().reset() 29 | 30 | def compute(self): 31 | self.mAP = super().compute() 32 | return self.mAP 33 | 34 | def report(self): 35 | return "SSD mAP accuracy is %f" % (self.compute()) 36 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_WER.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | import editdistance 9 | import numpy as np 10 | 11 | 12 | ''' 13 | WER: word error rate 14 | Work Error Rate = 100 * (insertions + substitutions + deletions) / (total words in correct transcript) 15 | ''' 16 | 17 | 18 | @register_plugin(PluginType.Metric, '1.0') 19 | class WERMetric(OptBaseMetric): 20 | """ 21 | This WERMetric is used for the metric of deepspeech_official/wavenet models in Optimizer. 22 | 23 | Word error rate (WER) is a common metric of the performance of a speech recognition or machine translation system. 24 | Work Error Rate = 100 * (insertions + substitutions + deletions) / (total words in correct transcript) 25 | """ 26 | 27 | def __init__(self, EOF=''): 28 | self.predictions = [] 29 | self.WER = 0 30 | self.EOF = EOF 31 | 32 | def __call__(self, preds, targets): 33 | ''' 34 | :param preds: 35 | :param targets: list(padding_label, act_label_len), padding_label.shape=(batch_size, padding_len), act_label_len.shape=(batch_size, act_len) 36 | :return: 37 | ''' 38 | preds = preds[0].cpu().numpy() 39 | padded_targets = targets[0].cpu().numpy() 40 | act_len = targets[1].cpu().numpy() 41 | targets = padded_targets 42 | for i in range(targets.shape[0]): 43 | flatten_pred = preds[i].reshape([-1]) 44 | eof_value = int(self.EOF) if len(self.EOF) > 0 else flatten_pred[-1] 45 | flatten_pred = flatten_pred[flatten_pred != eof_value] 46 | flatten_target = targets[i][:act_len[i]].reshape([-1]) 47 | self.predictions.append(editdistance.eval(flatten_pred, flatten_target) / len(flatten_target)) 48 | 49 | def reset(self): 50 | self.predictions = [] 51 | self.WER = 0 52 | 53 | def compute(self): 54 | self.WER = np.average(np.array(self.predictions)) 55 | return self.WER 56 | 57 | def report(self): 58 | return "ASR Word Error Rate(WER) is %f" % (self.compute()) 59 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_delta1.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | import torch 9 | import math 10 | 11 | 12 | @register_plugin(PluginType.Metric, '1.0') 13 | class delta1Metric(OptBaseMetric): 14 | """ 15 | This delta1Metric is used for the metric of fast_depth_onnx model in Optimizer. 16 | """ 17 | 18 | def __init__(self): 19 | self.num = 0 20 | self.delta1_sum = 0 21 | 22 | def __call__(self, pred, target): 23 | prediction = pred[0] 24 | batch_size = pred[0].shape[0] 25 | mask = ((target > 0) + (prediction > 0)) > 0 26 | 27 | prediction = prediction[mask] * 1000 28 | target = target[mask] * 1000 29 | 30 | max_ratio = torch.max(prediction / target, target / prediction) 31 | delta1 = float((max_ratio < 1.25).float().mean()) 32 | 33 | self.num += batch_size 34 | self.delta1_sum += batch_size * delta1 35 | 36 | def reset(self): 37 | self.num = 0 38 | self.delta1_sum = 0 39 | 40 | def compute(self): 41 | ret = self.delta1_sum / self.num 42 | return ret 43 | 44 | def report(self): 45 | return "delta1 accuracy is %f" % (self.compute()) 46 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_f1mesure.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.plugins.aipubt_metric_mIoU import mIoUMetricBase 7 | import torch 8 | import cv2 9 | import numpy as np 10 | 11 | 12 | @register_plugin(PluginType.Metric, '1.0') 13 | class F1scoreMetric(mIoUMetricBase): 14 | """ 15 | This F1scoreMetric is used for the metric of onnx_sne_roadseg models in Optimizer. 16 | This plugin computes the f1-measure metric for kitti dataset. 17 | """ 18 | 19 | def __init__(self, layout='NHWC'): 20 | super().__init__() 21 | if layout == 'NCHW': 22 | self.channel_axis = 1 23 | 24 | def __call__(self, pred, target): 25 | if isinstance(target, list): 26 | target = target[0] 27 | super().__call__(pred, target) 28 | 29 | def reset(self): 30 | super().reset() 31 | 32 | def compute(self): 33 | conf = self.confusion_matrix 34 | pred = (np.diag(conf) / conf.sum(0).astype(np.float32))[1] 35 | recall = (np.diag(conf) / conf.sum(1).astype(np.float32))[1] 36 | f1score = 2*(recall*pred)/(recall+pred) 37 | return f1score 38 | 39 | def report(self): 40 | return "F1 score is %f" % (self.compute()) 41 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_imdb.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.logger import * 7 | import torch 8 | 9 | 10 | @register_plugin(PluginType.Metric, '1.0') 11 | class IMDBMetric(OptBaseMetric): 12 | """ 13 | This IMDBMetric is used for the metric of robert-a models in Optimizer. 14 | accuracy = correct / total. 15 | half sample in total is negative sentiment, half sample is postive sentiment 16 | The IMDb data set is a sentiment analysis data set (two classifications), 17 | test set each have 25000 samples (each sample is a movie review), 18 | The number of samples of the positive/the negative class (ie positive/negative) is the same, 12500/12500. 19 | """ 20 | 21 | def __init__(self): 22 | self.correct = [0, 0] 23 | self.total = 0 24 | 25 | def __call__(self, pred, target): 26 | pt = torch.argmax(pred[0], 1) # 27 | gt = target 28 | batch = pt.shape[0] 29 | for idx in range(batch): 30 | if pt == gt and gt == 0: 31 | self.correct[0] += 1 32 | elif pt == gt and gt == 1: 33 | self.correct[1] += 1 34 | 35 | self.total += batch 36 | 37 | def reset(self): 38 | self.correct = [0, 0] 39 | self.total = 0 40 | 41 | def compute(self): 42 | try: 43 | acc = float(self.correct[0]+self.correct[1]) / float(self.total) 44 | return acc 45 | except ZeroDivisionError: 46 | OPT_ERROR('zeroDivisionError: imdb acc total label = 0') 47 | return float("-inf") 48 | 49 | def report(self): 50 | return "imdb sentiment acc is %f " % (self.compute()) 51 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_metric_topk.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | import torch 9 | 10 | 11 | @register_plugin(PluginType.Metric, '1.0') 12 | class TopKMetric(OptBaseMetric): 13 | """ 14 | This TopKMetric is used for the metric of image classfication models in Optimizer. 15 | This plugin defaultly computes Top1. 16 | """ 17 | 18 | def __init__(self, K='1', with_argmax=False): 19 | self.correct = 0 20 | self.total = 0 21 | self.K = int(K) if not with_argmax else 1 22 | self.with_argmax = with_argmax 23 | 24 | def __call__(self, pred, target): 25 | if self.with_argmax: 26 | pt = pred[0].reshape([pred[0].shape[0], -1]).cpu().numpy().astype('int32') 27 | else: 28 | _, pt = torch.topk(pred[0].reshape([pred[0].shape[0], -1]), self.K, dim=-1) # NHWC 29 | for i in range(target.numel()): 30 | if target[i] in pt[i]: 31 | self.correct += 1 32 | self.total += target.numel() 33 | 34 | def reset(self): 35 | self.correct = 0 36 | self.total = 0 37 | 38 | def compute(self): 39 | try: 40 | acc = float(self.correct) / float(self.total) 41 | return acc 42 | except ZeroDivisionError: 43 | OPT_ERROR('zeroDivisionError: Topk acc total label = 0') 44 | return float("-inf") 45 | 46 | def report(self): 47 | return "top-%d accuracy is %f" % (self.K, self.compute()) 48 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/plugins/aipubt_op_tile.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | 7 | from AIPUBuilder.Optimizer.logger import * 8 | 9 | # Delete the comment on the next line to enable this plugin 10 | # @op_register(OpType.Tile) 11 | 12 | 13 | def tile(self, *args): 14 | if 'multipliers' in self.params: 15 | key = 'multiplier' 16 | elif 'reps' in self.params: 17 | key = 'reps' 18 | else: 19 | OPT_ERROR("Tile op needs 'multipliers' or 'reps' param.") 20 | 21 | reps = self.params[key] 22 | if isinstance(reps, str): 23 | reps = [int(r) for r in reps.split(',')] 24 | inp_t = self.inputs[0].betensor 25 | out_t = inp_t.repeat(reps) 26 | self.outputs[0].betensor = out_t 27 | 28 | return out_t 29 | 30 | # Delete the comment on the next line to enable this plugin 31 | # @op_register(OpType.Tile) 32 | 33 | 34 | def tile_quantize(self, *args): 35 | inp = self.inputs[0] 36 | out = self.outputs[0] 37 | out.dtype = inp.dtype 38 | out.scale = inp.scale 39 | out.zerop = inp.zerop 40 | out.qbits = inp.qbits 41 | out.qinvariant = inp.qinvariant 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/qat/__init__.py -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/qatmain.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import sys 5 | from AIPUBuilder.Optimizer.plugins import * 6 | try: 7 | from AIPUBuilder.Optimizer.plugins_internal import * 8 | except: 9 | pass 10 | 11 | from AIPUBuilder.Optimizer.framework import (traverse_opt_plugins, 12 | QUANTIZE_DATASET_DICT, 13 | QUANTIZE_METRIC_DICT) 14 | from AIPUBuilder.Optimizer.config import arg_parser 15 | from src import AIPUQATMaster 16 | from src.qatlogger import QAT_INFO 17 | 18 | 19 | def main(): 20 | try: 21 | traverse_opt_plugins() 22 | args = arg_parser(metric_dict=QUANTIZE_METRIC_DICT, 23 | dataset_dict=QUANTIZE_DATASET_DICT) # pylint: disable=undefined-variable 24 | if isinstance(args, bool): 25 | return 0 if args else 1 26 | 27 | qat_master = AIPUQATMaster(args) 28 | qat_master.run() 29 | QAT_INFO(f"running QAT Done.") 30 | except Exception as e: 31 | raise e 32 | 33 | 34 | if __name__ == '__main__': 35 | ret = main() 36 | sys.exit(ret) 37 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from .config import * 5 | from .plugin import * 6 | from .quantizer import * 7 | from .qatmaster import AIPUQATMaster 8 | 9 | 10 | __OPT_QAT_VERSION__ = '0.1' 11 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/config/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from .config import QATConfig, get_device, default_device 5 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from ..utils import is_match 5 | from .concat_fuser import ConcatFusion 6 | from .convolution_fuser import ConvBNActFusion 7 | from .eltwise_fuser import MulFusion, AddFusion 8 | from .expand_fuser import ExpandFusion 9 | from .fullyconnected_fuser import LinearBNActFusion 10 | from .gelu_fuser import GeLUFusion 11 | from .hardswish_fuser import HardswishFusion 12 | from .hardsigmoid_fuser import HardsigmoidFusion 13 | from .layernorm_fuser import LayerNormFusion 14 | from .multiheadattention_fuser import MultiheadAttentionFusion 15 | from .mha_fuser import MHAFusion 16 | from .pooling_fuser import AvgPool2dFusion, MaxPool2dFusion 17 | from .reshape_fuser import ReshapeFusion 18 | from .transpose_fuser import TransposeFusion 19 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/concat_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | from operator import eq 8 | 9 | from ..qatregister import register_fusion_pattern 10 | from ..qatlogger import QAT_WARN 11 | from ..ops import QConcat 12 | from ..utils import replace_node_module 13 | 14 | 15 | @register_fusion_pattern((torch.cat)) 16 | @register_fusion_pattern((torch.concat)) 17 | class ConcatFusion: 18 | def __init__(self, quantizer, node): 19 | self.cat_node = None 20 | if eq(node.target, torch.concat) or eq(node.target, torch.cat): 21 | self.cat_node = node 22 | assert self.cat_node is not None, '[FATAL]: cat_node can not be None!' 23 | if 'dim' in self.cat_node.kwargs: 24 | self.dim = self.cat_node.kwargs['dim'] 25 | elif len(self.cat_node.args) > 1: 26 | self.dim = self.cat_node.args[1] 27 | else: 28 | self.dim = -1 29 | QAT_WARN(f"not found the dim parameters in cat node, using dim=-1.") 30 | 31 | def fuse(self, graph_module, modules): 32 | qname = self.cat_node.name + "_QConcat" 33 | q_cat = QConcat(dim=self.dim, name=qname) 34 | fused_graph = graph_module.graph 35 | with fused_graph.inserting_after(self.cat_node): 36 | graph_module.add_module(qname, q_cat) 37 | new_node = fused_graph.call_module(qname, args=self.cat_node.args) 38 | self.cat_node.replace_all_uses_with(new_node) 39 | fused_graph.erase_node(self.cat_node) 40 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/expand_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ..qatregister import register_fusion_pattern 9 | from ..qatlogger import QAT_FATAL, QAT_INFO, QAT_ERROR 10 | from ..ops import QExpand 11 | from ..utils import replace_node_module 12 | from types import BuiltinFunctionType 13 | 14 | 15 | @register_fusion_pattern((torch.Tensor.expand)) # 0, the highest priority 16 | class ExpandFusion: 17 | def __init__(self, quantizer, node): 18 | self.expand_node = None 19 | # if isinstance(node.target, BuiltinFunctionType): 20 | # self.expand_node = node 21 | self.expand_node = node 22 | assert self.expand_node is not None, '[FATAL]: expand_node can not be None!' 23 | 24 | def fuse(self, graph_module, modules): 25 | QAT_INFO(f"begin to expand fuse") 26 | fused_graph = graph_module.graph 27 | local_args = (self.expand_node.args[0],) 28 | for arg in self.expand_node.args[1:]: 29 | if not isinstance(arg, int): 30 | local_args = self.expand_node.args 31 | break 32 | size = [] if len(local_args) > 1 else self.expand_node.args[1:] 33 | q_expand = QExpand(size=size) 34 | with fused_graph.inserting_after(self.expand_node): 35 | graph_module.add_module(self.expand_node.name + "_QExpand", q_expand) 36 | new_node = fused_graph.call_module( 37 | self.expand_node.name + "_QExpand", args=local_args) 38 | self.expand_node.replace_all_uses_with(new_node) 39 | fused_graph.erase_node(self.expand_node) 40 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/gelu_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ..qatlogger import QAT_FATAL 9 | from ..qatregister import register_fusion_pattern 10 | from ..ops import QGeLU 11 | from ..utils import replace_node_module 12 | 13 | 14 | @register_fusion_pattern((nn.GELU)) 15 | class GeLUFusion: 16 | def __init__(self, quantizer, node): 17 | self.gelu_node = node 18 | if not isinstance(quantizer.modules[node.target], (nn.GELU)): 19 | self.gelu_node = None 20 | if self.gelu_node is None: 21 | QAT_FATAL(f"gelu_node can not be None!") 22 | self.gelu_module = quantizer.modules[self.gelu_node.target] 23 | 24 | def fuse(self, graph_module, modules): 25 | q_gelu = QGeLU(name=self.gelu_node.name, approximate=self.gelu_module.approximate) 26 | replace_node_module(self.gelu_node, modules, q_gelu) 27 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/hardsigmoid_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ..qatlogger import QAT_FATAL 9 | from ..qatregister import register_fusion_pattern 10 | from ..ops import QHardSigmoid 11 | from ..utils import replace_node_module 12 | 13 | 14 | @register_fusion_pattern((nn.Hardsigmoid)) 15 | class HardsigmoidFusion: 16 | def __init__(self, quantizer, node): 17 | self.hardsigmoid_node = node 18 | if not isinstance(quantizer.modules[node.target], (nn.Hardsigmoid)): 19 | self.hardsigmoid_node = None 20 | if self.hardsigmoid_node is None: 21 | QAT_FATAL(f"hardsigmoid_node can not be None!") 22 | self.hardsigmoid_module = quantizer.modules[self.hardsigmoid_node.target] 23 | 24 | def fuse(self, graph_module, modules): 25 | q_hardsigmoid = QHardSigmoid(name=self.hardsigmoid_node.name) 26 | replace_node_module(self.hardsigmoid_node, modules, q_hardsigmoid) 27 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/hardswish_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ..qatlogger import QAT_FATAL 9 | from ..qatregister import register_fusion_pattern 10 | from ..ops import QHardSwish 11 | from ..utils import replace_node_module 12 | 13 | 14 | @register_fusion_pattern((nn.Hardswish)) 15 | class HardswishFusion: 16 | def __init__(self, quantizer, node): 17 | self.hardswish_node = node 18 | if not isinstance(quantizer.modules[node.target], (nn.Hardswish)): 19 | self.hardswish_node = None 20 | if self.hardswish_node is None: 21 | QAT_FATAL(f"hardswish_node can not be None!") 22 | self.hardswish_module = quantizer.modules[self.hardswish_node.target] 23 | 24 | def fuse(self, graph_module, modules): 25 | q_hardswish = QHardSwish(name=self.hardswish_node.name) 26 | replace_node_module(self.hardswish_node, modules, q_hardswish) 27 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/layernorm_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.er 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ..qatregister import register_fusion_pattern 9 | from ..ops import QLayerNorm 10 | from ..utils import extract_linear_hyperparams, replace_node_module 11 | 12 | 13 | @register_fusion_pattern((nn.LayerNorm)) 14 | class LayerNormFusion: 15 | def __init__(self, quantizer, node): 16 | 17 | self.ln_node = None 18 | if isinstance(quantizer.modules[node.target], nn.LayerNorm): 19 | self.ln_node = node 20 | self.ln_name = self.ln_node.name 21 | assert self.ln_node is not None, '[FATAL]: layernorm node can not be None!' 22 | self.ln_module = quantizer.modules[self.ln_node.target] 23 | 24 | def _extract_hyperparams(self, m, name=None): 25 | hps = {} 26 | hps = { 27 | 'name': name, 28 | 'normalized_shape': m.normalized_shape, 29 | 'eps': m.eps, 30 | 'bias': True if hasattr(m, 'bias') and isinstance(m.bias, torch.nn.Parameter) else False 31 | } 32 | return hps 33 | 34 | def fuse(self, graph_module, modules): 35 | hyper_params = self._extract_hyperparams(self.ln_module, self.ln_name) 36 | qln = QLayerNorm(**hyper_params) 37 | qln.weight.data = self.ln_module.weight 38 | if qln.bias is not None: 39 | qln.bias.data = self.ln_module.bias 40 | 41 | replace_node_module(self.ln_node, modules, qln) 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/pooling_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ..qatlogger import QAT_FATAL 9 | from ..qatregister import register_fusion_pattern 10 | from ..ops import QAveragePooling2D, QMaxPooling2D 11 | from ..utils import replace_node_module 12 | 13 | 14 | @register_fusion_pattern((nn.AdaptiveAvgPool2d)) 15 | @register_fusion_pattern((nn.AvgPool2d)) # 0, the highest priority 16 | class AvgPool2dFusion: 17 | def __init__(self, quantizer, node): 18 | self.avg_node = None 19 | 20 | if isinstance(quantizer.modules[node.target], (nn.AvgPool2d, nn.AdaptiveAvgPool2d)): 21 | self.avg_node = node 22 | 23 | if self.avg_node is None: 24 | QAT_FATAL(f"avg_node can not be None!") 25 | 26 | self.avg_module = quantizer.modules[self.avg_node.target] 27 | self.avg_name = self.avg_node.name 28 | 29 | def fuse(self, graph_module, modules): 30 | avg_module = copy.deepcopy(self.avg_module) 31 | q_avg = QAveragePooling2D(avg_module, self.avg_name) 32 | replace_node_module(self.avg_node, modules, q_avg) 33 | 34 | 35 | @register_fusion_pattern((nn.AdaptiveMaxPool2d)) 36 | @register_fusion_pattern((nn.MaxPool2d)) # 0, the highest priority 37 | class MaxPool2dFusion: 38 | def __init__(self, quantizer, node): 39 | self.max_node = None 40 | 41 | if isinstance(quantizer.modules[node.target], (nn.MaxPool2d, nn.AdaptiveAvgPool2d)): 42 | self.max_node = node 43 | 44 | assert self.max_node is not None, '[FATAL]: max_node can not be None!' 45 | 46 | self.max_module = quantizer.modules[self.max_node.target] 47 | self.max_name = self.max_node.name 48 | 49 | def fuse(self, graph_module, modules): 50 | max_module = copy.deepcopy(self.max_module) 51 | q_max = QMaxPooling2D(max_module, self.max_name) 52 | replace_node_module(self.max_node, modules, q_max) 53 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/fuser/transpose_fuser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import copy 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ..qatregister import register_fusion_pattern 9 | from ..qatlogger import QAT_FATAL, QAT_INFO, QAT_ERROR 10 | from ..ops import QTranspose 11 | from ..utils import replace_node_module 12 | from types import BuiltinFunctionType 13 | 14 | 15 | @register_fusion_pattern((torch.permute)) # 0, the highest priority 16 | @register_fusion_pattern((torch.Tensor.permute)) # 0, the highest priority 17 | class TransposeFusion: 18 | def __init__(self, quantizer, node): 19 | self.transpose_node = None 20 | # if isinstance(node.target, BuiltinFunctionType): 21 | # self.transpose_node = node 22 | self.transpose_node = node 23 | assert self.transpose_node is not None, '[FATAL]: transpose_node can not be None!' 24 | 25 | def fuse(self, graph_module, modules): 26 | # QAT_INFO(f"begin to transpose fuse") 27 | fused_graph = graph_module.graph 28 | local_args = (self.transpose_node.args[0],) 29 | for arg in self.transpose_node.args[1:]: 30 | if not isinstance(arg, int): 31 | local_args = self.transpose_node.args 32 | break 33 | perm = [] if len(local_args) > 1 else self.transpose_node.args[1:] 34 | qname = self.transpose_node.name + "_QTranspose" 35 | q_transpose = QTranspose(name=qname, perm=perm) 36 | with fused_graph.inserting_after(self.transpose_node): 37 | graph_module.add_module(qname, q_transpose) 38 | new_node = fused_graph.call_module(qname, args=local_args) 39 | self.transpose_node.replace_all_uses_with(new_node) 40 | fused_graph.erase_node(self.transpose_node) 41 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from .qat_base_operator import QBaseOperator 5 | 6 | from .qat_batchnorm import QBatchNorm 7 | from .qat_concat import QConcat 8 | from .qat_constant import QConstant 9 | from .qat_convolution import QConvolution2D 10 | from .qat_eltwise import QElementwiseAdd, QElementwiseMul 11 | from .qat_expand import QExpand 12 | from .qat_fullyconnected import QFullyConnected 13 | from .qat_gelu import QGeLU 14 | from .qat_hardsigmoid import QHardSigmoid 15 | from .qat_hardswish import QHardSwish 16 | from .qat_input import QInput 17 | from .qat_layernorm import QLayerNorm 18 | from .qat_matmul import QMatMul 19 | from .qat_multiheadattention import QMultiHeadAttention 20 | from .qat_pooling import QAveragePooling2D, QMaxPooling2D 21 | from .qat_reshape import QReshape 22 | from .qat_softmax import QSoftmax 23 | from .qat_split import QSplit 24 | from .qat_transpose import QTranspose 25 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_activation.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from torch import nn 6 | from .qat_base_operator import QBaseOperator 7 | from ..config import QATConfig 8 | 9 | 10 | class QActivation(QBaseOperator): 11 | def __init__(self, 12 | name, 13 | method, 14 | dtype=None, 15 | ) -> None: 16 | super().__init__(dtype) 17 | self.name = name 18 | self.method = method 19 | 20 | def forward(self, inputs): 21 | pass 22 | 23 | def serialize(self, input): 24 | pass 25 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_concat.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from torch import nn 6 | from ..qatregister import register_operator 7 | from ..config import QATConfig 8 | from .qat_base_operator import QBaseOperator, check_args 9 | 10 | 11 | @register_operator() 12 | class QConcat(QBaseOperator): 13 | def __init__(self, dim=0, dtype=None, name='') -> None: 14 | super().__init__(dtype, name) 15 | self.dim = dim 16 | self.activation_qinfo = QATConfig.get('activation_qinfo') 17 | 18 | @check_args 19 | def forward(self, inputs, *args): 20 | self.dim = args[0] if len(args) else self.dim 21 | outputs = torch.cat(inputs, dim=self.dim) 22 | outputs = self.fake_quant(outputs, self.activation_qinfo) 23 | return outputs 24 | 25 | def serialize(self, inputs): 26 | from AIPUBuilder import ops 27 | out_q = self.get_quantization(self.activation_qinfo) 28 | out = ops.concat(inputs, axis=self.dim, quantization=out_q) 29 | return out 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_constant.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | import torch 6 | from torch import nn 7 | from torch.nn.parameter import Parameter 8 | from ..qatlogger import QAT_ERROR 9 | from ..qinfo import QuantStage 10 | from ..qatregister import register_operator 11 | from ..config import QATConfig 12 | from .qat_base_operator import QBaseOperator 13 | 14 | 15 | @register_operator() 16 | class QConstant(QBaseOperator): 17 | def __init__(self, name, data, dtype=None) -> None: 18 | super().__init__(dtype) 19 | self.name = name 20 | if data is None: 21 | QAT_ERROR(f"when instances one QConstant, the data arg is None") 22 | self.register_buffer('weight', data) 23 | self.activation_qinfo = QATConfig.get('activation_qinfo') 24 | 25 | def forward(self): 26 | if not torch.is_floating_point(self.weight): 27 | self.activation_qinfo.qinvariant = True 28 | outputs = self.fake_quant(self.weight, self.activation_qinfo) 29 | return outputs 30 | 31 | def serialize(self): 32 | from AIPUBuilder import ops 33 | from AIPUBuilder.core import Tensor 34 | weight = Tensor(self.name + "_weight", self.weight.cpu().numpy().astype('float32')) 35 | if self.ir_mode == 'fp': 36 | out = ops.constant(weight) 37 | else: 38 | bk = self.quant_stage 39 | self.quant_stage = QuantStage.QAT 40 | self.forward() 41 | self.quant_stage = bk 42 | out_q = self.get_quantization(self.activation_qinfo) 43 | out = ops.constant(weight, quantization=out_q) 44 | return out 45 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_expand.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from ..qatlogger import QAT_ERROR 5 | from ..qatregister import register_operator 6 | from ..config import QATConfig 7 | from .qat_base_operator import QBaseOperator, check_args 8 | 9 | 10 | @register_operator() 11 | class QExpand(QBaseOperator): 12 | def __init__(self, size, dtype=None) -> None: 13 | super().__init__(dtype) 14 | self._use_input_QConfig = True 15 | self.size = size 16 | self.activation_qinfo = QATConfig.get('activation_qinfo') 17 | 18 | @check_args 19 | def forward(self, input, *args): 20 | if len(self.size) == 0 and len(args) == 0: 21 | QAT_ERROR(f"QExpand meets the len(size) == 0.") 22 | self.size = list(args) if len(args) else self.size 23 | outputs = input.expand(self.size) 24 | outputs = self.fake_quant(outputs, self.activation_qinfo) 25 | return outputs 26 | 27 | def serialize(self, inputs): 28 | from AIPUBuilder import ops 29 | repeats = [s if s != -1 else 1 for s in self.size] 30 | repeats = [s if s != inputs.shape[i] else 1 for i, s in enumerate(repeats)] 31 | out = ops.tile(inputs, repeats) 32 | out.quantization = inputs.quantization 33 | return out 34 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_gelu.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from ..qatregister import register_operator 6 | from ..config import QATConfig 7 | from .qat_base_operator import QBaseOperator, check_args 8 | 9 | 10 | @register_operator() 11 | class QGeLU(QBaseOperator): 12 | def __init__(self, name, approximate, dtype=None) -> None: 13 | super().__init__(dtype) 14 | 15 | self.name = name 16 | self.approximate = approximate 17 | self.activation_qinfo = QATConfig.get('activation_qinfo') 18 | 19 | @check_args 20 | def forward(self, inputs): 21 | outputs = torch.nn.functional.gelu(input=inputs, approximate=self.approximate) 22 | outputs = self.fake_quant(outputs, self.activation_qinfo) 23 | return outputs 24 | 25 | def serialize(self, input): 26 | from AIPUBuilder import ops 27 | out_q = self.get_quantization(self.activation_qinfo) 28 | gl = ops.gelu(input, quantization=out_q) 29 | return gl 30 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_hardsigmoid.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from ..qatregister import register_operator 6 | from ..qinfo import CMode 7 | from ..config import QATConfig 8 | from .qat_base_operator import QBaseOperator, check_args 9 | 10 | 11 | @register_operator() 12 | class QHardSigmoid(QBaseOperator): 13 | def __init__(self, name, dtype=None) -> None: 14 | super().__init__(dtype) 15 | 16 | self.name = name 17 | self.method = "HARDSIGMOID" 18 | self.clip_min = 0.0 19 | self.clip_max = 1.0 20 | self.activation_qinfo = QATConfig.get('activation_qinfo') 21 | self.activation_qinfo.cmode = 'extrema' 22 | 23 | def forward(self, inputs): 24 | outputs = torch.nn.functional.hardsigmoid(inputs) 25 | dev = outputs.device 26 | outputs = torch.maximum(torch.tensor(self.clip_min, device=dev), 27 | torch.minimum(outputs, torch.tensor(self.clip_max, device=dev))) 28 | outputs = self.fake_quant(outputs, self.activation_qinfo) 29 | return outputs 30 | 31 | def serialize(self, input): 32 | from AIPUBuilder import ops 33 | out_q = self.get_quantization(self.activation_qinfo) 34 | hs = ops.hard_sigmoid(input, quantization=out_q) 35 | hs.op.name = self.name 36 | return hs 37 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_hardswish.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from torch import nn 6 | from AIPUBuilder.Optimizer.framework import OpType 7 | from ..qatregister import register_operator 8 | from ..config import QATConfig 9 | from .qat_base_operator import QBaseOperator, check_args 10 | 11 | 12 | @register_operator() 13 | class QHardSwish(QBaseOperator): 14 | def __init__(self, name, dtype=None) -> None: 15 | super().__init__(dtype) 16 | 17 | self.name = name 18 | self.method = "HARDSWISH" 19 | self.activation_qinfo = QATConfig.get('activation_qinfo') 20 | self.activation_qinfo.cmode = 'extrema' 21 | 22 | def forward(self, inputs): 23 | outputs = torch.nn.functional.hardswish(inputs) 24 | outputs = self.fake_quant(outputs, self.activation_qinfo) 25 | return outputs 26 | 27 | def serialize(self, input): 28 | from AIPUBuilder import ops 29 | out_q = self.get_quantization(self.activation_qinfo) 30 | hs = ops.hard_swish(input, quantization=out_q) 31 | hs.op.name = self.name 32 | return hs 33 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_matmul.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from ..qatregister import register_operator 6 | from ..config import QATConfig 7 | from .qat_base_operator import QBaseOperator, check_args 8 | 9 | 10 | @register_operator() 11 | class QMatMul(QBaseOperator): 12 | def __init__(self, 13 | name, 14 | trans_a=False, 15 | trans_b=False, 16 | dtype=None) -> None: 17 | super().__init__(dtype, name=name) 18 | self.trans_a = trans_a 19 | self.trans_b = trans_b 20 | self.activation_qinfo = QATConfig.get('activation_qinfo') 21 | 22 | @check_args 23 | def forward(self, x, y): 24 | if self.trans_a: 25 | if x.dim() == 0: 26 | x = x.unsqueeze(0).unsqueeze(0) 27 | elif x.dim() == 1: 28 | x = x.unsqueeze(0) 29 | x = x.transpose(-1, -2) 30 | if self.trans_b: 31 | if y.dim() == 0: 32 | y = y.unsqueeze(0).unsqueeze(0) 33 | elif x.dim() == 1: 34 | y = y.unsqueeze(0) 35 | y = y.transpose(-1, -2) 36 | outputs = torch.matmul(x, y) 37 | outputs = self.fake_quant(outputs, self.activation_qinfo) 38 | return outputs 39 | 40 | def serialize(self, input0, input1): 41 | from AIPUBuilder import ops 42 | out_q = self.get_quantization(self.activation_qinfo) 43 | return ops.matmul(input0, input1, self.trans_a, self.trans_b, quantization=out_q) 44 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_softmax.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from ..qatregister import register_operator 6 | from ..config import QATConfig 7 | from .qat_base_operator import QBaseOperator, check_args 8 | 9 | 10 | @register_operator() 11 | class QSoftmax(QBaseOperator): 12 | def __init__(self, 13 | name, 14 | dim=-1, 15 | dtype=None) -> None: 16 | super().__init__(dtype, name=name) 17 | self.dim = dim 18 | self.activation_qinfo = QATConfig.get('activation_qinfo') 19 | 20 | @check_args 21 | def forward(self, inputs, *args): 22 | self.dim = args[0] if len(args) else self.dim 23 | outputs = torch.nn.functional.softmax(inputs, self.dim) 24 | outputs = self.fake_quant(outputs, self.activation_qinfo) 25 | return outputs 26 | 27 | def serialize(self, inputs): 28 | from AIPUBuilder import ops 29 | out_q = self.get_quantization(self.activation_qinfo) 30 | return ops.softmax(inputs, axis=self.dim, quantization=out_q) 31 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_split.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from ..qatregister import register_operator 6 | from ..config import QATConfig 7 | from .qat_base_operator import QBaseOperator, check_args 8 | 9 | 10 | @register_operator() 11 | class QSplit(QBaseOperator): 12 | def __init__(self, 13 | name, 14 | split_size_or_sections, 15 | dim=0, 16 | dtype=None) -> None: 17 | super().__init__(dtype, name=name) 18 | self._use_input_QConfig = True 19 | self.dim = dim 20 | self.split_size_or_sections = split_size_or_sections 21 | self.activation_qinfo = QATConfig.get('activation_qinfo') 22 | 23 | @check_args 24 | def forward(self, inputs, *args): 25 | self.split_size_or_sections = args[0] if len(args) else self.split_size_or_sections 26 | self.dim = args[1] if len(args) > 1 else self.dim 27 | outputs = torch.split(inputs, self.split_size_or_sections, self.dim) 28 | outputs = list(outputs) 29 | for i, out in enumerate(outputs): 30 | outputs[i] = self.fake_quant(out, self.activation_qinfo) 31 | return outputs 32 | 33 | def serialize(self, inputs): 34 | from AIPUBuilder import ops 35 | ops_split_size_or_sections = self.split_size_or_sections 36 | if isinstance(self.split_size_or_sections, int): 37 | s = inputs.shape[self.dim] 38 | ops_split_size_or_sections = s // self.split_size_or_sections 39 | 40 | return ops.split(inputs, splits=ops_split_size_or_sections, axis=self.dim) 41 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/ops/qat_transpose.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import torch 5 | from ..qatlogger import QAT_ERROR 6 | from ..qatregister import register_operator 7 | from ..config import QATConfig 8 | from .qat_base_operator import QBaseOperator, check_args 9 | 10 | 11 | @register_operator() 12 | class QTranspose(QBaseOperator): 13 | def __init__(self, 14 | name, 15 | perm, 16 | dtype=None) -> None: 17 | super().__init__(dtype, name=name) 18 | self._use_input_QConfig = True 19 | self.perm = perm 20 | self.activation_qinfo = QATConfig.get('activation_qinfo') 21 | 22 | def forward(self, inputs, perm=None): 23 | if len(self.perm) == 0 and perm is None: 24 | QAT_ERROR(f"QTranspose meets the len(perm) == 0.") 25 | self.perm = perm if perm is not None else self.perm 26 | outputs = torch.permute(inputs, self.perm) 27 | outputs = self.fake_quant(outputs, self.activation_qinfo) 28 | return outputs 29 | 30 | def serialize(self, inputs): 31 | from AIPUBuilder import ops 32 | return ops.transpose(inputs, list(self.perm)) 33 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/plugin/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from .aipubt_train_resnet50 import ResNet50TrainLoop 6 | from .aipubt_train_mbv3 import MobilenetV3TrainLoop 7 | from .aipubt_train_vitb16 import ViTB16TrainLoop 8 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/qatlogger.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd.e 3 | 4 | 5 | from AIPUBuilder.Optimizer.logger import OPT_INFO, OPT_ERROR, OPT_WARN, OPT_DEBUG, OPT_FATAL 6 | 7 | 8 | def QAT_INFO(*args, **kwargs): 9 | OPT_INFO(*args, **kwargs, prefix_header='[QAT]') 10 | 11 | 12 | def QAT_ERROR(*args, **kwargs): 13 | OPT_ERROR(*args, **kwargs, prefix_header='[QAT]') 14 | 15 | 16 | def QAT_WARN(*args, **kwargs): 17 | OPT_WARN(*args, **kwargs, prefix_header='[QAT]') 18 | 19 | 20 | def QAT_DEBUG(*args, **kwargs): 21 | OPT_DEBUG(*args, **kwargs, prefix_header='[QAT]') 22 | 23 | 24 | def QAT_FATAL(*args, **kwargs): 25 | OPT_FATAL(*args, **kwargs, prefix_header='[QAT]') 26 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/qatregister.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import abc 5 | from collections import OrderedDict 6 | from .qatlogger import QAT_WARN 7 | 8 | QAT_FUSION_PATTERNS = OrderedDict() 9 | QAT_COMPASS_OPERATORS = OrderedDict() 10 | 11 | 12 | def register_fusion_pattern(pattern): 13 | def insert(fn): 14 | if pattern in QAT_FUSION_PATTERNS.keys(): 15 | QAT_WARN(f"QAT Pattern {pattern} has already registered, and will be overwritten") 16 | QAT_FUSION_PATTERNS[pattern] = fn 17 | return fn 18 | return insert 19 | 20 | 21 | def register_operator(optype=None): 22 | def insert(fn): 23 | if fn in QAT_COMPASS_OPERATORS.keys(): 24 | QAT_WARN(f"QAT compass operater({fn}) has already registered, and will be overwritten") 25 | if optype is None: 26 | QAT_COMPASS_OPERATORS[fn] = True 27 | else: 28 | QAT_COMPASS_OPERATORS[fn] = optype 29 | return fn 30 | return insert 31 | 32 | 33 | def get_default_fusion_patterns(): 34 | return QAT_FUSION_PATTERNS 35 | 36 | 37 | def get_compass_supported_operators(): 38 | '''tuple of supported operators!''' 39 | return tuple(QAT_COMPASS_OPERATORS.keys()) 40 | 41 | 42 | class QATBaseTrainLoop(object): 43 | # def __init__(self, *args, **kwargs): 44 | # pass 45 | def set_stage(self, model, stage='qat'): 46 | from .qinfo import QuantStage 47 | for m in model.modules(): 48 | if isinstance(m, tuple(QAT_COMPASS_OPERATORS.keys())): 49 | m.quant_stage = QuantStage.str_to_quantstage(stage) 50 | # @abc 51 | 52 | def __call__(self, model, *args, **kwargs): 53 | pass 54 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/quantizer/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from .pytorchquantizer import PytorchQuantizer 5 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/quantizer/basequantizer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | import torch 6 | 7 | 8 | class QATBaseQuantizer(object): 9 | 10 | def __init__(self, config, *args, **kwargs): 11 | self.config = config 12 | self.ir_mode = "" 13 | 14 | def forward(self, model, input): 15 | output = model(*input) 16 | if isinstance(output, torch.Tensor): 17 | output = [output] 18 | return output 19 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from .common_utils import convert2tuple 5 | from .cmp import check_result 6 | from .extra_params import * 7 | from .fuser_utils import is_match, replace_node_module 8 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/utils/cmp.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import numpy as np 5 | from AIPUBuilder.Optimizer.utils import cosine_distance 6 | from ..qatlogger import QAT_INFO 7 | 8 | 9 | def check_result(actual, desired): 10 | assert len(actual) == len(desired), "actual: %d vs desired %d" % ( 11 | len(actual), 12 | len(desired), 13 | ) 14 | 15 | ret = True 16 | for idx in range(len(actual)): 17 | cos = cosine_distance(actual[idx], desired[idx]) 18 | QAT_INFO(f"cosine distance of {idx} output: {cos}") 19 | # ret = np.testing.assert_allclose( 20 | # actual[idx].detach().cpu().numpy(), 21 | # desired[idx].detach().cpu().numpy(), 22 | # rtol=1e-5, 23 | # atol=1e-5) and ret 24 | 25 | return ret 26 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/qat/src/utils/common_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | def convert2tuple(input): 5 | '''For DNN parameters conversion''' 6 | return input if isinstance(input, tuple) else (input, input) 7 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/scripts/git_hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # An hook script to verify and check what is about to be committed. 4 | # Called by "git commit" with no arguments. 5 | # The hook exits with non-zero status and stops the commit if extra 6 | # changes are needed. 7 | 8 | STYLE=$(git config --get hooks.clangformat.style) 9 | if [ -n "${STYLE}" ] ; then 10 | STYLEARG="-style=${STYLE}" 11 | else 12 | STYLEARG="" 13 | fi 14 | 15 | format_file() { 16 | file="${1}" 17 | if [ -f $file ]; then 18 | clang-format -i ${STYLEARG} ${1} 19 | git add ${1} 20 | fi 21 | } 22 | 23 | current_workspace=`pwd` 24 | hook_failed=0 25 | case "${1}" in 26 | --about ) 27 | echo "Runs clang-format on source files" 28 | ;; 29 | * ) 30 | # Only check the files in status A(added), C(copied) and M(modified). 31 | # Ignore D(deleted), U(unmerged), T(type changed), R(renamed) and X(unknown). 32 | files_to_check=`git diff-index --cached --name-only --diff-filter=ACM HEAD` 33 | for file in ${files_to_check} ; do 34 | # Convert to lower case 35 | lc_file=${file,,} 36 | if [[ ${lc_file} =~ .*\.(cpp|cc|h|hpp) ]] 37 | then 38 | format_file "${file}" 39 | elif [[ ${lc_file} =~ .*\.py ]] 40 | then 41 | # Call format_pyfile to check format of Parser python code 42 | basedir=`dirname $0` 43 | ${basedir}/pre-commit.d/format_pyfile "${current_workspace}/${file}" 44 | exit_status=$? 45 | if [[ ${exit_status} != 0 ]] 46 | then 47 | hook_failed=1 48 | fi 49 | fi 50 | done 51 | ;; 52 | esac 53 | 54 | if [[ ${hook_failed} != 0 ]] 55 | then 56 | echo "----------- pre-commit hook failed -----------" 57 | exit 1 58 | fi 59 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/scripts/git_hooks/pre-commit.d/format_pyfile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # The script will use autopep8 to scan and format python files. 4 | # 5 | # Usage: 6 | # format_pyfile file-to-check 7 | # 8 | # Arguments: 9 | # - file-to-check: the path of one file or paths of multiple files 10 | # that need to be checked and reformatted. 11 | # Returns: 12 | # 0 means pass checking and no differences; 13 | # 1 means error exit, which requires checking input file(s); 14 | # 2 means differences exist and file(s) has/have been reformatted. 15 | 16 | usage="Usage:\n $0 file-to-check [file-to-check]\n\n" 17 | current_workspace=`pwd` 18 | failed_checking=0 19 | 20 | files_to_check=$* 21 | if [[ -z ${files_to_check} ]] 22 | then 23 | printf "Error: No input file(s) provided to $0\n\n" 24 | printf ${usage} 25 | exit 1 26 | fi 27 | 28 | for file in ${files_to_check} 29 | do 30 | if [[ ! -f ${file} ]] 31 | then 32 | printf ${usage} 33 | exit 1 34 | fi 35 | 36 | # Use autopep8 to check code format for parser's python files 37 | printf "autopep8: Checking ${file} " 38 | # Options of autopep8: 39 | # --in-place: make changes to files in place 40 | # --exit-code: return 2 if differences exist 41 | autopep8_output=`autopep8 --verbose --in-place --exit-code --max-line-length=120 ${file} 2>&1` 42 | exit_code=$? 43 | if [[ ${exit_code} = 2 ]] 44 | then 45 | printf "[modified]\n" 46 | failed_files="${failed_files} ${file}" 47 | failed_checking=1 48 | elif [[ ${exit_code} = 0 ]] 49 | then 50 | printf "[passed]\n" 51 | else 52 | printf "[error]\nAdditional output of autopep8:\n${autopep8_output}\n\n" 53 | exit 1 54 | fi 55 | done 56 | 57 | if [[ ${failed_checking} = 1 ]] 58 | then 59 | printf "File(s) did not comply with code format.\n" 60 | printf "Please add changes made by autopep8 for the following file(s):\n" 61 | for file in ${failed_files} 62 | do 63 | printf " ${file}\n" 64 | done 65 | printf "\n" 66 | exit 2 67 | else 68 | exit 0 69 | fi 70 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/scripts/git_hooks/pre-commit.d/python: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | from __future__ import with_statement 3 | import os 4 | import re 5 | import shutil 6 | import subprocess 7 | import sys 8 | import tempfile 9 | 10 | 11 | def system(*args, **kwargs): 12 | kwargs.setdefault('stdout', subprocess.PIPE) 13 | proc = subprocess.Popen(args, **kwargs) 14 | out, err = proc.communicate() 15 | return out 16 | 17 | 18 | def main(): 19 | modified = re.compile('^[AM]+\s+(?P.*\.py)', re.MULTILINE) 20 | files = system('git', 'status', '--porcelain').decode("utf-8") 21 | files = modified.findall(files) 22 | 23 | tempdir = tempfile.mkdtemp() 24 | for name in files: 25 | filename = os.path.join(tempdir, name) 26 | filepath = os.path.dirname(filename) 27 | if not os.path.exists(filepath): 28 | os.makedirs(filepath) 29 | with open(filename, 'w') as f: 30 | system('git', 'show', ':' + name, stdout=f) 31 | output = system('pep8', '.', cwd=tempdir) 32 | shutil.rmtree(tempdir) 33 | if output: 34 | print(output) 35 | sys.exit(1) 36 | 37 | 38 | if __name__ == '__main__': 39 | pass 40 | # main() -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/scripts/install_git_hooks.sh: -------------------------------------------------------------------------------- 1 | #!/usr/sbin/env bash 2 | 3 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 4 | 5 | echo "installing git hooks..." 6 | cp -r ${SCRIPT_DIR}/git_hooks/* `git rev-parse --git-dir`/hooks/ 7 | echo "git hooks setup done!" -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/model_test/squeezenet/calibration2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/calibration2.npy -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/model_test/squeezenet/opt.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | #the paths for this model's IR 3 | graph = ./squeezenet_s.txt 4 | bin = ./squeezenet_s.bin 5 | model_name = squeezenet_caffe 6 | #the name of dataset plugin for this model's input dataset 7 | #if omitted, will use all zeros as input data for executing forward 8 | dataset = numpynhwcrgb2ncbgrhwdataset 9 | #the path of dataset used for calibration during quantization 10 | #if omitted, will use all zeros as input data for executing calibration 11 | calibration_data = ./calibration2.npy 12 | #the batch_size used for calibration during quantization 13 | calibration_batch_size = 1 14 | #the name of metric plugins for computing accuracy metrics for this model 15 | #if omitted, will not computing accuracy metrics 16 | metric = TopKMetric 17 | #the path of dataset (and corresponding labels) used for computing accuracy metrics for this model 18 | #if ommitted, will not computing accuracy metrics 19 | data = ./validation10.npy 20 | label = ./vlabel10.npy 21 | #the batch_size used for computing accuracy metrics for this model 22 | metric_batch_size = 2 23 | #the quantization method used for weights, default to 'per_tensor_symmetric_restricted_range' 24 | quantize_method_for_weight = per_channel_symmetric_restricted_range 25 | #the quantization method used for activations, default to 'per_tensor_symmetric_full_range' 26 | quantize_method_for_activation = per_tensor_asymmetric 27 | #the bits used for quantizing weight tensors, default to 8 28 | weight_bits = 8 29 | #the bits used for quantizing bias tensors, default to 32 30 | bias_bits = 32 31 | #the bits used for quantizing activation tensors, default to 8 32 | activation_bits = 8 33 | #Maximal LUT items (in bits, as only support LUT with 2**N items) amount when representing nonlinear functions in quantization, 34 | #default to 8, suggest to set to 10+ when quantizing activations to 16bit 35 | lut_items_in_bits = 8 36 | #the output directory path, default to pwd 37 | output_dir = ./ 38 | #the dataloader thread numbers for torch dataset, default to 0, 39 | #which means do not using multi-threads to accelerate data loading 40 | dataloader_workers=4 41 | 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/model_test/squeezenet/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | export AIPUBUILDER_LOG=2 4 | export PYTHONPATH=../../../../../:$PYTHONPATH 5 | 6 | python3 ../../../tools/optimizer_main.py --cfg ./opt.cfg 7 | 8 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/model_test/squeezenet/squeezenet_s.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/squeezenet_s.bin -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/model_test/squeezenet/validation10.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/validation10.npy -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/model_test/squeezenet/vlabel10.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/model_test/squeezenet/vlabel10.npy -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/op_test/data.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/op_test/data.npy -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/op_test/label.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/op_test/label.npy -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/op_test/opt.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | #the paths for this model's IR 3 | graph = ./single_eltwise_1.txt 4 | bin = ./single_eltwise_1.bin 5 | model_name = single_eltwise_1 6 | #the name of dataset plugin for this model's input dataset 7 | #if omitted, will use all zeros as input data for executing forward 8 | dataset = NumpyMultiInputDataset 9 | #the path of dataset used for calibration during quantization 10 | #if omitted, will use all zeros as input data for executing calibration 11 | calibration_data = ./data.npy 12 | #the batch_size used for calibration during quantization 13 | calibration_batch_size = 1 14 | #the name of metric plugins for computing accuracy metrics for this model 15 | #if ommitted, will not computing accuracy metrics 16 | metric = CosDistanceMetric,MaxAbsErrorMetric 17 | #the path of dataset (and corresponding labels) used for computing accuracy metrics for this model 18 | #if ommitted, will not computing accuracy metrics 19 | data = ./data.npy 20 | label = ./label.npy 21 | #the batch_size used for computing accuracy metrics for this model 22 | metric_batch_size = 1 23 | #the quantization method used for weights, default to 'per_tensor_symmetric_restricted_range' 24 | quantize_method_for_weight = per_channel_symmetric_restricted_range 25 | #the quantization method used for activations, default to 'per_tensor_symmetric_full_range' 26 | quantize_method_for_activation = per_tensor_asymmetric 27 | #the bits used for quantizing weight tensors, default to 8 28 | weight_bits = 8 29 | #the bits used for quantizing bias tensors, default to 32 30 | bias_bits = 32 31 | #the bits used for quantizing activation tensors, default to 8 32 | activation_bits = 8 33 | #Maximal LUT items (in bits, as only support LUT with 2**N items) amount when representing nonlinear functions in quantization, 34 | #default to 8, suggest to set to 10+ when quantizing activations to 16bit 35 | lut_items_in_bits = 8 36 | #the output directory path, default to pwd 37 | output_dir = ./ 38 | #the dataloader thread numbers for torch dataset, default to 0, 39 | #which means do not using multi-threads to accelerate data loading 40 | dataloader_workers=4 41 | 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/op_test/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | export AIPUBUILDER_LOG=2 4 | export PYTHONPATH=../../../../:$PYTHONPATH 5 | 6 | python3 ../../tools/optimizer_main.py --cfg ./opt.cfg 7 | 8 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/op_test/single_eltwise_1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/AIPUBuilder/Optimizer/test/op_test/single_eltwise_1.bin -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/op_test/single_eltwise_1.txt: -------------------------------------------------------------------------------- 1 | model_name=eltwise 2 | layer_number=3 3 | input_tensors=[Placeholder1,Placeholder2] 4 | output_tensors=[eltwise] 5 | 6 | layer_id=0 7 | layer_name=Placeholder1 8 | layer_type=Input 9 | layer_bottom=[] 10 | layer_bottom_shape=[] 11 | layer_bottom_type=[] 12 | layer_top=[Placeholder1] 13 | layer_top_shape=[[1,4,4,16]] 14 | layer_top_type=[float32] 15 | 16 | layer_id=1 17 | layer_name=Placeholder2 18 | layer_type=Input 19 | layer_bottom=[] 20 | layer_bottom_shape=[] 21 | layer_bottom_type=[] 22 | layer_top=[Placeholder2] 23 | layer_top_shape=[[1,4,4,16]] 24 | layer_top_type=[float32] 25 | 26 | layer_id=2 27 | layer_name=eltwise 28 | layer_type=Eltwise 29 | layer_bottom=[Placeholder1,Placeholder2] 30 | layer_bottom_shape=[[1,4,4,16],[1,4,4,16]] 31 | layer_bottom_type=[float32,float32] 32 | layer_top=[eltwise] 33 | layer_top_shape=[[1,4,4,16]] 34 | layer_top_type=[float32] 35 | method=ADD 36 | with_activation=NONE 37 | 38 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/plugin_test/aipubt_dataset_my_numpynhwcrgb2ncbgrhw.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.logger import * 7 | from torch.utils.data import Dataset 8 | import numpy as np 9 | 10 | 11 | @register_plugin(PluginType.Dataset, '2.0') 12 | class MyNumpyNHWCRGB2NCBGRHWDataset(Dataset): 13 | def __init__(self, data_file, label_file=None): 14 | ''' 15 | :param data_file: ndarray in npy file. 16 | :param label_file: ndarray in npy file. 17 | ''' 18 | OPT_INFO('Customized dataset plugin is enabled.') 19 | self.data = None 20 | self.label = None 21 | try: 22 | self.data = np.load(data_file, mmap_mode='c') 23 | self.data = np.flip(self.data, -1).copy() 24 | self.data = np.transpose(self.data, [0, 3, 1, 2]) 25 | except Exception as e: 26 | OPT_FATAL('the data of MyNumpyNHWCRGB2NCBGRHWDataset plugin should be Numpy.ndarray and allow_pickle=False.') 27 | if label_file is not None: 28 | try: 29 | self.label = np.load(label_file, mmap_mode='c') 30 | except ValueError: 31 | self.label = np.load(label_file, allow_pickle=True) 32 | 33 | def __len__(self): 34 | return len(self.data) 35 | 36 | def __getitem__(self, idx): 37 | sample = [[self.data[idx]], float("-inf")] 38 | if self.label is not None: 39 | sample[1] = self.label[idx] 40 | return sample 41 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/plugin_test/aipubt_metric_my_topk.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.logger import * 7 | import torch 8 | 9 | 10 | @register_plugin(PluginType.Metric, '2.0') 11 | class MyTopKMetric(OptBaseMetric): 12 | def __init__(self, K='1'): 13 | self.correct = 0 14 | self.total = 0 15 | self.K = int(K) 16 | OPT_INFO(f'Customized metric plugin is enabled. k={K}') 17 | 18 | def __call__(self, pred, target): 19 | _, pt = torch.topk(pred[0].reshape([pred[0].shape[0], -1]), self.K, dim=-1) # NHWC 20 | for i in range(target.numel()): 21 | if target[i] in pt[i]: 22 | self.correct += 1 23 | self.total += target.numel() 24 | 25 | def reset(self): 26 | self.correct = 0 27 | self.total = 0 28 | 29 | def compute(self): 30 | try: 31 | acc = float(self.correct) / float(self.total) 32 | return acc 33 | except ZeroDivisionError: 34 | OPT_ERROR('zeroDivisionError: Topk acc total label = 0') 35 | return float("-inf") 36 | 37 | def report(self): 38 | return "top-%d accuracy is %f" % (self.K, self.compute()) 39 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/plugin_test/aipubt_op_my_softmax.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | 5 | from AIPUBuilder.Optimizer.framework import * 6 | from AIPUBuilder.Optimizer.logger import * 7 | from AIPUBuilder.Optimizer.ops.softmax import softmax, softmax_quantize 8 | 9 | # for optype out of IR guide's definition, use register_optype(xxtype_name_string) to register optype firstly 10 | # register_optype('Softmax') 11 | 12 | 13 | @op_register(OpType.Softmax, version='2.0') 14 | def my_softmax(self, *args): 15 | OPT_INFO('Customized OP forward function is enabled.') 16 | return softmax(self, *args) 17 | 18 | 19 | @quant_register(OpType.Softmax, version='2.0') 20 | def my_softmax_quantize(self, *args): 21 | OPT_INFO('Customized OP quantize function is enabled.') 22 | inp = self.inputs[0] 23 | out = self.outputs[0] 24 | axis = self.get_param('axis') 25 | shape_value_in_axis = inp.betensor.shape[axis] 26 | if shape_value_in_axis < 8: 27 | customized_softmax_quantize_func(self, *args) 28 | else: 29 | softmax_quantize(self, *args) 30 | 31 | 32 | def customized_softmax_quantize_func(self, *args): 33 | softmax_quantize(self, *args) 34 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/plugin_test/opt.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | #the paths for this model's IR 3 | graph = ../model_test/squeezenet/squeezenet_s.txt 4 | bin = ../model_test/squeezenet/squeezenet_s.bin 5 | model_name = squeezenet_caffe 6 | #the name of dataset plugin for this model's input dataset 7 | #if omitted, will use all zeros as input data for executing forward 8 | dataset = MyNumpyNHWCRGB2NCBGRHWDataset 9 | #the path of dataset used for calibration during quantization 10 | #if omitted, will use all zeros as input data for executing calibration 11 | calibration_data = ../model_test/squeezenet/calibration2.npy 12 | #the batch_size used for calibration during quantization 13 | calibration_batch_size = 1 14 | #the name of metric plugins for computing accuracy metrics for this model 15 | #if ommitted, will not computing accuracy metrics 16 | metric = MyTopKMetric(1),MyTopKMetric(5) 17 | #the path of dataset (and corresponding labels) used for computing accuracy metrics for this model 18 | #if ommitted, will not computing accuracy metrics 19 | data = ../model_test/squeezenet/validation10.npy 20 | label = ../model_test/squeezenet/vlabel10.npy 21 | #the batch_size used for computing accuracy metrics for this model 22 | metric_batch_size = 2 23 | #the quantization method used for weights, default to 'per_tensor_symmetric_restricted_range' 24 | quantize_method_for_weight = per_channel_symmetric_restricted_range 25 | #the quantization method used for activations, default to 'per_tensor_symmetric_full_range' 26 | quantize_method_for_activation = per_tensor_asymmetric 27 | #the bits used for quantizing weight tensors, default to 8 28 | weight_bits = 8 29 | #the bits used for quantizing bias tensors, default to 32 30 | bias_bits = 32 31 | #the bits used for quantizing activation tensors, default to 8 32 | activation_bits = 8 33 | #Maximal LUT items (in bits, as only support LUT with 2**N items) amount when representing nonlinear functions in quantization, 34 | #default to 8, suggest to set to 10+ when quantizing activations to 16bit 35 | lut_items_in_bits = 8 36 | #the output directory path, default to pwd 37 | output_dir = ./ 38 | #the dataloader thread numbers for torch dataset, default to 0, 39 | #which means do not using multi-threads to accelerate data loading 40 | dataloader_workers=0 41 | 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/test/plugin_test/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | export AIPUBUILDER_LOG=2 4 | export PYTHONPATH=../../../../:$PYTHONPATH 5 | export AIPUPLUGIN_PATH=./ 6 | echo $AIPUPLUGIN_PATH 7 | python3 ../../tools/optimizer_main.py --cfg ./opt.cfg 8 | 9 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/tools/optimizer_main.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import sys 5 | from AIPUBuilder.Optimizer.plugins import * 6 | try: 7 | from AIPUBuilder.Optimizer.plugins_internal import * 8 | except: 9 | pass 10 | from AIPUBuilder.Optimizer.framework import * 11 | from AIPUBuilder.Optimizer.config import * 12 | from AIPUBuilder.Optimizer.optmaster import * 13 | from AIPUBuilder.Optimizer.logger import OPT_START, OPT_END 14 | 15 | 16 | def OPT_WORK(argv): 17 | graph = QuantizeGraph.parse(argv.graph, argv.bin) 18 | optimizer = OptMaster(graph, argv) 19 | report = optimizer() 20 | return report 21 | 22 | 23 | def main(): 24 | try: 25 | traverse_opt_plugins() 26 | argv = arg_parser(metric_dict=QUANTIZE_METRIC_DICT, dataset_dict=QUANTIZE_DATASET_DICT) 27 | if isinstance(argv, bool): 28 | return 0 if argv is True else 1 # return 0/1 value for tvm calling the optimizer 29 | 30 | OPT_START(argv) 31 | report = OPT_WORK(argv) 32 | OPT_END(report) 33 | return 0 34 | except Exception as e: 35 | OPT_END() 36 | raise e 37 | 38 | 39 | if __name__ == '__main__': 40 | 41 | ret = main() 42 | sys.exit(ret) 43 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.utils.dtype_utils import * 5 | from AIPUBuilder.Optimizer.utils.quant_tool_utils import * 6 | from AIPUBuilder.Optimizer.utils.files_utils import * 7 | from AIPUBuilder.Optimizer.utils.math_utils import * 8 | from AIPUBuilder.Optimizer.utils.string_utils import * 9 | from AIPUBuilder.Optimizer.utils.random_utils import * 10 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/utils/files_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | import os 5 | 6 | __all__ = ['make_path', 'make_dir_path'] 7 | 8 | 9 | def make_path(path): 10 | dpath = os.path.dirname(path) 11 | if dpath != '' and not os.path.exists(dpath): 12 | os.makedirs(dpath) 13 | return path 14 | 15 | 16 | def make_dir_path(path): 17 | if not os.path.exists(path): 18 | os.makedirs(path) 19 | return path 20 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/utils/passes_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2024 Arm Technology (China) Co. Ltd. 3 | 4 | import functools 5 | from AIPUBuilder.Optimizer.framework import PyGraph 6 | from AIPUBuilder.Optimizer.logger import OPT_DEBUG 7 | 8 | __all__ = ['passes_run', 'PASSES', 'ENABLE_PASSES'] 9 | 10 | PASSES = dict() 11 | ENABLE_PASSES = dict() 12 | 13 | 14 | def passes_run(func): 15 | """ 16 | this decorator is used for enabling or disabling the pass for all nodes, which is setted in cfg file and defaultly 17 | worked for all nodes. if node has independently flag this decorator does not work. 18 | """ 19 | @functools.wraps(func) 20 | def wrapper(*args, **kwargs): 21 | from AIPUBuilder.Optimizer.config import CfgParser 22 | flag = len(args) == 2 and isinstance(args[0], PyGraph) and isinstance(args[1], CfgParser) 23 | flag = flag or (len(args) == 1 and len(kwargs) == 1 and isinstance(args[0], PyGraph) 24 | and isinstance(list(kwargs.values())[0], CfgParser)) 25 | if flag: 26 | hparams = args[1] if len(args) == 2 else list(kwargs.values())[0] 27 | prefix = 'enable_pass_' 28 | pass_func_name = f"{prefix}{func.__name__}" 29 | if not hasattr(hparams, pass_func_name): 30 | # fixed enable pass, like shrink_pow_exponent 31 | func(*args, **kwargs) 32 | OPT_DEBUG(f"now run pass: {func.__name__}") 33 | elif hasattr(hparams, pass_func_name) and hparams.__getattr__(pass_func_name): 34 | func(*args, **kwargs) 35 | OPT_DEBUG(f"now run pass: {func.__name__}") 36 | if func.__name__ not in ENABLE_PASSES: 37 | ENABLE_PASSES.update({func.__name__: func}) 38 | if func.__name__ not in PASSES: 39 | PASSES.update({func.__name__: wrapper}) 40 | 41 | return wrapper 42 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/utils/string_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | from AIPUBuilder.Optimizer.logger import * 5 | 6 | 7 | def list_any_to_str(s): 8 | if isinstance(s, list): 9 | lstr = '[' 10 | for x in s: 11 | lstr += str(x) + ',' 12 | if len(lstr) > 1: 13 | lstr = lstr[:-1] + ']' 14 | else: 15 | lstr += ']' 16 | return lstr 17 | else: 18 | return str(s) 19 | 20 | 21 | def timestamp_string(): 22 | from datetime import datetime 23 | import random 24 | return '_' + str(datetime.timestamp(datetime.now())).replace('.', '_') + '_' + str(random.random()).replace('.', '_') + '_' 25 | 26 | 27 | def string_to_base_type(s: str): 28 | import re 29 | opt_v = s.strip() 30 | if opt_v.upper() == "FALSE": 31 | opt_v = False 32 | elif opt_v.upper() == "TRUE": 33 | opt_v = True 34 | elif re.findall('^[-+]?\d+$', opt_v): 35 | opt_v = int(opt_v) 36 | elif re.findall('^[-+]?[0-9]+\.?[0-9]*$', opt_v): 37 | opt_v = float(opt_v) 38 | return opt_v 39 | -------------------------------------------------------------------------------- /AIPUBuilder/Optimizer/version.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | 4 | #!/usr/bin/python3 5 | # -*- coding: UTF-8 -*- 6 | 7 | __OPT_VERSION__ = '1.3' 8 | __build_number__ = None # placeholder for build script 9 | if __build_number__ is not None: 10 | __OPT_VERSION__ = __OPT_VERSION__+"."+str(__build_number__) 11 | __OPT_NAME__ = 'Compass-Optimizer' 12 | -------------------------------------------------------------------------------- /AIPUBuilder/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | # Copyright © 2022-2025 Arm Technology (China) Co. Ltd. 3 | -------------------------------------------------------------------------------- /tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arm-China/Compass_Optimizer/12623a9a4cedc435f9f38c5f39f3db8cd5db0563/tutorial.pdf --------------------------------------------------------------------------------