├── .clang-format
├── .flake8
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .style.yapf
├── Guides
    ├── image.png
    ├── paddle_load_customdevice.png
    ├── pir_plugin_subgraph.md
    └── subgraph_case.png
├── LICENSE
├── README.md
├── README_en.md
├── README_ja.md
├── backends
    ├── biren_gpu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   ├── gflags.cmake
    │   │   │   ├── glog.cmake
    │   │   │   ├── gtest.cmake
    │   │   │   ├── onednn.cmake
    │   │   │   ├── pybind11.cmake
    │   │   │   └── supa.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   ├── third_party.cmake
    │   │   └── version.cmake
    │   ├── kernels
    │   │   ├── abs_kernel.cc
    │   │   └── funcs
    │   │   │   └── br_paddle_supa.h
    │   ├── runtime
    │   │   ├── runtime.cc
    │   │   └── runtime.h
    │   ├── setup.py.in
    │   └── tests
    │   │   ├── CMakeLists.txt
    │   │   └── unittests
    │   │       ├── CMakeLists.txt
    │   │       └── test_abs_op_supa.py
    ├── custom_cpu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── README_ja.md
    │   ├── cmake
    │   │   └── paddle.cmake
    │   ├── kernels
    │   │   ├── argsort_kernel.cc
    │   │   ├── assign_value_kernel.cc
    │   │   ├── cast_kernel.cc
    │   │   ├── compare_kernel.cc
    │   │   ├── concat_kernel.cc
    │   │   ├── contiguous_kernel.cc
    │   │   ├── cross_entropy_with_softmax_kernel.cc
    │   │   ├── elementwise_kernel.cc
    │   │   ├── fill_kernel.cc
    │   │   ├── full_kernel.cc
    │   │   ├── kernels.h
    │   │   ├── matmul_kernel.cc
    │   │   ├── mean_kernel.cc
    │   │   ├── memcpy_kernel.cc
    │   │   ├── phi_funcs.h
    │   │   ├── reduce_kernel.cc
    │   │   ├── reshape_kernel.cc
    │   │   ├── sgd_kernel.cc
    │   │   ├── slice_kernel.cc
    │   │   ├── softmax_kernel.cc
    │   │   ├── strided_copy_kernel.cc
    │   │   ├── transpose_kernel.cc
    │   │   └── uniform_kernel.cc
    │   ├── runtime
    │   │   └── runtime.cc
    │   ├── setup.py.in
    │   ├── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── test_MNIST_model.py
    │   │   └── unittests
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── test_argsort_op.py
    │   │   │   ├── test_cast_op.py
    │   │   │   ├── test_compare_op.py
    │   │   │   ├── test_concat_op.py
    │   │   │   ├── test_elementwise_mul_op.py
    │   │   │   ├── test_fill_constant_op.py
    │   │   │   ├── test_matmul_op.py
    │   │   │   ├── test_matmul_v2_op.py
    │   │   │   ├── test_mean_op.py
    │   │   │   ├── test_reduce_op.py
    │   │   │   ├── test_reshape_op.py
    │   │   │   ├── test_sgd_op.py
    │   │   │   ├── test_slice_op.py
    │   │   │   ├── test_softmax_op.py
    │   │   │   ├── test_softmax_with_cross_entropy_op.py
    │   │   │   ├── test_transpose_op.py
    │   │   │   ├── test_uniform_random_op.py
    │   │   │   └── test_zero_dim_tensor.py
    │   └── tools
    │   │   └── dockerfile
    │   │       ├── Dockerfile.ubuntu20.aarch64.gcc84
    │   │       ├── Dockerfile.ubuntu20.x86_64.gcc84
    │   │       ├── build-image.sh
    │   │       └── root
    │   │           ├── .bashrc
    │   │           ├── .gitconfig
    │   │           ├── .scripts
    │   │               ├── git-completion.sh
    │   │               └── git-prompt.sh
    │   │           └── .vimrc
    ├── gcu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── backend
    │   │   ├── equivalence_trans
    │   │   │   ├── all_ops.h
    │   │   │   ├── insensitive_ops
    │   │   │   │   ├── accuracy.h
    │   │   │   │   ├── activation.h
    │   │   │   │   ├── adam.h
    │   │   │   │   ├── adamw.h
    │   │   │   │   ├── add_n.h
    │   │   │   │   ├── argmax.h
    │   │   │   │   ├── argmin.h
    │   │   │   │   ├── argsort.h
    │   │   │   │   ├── assign.h
    │   │   │   │   ├── assign_value.h
    │   │   │   │   ├── atan.h
    │   │   │   │   ├── batch_norm.h
    │   │   │   │   ├── bilinear_interp_v2.h
    │   │   │   │   ├── bitwise_not.h
    │   │   │   │   ├── bmm.h
    │   │   │   │   ├── cast.h
    │   │   │   │   ├── check_finite_and_unscale.h
    │   │   │   │   ├── clip.h
    │   │   │   │   ├── concat.h
    │   │   │   │   ├── conv2d.h
    │   │   │   │   ├── conv3d.h
    │   │   │   │   ├── cos.h
    │   │   │   │   ├── cross_entropy.h
    │   │   │   │   ├── cumsum.h
    │   │   │   │   ├── dropout.h
    │   │   │   │   ├── elementwise_binary.h
    │   │   │   │   ├── elementwise_unary.h
    │   │   │   │   ├── embedding.h
    │   │   │   │   ├── equal.h
    │   │   │   │   ├── expand.h
    │   │   │   │   ├── expand_as.h
    │   │   │   │   ├── fill_constant.h
    │   │   │   │   ├── fill_zeros_like.h
    │   │   │   │   ├── flatten.h
    │   │   │   │   ├── flip.h
    │   │   │   │   ├── floor.h
    │   │   │   │   ├── full_like.h
    │   │   │   │   ├── fuse
    │   │   │   │   │   ├── conv_add_mul_add_hard_swish_mul_add.h
    │   │   │   │   │   ├── conv_bias_activate.h
    │   │   │   │   │   ├── conv_bn.h
    │   │   │   │   │   ├── conv_bn_hard_swish.h
    │   │   │   │   │   ├── conv_bn_relu.h
    │   │   │   │   │   ├── dot_bias.h
    │   │   │   │   │   ├── mul_add.h
    │   │   │   │   │   └── utility.h
    │   │   │   │   ├── gather.h
    │   │   │   │   ├── gather_nd.h
    │   │   │   │   ├── gelu.h
    │   │   │   │   ├── grid_sampler.h
    │   │   │   │   ├── huber_loss.h
    │   │   │   │   ├── increment.h
    │   │   │   │   ├── index_select.h
    │   │   │   │   ├── instance_norm.h
    │   │   │   │   ├── iou_similarity.h
    │   │   │   │   ├── isinf_v2.h
    │   │   │   │   ├── label_smooth.h
    │   │   │   │   ├── layer_norm.h
    │   │   │   │   ├── log.h
    │   │   │   │   ├── log_loss.h
    │   │   │   │   ├── log_softmax.h
    │   │   │   │   ├── logical_and.h
    │   │   │   │   ├── logical_not.h
    │   │   │   │   ├── masked_select.h
    │   │   │   │   ├── matmul_v2.h
    │   │   │   │   ├── maximum.h
    │   │   │   │   ├── mean.h
    │   │   │   │   ├── meshgrid.h
    │   │   │   │   ├── minimum.h
    │   │   │   │   ├── momentum.h
    │   │   │   │   ├── mul.h
    │   │   │   │   ├── nearest_interp.h
    │   │   │   │   ├── nearest_interp_v2.h
    │   │   │   │   ├── not_equal.h
    │   │   │   │   ├── one_hot.h
    │   │   │   │   ├── pool2d.h
    │   │   │   │   ├── prior_box.h
    │   │   │   │   ├── range.h
    │   │   │   │   ├── reduce_x.h
    │   │   │   │   ├── reshape.h
    │   │   │   │   ├── reverse.h
    │   │   │   │   ├── rmsprop.h
    │   │   │   │   ├── rnn.h
    │   │   │   │   ├── roi_align.h
    │   │   │   │   ├── roll.h
    │   │   │   │   ├── scale.h
    │   │   │   │   ├── scatter.h
    │   │   │   │   ├── set_value.h
    │   │   │   │   ├── shape.h
    │   │   │   │   ├── share_data.h
    │   │   │   │   ├── sigmoid_cross_entropy_with_logits.h
    │   │   │   │   ├── sign.h
    │   │   │   │   ├── size.h
    │   │   │   │   ├── slice.h
    │   │   │   │   ├── softmax.h
    │   │   │   │   ├── softmax_with_cross_entropy.h
    │   │   │   │   ├── split.h
    │   │   │   │   ├── sqrt.h
    │   │   │   │   ├── squared_l2_norm.h
    │   │   │   │   ├── squeeze.h
    │   │   │   │   ├── stack.h
    │   │   │   │   ├── strided_slice.h
    │   │   │   │   ├── tanh.h
    │   │   │   │   ├── tile.h
    │   │   │   │   ├── topk.h
    │   │   │   │   ├── transpose.h
    │   │   │   │   ├── tril_triu.h
    │   │   │   │   ├── unsqueeze.h
    │   │   │   │   ├── unstack.h
    │   │   │   │   ├── where.h
    │   │   │   │   └── yolo_box.h
    │   │   │   └── utils.h
    │   │   ├── executor
    │   │   │   ├── cast_runner.cc
    │   │   │   ├── cast_runner.h
    │   │   │   ├── gcu_node.h
    │   │   │   ├── single_op_executor.cc
    │   │   │   ├── single_op_executor.h
    │   │   │   ├── tops_compiler.cc
    │   │   │   └── tops_compiler.h
    │   │   ├── register
    │   │   │   └── register.h
    │   │   └── utils
    │   │   │   ├── gcu_op_desc.cc
    │   │   │   ├── gcu_op_desc.h
    │   │   │   ├── types.h
    │   │   │   ├── utils.cc
    │   │   │   └── utils.h
    │   ├── ci_test.sh
    │   ├── cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   ├── gcu.cmake
    │   │   │   ├── gflags.cmake
    │   │   │   ├── glog.cmake
    │   │   │   ├── gtest.cmake
    │   │   │   ├── onednn.cmake
    │   │   │   ├── pybind11.cmake
    │   │   │   └── topscc.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   ├── third_party.cmake
    │   │   └── version.cmake
    │   ├── common
    │   │   ├── flags.cc
    │   │   ├── gcu_env_list.h
    │   │   ├── gcu_funcs.h
    │   │   ├── gcu_op_runner.cc
    │   │   ├── gcu_op_runner.h
    │   │   ├── gcu_profiler.h
    │   │   ├── host_pinned_allocator.h
    │   │   ├── utils.cc
    │   │   └── utils.h
    │   ├── custom_engine
    │   │   ├── custom_engine_interface.cc
    │   │   ├── custom_engine_interface.h
    │   │   ├── custom_engine_op.cc
    │   │   ├── custom_engine_op.h
    │   │   ├── gcu_engine.cc
    │   │   ├── gcu_engine.h
    │   │   ├── gcu_engine_compiler.cc
    │   │   ├── gcu_engine_compiler.h
    │   │   ├── gcu_engine_executor.cc
    │   │   ├── gcu_engine_executor.h
    │   │   └── ir_translator
    │   │   │   ├── operators
    │   │   │       ├── activation_ops.cc
    │   │   │       ├── batch_norm.cc
    │   │   │       ├── binary_ops.cc
    │   │   │       ├── builtin_ops.cc
    │   │   │       ├── cast.cc
    │   │   │       ├── concat.cc
    │   │   │       ├── conv2d.cc
    │   │   │       ├── full.cc
    │   │   │       ├── full_int_array.cc
    │   │   │       ├── full_like.cc
    │   │   │       ├── isnan.cc
    │   │   │       ├── matmul.cc
    │   │   │       ├── nearest_interp.cc
    │   │   │       ├── pool2d.cc
    │   │   │       ├── reshape.cc
    │   │   │       ├── scale.cc
    │   │   │       ├── shape.cc
    │   │   │       ├── slice.cc
    │   │   │       ├── softmax.cc
    │   │   │       ├── where.cc
    │   │   │       └── yield.cc
    │   │   │   ├── translator_registry.h
    │   │   │   └── utils
    │   │   │       ├── utils.cc
    │   │   │       └── utils.h
    │   ├── custom_op
    │   │   ├── conv_add_mul_add_hard_swish_mul_add_op.cc
    │   │   ├── conv_bias_activate_op.cc
    │   │   ├── conv_bn_hard_swish_op.cc
    │   │   ├── conv_bn_op.cc
    │   │   ├── conv_bn_relu_op.cc
    │   │   ├── custom_op_common.cc
    │   │   ├── custom_op_common.h
    │   │   ├── dot_bias_op.cc
    │   │   ├── fused_add_rms_norm_op.cc
    │   │   ├── fused_conv2d_add_op.cc
    │   │   ├── fused_conv2d_transpose_bias_act_op.cc
    │   │   ├── fused_fc_elementwise_layernorm_op.cc
    │   │   ├── fused_multi_head_attention.cc
    │   │   ├── fused_rotary_embedding_op.cc
    │   │   ├── fused_sdp_flash_attention_op.cc
    │   │   ├── fused_self_attention.cc
    │   │   ├── gcu_netoutput_op.cc
    │   │   ├── mul_add_op.cc
    │   │   ├── my_add_n_op.cc
    │   │   ├── rms_norm_op.cc
    │   │   └── test_for_custom_engine_op.cc
    │   ├── kernels
    │   │   ├── accuracy_kernel.cc
    │   │   ├── activation_kernels.cc
    │   │   ├── adam_kernel.cc
    │   │   ├── add_n_kernel.cc
    │   │   ├── arange_kernel.cc
    │   │   ├── arg_min_max_kernels.cc
    │   │   ├── argsort_kernel.cc
    │   │   ├── assign_kernel.cc
    │   │   ├── atan2_kernel.cc
    │   │   ├── batch_norm_kernel.cc
    │   │   ├── binary_kernels.cc
    │   │   ├── bitwise_kernel.cc
    │   │   ├── cast_kernel.cc
    │   │   ├── cholesky_kernel.cc
    │   │   ├── clip_kernel.cc
    │   │   ├── compare_kernels.cc
    │   │   ├── concat_kernel.cc
    │   │   ├── contiguous_kernel.cc
    │   │   ├── conv_kernel.cc
    │   │   ├── conv_transpose_kernel.cc
    │   │   ├── copysign_kernel.cc
    │   │   ├── cross_entropy_kernel.cc
    │   │   ├── cross_kernel.cc
    │   │   ├── cumulate_kernel.cc
    │   │   ├── diag_kernel.cc
    │   │   ├── diagonal_kernel.cc
    │   │   ├── dropout_kernel.cc
    │   │   ├── einsum_kernel.cc
    │   │   ├── embedding_kernel.cc
    │   │   ├── expand_as_kernel.cc
    │   │   ├── expand_kernel.cc
    │   │   ├── eye_kernel.cc
    │   │   ├── fc_kernel.cc
    │   │   ├── flatten_kernel.cc
    │   │   ├── flip_kernel.cc
    │   │   ├── full_kernel.cc
    │   │   ├── funcs
    │   │   │   ├── common_ops.cc
    │   │   │   ├── common_ops.h
    │   │   │   ├── gcu_kernel_funcs.cc
    │   │   │   ├── gcu_kernel_funcs.h
    │   │   │   ├── gcu_layout_funcs.cc
    │   │   │   ├── gcu_layout_funcs.h
    │   │   │   ├── op_utils.cc
    │   │   │   ├── op_utils.h
    │   │   │   ├── topsaten_op_launch.h
    │   │   │   ├── topsaten_op_utils.cc
    │   │   │   └── topsaten_op_utils.h
    │   │   ├── fused_conv2d_add_act_kernel.cc
    │   │   ├── fused_fc_elementwise_layernorm_kernel.cc
    │   │   ├── gather_kernel.cc
    │   │   ├── gather_nd_kernel.cc
    │   │   ├── gaussian_kernel.cc
    │   │   ├── grid_sample_kernel.cc
    │   │   ├── huber_loss_kernel.cc
    │   │   ├── increment_kernel.cc
    │   │   ├── index_add_kernel.cc
    │   │   ├── index_put_kernel.cc
    │   │   ├── index_sample_kernel.cc
    │   │   ├── index_select_kernel.cc
    │   │   ├── instance_norm_kernel.cc
    │   │   ├── interpolate_kernels.cc
    │   │   ├── is_empty_kernel.cc
    │   │   ├── isclose_kernel.cc
    │   │   ├── isfinite_kernel.cc
    │   │   ├── isinf_kernel.cc
    │   │   ├── isnan_kernel.cc
    │   │   ├── label_smooth_kernel.cc
    │   │   ├── layer_norm_kernel.cc
    │   │   ├── lerp_kernel.cc
    │   │   ├── llama_stub_kernels.cc
    │   │   ├── log_loss_kernel.cc
    │   │   ├── log_softmax_kernel.cc
    │   │   ├── logcumsumexp_kernel.cc
    │   │   ├── logical_kernels.cc
    │   │   ├── logsumexp_kernel.cc
    │   │   ├── masked_select_kernel.cc
    │   │   ├── matmul_kernel.cc
    │   │   ├── mean_all_kernel.cc
    │   │   ├── memcpy_kernels.cc
    │   │   ├── merged_adam_kernel.cc
    │   │   ├── merged_momentum_kernel.cc
    │   │   ├── meshgrid_kernel.cc
    │   │   ├── momentum_kernel.cc
    │   │   ├── multiclass_nms3_kernel.cc
    │   │   ├── multinomial_kernel.cc
    │   │   ├── nms_kernel.cc
    │   │   ├── numel_kernel.cc
    │   │   ├── one_hot_kernel.cc
    │   │   ├── pool2d_kernel.cc
    │   │   ├── prior_box_kernel.cc
    │   │   ├── randperm_kernel.cc
    │   │   ├── reduce_kernels.cc
    │   │   ├── reshape_kernel.cc
    │   │   ├── rmsprop_kernel.cc
    │   │   ├── rnn_kernel.cc
    │   │   ├── roi_align_kernel.cc
    │   │   ├── roll_kernel.cc
    │   │   ├── scale_kernel.cc
    │   │   ├── scatter_kernel.cc
    │   │   ├── set_value_kernel.cc
    │   │   ├── sigmoid_cross_entropy_with_logits_kernel.cc
    │   │   ├── sign_kernel.cc
    │   │   ├── slice_kernel.cc
    │   │   ├── softmax_kernel.cc
    │   │   ├── split_kernel.cc
    │   │   ├── squared_l2_norm_kernel.cc
    │   │   ├── squeeze_kernel.cc
    │   │   ├── stack_kernel.cc
    │   │   ├── strided_copy_kernel.cc
    │   │   ├── strided_slice_kernel.cc
    │   │   ├── swiglu_kernel.cc
    │   │   ├── take_along_axis.cc
    │   │   ├── temporal_shift_kernel.cc
    │   │   ├── tile_kernel.cc
    │   │   ├── top_p_sampling_kernel.cc
    │   │   ├── topk_kernel.cc
    │   │   ├── transfer_layout_kernel.cc
    │   │   ├── transpose_kernel.cc
    │   │   ├── tril_triu_kernel.cc
    │   │   ├── trunc_kernel.cc
    │   │   ├── truncated_gaussian_random_kernel.cc
    │   │   ├── uniform_kernel.cc
    │   │   ├── unsqueeze_kernel.cc
    │   │   ├── where_kernel.cc
    │   │   └── yolo_box_kernel.cc
    │   ├── paddle_gcu_export.map
    │   ├── passes
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── custom_addn_pass.cc
    │   │   ├── gcu_conv2d_add_act_fuse_pass.cc
    │   │   ├── gcu_conv2d_add_fuse_pass.cc
    │   │   ├── gcu_conv2d_bn_fuse_pass.cc
    │   │   ├── gcu_custom_passes.h
    │   │   ├── gcu_fused_conv2d_add_act_append_pass.cc
    │   │   ├── gcu_op_marker_pass.cc
    │   │   ├── gcu_pass_pipeline.cc
    │   │   ├── gcu_pass_pipeline.h
    │   │   ├── gcu_replace_with_engine_op_pass.cc
    │   │   ├── gcu_sub_graph_extract_pass.cc
    │   │   └── legacy_ir_passes
    │   │   │   ├── gcu_conv_add_mul_add_hard_swish_mul_add_fuse.py
    │   │   │   ├── gcu_conv_bias_activate_fuse.py
    │   │   │   ├── gcu_conv_bn_fuse.py
    │   │   │   ├── gcu_conv_bn_hard_swish_fuse.py
    │   │   │   ├── gcu_conv_depthwise_elementwise_add_fuse_pass.py
    │   │   │   ├── gcu_conv_elementwise_add_fuse_pass.py
    │   │   │   ├── gcu_conv_transpose_elementwise_add_act_fuse_pass.py
    │   │   │   ├── gcu_conv_transpose_elementwise_add_fuse_pass.py
    │   │   │   ├── gcu_dot_bias_fuse.py
    │   │   │   ├── gcu_fused_conv2d_add_act_append_pass.py
    │   │   │   ├── gcu_linear_fuse.py
    │   │   │   ├── gcu_mul_add_fuse.py
    │   │   │   ├── gcu_multi_head_attn_fuse.py
    │   │   │   ├── gcu_netoutput_pass.py
    │   │   │   └── gcu_sdp_attn_fuse.py
    │   ├── runtime
    │   │   ├── flags.h
    │   │   ├── runtime.cc
    │   │   └── runtime.h
    │   ├── setup.py.in
    │   ├── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── fuse_pass
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── model
    │   │   │   │   ├── conv_bn.pdiparams
    │   │   │   │   ├── conv_bn.pdmodel
    │   │   │   │   ├── conv_bn_hard_swish.pdiparams
    │   │   │   │   ├── conv_bn_hard_swish.pdmodel
    │   │   │   │   ├── conv_bn_relu.pdiparams
    │   │   │   │   └── conv_bn_relu.pdmodel
    │   │   │   ├── model_graph
    │   │   │   │   ├── test_graph.json
    │   │   │   │   └── test_graph.pdiparams
    │   │   │   ├── test_conv_bn_hard_swish_pass.py
    │   │   │   ├── test_conv_bn_pass.py
    │   │   │   ├── test_conv_bn_relu_pass.py
    │   │   │   ├── test_custom_addn_pass.py
    │   │   │   ├── test_custom_engine.py
    │   │   │   └── test_custom_engine_ppocr_cls.py
    │   │   ├── unittests
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── api_base.py
    │   │   │   ├── test_accuracy.py
    │   │   │   ├── test_add_n.py
    │   │   │   ├── test_arange.py
    │   │   │   ├── test_argmax.py
    │   │   │   ├── test_argmin.py
    │   │   │   ├── test_argsort.py
    │   │   │   ├── test_assign.py
    │   │   │   ├── test_atan2.py
    │   │   │   ├── test_batch_norm.py
    │   │   │   ├── test_binary_ops.py
    │   │   │   ├── test_bitwise_ops.py
    │   │   │   ├── test_cast.py
    │   │   │   ├── test_cholesky.py
    │   │   │   ├── test_clip.py
    │   │   │   ├── test_compare_ops.py
    │   │   │   ├── test_concat.py
    │   │   │   ├── test_contiguous.py
    │   │   │   ├── test_conv_ops.py
    │   │   │   ├── test_conv_transpose_ops.py
    │   │   │   ├── test_copysign.py
    │   │   │   ├── test_cross.py
    │   │   │   ├── test_cumulate_ops.py
    │   │   │   ├── test_diag.py
    │   │   │   ├── test_diagonal.py
    │   │   │   ├── test_dropout.py
    │   │   │   ├── test_einsum.py
    │   │   │   ├── test_embedding.py
    │   │   │   ├── test_expand.py
    │   │   │   ├── test_expand_as.py
    │   │   │   ├── test_eye_kernel.py
    │   │   │   ├── test_fc.py
    │   │   │   ├── test_flatten.py
    │   │   │   ├── test_flip.py
    │   │   │   ├── test_full.py
    │   │   │   ├── test_fused_add_rms_norm.py
    │   │   │   ├── test_fused_conv2d_add.py
    │   │   │   ├── test_fused_conv2d_add_act.py
    │   │   │   ├── test_fused_conv2d_transpose_bias_act.py
    │   │   │   ├── test_fused_rotary_embedding.py
    │   │   │   ├── test_fused_sdp_flash_attention.py
    │   │   │   ├── test_gather_nd.py
    │   │   │   ├── test_gather_op.py
    │   │   │   ├── test_gaussian_random.py
    │   │   │   ├── test_grid_sample.py
    │   │   │   ├── test_hard_sigmoid.py
    │   │   │   ├── test_huber_loss.py
    │   │   │   ├── test_increment.py
    │   │   │   ├── test_index_add.py
    │   │   │   ├── test_index_put.py
    │   │   │   ├── test_index_sample.py
    │   │   │   ├── test_index_select.py
    │   │   │   ├── test_instance_norm.py
    │   │   │   ├── test_interpolate.py
    │   │   │   ├── test_is_empty.py
    │   │   │   ├── test_isclose.py
    │   │   │   ├── test_isfinite.py
    │   │   │   ├── test_isinf.py
    │   │   │   ├── test_isnan.py
    │   │   │   ├── test_layer_norm.py
    │   │   │   ├── test_lerp.py
    │   │   │   ├── test_log_loss.py
    │   │   │   ├── test_log_softmax.py
    │   │   │   ├── test_logcumsumexp.py
    │   │   │   ├── test_logical_ops.py
    │   │   │   ├── test_logsumexp.py
    │   │   │   ├── test_masked_select.py
    │   │   │   ├── test_matmul.py
    │   │   │   ├── test_mean_all.py
    │   │   │   ├── test_meshgrid.py
    │   │   │   ├── test_multinomial.py
    │   │   │   ├── test_nms.py
    │   │   │   ├── test_numel.py
    │   │   │   ├── test_one_hot.py
    │   │   │   ├── test_pool2d.py
    │   │   │   ├── test_reduce_ops.py
    │   │   │   ├── test_rms_norm.py
    │   │   │   ├── test_rnn.py
    │   │   │   ├── test_roll.py
    │   │   │   ├── test_scale.py
    │   │   │   ├── test_scatter.py
    │   │   │   ├── test_set_value.py
    │   │   │   ├── test_sign.py
    │   │   │   ├── test_slice.py
    │   │   │   ├── test_softmax.py
    │   │   │   ├── test_split.py
    │   │   │   ├── test_squeeze.py
    │   │   │   ├── test_stack.py
    │   │   │   ├── test_strided_slice.py
    │   │   │   ├── test_swiglu.py
    │   │   │   ├── test_take_along_axis.py
    │   │   │   ├── test_tile.py
    │   │   │   ├── test_topk.py
    │   │   │   ├── test_transpose.py
    │   │   │   ├── test_tril_triu.py
    │   │   │   ├── test_trunc.py
    │   │   │   ├── test_unary_ops.py
    │   │   │   ├── test_unbind.py
    │   │   │   ├── test_uniform_random.py
    │   │   │   ├── test_unsqueeze.py
    │   │   │   ├── test_unstack.py
    │   │   │   ├── test_where.py
    │   │   │   └── test_x_custom_engine_op.py
    │   │   ├── unittests_jit
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── api_base.py
    │   │   │   ├── ctest.cmake
    │   │   │   ├── mise.cmake
    │   │   │   ├── pytest.ini
    │   │   │   ├── test_abs.py
    │   │   │   ├── test_accuracy.py
    │   │   │   ├── test_adam.py
    │   │   │   ├── test_adamw.py
    │   │   │   ├── test_add_n.py
    │   │   │   ├── test_argmax.py
    │   │   │   ├── test_argmin.py
    │   │   │   ├── test_argsort.py
    │   │   │   ├── test_assign.py
    │   │   │   ├── test_assign_value.py
    │   │   │   ├── test_atan.py
    │   │   │   ├── test_batch_norm.py
    │   │   │   ├── test_bilinear_interp_v2.py
    │   │   │   ├── test_bmm.py
    │   │   │   ├── test_cast.py
    │   │   │   ├── test_check_finite_and_unscale.py
    │   │   │   ├── test_clip.py
    │   │   │   ├── test_collective_gcu.py
    │   │   │   ├── test_concat.py
    │   │   │   ├── test_conv2d.py
    │   │   │   ├── test_conv2d_transpose.py
    │   │   │   ├── test_conv3d.py
    │   │   │   ├── test_conv3d_transpose.py
    │   │   │   ├── test_cos.py
    │   │   │   ├── test_cross_entropy.py
    │   │   │   ├── test_cross_entropy2.py
    │   │   │   ├── test_cumsum.py
    │   │   │   ├── test_depthwise_conv2d.py
    │   │   │   ├── test_dropout.py
    │   │   │   ├── test_eager_collective.py
    │   │   │   ├── test_eager_fp_bp_no_trans.py
    │   │   │   ├── test_elementwise_add.py
    │   │   │   ├── test_elementwise_div.py
    │   │   │   ├── test_elementwise_mul.py
    │   │   │   ├── test_elementwise_pow.py
    │   │   │   ├── test_elementwise_sub.py
    │   │   │   ├── test_embedding.py
    │   │   │   ├── test_equal.py
    │   │   │   ├── test_exp.py
    │   │   │   ├── test_expand.py
    │   │   │   ├── test_expand_as.py
    │   │   │   ├── test_fc.py
    │   │   │   ├── test_fill_constant.py
    │   │   │   ├── test_flatten.py
    │   │   │   ├── test_flip.py
    │   │   │   ├── test_floor.py
    │   │   │   ├── test_full_like.py
    │   │   │   ├── test_gather.py
    │   │   │   ├── test_gather_nd.py
    │   │   │   ├── test_gelu.py
    │   │   │   ├── test_greater_equal.py
    │   │   │   ├── test_greater_than.py
    │   │   │   ├── test_grid_sample.py
    │   │   │   ├── test_hard_sigmoid.py
    │   │   │   ├── test_hard_swish.py
    │   │   │   ├── test_huber_loss.py
    │   │   │   ├── test_increment.py
    │   │   │   ├── test_index_select.py
    │   │   │   ├── test_instance_norm.py
    │   │   │   ├── test_iou_similarity.py
    │   │   │   ├── test_isinf.py
    │   │   │   ├── test_label_smooth.py
    │   │   │   ├── test_layer_norm.py
    │   │   │   ├── test_leaky_relu.py
    │   │   │   ├── test_less_equal.py
    │   │   │   ├── test_less_than.py
    │   │   │   ├── test_log.py
    │   │   │   ├── test_log_softmax.py
    │   │   │   ├── test_logical_and.py
    │   │   │   ├── test_logical_not.py
    │   │   │   ├── test_lstm.py
    │   │   │   ├── test_masked_select.py
    │   │   │   ├── test_matmul_v2.py
    │   │   │   ├── test_maximum.py
    │   │   │   ├── test_mean.py
    │   │   │   ├── test_merged_adam.py
    │   │   │   ├── test_merged_momentum.py
    │   │   │   ├── test_meshgrid.py
    │   │   │   ├── test_minimum.py
    │   │   │   ├── test_momentum.py
    │   │   │   ├── test_nearest_interp.py
    │   │   │   ├── test_nearest_interp_v2.py
    │   │   │   ├── test_not_equal.py
    │   │   │   ├── test_one_hot.py
    │   │   │   ├── test_one_hot_v2.py
    │   │   │   ├── test_pool2d_avg.py
    │   │   │   ├── test_pool2d_max.py
    │   │   │   ├── test_pow.py
    │   │   │   ├── test_prior_box.py
    │   │   │   ├── test_randperm.py
    │   │   │   ├── test_range.py
    │   │   │   ├── test_reciprocal.py
    │   │   │   ├── test_reduce_max.py
    │   │   │   ├── test_reduce_mean.py
    │   │   │   ├── test_reduce_min.py
    │   │   │   ├── test_reduce_prod.py
    │   │   │   ├── test_reduce_sum.py
    │   │   │   ├── test_relu.py
    │   │   │   ├── test_relu6.py
    │   │   │   ├── test_reshape.py
    │   │   │   ├── test_reverse.py
    │   │   │   ├── test_rmsprop.py
    │   │   │   ├── test_roi_align.py
    │   │   │   ├── test_roll.py
    │   │   │   ├── test_scale.py
    │   │   │   ├── test_scatter.py
    │   │   │   ├── test_set_value.py
    │   │   │   ├── test_shape.py
    │   │   │   ├── test_sigmoid.py
    │   │   │   ├── test_sigmoid_cross_entropy_with_logits.py
    │   │   │   ├── test_sign.py
    │   │   │   ├── test_silu.py
    │   │   │   ├── test_size.py
    │   │   │   ├── test_slice.py
    │   │   │   ├── test_softmax.py
    │   │   │   ├── test_softmax_cross_entropy_hardlabel.py
    │   │   │   ├── test_softmax_cross_entropy_softlabel.py
    │   │   │   ├── test_split.py
    │   │   │   ├── test_sqrt.py
    │   │   │   ├── test_square.py
    │   │   │   ├── test_squared_l2_norm.py
    │   │   │   ├── test_squeeze.py
    │   │   │   ├── test_squeeze_v2.py
    │   │   │   ├── test_stack.py
    │   │   │   ├── test_strided_slice.py
    │   │   │   ├── test_swish.py
    │   │   │   ├── test_tanh.py
    │   │   │   ├── test_tile.py
    │   │   │   ├── test_topk.py
    │   │   │   ├── test_transpose.py
    │   │   │   ├── test_tril_triu.py
    │   │   │   ├── test_truncated_gaussian_random.py
    │   │   │   ├── test_unsqueeze.py
    │   │   │   ├── test_unstack.py
    │   │   │   ├── test_where.py
    │   │   │   └── test_yolo_box.py
    │   │   └── unittests_legacy
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── test_abs_op_gcu.py
    │   │   │   ├── test_accuracy_op_gcu.py
    │   │   │   ├── test_activation_op_gcu.py
    │   │   │   ├── test_adam_op_gcu.py
    │   │   │   ├── test_adamw_op_gcu.py
    │   │   │   ├── test_add_n_op_gcu.py
    │   │   │   ├── test_arange_op_gcu.py
    │   │   │   ├── test_arg_max_op_gcu.py
    │   │   │   ├── test_arg_min_op_gcu.py
    │   │   │   ├── test_argsort_op_gcu.py
    │   │   │   ├── test_assign_op_gcu.py
    │   │   │   ├── test_assign_value_op_gcu.py
    │   │   │   ├── test_batch_norm_op_gcu.py
    │   │   │   ├── test_bilinear_interp_op_gcu.py
    │   │   │   ├── test_bitwise_op_gcu.py
    │   │   │   ├── test_cast_op_gcu.py
    │   │   │   ├── test_clip_op_gcu.py
    │   │   │   ├── test_compare_op_gcu.py
    │   │   │   ├── test_concat_op_gcu.py
    │   │   │   ├── test_conv2d_op_gcu.py
    │   │   │   ├── test_conv2d_transposed_op_gcu.py
    │   │   │   ├── test_conv3d_op_gcu.py
    │   │   │   ├── test_conv_bn_relu.py
    │   │   │   ├── test_depthwise_conv2d_op_gcu.py
    │   │   │   ├── test_dropout_op_gcu.py
    │   │   │   ├── test_elementwise_add_op_gcu.py
    │   │   │   ├── test_elementwise_div_op_gcu.py
    │   │   │   ├── test_elementwise_max_op_gcu.py
    │   │   │   ├── test_elementwise_min_op_gcu.py
    │   │   │   ├── test_elementwise_mul_op_gcu.py
    │   │   │   ├── test_elementwise_sub_op_gcu.py
    │   │   │   ├── test_expand_as_v2_op_gcu.py
    │   │   │   ├── test_expand_v2_op_gcu.py
    │   │   │   ├── test_fill_any_like_op_gcu.py
    │   │   │   ├── test_fill_constant_op_gcu.py
    │   │   │   ├── test_flatten_op_gcu.py
    │   │   │   ├── test_gather_nd_op_gcu.py
    │   │   │   ├── test_gather_op_gcu.py
    │   │   │   ├── test_grid_sample_op_gcu.py
    │   │   │   ├── test_huber_loss_op_gcu.py
    │   │   │   ├── test_instance_norm_op_gcu.py
    │   │   │   ├── test_isinf_v2_op_gcu.py
    │   │   │   ├── test_label_smooth_op_gcu.py
    │   │   │   ├── test_layer_norm_op_gcu.py
    │   │   │   ├── test_log_loss_op_gcu.py
    │   │   │   ├── test_log_softmax_op_gcu.py
    │   │   │   ├── test_logical_op_gcu.py
    │   │   │   ├── test_lookup_table_v2_op_gcu.py
    │   │   │   ├── test_matmul_op_gcu.py
    │   │   │   ├── test_mean_op_gcu.py
    │   │   │   ├── test_memcpy_op_gcu.py
    │   │   │   ├── test_merged_adam_op_gcu.py
    │   │   │   ├── test_merged_momentum_op_gcu.py
    │   │   │   ├── test_meshgrid_op_gcu.py
    │   │   │   ├── test_momentum_op_gcu.py
    │   │   │   ├── test_nearest_interp_op_gcu.py
    │   │   │   ├── test_numel_op_gcu.py
    │   │   │   ├── test_one_hot_v2_op_gcu.py
    │   │   │   ├── test_pool2d_op_gcu.py
    │   │   │   ├── test_prior_box_op_gcu.py
    │   │   │   ├── test_randperm_op_gcu.py
    │   │   │   ├── test_reduce_max_op_gcu.py
    │   │   │   ├── test_reduce_mean_op_gcu.py
    │   │   │   ├── test_reduce_min_op_gcu.py
    │   │   │   ├── test_reduce_prod_op_gcu.py
    │   │   │   ├── test_reduce_sum_op_gcu.py
    │   │   │   ├── test_reshape_op_gcu.py
    │   │   │   ├── test_rmsprop_op_gcu.py
    │   │   │   ├── test_roi_align_op_gcu.py
    │   │   │   ├── test_scale_op_gcu.py
    │   │   │   ├── test_scatter_op_gcu.py
    │   │   │   ├── test_set_value_op_gcu.py
    │   │   │   ├── test_sigmoid_cross_entropy_with_logits_op_gcu.py
    │   │   │   ├── test_slice_op_gcu.py
    │   │   │   ├── test_softmax_op_gcu.py
    │   │   │   ├── test_softmax_with_cross_entropy_op_gcu.py
    │   │   │   ├── test_split_op_gcu.py
    │   │   │   ├── test_squared_l2_norm_op_gcu.py
    │   │   │   ├── test_squeeze_op_gcu.py
    │   │   │   ├── test_stack_op_gcu.py
    │   │   │   ├── test_strided_slice_op_gcu.py
    │   │   │   ├── test_tile_op_gcu.py
    │   │   │   ├── test_top_k_op_gcu.py
    │   │   │   ├── test_transpose_op_gcu.py
    │   │   │   ├── test_tril_triu_op_gcu.py
    │   │   │   ├── test_unsqueeze_op_gcu.py
    │   │   │   ├── test_where_op_gcu.py
    │   │   │   └── test_yolo_box_op_gcu.py
    │   ├── tools
    │   │   └── dockerfile
    │   │   │   ├── Dockerfile.gcu.ubuntu20.gcc84
    │   │   │   └── build-image.sh
    │   └── topscc_custom_kernels
    │   │   └── topscc_custom_demo_kernel.cc
    ├── iluvatar_gpu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── clean_paddle.sh
    │   ├── cmake
    │   │   ├── cblas.cmake
    │   │   ├── cuda.cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   ├── cccl.cmake
    │   │   │   ├── eigen.cmake
    │   │   │   ├── mklml.cmake
    │   │   │   ├── protobuf.cmake
    │   │   │   ├── xxhash.cmake
    │   │   │   └── zlib.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   ├── third_party.cmake
    │   │   └── version.cmake
    │   ├── common
    │   │   └── cuda_flags.cc
    │   ├── install_paddle.sh
    │   ├── kernels
    │   │   ├── cuda_kernels
    │   │   │   ├── abs_grad_kernel_register.cc
    │   │   │   ├── abs_kernel_register.cc
    │   │   │   ├── activation_grad_kernel_register.cc
    │   │   │   ├── activation_kernel_register.cc
    │   │   │   ├── adamw_kernel_register.cc
    │   │   │   ├── adamw_kernel_row_register.cc
    │   │   │   ├── addmm_kernel_register.cc
    │   │   │   ├── amp_kernel_register.cc
    │   │   │   ├── arange_kernel_register.cc
    │   │   │   ├── assign_kernel_register.cc
    │   │   │   ├── bitwise_kernel_register.cc
    │   │   │   ├── c_embedding_grad_kernel_register.cc
    │   │   │   ├── c_embedding_kernel_register.cc
    │   │   │   ├── c_identity_kernel_register.cc
    │   │   │   ├── c_softmax_with_cross_entropy_grad_kernel_register.cc
    │   │   │   ├── c_softmax_with_cross_entropy_kernel_register.cc
    │   │   │   ├── cast_kernel_register.cc
    │   │   │   ├── clip_grad_kernel_register.cc
    │   │   │   ├── clip_kernel_register.cc
    │   │   │   ├── compare_kernel_register.cc
    │   │   │   ├── concat_grad_kernel_register.cc
    │   │   │   ├── concat_kernel_register.cc
    │   │   │   ├── contiguous_kernel_register.cc
    │   │   │   ├── cross_entropy_grad_kernel.cu
    │   │   │   ├── cross_entropy_kernel.cu
    │   │   │   ├── cum_grad_kernel_register.cc
    │   │   │   ├── cum_kernel_register.cc
    │   │   │   ├── einsum_kernel_register.cc
    │   │   │   ├── elementwise_grad_kernel_register.cc
    │   │   │   ├── elementwise_kernel_register.cc
    │   │   │   ├── embedding_grad_kernel_register.cc
    │   │   │   ├── embedding_kernel_register.cc
    │   │   │   ├── empty_kernel_register.cc
    │   │   │   ├── expand_kernel_register.cc
    │   │   │   ├── fill_kernel_register.cc
    │   │   │   ├── flatten_grad_kernel_register.cc
    │   │   │   ├── flatten_kernel_register.cc
    │   │   │   ├── full_kernel_register.cc
    │   │   │   ├── fused_rope_grad_kernel.cu
    │   │   │   ├── fused_rope_kernel.cu
    │   │   │   ├── gather_grad_kernel_register.cc
    │   │   │   ├── gather_kernel_register.cc
    │   │   │   ├── gather_nd_grad_kernel_register.cc
    │   │   │   ├── gather_nd_kernel_register.cc
    │   │   │   ├── gaussian_kernel_register.cc
    │   │   │   ├── index_put_kernel_register.cc
    │   │   │   ├── layer_norm_grad_kernel.cu
    │   │   │   ├── layer_norm_kernel.cu
    │   │   │   ├── logical_kernel_register.cc
    │   │   │   ├── logsumexp_kernel_register.cc
    │   │   │   ├── matmul_grad_kernel.cu
    │   │   │   ├── matmul_kernel.cu
    │   │   │   ├── mean_all_grad_kernel_register.cc
    │   │   │   ├── mean_all_kernel_register.cc
    │   │   │   ├── multinomial_kernel_register.cc
    │   │   │   ├── nonzero_kernel_register.cc
    │   │   │   ├── numel_kernel_register.cc
    │   │   │   ├── one_hot_kernel_register.cc
    │   │   │   ├── p_norm_grad_kernel_register.cc
    │   │   │   ├── p_norm_kernel_register.cc
    │   │   │   ├── pad_grad_kernel_register.cc
    │   │   │   ├── pad_kernel_register.cc
    │   │   │   ├── put_along_axis_kernel_register.cc
    │   │   │   ├── randint_kernel_register.cc
    │   │   │   ├── reduce_all_kernel_register.cc
    │   │   │   ├── reduce_any_kernel_register.cc
    │   │   │   ├── reduce_kernel_kps_register.cc
    │   │   │   ├── reduce_kernel_register.cc
    │   │   │   ├── reduce_max_kernel_register.cc
    │   │   │   ├── reduce_mean_kernel_register.cc
    │   │   │   ├── reduce_sum_kernel_register.cc
    │   │   │   ├── reshape_grad_kernel_register.cc
    │   │   │   ├── reshape_kernel_register.cc
    │   │   │   ├── rms_norm_grad_kernel_register.cc
    │   │   │   ├── rms_norm_kernel_register.cc
    │   │   │   ├── scale_kernel_register.cc
    │   │   │   ├── scatter_nd_add_grad_kernel.cu
    │   │   │   ├── scatter_nd_add_kernel.cu
    │   │   │   ├── set_value_grad_kernel_register.cc
    │   │   │   ├── set_value_kernel_register.cc
    │   │   │   ├── shape_kernel_register.cc
    │   │   │   ├── sign_kernel_register.cc
    │   │   │   ├── slice_grad_kernel_register.cc
    │   │   │   ├── slice_kernel_register.cc
    │   │   │   ├── softmax_grad_kernel.cu
    │   │   │   ├── softmax_kernel.cu
    │   │   │   ├── split_kernel_register.cc
    │   │   │   ├── squared_l2_norm_kernel_register.cc
    │   │   │   ├── squeeze_grad_kernel_register.cc
    │   │   │   ├── squeeze_kernel_register.cc
    │   │   │   ├── stack_kernel_register.cc
    │   │   │   ├── strided_copy_kernel_register.cc
    │   │   │   ├── strided_slice_grad_kernel_register.cc
    │   │   │   ├── strided_slice_kernel_register.cc
    │   │   │   ├── swiglu_grad_kernel_register.cc
    │   │   │   ├── swiglu_kernel_register.cc
    │   │   │   ├── take_along_axis_kernel_register.cc
    │   │   │   ├── tile_grad_kernel_register.cc
    │   │   │   ├── tile_kernel_register.cc
    │   │   │   ├── top_k_grad_kernel.cu
    │   │   │   ├── top_k_kernel.cu
    │   │   │   ├── transpose_grad_kernel_register.cc
    │   │   │   ├── transpose_kernel_register.cc
    │   │   │   ├── tril_triu_kernel_register.cc
    │   │   │   ├── unbind_kernel_register.cc
    │   │   │   ├── uniform_kernel_register.cc
    │   │   │   ├── unsqueeze_grad_kernel_register.cc
    │   │   │   ├── unsqueeze_kernel_register.cc
    │   │   │   └── where_kernel_register.cc
    │   │   ├── ernie_core
    │   │   │   ├── cal_aux_loss_grad_kernel_register.cc
    │   │   │   ├── expand_modality_expert_id_kernel_register.cc
    │   │   │   ├── fused_bias_act_kernel_register.cc
    │   │   │   ├── int_bincount_kernel_register.cc
    │   │   │   ├── layer_norm_cuda_kernel_register.cc
    │   │   │   ├── moe_combine_grad_kernel_register.cc
    │   │   │   ├── moe_combine_kernel_register.cc
    │   │   │   ├── moe_gate_dispatch_grad_kernel_register.cc
    │   │   │   ├── moe_gate_dispatch_kernel_register.cc
    │   │   │   ├── moe_gate_dispatch_permute_grad_kernel_register.cc
    │   │   │   ├── moe_gate_dispatch_permute_kernel_register.cc
    │   │   │   ├── moe_ops_partial_nosoftmaxtopk_grad_kernel_register.cc
    │   │   │   ├── moe_ops_partial_nosoftmaxtopk_kernel_register.cc
    │   │   │   ├── register_build_src_rank_and_local_expert_id_kernel.cc
    │   │   │   ├── rms_norm_kernel_register.cc
    │   │   │   └── top_p_sampling_kernel_register.cc
    │   │   ├── funcs
    │   │   │   ├── blas
    │   │   │   │   ├── CMakeLists.txt
    │   │   │   │   ├── blas.cc
    │   │   │   │   ├── blas.h
    │   │   │   │   ├── blas_impl.cu.h
    │   │   │   │   ├── blas_impl.h
    │   │   │   │   ├── blaslt_gemm_search.h
    │   │   │   │   └── blaslt_impl.cu.h
    │   │   │   ├── fake_quantize_functor.h
    │   │   │   ├── layer_norm_impl.cu.h
    │   │   │   ├── layer_norm_util.h
    │   │   │   ├── math_cuda_utils.h
    │   │   │   └── top_k_function_cuda.h
    │   │   ├── gpudnn
    │   │   │   └── softmax_gpudnn.h
    │   │   └── impl
    │   │   │   ├── addmm_kernel_impl.h
    │   │   │   ├── matmul_grad_kernel_impl.h
    │   │   │   └── matmul_kernel_impl.h
    │   ├── patches
    │   │   ├── eigen
    │   │   │   ├── Core
    │   │   │   ├── Tensor
    │   │   │   └── TensorAssign.h
    │   │   └── paddle-corex.patch
    │   ├── runtime
    │   │   └── runtime.cc
    │   ├── setup.py.in
    │   └── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── run_test.sh
    │   │   └── unittests
    │   │       ├── test_abs_iluvatar.py
    │   │       ├── test_addmm_op_iluvatar.py
    │   │       ├── test_arange_iluvatar.py
    │   │       ├── test_assign_op_iluvatar.py
    │   │       ├── test_bfloat16_embedding_iluvatar.py
    │   │       ├── test_bitwise_op_iluvatar.py
    │   │       ├── test_cast_op_iluvatar.py
    │   │       ├── test_concat_op_iluvatar.py
    │   │       ├── test_count_nonzero_api_iluvatar.py
    │   │       ├── test_cumsum_op_iluvatar.py
    │   │       ├── test_einsum_iluvatar.py
    │   │       ├── test_elementwise_div_op_iluvatar.py
    │   │       ├── test_empty_op_iluvatar.py
    │   │       ├── test_flatten2_op_iluvatar.py
    │   │       ├── test_full_like_op_iluvatar.py
    │   │       ├── test_full_op_iluvatar.py
    │   │       ├── test_fused_rope_iluvatar.py
    │   │       ├── test_gather_op_iluvatar.py
    │   │       ├── test_greater_equal_op_iluvatar.py
    │   │       ├── test_layer_norm_op_iluvatar.py
    │   │       ├── test_layer_norm_op_v2_iluvatar.py
    │   │       ├── test_matmul_op_iluvatar.py
    │   │       ├── test_max_op_iluvatar.py
    │   │       ├── test_maximum_op_iluvatar.py
    │   │       ├── test_nonzero_api_iluvatar.py
    │   │       ├── test_one_hot_v2_op_iluvatar.py
    │   │       ├── test_p_norm_op_iluvatar.py
    │   │       ├── test_pad_op_iluvatar.py
    │   │       ├── test_rms_norm_op_iluvatar.py
    │   │       ├── test_scale_op_iluvatar.py
    │   │       ├── test_set_value_op_iluvatar.py
    │   │       ├── test_shape_op_iluvatar.py
    │   │       ├── test_sign_op_iluvatar.py
    │   │       ├── test_softmax_op_iluvatar.py
    │   │       ├── test_squared_l2_norm_op_iluvatar.py
    │   │       ├── test_squeeze2_op_iluvatar.py
    │   │       ├── test_stack_op_iluvatar.py
    │   │       ├── test_strided_slice_op_iluvatar.py
    │   │       ├── test_subtract_op_iluvatar.py
    │   │       ├── test_sum_op_iluvatar.py
    │   │       ├── test_tile_op_iluvatar.py
    │   │       ├── test_top_k_op_iluvatar.py
    │   │       ├── test_transpose_op_iluvatar.py
    │   │       └── test_zeros_like_op_iluvatar.py
    ├── intel_gpu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   ├── gflags.cmake
    │   │   │   ├── glog.cmake
    │   │   │   ├── gtest.cmake
    │   │   │   ├── onednn.cmake
    │   │   │   ├── onednn_gpu.cmake
    │   │   │   └── pybind11.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   └── third_party.cmake
    │   ├── kernels
    │   │   ├── argsort_kernel.cc
    │   │   ├── assign_value_kernel.cc
    │   │   ├── cast_kernel.cc
    │   │   ├── compare_kernel.cc
    │   │   ├── dnn_support.hpp
    │   │   ├── elementwise_kernel.cc
    │   │   ├── full_kernel.cc
    │   │   ├── kernels.h
    │   │   ├── mean_kernel.cc
    │   │   ├── memcpy_kernel.cc
    │   │   ├── phi_funcs.h
    │   │   ├── reduce_kernel.cc
    │   │   ├── reshape_kernel.cc
    │   │   ├── slice_kernel.cc
    │   │   ├── softmax_kernel.cc
    │   │   ├── transpose_kernel.cc
    │   │   └── uniform_random_kernel.cc
    │   ├── load.sh
    │   ├── runtime
    │   │   └── runtime.cc
    │   ├── setup.py.in
    │   └── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── test_MNIST_model.py
    │   │   ├── test_mean_op.py
    │   │   └── unittests
    │   │       ├── CMakeLists.txt
    │   │       ├── test_argsort_op.py
    │   │       ├── test_assign_value_op.py
    │   │       ├── test_cast_op.py
    │   │       ├── test_compare_op.py
    │   │       ├── test_elementwise_mul_op.py
    │   │       ├── test_fill_constant_op.py
    │   │       ├── test_mean_op.py
    │   │       ├── test_memcpy_op.py
    │   │       ├── test_reduce_op.py
    │   │       ├── test_reshape_op.py
    │   │       ├── test_slice_op.py
    │   │       ├── test_softmax_op.py
    │   │       ├── test_transpose_op.py
    │   │       └── test_uniform_random_op.py
    ├── intel_hpu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   ├── custom_tpc_lib.cmake
    │   │   │   ├── gflags.cmake
    │   │   │   ├── glog.cmake
    │   │   │   ├── gtest.cmake
    │   │   │   ├── onednn.cmake
    │   │   │   ├── pybind11.cmake
    │   │   │   └── synapse.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   ├── third_party.cmake
    │   │   └── version.cmake
    │   ├── custom_ops
    │   │   ├── llama_infer
    │   │   │   ├── block_attn_pp_kernels.cc
    │   │   │   ├── fused_block_attention.cc
    │   │   │   ├── fused_flatpa_proj.cc
    │   │   │   ├── fused_fp8_gemm.cc
    │   │   │   ├── fused_fp8_rms_qkv_rope_t.cc
    │   │   │   ├── fused_fp8_sdpa.cc
    │   │   │   ├── fused_get_rotary_embedding.cc
    │   │   │   ├── fused_mlp.cc
    │   │   │   ├── fused_rms_mlp.cc
    │   │   │   ├── fused_rms_mlp_add.cc
    │   │   │   ├── fused_rms_qkv_rope.cc
    │   │   │   ├── fused_rms_qkv_rope_t.cc
    │   │   │   ├── fused_rms_qkv_rope_v2.cc
    │   │   │   ├── fused_rms_qkv_rope_v3.cc
    │   │   │   ├── fused_sdpa_proj.cc
    │   │   │   ├── fused_sdpa_proj_dec.cc
    │   │   │   ├── fused_sdpa_proj_t.cc
    │   │   │   ├── fused_sdpa_proj_v2.cc
    │   │   │   ├── get_output.cc
    │   │   │   ├── get_padding_offset.cc
    │   │   │   ├── pp_kernels.cc
    │   │   │   ├── prepare_block_metadata.cc
    │   │   │   ├── recover_block.cc
    │   │   │   ├── save_with_output_msg.cc
    │   │   │   ├── speculate_get_output.cc
    │   │   │   ├── step.cc
    │   │   │   └── update_inputs_v2.cc
    │   │   ├── python
    │   │   │   └── paddlenlp_ops
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── layers.py
    │   │   │   │   └── llama_block_atten.py
    │   │   ├── setup.py
    │   │   ├── src
    │   │   │   ├── fake_gpu_kernels.cc
    │   │   │   ├── fused_moe.cc
    │   │   │   ├── index_copy.cc
    │   │   │   └── index_reduce.cc
    │   │   └── tests
    │   │   │   ├── test_flatPA_proj.py
    │   │   │   ├── test_fused_block_attention.py
    │   │   │   ├── test_fused_fp8_rms_qkv_rope_t.py
    │   │   │   ├── test_fused_mlp.py
    │   │   │   ├── test_fused_rms_mlp.py
    │   │   │   ├── test_index_copy.py
    │   │   │   ├── test_inplace_cumsum.py
    │   │   │   ├── test_mixture_of_experts.py
    │   │   │   ├── test_rms_qkv_rope.py
    │   │   │   ├── test_sdpa_proj.py
    │   │   │   └── test_sdpa_proj_v2.py
    │   ├── kernels
    │   │   ├── all_kernel.cc
    │   │   ├── any_kernel.cc
    │   │   ├── arange_kernel.cc
    │   │   ├── arg_min_max_kernel.cc
    │   │   ├── assign_kernel.cc
    │   │   ├── bitwise_kernel.cc
    │   │   ├── c_identity_kernel.cc
    │   │   ├── cast_kernel.cc
    │   │   ├── compare_kernel.cc
    │   │   ├── concat_kernel.cc
    │   │   ├── contiguous.cc
    │   │   ├── cum_kernel.cc
    │   │   ├── einsum_kernel.cc
    │   │   ├── elementwise_kernel.cc
    │   │   ├── expand_kernel.cc
    │   │   ├── full_kernel.cc
    │   │   ├── funcs.h
    │   │   ├── gather_kernel.cc
    │   │   ├── gather_nd_kernel.cc
    │   │   ├── gaussian_kernel.cc
    │   │   ├── hpu_funcs.h
    │   │   ├── hpu_operator.cc
    │   │   ├── hpu_operator.h
    │   │   ├── index_put_kernel.cc
    │   │   ├── index_sample_kernel.cc
    │   │   ├── index_select_kernel.cc
    │   │   ├── logical_kernel.cc
    │   │   ├── matmul_kernel.cc
    │   │   ├── memcpy_kernel.cc
    │   │   ├── multiply_kernel.cc
    │   │   ├── one_hot_kernel.cc
    │   │   ├── phi_funcs.h
    │   │   ├── reduce_kernel.cc
    │   │   ├── rms_norm_kernel.cc
    │   │   ├── rope_kernel.cc
    │   │   ├── scale_kernel.cc
    │   │   ├── scatter_kernel.cc
    │   │   ├── sdpa_kernel.cc
    │   │   ├── set_value_kernel.cc
    │   │   ├── slice_kernel.cc
    │   │   ├── softmax_kernel.cc
    │   │   ├── split_kernel.cc
    │   │   ├── squeeze_kernel.cc
    │   │   ├── swiglu_kernel.cc
    │   │   ├── tile_kernel.cc
    │   │   ├── top_k.cc
    │   │   ├── top_p.cc
    │   │   ├── top_p_hpu.cc
    │   │   ├── transpose_kernel.cc
    │   │   ├── tril_triu_kernel.cc
    │   │   ├── unary_kernel.cc
    │   │   ├── uniform_kernel.cc
    │   │   └── where_kernel.cc
    │   ├── runtime
    │   │   ├── flags.h
    │   │   ├── runtime.cc
    │   │   └── runtime.h
    │   ├── setup.py.in
    │   ├── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── benchmark_paddlepaddle_cli.sh
    │   │   ├── ccl
    │   │   │   ├── all2all.py
    │   │   │   ├── allgather.py
    │   │   │   ├── allreduce.py
    │   │   │   └── readme.md
    │   │   ├── config.py
    │   │   ├── inference_hpu.py
    │   │   ├── junitxml.py
    │   │   ├── pr-test-run.py
    │   │   ├── run_distributed_generation.sh
    │   │   ├── run_generation.py
    │   │   ├── test_MNIST_model.py
    │   │   ├── test_addmul_model.py
    │   │   ├── test_hpu_memory_stat.py
    │   │   ├── test_index_reduce.py
    │   │   ├── test_kvcache.py
    │   │   ├── test_llama.py
    │   │   ├── test_llama_2x.sh
    │   │   ├── test_profiler.py
    │   │   ├── test_softmax_model.py
    │   │   ├── test_vllm_kvcache.py
    │   │   └── unittests
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── api_base.py
    │   │   │   ├── test_abs_op.py
    │   │   │   ├── test_abs_op_eager.py
    │   │   │   ├── test_activation_op.py
    │   │   │   ├── test_arg_max_op.py
    │   │   │   ├── test_block_attn_pp_stages.py
    │   │   │   ├── test_cast.py
    │   │   │   ├── test_concat.py
    │   │   │   ├── test_contiguous.py
    │   │   │   ├── test_cumsum_op.py
    │   │   │   ├── test_elementwise_add_op.py
    │   │   │   ├── test_elementwise_div_op.py
    │   │   │   ├── test_elementwise_max_op.py
    │   │   │   ├── test_elementwise_min_op.py
    │   │   │   ├── test_elementwise_mod_op.py
    │   │   │   ├── test_elementwise_mul_op.py
    │   │   │   ├── test_elementwise_pow_op.py
    │   │   │   ├── test_elementwise_sub_op.py
    │   │   │   ├── test_expand.py
    │   │   │   ├── test_floor.py
    │   │   │   ├── test_full_like_op.py
    │   │   │   ├── test_full_op.py
    │   │   │   ├── test_fused_fp8_gemm.py
    │   │   │   ├── test_fused_fp8_sdpa.py
    │   │   │   ├── test_fused_mlp.py
    │   │   │   ├── test_fused_rms_mlp.py
    │   │   │   ├── test_fused_rms_qkv_rope.py
    │   │   │   ├── test_fused_sdpa_proj.py
    │   │   │   ├── test_gather.py
    │   │   │   ├── test_gather_nd.py
    │   │   │   ├── test_get_token_multiscores.py
    │   │   │   ├── test_index_copy.py
    │   │   │   ├── test_index_put_op.py
    │   │   │   ├── test_index_sample.py
    │   │   │   ├── test_index_select.py
    │   │   │   ├── test_logical_op.py
    │   │   │   ├── test_one_hot.py
    │   │   │   ├── test_pow.py
    │   │   │   ├── test_reduce_all.py
    │   │   │   ├── test_reduce_any.py
    │   │   │   ├── test_reduce_max.py
    │   │   │   ├── test_reduce_mean.py
    │   │   │   ├── test_reduce_min.py
    │   │   │   ├── test_reduce_prod.py
    │   │   │   ├── test_reduce_sum.py
    │   │   │   ├── test_relu.py
    │   │   │   ├── test_rms_norm.py
    │   │   │   ├── test_scale.py
    │   │   │   ├── test_scatter_hpu.py
    │   │   │   ├── test_set_value_afbi.py
    │   │   │   ├── test_set_value_flags.py
    │   │   │   ├── test_sigmoid.py
    │   │   │   ├── test_silu.py
    │   │   │   ├── test_sin.py
    │   │   │   ├── test_slice.py
    │   │   │   ├── test_softmax.py
    │   │   │   ├── test_split.py
    │   │   │   ├── test_sqrt.py
    │   │   │   ├── test_squeeze.py
    │   │   │   ├── test_swiglu_op.py
    │   │   │   ├── test_tanh.py
    │   │   │   ├── test_tile.py
    │   │   │   ├── test_top_k.py
    │   │   │   ├── test_top_p_sampling.py
    │   │   │   ├── test_transpose_op_eager.py
    │   │   │   ├── test_tril_triu.py
    │   │   │   ├── test_uniform.py
    │   │   │   ├── test_update_inputs_v2.py
    │   │   │   ├── test_where_op.py
    │   │   │   └── util.py
    │   ├── tools
    │   │   ├── pr_hpu_ci.sh
    │   │   └── testresult_analyse.py
    │   └── utils
    │   │   ├── hlml_shm.h
    │   │   ├── hpu_tracer.cc
    │   │   ├── hpu_tracer.h
    │   │   ├── hpu_utils.h
    │   │   ├── mem_hlml.cc
    │   │   ├── mem_hlml.h
    │   │   ├── utils.cc
    │   │   └── utils.h
    ├── metax_gpu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── build.sh
    │   ├── cmake
    │   │   ├── cblas.cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   └── mklml.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   ├── third_party.cmake
    │   │   └── version.cmake
    │   ├── kernels
    │   │   ├── c_softmax_with_cross_entropy_kernel.cu
    │   │   ├── cross_entropy_grad_kernel_register.cu
    │   │   ├── cross_entropy_kernel_register.cu
    │   │   ├── cuda_kernels
    │   │   │   ├── abs_grad_kernel_register.cu
    │   │   │   ├── abs_kernel_register.cu
    │   │   │   ├── accuracy_check_kernel_register.cu
    │   │   │   ├── accuracy_kernel_register.cu
    │   │   │   ├── activation_grad_kernel_register.cu
    │   │   │   ├── activation_kernel_register.cu
    │   │   │   ├── adadelta_kernel_register.cu
    │   │   │   ├── adagrad_kernel_register.cu
    │   │   │   ├── adam_kernel_register.cu
    │   │   │   ├── adamax_kernel_register.cu
    │   │   │   ├── adamw_kernel_register.cu
    │   │   │   ├── adamw_kernel_row_register.cu
    │   │   │   ├── add_n_kernel_register.cu
    │   │   │   ├── all_gather_kernel_register.cu
    │   │   │   ├── all_reduce_kernel_register.cu
    │   │   │   ├── all_to_all_kernel_register.cu
    │   │   │   ├── allclose_kernel_register.cu
    │   │   │   ├── amp_kernel_register.cu
    │   │   │   ├── angle_grad_kernel_register.cu
    │   │   │   ├── angle_kernel_register.cu
    │   │   │   ├── apply_per_channel_scale_kernel_register.cu
    │   │   │   ├── arange_kernel_register.cu
    │   │   │   ├── arg_min_max_kernel_register.cu
    │   │   │   ├── array_kernel_register.cu
    │   │   │   ├── as_complex_kernel_register.cu
    │   │   │   ├── as_real_kernel_register.cu
    │   │   │   ├── asgd_kernel_register.cu
    │   │   │   ├── assign_kernel_register.cu
    │   │   │   ├── assign_pos_kernel_register.cu
    │   │   │   ├── bincount_kernel_register.cu
    │   │   │   ├── bitwise_kernel_register.cu
    │   │   │   ├── c_embedding_grad_kernel_register.cu
    │   │   │   ├── c_embedding_kernel_register.cu
    │   │   │   ├── c_identity_kernel_register.cu
    │   │   │   ├── c_softmax_with_cross_entropy_grad_kernel_register.cu
    │   │   │   ├── cast_kernel_register.cu
    │   │   │   ├── clip_grad_kernel_register.cu
    │   │   │   ├── clip_kernel_register.cu
    │   │   │   ├── compare_kernel_legacy_register.cu
    │   │   │   ├── compare_kernel_register.cu
    │   │   │   ├── concat_grad_kernel_register.cu
    │   │   │   ├── concat_kernel_register.cu
    │   │   │   ├── contiguous_kernel_register.cu
    │   │   │   ├── cum_kernel_register.cu
    │   │   │   ├── einsum_grad_kernel_register.cu
    │   │   │   ├── einsum_kernel_register.cu
    │   │   │   ├── elementwise_grad_kernel_register.cu
    │   │   │   ├── elementwise_kernel_register.cu
    │   │   │   ├── elementwise_legacy_kernel.cu
    │   │   │   ├── embedding_grad_kernel_register.cu
    │   │   │   ├── embedding_kernel_register.cu
    │   │   │   ├── embedding_with_scaled_gradient_grad_kernel_register.cu
    │   │   │   ├── empty_kernel_register.cu
    │   │   │   ├── expand_as_grad_kernel_register.cu
    │   │   │   ├── expand_as_kernel_register.cu
    │   │   │   ├── expand_grad_kernel_register.cu
    │   │   │   ├── expand_kernel_register.cu
    │   │   │   ├── fill_diagonal_grad_kernel_register.cu
    │   │   │   ├── fill_diagonal_kernel_register.cu
    │   │   │   ├── fill_diagonal_tensor_grad_kernel_register.cu
    │   │   │   ├── fill_diagonal_tensor_kernel_register.cu
    │   │   │   ├── fill_grad_kernel_register.cu
    │   │   │   ├── fill_kernel_register.cu
    │   │   │   ├── flatten_grad_kernel_register.cu
    │   │   │   ├── flatten_kernel_register.cu
    │   │   │   ├── full_kernel_register.cu
    │   │   │   ├── gather_nd_grad_kernel_register.cu
    │   │   │   ├── gather_nd_kernel_register.cu
    │   │   │   ├── gaussian_kernel_register.cu
    │   │   │   ├── gelu_grad_kernel_register.cu
    │   │   │   ├── gelu_kernel_register.cu
    │   │   │   ├── index_add_kernel_register.cu
    │   │   │   ├── index_put_grad_kernel_register.cu
    │   │   │   ├── index_put_kernel_register.cu
    │   │   │   ├── logical_kernel_register.cu
    │   │   │   ├── logsumexp_kernel_register.cu
    │   │   │   ├── matmul_grad_kernel_register.cu
    │   │   │   ├── matmul_kernel_register.cu
    │   │   │   ├── mean_all_grad_kernel_register.cu
    │   │   │   ├── mean_all_kernel_register.cu
    │   │   │   ├── multiplex_grad_kernel_register.cu
    │   │   │   ├── multiplex_kernel_register.cu
    │   │   │   ├── nonzero_kernel_register.cu
    │   │   │   ├── numel_kernel_register.cu
    │   │   │   ├── one_hot_kernel_register.cu
    │   │   │   ├── p_norm_grad_kernel_register.cu
    │   │   │   ├── p_norm_kernel_register.cu
    │   │   │   ├── pad_kernel_register.cu
    │   │   │   ├── pow2_decay_with_linear_warmup_kernel_register.cu
    │   │   │   ├── put_along_axis_kernel_register.cu
    │   │   │   ├── randint_kernel_register.cu
    │   │   │   ├── reduce_all_kernel_register.cc
    │   │   │   ├── reduce_any_kernel_register.cc
    │   │   │   ├── reduce_kernel_kps_register.cu
    │   │   │   ├── reduce_kernel_register.cu
    │   │   │   ├── reduce_max_kernel_register.cu
    │   │   │   ├── reduce_mean_kernel_register.cu
    │   │   │   ├── reduce_sum_kernel_register.cu
    │   │   │   ├── reshape_grad_kernel_register.cu
    │   │   │   ├── reshape_kernel_register.cu
    │   │   │   ├── scale_kernel_register.cu
    │   │   │   ├── scatter_nd_add_kernel_register.cu
    │   │   │   ├── set_value_kernel_register.cu
    │   │   │   ├── shape_kernel_register.cu
    │   │   │   ├── sigmoid_cross_entropy_with_logits_grad_kernel_register.cu
    │   │   │   ├── sign_kernel_register.cu
    │   │   │   ├── slice_grad_kernel_register.cu.cc
    │   │   │   ├── slice_kernel_register.cu.cc
    │   │   │   ├── soft_relu_kernel_register.cu
    │   │   │   ├── split_kernel_register.cu
    │   │   │   ├── squared_l2_norm_grad_kernel_register.cu
    │   │   │   ├── squared_l2_norm_kernel_register.cu
    │   │   │   ├── stack_kernel_register.cu
    │   │   │   ├── strided_copy_kernel_register.cu
    │   │   │   ├── strided_slice_grad_kernel_register.cu
    │   │   │   ├── strided_slice_kernel_register.cu
    │   │   │   ├── swiglu_grad_kernel_register.cu
    │   │   │   ├── swiglu_kernel_register.cu
    │   │   │   ├── take_along_axis_grad_kernel_register.cu
    │   │   │   ├── take_along_axis_kernel_register.cu
    │   │   │   ├── tile_kernel_register.cu
    │   │   │   ├── top_k_grad_kernel_register.cu
    │   │   │   ├── top_k_kernel_register.cu
    │   │   │   ├── transpose_grad_kernel_register.cu
    │   │   │   ├── transpose_kernel_register.cu
    │   │   │   ├── tril_indices_kernel_register.cu
    │   │   │   ├── tril_triu_grad_kernel_register.cu
    │   │   │   ├── tril_triu_kernel_register.cu
    │   │   │   ├── triu_indices_kernel_register.cu
    │   │   │   ├── unbind_kernel_register.cu
    │   │   │   ├── uniform_inplace_grad_kernel_register.cu
    │   │   │   ├── uniform_inplace_kernel_register.cu
    │   │   │   ├── uniform_kernel_register.cu
    │   │   │   ├── uniform_random_batch_size_like_kernel_register.cu
    │   │   │   ├── unsqueeze_grad_kernel_register.cu
    │   │   │   ├── unsqueeze_kernel_register.cu
    │   │   │   ├── where_grad_kernel_register.cu
    │   │   │   └── where_kernel_register.cu
    │   │   ├── dynload
    │   │   │   ├── cupti_lib_path.h
    │   │   │   ├── dynamic_loader.cc
    │   │   │   └── dynamic_loader.h
    │   │   ├── ernie_core
    │   │   │   ├── fused_bias_act_kernel_register.cu
    │   │   │   ├── rms_norm_kernel_register.cu
    │   │   │   └── top_p_sampling_kernel_register.cu
    │   │   ├── flags_declare.cu
    │   │   ├── funcs
    │   │   │   ├── blas
    │   │   │   │   ├── blas.cc
    │   │   │   │   ├── blas.h
    │   │   │   │   ├── blas_impl.cu.h
    │   │   │   │   ├── blas_impl.h
    │   │   │   │   ├── blaslt_gemm_search.h
    │   │   │   │   ├── blaslt_impl.cu.h
    │   │   │   │   ├── cublas.cc
    │   │   │   │   ├── cublas.h
    │   │   │   │   ├── cublaslt.h
    │   │   │   │   ├── port.cc
    │   │   │   │   └── port.h
    │   │   │   ├── layer_norm_util.h
    │   │   │   └── quant_dequant.h
    │   │   ├── gpudnn
    │   │   │   ├── mxdnn_helper.h
    │   │   │   └── softmax_gpudnn.h
    │   │   ├── impl
    │   │   │   ├── addmm_kernel_impl.h
    │   │   │   ├── matmul_grad_kernel_impl.h
    │   │   │   ├── matmul_kernel_impl.h
    │   │   │   └── matmul_kernel_impl_maca.h
    │   │   └── layer_norm_kernel_register.cu
    │   ├── patch
    │   │   ├── mcEigen_3.4.0_paddle_final.zip
    │   │   └── paddle.patch
    │   ├── runtime
    │   │   └── runtime.cc
    │   ├── setup.py.in
    │   └── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── error_pytest
    │   │       ├── test_activation_op.py
    │   │       ├── test_bincount_op_metax.py
    │   │       ├── test_bitwise_op_metax.py
    │   │       ├── test_c_embedding_op_metax.py
    │   │       ├── test_cumsum_op_metax.py
    │   │       ├── test_einsum_op.py
    │   │       ├── test_elementwise_add_op_metax.py
    │   │       ├── test_fill_constant_op_metax.py
    │   │       ├── test_flatten_contiguous_range_op_metax.py
    │   │       ├── test_index_put_op.py
    │   │       ├── test_logical_op_metax.py
    │   │       ├── test_maximum_op_metax.py
    │   │       ├── test_p_norm_op_metax.py
    │   │       ├── test_rms_norm_op_meatx.py
    │   │       ├── test_split_op_metax.py
    │   │       ├── test_stack_op.py
    │   │       └── test_sum_op_metax.py
    │   │   ├── run_test.sh
    │   │   └── unittest
    │   │       ├── test_abs_metax.py
    │   │       ├── test_adamw_op_metax.py
    │   │       ├── test_arange_metax.py
    │   │       ├── test_cast_op_metax.py
    │   │       ├── test_clip_op_metax.py
    │   │       ├── test_compare_op_metax.py
    │   │       ├── test_concat_op_metax.py
    │   │       ├── test_elementwise_floordiv_op_metax.py
    │   │       ├── test_elementwise_mul_op_metax.py
    │   │       ├── test_elementwise_pow_op_metax.py
    │   │       ├── test_empty_op_metax.py
    │   │       ├── test_fill_any_op_metax.py
    │   │       ├── test_full_like_op_metax.py
    │   │       ├── test_fused_bias_act_op.py
    │   │       ├── test_gather_nd_op_metax.py
    │   │       ├── test_index_add_op_metax.py
    │   │       ├── test_one_hot_v2_op.py
    │   │       ├── test_pad_op_metax.py
    │   │       ├── test_reduce_op_metax.py
    │   │       ├── test_reshape_op_metax.py
    │   │       ├── test_sign_op_metax.py
    │   │       ├── test_subtract_op_metax.py
    │   │       ├── test_top_k_v2_op_metax.py
    │   │       ├── test_top_p_sampling.py
    │   │       └── test_uniform_random_op_metax.py
    ├── mlu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   ├── concurrentqueue.cmake
    │   │   │   ├── gflags.cmake
    │   │   │   ├── glog.cmake
    │   │   │   ├── gtest.cmake
    │   │   │   ├── neuware.cmake
    │   │   │   ├── onednn.cmake
    │   │   │   └── pybind11.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   ├── third_party.cmake
    │   │   └── version.cmake
    │   ├── kernels
    │   │   ├── abs_kernel.cc
    │   │   ├── accuracy_kernel.cc
    │   │   ├── activation_kernel.cc
    │   │   ├── adam_kernel.cc
    │   │   ├── add_n_kernel.cc
    │   │   ├── arange_kernel.cc
    │   │   ├── arg_max_kernel.cc
    │   │   ├── argsort_kernel.cc
    │   │   ├── assign_kernel.cc
    │   │   ├── batch_norm_kernel.cc
    │   │   ├── bce_loss_kernel.cc
    │   │   ├── bitwise_kernel.cc
    │   │   ├── cast_kernel.cc
    │   │   ├── check_finite_and_unscale_kernel.cc
    │   │   ├── clip_kernel.cc
    │   │   ├── coalesce_tensor_kernel.cc
    │   │   ├── compare_kernel.cc
    │   │   ├── concat_kernel.cc
    │   │   ├── contiguous_kernel.cc
    │   │   ├── conv_kernel.cc
    │   │   ├── conv_transpose_kernel.cc
    │   │   ├── cross_entropy_kernel.cc
    │   │   ├── cumsum_kernel.cc
    │   │   ├── data_kernel.cc
    │   │   ├── deformable_conv_kernel.cc
    │   │   ├── dropout_kernel.cc
    │   │   ├── elementwise_add_kernel.cc
    │   │   ├── elementwise_div_kernel.cc
    │   │   ├── elementwise_max_kernel.cc
    │   │   ├── elementwise_min_kernel.cc
    │   │   ├── elementwise_mul_kernel.cc
    │   │   ├── elementwise_pow_kernel.cc
    │   │   ├── elementwise_sub_kernel.cc
    │   │   ├── expand_as_kernel.cc
    │   │   ├── expand_kernel.cc
    │   │   ├── fill_kernel.cc
    │   │   ├── flash_attn_kernel.cc
    │   │   ├── flip_kernel.cc
    │   │   ├── full_kernel.cc
    │   │   ├── funcs
    │   │   │   ├── conv_utils.h
    │   │   │   ├── elementwise_utils.h
    │   │   │   ├── logic_op.h
    │   │   │   ├── mlu_baseop.cc
    │   │   │   ├── mlu_baseop.h
    │   │   │   ├── mlu_funcs.h
    │   │   │   ├── range_op.h
    │   │   │   └── reduce_op.h
    │   │   ├── gather_kernel.cc
    │   │   ├── gather_nd_kernel.cc
    │   │   ├── gaussian_kernel.cc
    │   │   ├── generate_proposals_kernel.cc
    │   │   ├── grid_sample_kernel.cc
    │   │   ├── huber_loss_kernel.cc
    │   │   ├── index_sample_kernel.cc
    │   │   ├── index_select_kernel.cc
    │   │   ├── interpolate_kernel.cc
    │   │   ├── kldiv_loss_kernel.cc
    │   │   ├── label_smooth_kernel.cc
    │   │   ├── layer_norm_kernel.cc
    │   │   ├── log_softmax_kernel.cc
    │   │   ├── logical_kernel.cc
    │   │   ├── lookup_table_v2_op_kernel.cc
    │   │   ├── masked_select_kernel.cc
    │   │   ├── matmul_kernel.cc
    │   │   ├── mean_all_kernel.cc
    │   │   ├── memcpy_kernel.cc
    │   │   ├── meshgrid_kernel.cc
    │   │   ├── momentum_kernel.cc
    │   │   ├── multinomial_kernel.cc
    │   │   ├── nonzero_kernel.cc
    │   │   ├── numel_kernel.cc
    │   │   ├── one_hot_kernel.cc
    │   │   ├── p_norm_kernel.cc
    │   │   ├── pool2d_kernel.cc
    │   │   ├── prior_box_kernel.cc
    │   │   ├── prod_kernel.cc
    │   │   ├── randperm_kernel.cc
    │   │   ├── reduce_all_kernel.cc
    │   │   ├── reduce_any_kernel.cc
    │   │   ├── reduce_max_kernel.cc
    │   │   ├── reduce_mean_kernel.cc
    │   │   ├── reduce_min_kernel.cc
    │   │   ├── reduce_sum_kernel.cc
    │   │   ├── rnn_kernel.cc
    │   │   ├── roi_align_kernel.cc
    │   │   ├── roll_kernel.cc
    │   │   ├── scale_kernel.cc
    │   │   ├── scatter_kernel.cc
    │   │   ├── set_value_kernel.cc
    │   │   ├── sgd_kernel.cc
    │   │   ├── sigmoid_cross_with_logits_kernel.cc
    │   │   ├── slice_kernel.cc
    │   │   ├── softmax_kernel.cc
    │   │   ├── split_kernel.cc
    │   │   ├── squared_l2_norm_kernel.cc
    │   │   ├── squeeze_kernel.cc
    │   │   ├── stack_kernel.cc
    │   │   ├── strided_copy_kernel.cc
    │   │   ├── strided_slice_kernel.cc
    │   │   ├── sync_batch_norm_kernel.cc
    │   │   ├── tile_kernel.cc
    │   │   ├── top_k_kernel.cc
    │   │   ├── transpose_kernel.cc
    │   │   ├── tril_triu_op_mlu.cc
    │   │   ├── truncated_gaussian_random_kernel.cc
    │   │   ├── uniform_kernel.cc
    │   │   ├── unsqueeze_kernel.cc
    │   │   ├── unstack_kernel.cc
    │   │   ├── where_kernel.cc
    │   │   └── yolo_box_kernel.cc
    │   ├── plugin_ops
    │   │   ├── fused_rms_norm_mlu.cc
    │   │   ├── fused_rope_mlu.cc
    │   │   └── my_add_n_op.cc
    │   ├── runtime
    │   │   ├── CNRTEvent.h
    │   │   ├── flags.h
    │   │   ├── process_cnpapi_data.cc
    │   │   ├── process_cnpapi_data.h
    │   │   ├── runtime.cc
    │   │   ├── runtime.h
    │   │   └── thread_data_registry.h
    │   ├── setup.py.in
    │   ├── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── test_LeNet_MNIST.py
    │   │   ├── test_MNIST_model.py
    │   │   └── unittests
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── dygraph_group_sharded_stage2.py
    │   │   │   ├── dygraph_group_sharded_stage2_comm_overlap.py
    │   │   │   ├── dygraph_group_sharded_stage2_offload.py
    │   │   │   ├── dygraph_group_sharded_stage3.py
    │   │   │   ├── dygraph_group_sharded_stage3_offload.py
    │   │   │   ├── model_parallel
    │   │   │       ├── hybrid_column_parallel_mp_layers.py
    │   │   │       ├── hybrid_cross_entropy_parallel_mp_layers.py
    │   │   │       ├── hybrid_embedding_parallel_mp_layers.py
    │   │   │       ├── hybrid_parallel_pp_amp.py
    │   │   │       ├── hybrid_parallel_pp_clip_grad.py
    │   │   │       ├── hybrid_parallel_pp_embedding.py
    │   │   │       ├── hybrid_parallel_pp_fp16.py
    │   │   │       ├── hybrid_parallel_pp_layer.py
    │   │   │       ├── hybrid_parallel_pp_recompute.py
    │   │   │       ├── hybrid_parallel_pp_save_load.py
    │   │   │       ├── hybrid_parallel_pp_transformer.py
    │   │   │       ├── hybrid_parallel_shared_weight.py
    │   │   │       └── hybrid_row_parallel_mp_layers.py
    │   │   │   ├── parallel_dygraph_sync_batch_norm.py
    │   │   │   ├── process_group_xccl.py
    │   │   │   ├── test_abs_op_mlu.py
    │   │   │   ├── test_accuracy_op_mlu.py
    │   │   │   ├── test_adam_op_mlu.py
    │   │   │   ├── test_adamw_op_mlu.py
    │   │   │   ├── test_arange_op_mlu.py
    │   │   │   ├── test_arg_max_op_mlu.py
    │   │   │   ├── test_argsort_op_mlu.py
    │   │   │   ├── test_assign_op_mlu.py
    │   │   │   ├── test_assign_value_op_mlu.py
    │   │   │   ├── test_atan_op_mlu.py
    │   │   │   ├── test_batch_norm_op_mlu.py
    │   │   │   ├── test_batch_norm_op_mlu_v2.py
    │   │   │   ├── test_bce_loss_mlu.py
    │   │   │   ├── test_bilinear_interp_v2_op_mlu.py
    │   │   │   ├── test_bitwise_op_mlu.py
    │   │   │   ├── test_c_embedding_op_mlu.py
    │   │   │   ├── test_cast_op_mlu.py
    │   │   │   ├── test_check_finite_and_unscale_op_mlu.py
    │   │   │   ├── test_clip_op_mlu.py
    │   │   │   ├── test_coalesce_tensor_op_mlu.py
    │   │   │   ├── test_collective_api.py
    │   │   │   ├── test_compare_op_mlu.py
    │   │   │   ├── test_concat_op_mlu.py
    │   │   │   ├── test_conv2d_op_depthwise_conv_mlu.py
    │   │   │   ├── test_conv2d_op_mlu.py
    │   │   │   ├── test_conv2d_transposed_op_mlu.py
    │   │   │   ├── test_cos_op_mlu.py
    │   │   │   ├── test_cumsum_op_mlu.py
    │   │   │   ├── test_custom_pass_mlu.py
    │   │   │   ├── test_deformable_conv_op_mlu.py
    │   │   │   ├── test_dist_base.py
    │   │   │   ├── test_dropout_op_mlu.py
    │   │   │   ├── test_dygraph_recompute_for_eager.py
    │   │   │   ├── test_dygraph_sharding_stage_2.py
    │   │   │   ├── test_dygraph_sharding_stage_3.py
    │   │   │   ├── test_elementwise_add_op_mlu.py
    │   │   │   ├── test_elementwise_div_op_mlu.py
    │   │   │   ├── test_elementwise_max_op_mlu.py
    │   │   │   ├── test_elementwise_min_op_mlu.py
    │   │   │   ├── test_elementwise_mul_op_mlu.py
    │   │   │   ├── test_elementwise_pow_op_mlu.py
    │   │   │   ├── test_elementwise_sub_op_mlu.py
    │   │   │   ├── test_embedding_op_mlu.py
    │   │   │   ├── test_exp_op_mlu.py
    │   │   │   ├── test_expand_as_v2_op_mlu.py
    │   │   │   ├── test_expand_v2_op_mlu.py
    │   │   │   ├── test_fill_any_like_op_mlu.py
    │   │   │   ├── test_fill_constant_batch_size_like_op_mlu.py
    │   │   │   ├── test_fill_constant_op_mlu.py
    │   │   │   ├── test_flash_attention_op_mlu.py
    │   │   │   ├── test_flip_op_mlu.py
    │   │   │   ├── test_floor_op_mlu.py
    │   │   │   ├── test_full_with_tensor_op_mlu.py
    │   │   │   ├── test_gather_nd_op_mlu.py
    │   │   │   ├── test_gather_op_mlu.py
    │   │   │   ├── test_gaussian_random_op_mlu.py
    │   │   │   ├── test_gelu_op_mlu.py
    │   │   │   ├── test_generate_proposals_v2_op_mlu.py
    │   │   │   ├── test_grid_sample_op_mlu.py
    │   │   │   ├── test_hard_sigmoid_op_mlu.py
    │   │   │   ├── test_hard_swish_op_mlu.py
    │   │   │   ├── test_huber_loss_op_mlu.py
    │   │   │   ├── test_index_sample_op_mlu.py
    │   │   │   ├── test_index_select_op_mlu.py
    │   │   │   ├── test_kldiv_loss_op_mlu.py
    │   │   │   ├── test_label_smooth_op_mlu.py
    │   │   │   ├── test_layer_norm_op_mlu.py
    │   │   │   ├── test_leaky_relu_op_mlu.py
    │   │   │   ├── test_log_op_mlu.py
    │   │   │   ├── test_log_softmax_op_mlu.py
    │   │   │   ├── test_logical_op_mlu.py
    │   │   │   ├── test_lookup_table_v2_op_mlu.py
    │   │   │   ├── test_masked_select_op_mlu.py
    │   │   │   ├── test_matmul_op_mlu.py
    │   │   │   ├── test_mean_op_mlu.py
    │   │   │   ├── test_merged_adam_op_mlu.py
    │   │   │   ├── test_merged_momentum_op_mlu.py
    │   │   │   ├── test_meshgrid_op_mlu.py
    │   │   │   ├── test_momentum_op_mlu.py
    │   │   │   ├── test_multinomial_op_mlu.py
    │   │   │   ├── test_nearest_interp_v2_op_mlu.py
    │   │   │   ├── test_numel_op_mlu.py
    │   │   │   ├── test_one_hot_v2_op_mlu.py
    │   │   │   ├── test_p_norm_op_mlu.py
    │   │   │   ├── test_parallel_dygraph_mp_layers.py
    │   │   │   ├── test_parallel_dygraph_pipeline_parallel.py
    │   │   │   ├── test_pool2d_op_mlu.py
    │   │   │   ├── test_pow_op_mlu.py
    │   │   │   ├── test_prior_box_op_mlu.py
    │   │   │   ├── test_randperm_op_mlu.py
    │   │   │   ├── test_reciprocal_op_mlu.py
    │   │   │   ├── test_reduce_all_op_mlu.py
    │   │   │   ├── test_reduce_any_op_mlu.py
    │   │   │   ├── test_reduce_max_op_mlu.py
    │   │   │   ├── test_reduce_mean_op_mlu.py
    │   │   │   ├── test_reduce_min_op_mlu.py
    │   │   │   ├── test_reduce_prod_op_mlu.py
    │   │   │   ├── test_reduce_sum_op_mlu.py
    │   │   │   ├── test_relu6_op_mlu.py
    │   │   │   ├── test_relu_op_mlu.py
    │   │   │   ├── test_rms_norm_op_mlu.py
    │   │   │   ├── test_rnn_op_mlu.py
    │   │   │   ├── test_roi_align_op_mlu.py
    │   │   │   ├── test_roll_op_mlu.py
    │   │   │   ├── test_rope_op_mlu.py
    │   │   │   ├── test_round_op_mlu.py
    │   │   │   ├── test_rsqrt_op_mlu.py
    │   │   │   ├── test_scale_op_mlu.py
    │   │   │   ├── test_scatter_nd_op_mlu.py
    │   │   │   ├── test_scatter_op_mlu.py
    │   │   │   ├── test_set_value_op_mlu.py
    │   │   │   ├── test_sgd_op_mlu.py
    │   │   │   ├── test_shape_op_mlu.py
    │   │   │   ├── test_sigmoid_cross_entropy_with_logits_op_mlu.py
    │   │   │   ├── test_silu_op_mlu.py
    │   │   │   ├── test_sin_op_mlu.py
    │   │   │   ├── test_slice_op_mlu.py
    │   │   │   ├── test_softmax_op_mlu.py
    │   │   │   ├── test_softmax_with_cross_entropy_op_mlu.py
    │   │   │   ├── test_split_op_mlu.py
    │   │   │   ├── test_sqrt_op_mlu.py
    │   │   │   ├── test_square_op_mlu.py
    │   │   │   ├── test_squared_l2_norm_op_mlu.py
    │   │   │   ├── test_squeeze_op_mlu.py
    │   │   │   ├── test_stack_op_mlu.py
    │   │   │   ├── test_static_print_mlu.py
    │   │   │   ├── test_stride_mlu.py
    │   │   │   ├── test_strided_slice_op_mlu.py
    │   │   │   ├── test_sum_op_mlu.py
    │   │   │   ├── test_swish_op_mlu.py
    │   │   │   ├── test_sync_batch_norm_op_mlu.py
    │   │   │   ├── test_take_along_axis_op_mlu.py
    │   │   │   ├── test_tanh_op_mlu.py
    │   │   │   ├── test_tile_op_mlu.py
    │   │   │   ├── test_top_k_op_mlu.py
    │   │   │   ├── test_transpose_op_mlu.py
    │   │   │   ├── test_tril_triu_op_mlu.py
    │   │   │   ├── test_truncated_gaussian_random_op_mlu.py
    │   │   │   ├── test_uniform_random_op_mlu.py
    │   │   │   ├── test_unsqueeze_op_mlu.py
    │   │   │   ├── test_unstack_op_mlu.py
    │   │   │   ├── test_where_index_op_mlu.py
    │   │   │   ├── test_where_op_mlu.py
    │   │   │   ├── test_yolo_box_op_mlu.py
    │   │   │   └── test_zero_dim_tensor_mlu.py
    │   └── tools
    │   │   ├── compile.sh
    │   │   ├── disable_ut_mlu
    │   │   ├── dockerfile
    │   │       ├── Dockerfile.mlu.kylinv10.gcc82.py310
    │   │       ├── Dockerfile.mlu.ubuntu20.gcc84.py310
    │   │       └── build-image.sh
    │   │   └── pr_ci_mlu.sh
    ├── mps
    │   ├── .clang-format
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── cmake
    │   │   ├── external
    │   │   │   ├── gflags.cmake
    │   │   │   ├── glog.cmake
    │   │   │   └── gtest.cmake
    │   │   ├── paddle.cmake
    │   │   └── third_party.cmake
    │   ├── kernels
    │   │   ├── activation_impl.h
    │   │   ├── activation_impl.mm
    │   │   ├── activation_kernl.cc
    │   │   ├── elementwise_impl.h
    │   │   ├── elementwise_impl.mm
    │   │   ├── elementwise_kernel.cc
    │   │   ├── matmul_impl.h
    │   │   ├── matmul_impl.mm
    │   │   ├── matmul_kernel.cc
    │   │   ├── op_utils.h
    │   │   ├── op_utils.mm
    │   │   ├── phi_funcs.h
    │   │   ├── reshape_kernel.cc
    │   │   ├── softmax_impl.h
    │   │   ├── softmax_impl.mm
    │   │   ├── softmax_kernel.cc
    │   │   ├── squeeze_kernel.cc
    │   │   └── unsqueeze_kernel.cc
    │   ├── runtime
    │   │   ├── mps_device.h
    │   │   ├── mps_device.mm
    │   │   ├── mps_runtime.h
    │   │   ├── mps_stream.h
    │   │   ├── mps_stream.mm
    │   │   └── runtime.cc
    │   ├── setup.py.in
    │   └── tests
    │   │   ├── CMakeLists.txt
    │   │   └── unittests
    │   │       ├── CMakeLists.txt
    │   │       └── test_softmax_op.py
    ├── npu
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── cmake
    │   │   ├── dummy.c.in
    │   │   ├── external
    │   │   │   ├── ascend.cmake
    │   │   │   ├── gflags.cmake
    │   │   │   ├── glog.cmake
    │   │   │   ├── gtest.cmake
    │   │   │   ├── onednn.cmake
    │   │   │   └── pybind11.cmake
    │   │   ├── generic.cmake
    │   │   ├── paddle.cmake
    │   │   ├── third_party.cmake
    │   │   └── version.cmake
    │   ├── custom_op
    │   │   ├── fused_allgather_mm.cc
    │   │   ├── fused_attention_npu.cc
    │   │   ├── fused_mm_allreduce.cc
    │   │   ├── fused_mm_reduce_scatter.cc
    │   │   ├── fused_rms_norm_npu.cc
    │   │   ├── fused_rope_npu.cc
    │   │   ├── llama_infer
    │   │   │   ├── atb_ops
    │   │   │   │   ├── atb_layers
    │   │   │   │   │   ├── fused_blha_layer.cc
    │   │   │   │   │   ├── fused_blha_layer.h
    │   │   │   │   │   ├── fused_lm_head_layer.cc
    │   │   │   │   │   ├── fused_lm_head_layer.h
    │   │   │   │   │   ├── linear.cc
    │   │   │   │   │   ├── linear.h
    │   │   │   │   │   ├── mixed_gate_up_act.cc
    │   │   │   │   │   ├── mixed_gate_up_act.h
    │   │   │   │   │   ├── qkv_split.cc
    │   │   │   │   │   ├── qkv_split.h
    │   │   │   │   │   ├── runner.cc
    │   │   │   │   │   ├── runner.h
    │   │   │   │   │   ├── smooth_quant.cc
    │   │   │   │   │   └── smooth_quant.h
    │   │   │   │   ├── fused_blha_layer_op.cc
    │   │   │   │   ├── fused_blha_layer_op_utils.cc
    │   │   │   │   ├── fused_blha_layer_op_utils.h
    │   │   │   │   ├── fused_lm_head_op.cc
    │   │   │   │   └── remove_padding_op.cc
    │   │   │   ├── dequant_int8.cc
    │   │   │   ├── encode_rotary_qk.cc
    │   │   │   ├── fused_get_rope.cc
    │   │   │   ├── get_output.cc
    │   │   │   ├── get_padding_offset.cc
    │   │   │   ├── get_padding_offset_v2.cc
    │   │   │   ├── qkv_transpose_split.cc
    │   │   │   ├── quant_int8.cc
    │   │   │   ├── rebuild_padding.cc
    │   │   │   ├── rebuild_padding_v2.cc
    │   │   │   ├── save_with_output.cc
    │   │   │   ├── save_with_output_msg.cc
    │   │   │   ├── set_value_by_flags.cc
    │   │   │   ├── set_value_by_flags_v2.cc
    │   │   │   ├── step.cc
    │   │   │   ├── stop_generation_multi_ends.cc
    │   │   │   ├── stop_generation_multi_ends_v2.cc
    │   │   │   ├── token_penalty_multi_scores.cc
    │   │   │   ├── token_penalty_multi_scores_v2.cc
    │   │   │   ├── transpose_removing_padding.cc
    │   │   │   ├── update_inputs.cc
    │   │   │   ├── write_cache_kv.cc
    │   │   │   └── write_int8_cache_kv.cc
    │   │   └── my_add_n_op.cc
    │   ├── kernels
    │   │   ├── abs_kernel.cc
    │   │   ├── accuracy_kernel.cc
    │   │   ├── activation_kernel.cc
    │   │   ├── adadelta_kernel.cc
    │   │   ├── adagrad_kernel.cc
    │   │   ├── adam_kernel.cc
    │   │   ├── add_n_kernel.cc
    │   │   ├── amp
    │   │   │   ├── check_finite_and_unscale_kernel.cc
    │   │   │   └── update_loss_scaling_kernel.cc
    │   │   ├── arange_kernel.cc
    │   │   ├── arg_min_max_kernel.cc
    │   │   ├── argsort_grad_kernel.cc
    │   │   ├── argsort_kernel.cc
    │   │   ├── assign_kernel.cc
    │   │   ├── batch_norm_kernel.cc
    │   │   ├── bce_loss_kernel.cc
    │   │   ├── bitwise_kernel.cc
    │   │   ├── bmm_kernel.cc
    │   │   ├── box_coder_kernel.cc
    │   │   ├── c_identity_kernel.cc
    │   │   ├── cast_kernel.cc
    │   │   ├── clip_by_norm_kernel.cc
    │   │   ├── clip_kernel.cc
    │   │   ├── coalesce_tensor_kernel.cc
    │   │   ├── compare_kernel.cc
    │   │   ├── concat_kernel.cc
    │   │   ├── contiguous_kernel.cc
    │   │   ├── conv2d_kernel.cc
    │   │   ├── conv_kernel.cc
    │   │   ├── conv_transpose_kernel.cc
    │   │   ├── cross_entropy_kernel.cc
    │   │   ├── cum_kernel.cc
    │   │   ├── cumprod_kernel.cc
    │   │   ├── deformable_conv_kernel.cc
    │   │   ├── diag_kernel.cc
    │   │   ├── diagonal_kernel.cc
    │   │   ├── dropout_kernel.cc
    │   │   ├── einsum_kernel.cc
    │   │   ├── einsum_kernel.h
    │   │   ├── elementwise_add_kernel.cc
    │   │   ├── elementwise_div_kernel.cc
    │   │   ├── elementwise_floordiv_kernel.cc
    │   │   ├── elementwise_max_kernel.cc
    │   │   ├── elementwise_min_kernel.cc
    │   │   ├── elementwise_mod_kernel.cc
    │   │   ├── elementwise_mul_kernel.cc
    │   │   ├── elementwise_pow_kernel.cc
    │   │   ├── elementwise_sub_kernel.cc
    │   │   ├── expand_as_kernel.cc
    │   │   ├── expand_kernel.cc
    │   │   ├── eye_kernel.cc
    │   │   ├── fill_diagonal_kernel.cc
    │   │   ├── fill_diagonal_tensor_kernel.cc
    │   │   ├── fill_kernel.cc
    │   │   ├── flip_kernel.cc
    │   │   ├── full_kernel.cc
    │   │   ├── funcs
    │   │   │   ├── conv_util.h
    │   │   │   ├── format_utils.cc
    │   │   │   ├── format_utils.h
    │   │   │   ├── npu_enforce.h
    │   │   │   ├── npu_funcs.h
    │   │   │   ├── npu_op_prepare.h
    │   │   │   ├── npu_op_runner.cc
    │   │   │   ├── npu_op_runner.h
    │   │   │   ├── slice_utils.h
    │   │   │   ├── string_helper.cc
    │   │   │   └── string_helper.h
    │   │   ├── fused_gemm_epilogue_grad_kernel.cc
    │   │   ├── fused_gemm_epilogue_kernel.cc
    │   │   ├── fused_linear_param_grad_add_kernel.cc
    │   │   ├── fusion
    │   │   │   ├── blha_get_max_len.cc
    │   │   │   ├── block_multihead_attention_kernel.cc
    │   │   │   ├── fused_bias_act_kernel.cc
    │   │   │   ├── fused_bias_residual_layernorm_kernel.cc
    │   │   │   ├── masked_multihead_attention_kernel.cc
    │   │   │   ├── rms_norm_kernel.cc
    │   │   │   ├── topp_sampling_kernel.cc
    │   │   │   └── variable_length_memory_efficient_attention_kernel.cc
    │   │   ├── gather_kernel.cc
    │   │   ├── gather_nd_kernel.cc
    │   │   ├── gaussian_kernel.cc
    │   │   ├── grid_sample_kernel.cc
    │   │   ├── group_norm_kernel.cc
    │   │   ├── histogram_kernel.cc
    │   │   ├── huber_loss_kernel.cc
    │   │   ├── increment_kernel.cc
    │   │   ├── index_put_kernel.cc
    │   │   ├── index_sample_kernel.cc
    │   │   ├── index_select_kernel.cc
    │   │   ├── interpolate_kernel.cc
    │   │   ├── inverse_kernel.cc
    │   │   ├── is_empty_kernel.cc
    │   │   ├── isfinite_kernel.cc
    │   │   ├── kldiv_loss_kernel.cc
    │   │   ├── label_smooth_kernel.cc
    │   │   ├── layer_norm_kernel.cc
    │   │   ├── linspace_kernel.cc
    │   │   ├── log_loss_kernel.cc
    │   │   ├── log_softmax_kernel.cc
    │   │   ├── logical_kernel.cc
    │   │   ├── lookup_table_v2_op_npu_kernel.cc
    │   │   ├── masked_select_kernel.cc
    │   │   ├── matmul_kernel.cc
    │   │   ├── mean_all_kernel.cc
    │   │   ├── memcpy_kernel.cc
    │   │   ├── merged_momentum.cc
    │   │   ├── meshgrid_kernel.cc
    │   │   ├── momentum_kernel.cc
    │   │   ├── multinomial_kernel.cc
    │   │   ├── nll_loss_kernel.cc
    │   │   ├── nonzero_kernel.cc
    │   │   ├── norm_kernel.cc
    │   │   ├── npu_identity_kernel.cc
    │   │   ├── one_hot_kernel.cc
    │   │   ├── p_norm_kernel.cc
    │   │   ├── pad3d_kernel.cc
    │   │   ├── pad_kernel.cc
    │   │   ├── pool2d_kernel.cc
    │   │   ├── prelu_kernel.cc
    │   │   ├── prior_box_kernel.cc
    │   │   ├── randint_kernel.cc
    │   │   ├── randperm_kernel.cc
    │   │   ├── reduce_all_kernel.cc
    │   │   ├── reduce_any_kernel.cc
    │   │   ├── reduce_max_kernel.cc
    │   │   ├── reduce_mean_kernel.cc
    │   │   ├── reduce_min_kernel.cc
    │   │   ├── reduce_prod_kernel.cc
    │   │   ├── reduce_sum_kernel.cc
    │   │   ├── rmsprop_kernel.cc
    │   │   ├── rnn_kernel.cc
    │   │   ├── roi_align_kernel.cc
    │   │   ├── roll_kernel.cc
    │   │   ├── scale_kernel.cc
    │   │   ├── scatter_kernel.cc
    │   │   ├── scatter_nd_add_kernel.cc
    │   │   ├── sequence_mask.cc
    │   │   ├── set_value_kernel.cc
    │   │   ├── sgd_kernel.cc
    │   │   ├── shard_index_kernel.cc
    │   │   ├── sigmoid_cross_entropy_with_logits_kernel.cc
    │   │   ├── sign_kernel.cc
    │   │   ├── slice_kernel.cc
    │   │   ├── softmax_kernel.cc
    │   │   ├── split_kernel.cc
    │   │   ├── squared_l2_norm_kernel.cc
    │   │   ├── squeeze_kernel.cc
    │   │   ├── stack_kernel.cc
    │   │   ├── strided_copy_kernel.cc
    │   │   ├── strided_slice_kernel.cc
    │   │   ├── swiglu_kernel.cc
    │   │   ├── take_along_axis_kernel.cc
    │   │   ├── tile_kernel.cc
    │   │   ├── top_k_kernel.cc
    │   │   ├── transpose_kernel.cc
    │   │   ├── tril_triu_kernel.cc
    │   │   ├── truncated_gaussian_random_kernel.cc
    │   │   ├── unbind_kernel.cc
    │   │   ├── uniform_kernel.cc
    │   │   ├── unsqueeze_kernel.cc
    │   │   ├── unstack_kernel.cc
    │   │   ├── warpctc_kernel.cc
    │   │   └── where_kernel.cc
    │   ├── passes
    │   │   ├── __init__.py
    │   │   ├── chatglm.py
    │   │   ├── common.py
    │   │   └── llama.py
    │   ├── profile
    │   │   ├── __init__.py
    │   │   └── cann_export.py
    │   ├── runtime
    │   │   ├── flags.h
    │   │   ├── runtime.cc
    │   │   └── runtime.h
    │   ├── setup.py.in
    │   ├── tests
    │   │   ├── CMakeLists.txt
    │   │   ├── test_LeNet_MNIST.py
    │   │   └── unittests
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── big_shape_cases.json
    │   │   │   ├── dygraph_group_sharded_stage2.py
    │   │   │   ├── dygraph_group_sharded_stage2_comm_overlap.py
    │   │   │   ├── dygraph_group_sharded_stage2_offload.py
    │   │   │   ├── dygraph_group_sharded_stage3.py
    │   │   │   ├── dygraph_group_sharded_stage3_offload.py
    │   │   │   ├── hybrid_parallel_mp_layers.py
    │   │   │   ├── npu_utils.py
    │   │   │   ├── process_group_xccl.py
    │   │   │   ├── test_abs_op_npu.py
    │   │   │   ├── test_abs_op_npu_eager.py
    │   │   │   ├── test_accuracy_op_npu.py
    │   │   │   ├── test_activation_op.py
    │   │   │   ├── test_activation_op_eager.py
    │   │   │   ├── test_adadelta_op_npu.py
    │   │   │   ├── test_adagrad_op_npu.py
    │   │   │   ├── test_adam_op_npu.py
    │   │   │   ├── test_adam_op_npu_eager.py
    │   │   │   ├── test_adamw_op_npu.py
    │   │   │   ├── test_arg_max_op_npu.py
    │   │   │   ├── test_arg_min_op_npu.py
    │   │   │   ├── test_argsort_op_npu.py
    │   │   │   ├── test_assign_op_npu.py
    │   │   │   ├── test_assign_op_npu_eager.py
    │   │   │   ├── test_assign_value_op_npu.py
    │   │   │   ├── test_batch_norm_op_npu.py
    │   │   │   ├── test_bce_loss_npu.py
    │   │   │   ├── test_big_shape_npu.py
    │   │   │   ├── test_bilinear_interp_v2_op_npu.py
    │   │   │   ├── test_bitwise_op_npu.py
    │   │   │   ├── test_bmm_op_npu.py
    │   │   │   ├── test_box_coder_op_npu.py
    │   │   │   ├── test_cast_op_npu.py
    │   │   │   ├── test_cast_op_npu_eager.py
    │   │   │   ├── test_check_finite_and_unscale_op_npu.py
    │   │   │   ├── test_check_finite_and_unscale_op_npu_eager.py
    │   │   │   ├── test_check_nan_inf_op_npu.py
    │   │   │   ├── test_clip_by_norm_op_npu.py
    │   │   │   ├── test_clip_op_npu.py
    │   │   │   ├── test_coalesce_tensor_op_npu.py
    │   │   │   ├── test_collective_api.py
    │   │   │   ├── test_compare_op_npu.py
    │   │   │   ├── test_concat_op_npu.py
    │   │   │   ├── test_concat_op_npu_eager.py
    │   │   │   ├── test_contiguous_op_npu.py
    │   │   │   ├── test_conv2d_op_depthwise_conv_npu.py
    │   │   │   ├── test_conv2d_op_npu.py
    │   │   │   ├── test_conv2d_transpose_op_npu.py
    │   │   │   ├── test_conv3d_op_npu.py
    │   │   │   ├── test_cumprod_op_npu.py
    │   │   │   ├── test_cumsum_op_npu.py
    │   │   │   ├── test_custom_pass_npu.py
    │   │   │   ├── test_deformable_conv_op_npu.py
    │   │   │   ├── test_diag_op_npu.py
    │   │   │   ├── test_diagonal_op_npu.py
    │   │   │   ├── test_divide_op_npu_eager.py
    │   │   │   ├── test_dropout_op_npu.py
    │   │   │   ├── test_dygraph_recompute_for_eager.py
    │   │   │   ├── test_dygraph_sharding_stage_2.py
    │   │   │   ├── test_dygraph_sharding_stage_3.py
    │   │   │   ├── test_einsum_op_npu.py
    │   │   │   ├── test_elementwise_add_op_npu.py
    │   │   │   ├── test_elementwise_add_op_npu_eager.py
    │   │   │   ├── test_elementwise_div_op_npu.py
    │   │   │   ├── test_elementwise_floordiv_op_npu.py
    │   │   │   ├── test_elementwise_max_op_npu.py
    │   │   │   ├── test_elementwise_max_op_npu_eager.py
    │   │   │   ├── test_elementwise_min_op_npu.py
    │   │   │   ├── test_elementwise_mod_op_npu.py
    │   │   │   ├── test_elementwise_mul_op_npu.py
    │   │   │   ├── test_elementwise_mul_op_npu_eager.py
    │   │   │   ├── test_elementwise_pow_op_npu.py
    │   │   │   ├── test_elementwise_pow_op_npu_eager.py
    │   │   │   ├── test_elementwise_sub_op_npu.py
    │   │   │   ├── test_elu_op_npu.py
    │   │   │   ├── test_expand_as_v2_op_npu.py
    │   │   │   ├── test_expand_as_v2_op_npu_eager.py
    │   │   │   ├── test_expand_v2_op_npu.py
    │   │   │   ├── test_expand_v2_op_npu_eager.py
    │   │   │   ├── test_eye_op_npu.py
    │   │   │   ├── test_fill_any_like_op_npu.py
    │   │   │   ├── test_fill_constant_batch_size_like_op_npu.py
    │   │   │   ├── test_fill_constant_op_npu.py
    │   │   │   ├── test_fill_diagonal_op_npu.py
    │   │   │   ├── test_fill_diagonal_tensor_op_npu.py
    │   │   │   ├── test_fill_op_npu_eager.py
    │   │   │   ├── test_flashattention_npu.py
    │   │   │   ├── test_flip_op_npu.py
    │   │   │   ├── test_full_like_op.py
    │   │   │   ├── test_full_op.py
    │   │   │   ├── test_full_with_tensor_op_on_npu.py
    │   │   │   ├── test_fused_linear_param_grad_add_op_npu.py
    │   │   │   ├── test_fused_matmul_bias_op_npu.py
    │   │   │   ├── test_gather_nd_op_npu.py
    │   │   │   ├── test_gather_nd_op_npu_eager.py
    │   │   │   ├── test_gather_op_npu.py
    │   │   │   ├── test_gaussian_random_op_npu.py
    │   │   │   ├── test_gelu_op_npu.py
    │   │   │   ├── test_grid_sample_op_npu.py
    │   │   │   ├── test_group_norm_op_npu.py
    │   │   │   ├── test_hard_shrink_op_npu.py
    │   │   │   ├── test_hard_sigmoid_op_npu.py
    │   │   │   ├── test_hard_swish_op_npu.py
    │   │   │   ├── test_hard_tanh_op_npu.py
    │   │   │   ├── test_histogram_op_npu.py
    │   │   │   ├── test_huber_loss_op_npu.py
    │   │   │   ├── test_increment_op_npu.py
    │   │   │   ├── test_index_put_op_npu_eager.py
    │   │   │   ├── test_index_sample_op_npu.py
    │   │   │   ├── test_index_select_op_npu.py
    │   │   │   ├── test_inverse_op_npu.py
    │   │   │   ├── test_is_empty_op_npu.py
    │   │   │   ├── test_isfinite_v2_op_npu.py
    │   │   │   ├── test_kldiv_loss_op_npu.py
    │   │   │   ├── test_label_smooth_op_npu.py
    │   │   │   ├── test_layer_norm_op_npu.py
    │   │   │   ├── test_linear_op_npu.py
    │   │   │   ├── test_linspace_op_npu.py
    │   │   │   ├── test_log_loss_op_npu.py
    │   │   │   ├── test_log_softmax_op_npu.py
    │   │   │   ├── test_logical_op_npu.py
    │   │   │   ├── test_lookup_table_v2_op_npu.py
    │   │   │   ├── test_masked_select_op_npu.py
    │   │   │   ├── test_matmul_op_npu_eager.py
    │   │   │   ├── test_matmulv2_op_npu.py
    │   │   │   ├── test_mean_op_npu.py
    │   │   │   ├── test_memcpy_op_npu.py
    │   │   │   ├── test_merged_momentum_op_npu.py
    │   │   │   ├── test_meshgrid_op_npu.py
    │   │   │   ├── test_momentum_op_npu.py
    │   │   │   ├── test_multinomial_op_npu.py
    │   │   │   ├── test_nearest_interp_op_npu.py
    │   │   │   ├── test_nll_loss_op_npu.py
    │   │   │   ├── test_norm_op_npu.py
    │   │   │   ├── test_npu_identity_op.py
    │   │   │   ├── test_one_dim_tensor_npu.py
    │   │   │   ├── test_one_hot_v2_op_npu.py
    │   │   │   ├── test_p_norm_op_npu.py
    │   │   │   ├── test_pad3d_op_npu.py
    │   │   │   ├── test_pad_op_npu.py
    │   │   │   ├── test_pad_op_npu_eager.py
    │   │   │   ├── test_parallel_dygraph_mp_layers.py
    │   │   │   ├── test_pool2d_op_npu.py
    │   │   │   ├── test_prelu_op_npu.py
    │   │   │   ├── test_prior_box_op_npu.py
    │   │   │   ├── test_randint_op_npu.py
    │   │   │   ├── test_randperm_op_npu.py
    │   │   │   ├── test_range_npu.py
    │   │   │   ├── test_reciprocal_op_npu.py
    │   │   │   ├── test_reduce_all_op_npu.py
    │   │   │   ├── test_reduce_any_op_npu.py
    │   │   │   ├── test_reduce_max_op_npu.py
    │   │   │   ├── test_reduce_max_op_npu_eager.py
    │   │   │   ├── test_reduce_mean_op_npu.py
    │   │   │   ├── test_reduce_mean_op_npu_eager.py
    │   │   │   ├── test_reduce_min_op_npu.py
    │   │   │   ├── test_reduce_prod_op_npu.py
    │   │   │   ├── test_reduce_sum_op_npu.py
    │   │   │   ├── test_reduce_sum_op_npu_eager.py
    │   │   │   ├── test_rms_norm_npu.py
    │   │   │   ├── test_rmsprop_op_npu.py
    │   │   │   ├── test_rnn_op_npu.py
    │   │   │   ├── test_roi_align_op_npu.py
    │   │   │   ├── test_roll_op_npu.py
    │   │   │   ├── test_rope_npu.py
    │   │   │   ├── test_rsqrt_op_npu.py
    │   │   │   ├── test_rsqrt_op_npu_eager.py
    │   │   │   ├── test_scale_op_npu.py
    │   │   │   ├── test_scale_op_npu_eager.py
    │   │   │   ├── test_scatter_nd_add_op_npu.py
    │   │   │   ├── test_scatter_op_npu.py
    │   │   │   ├── test_selu_op_npu.py
    │   │   │   ├── test_sequence_mask_op_npu.py
    │   │   │   ├── test_set_value_op_npu.py
    │   │   │   ├── test_sgd_op_npu.py
    │   │   │   ├── test_shard_index_op_npu.py
    │   │   │   ├── test_sigmoid_cross_entropy_with_logits_op_npu.py
    │   │   │   ├── test_sign_op_npu.py
    │   │   │   ├── test_slice_op_npu.py
    │   │   │   ├── test_slice_op_npu_eager.py
    │   │   │   ├── test_softmax_op_npu.py
    │   │   │   ├── test_softmax_with_cross_entropy_op_npu.py
    │   │   │   ├── test_split_op_npu.py
    │   │   │   ├── test_split_op_npu_eager.py
    │   │   │   ├── test_squared_l2_norm_op_npu.py
    │   │   │   ├── test_squared_l2_norm_op_npu_eager.py
    │   │   │   ├── test_squeeze_op_npu.py
    │   │   │   ├── test_squeeze_op_npu_eager.py
    │   │   │   ├── test_stack_op_npu.py
    │   │   │   ├── test_stack_op_npu_eager.py
    │   │   │   ├── test_strided_slice_op_npu.py
    │   │   │   ├── test_strided_slice_op_npu_eager.py
    │   │   │   ├── test_sum_op_npu.py
    │   │   │   ├── test_swiglu_op_npu.py
    │   │   │   ├── test_take_along_axis_op_npu.py
    │   │   │   ├── test_tile_op_npu.py
    │   │   │   ├── test_top_k_v2_op_npu.py
    │   │   │   ├── test_transpose_op_npu.py
    │   │   │   ├── test_transpose_op_npu_eager.py
    │   │   │   ├── test_tril_triu_op_npu.py
    │   │   │   ├── test_truncated_gaussian_random_op_npu.py
    │   │   │   ├── test_unbind_op_npu.py
    │   │   │   ├── test_uniform_random_op_npu.py
    │   │   │   ├── test_unsqueeze_op_npu.py
    │   │   │   ├── test_unsqueeze_op_npu_eager.py
    │   │   │   ├── test_unstack_op_npu.py
    │   │   │   ├── test_update_loss_scaling_op_npu.py
    │   │   │   ├── test_warpctc_op_npu.py
    │   │   │   ├── test_where_index_npu.py
    │   │   │   ├── test_where_op_npu.py
    │   │   │   ├── test_where_op_npu_eager.py
    │   │   │   └── test_zero_dim_tensor_npu.py
    │   └── tools
    │   │   ├── compile.sh
    │   │   ├── coverage
    │   │       ├── coverage_diff.py
    │   │       ├── coverage_process.sh
    │   │       └── pull_request.py
    │   │   ├── disable_ut_npu
    │   │   ├── disable_ut_npu_910b
    │   │   ├── dockerfile
    │   │       ├── Dockerfile.npu.ubuntu20.gcc84
    │   │       └── build-image.sh
    │   │   ├── important_ut_npu
    │   │   ├── pr_ci_llama_npu.sh
    │   │   └── pr_ci_npu.sh
    └── sdaa
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_cn.md
    │   ├── cmake
    │       ├── dummy.c.in
    │       ├── external
    │       │   ├── gflags.cmake
    │       │   ├── glog.cmake
    │       │   ├── gtest.cmake
    │       │   ├── onednn.cmake
    │       │   └── tabulate.cmake
    │       ├── generic.cmake
    │       ├── paddle.cmake
    │       ├── teco.cmake
    │       ├── third_party.cmake
    │       └── version.cmake
    │   ├── compile.sh
    │   ├── dynload
    │       ├── dynamic_loader.cc
    │       ├── dynamic_loader.h
    │       ├── sdpti.cc
    │       └── sdpti.h
    │   ├── external
    │       └── customsdaastream.h
    │   ├── kernels
    │       ├── abs_kernel.cc
    │       ├── accuracy_kernel.cc
    │       ├── activation_kernel.cc
    │       ├── adam_kernel.cc
    │       ├── add_n_kernel.cc
    │       ├── amp
    │       │   ├── amp_funcs.h
    │       │   ├── check_finite_and_scaling_kernel.cc
    │       │   └── update_loss_scaling_kernel.cc
    │       ├── arange_kernel.cc
    │       ├── arg_max_min_kernel.cc
    │       ├── argsort_kernel.cc
    │       ├── assign_kernel.cc
    │       ├── batch_norm_kernel.cc
    │       ├── bce_loss_kernel.cc
    │       ├── bitwise_kernel.cc
    │       ├── bmm_kernel.cc
    │       ├── cast_kernel.cc
    │       ├── clip_kernel.cc
    │       ├── coalesce_tensor_kernel.cc
    │       ├── compare_kernel.cc
    │       ├── concat_kernel.cc
    │       ├── contiguous_kernel.cc
    │       ├── conv2d_kernel.cc
    │       ├── conv_transpose_kernel.cc
    │       ├── cross_entropy_kernel.cc
    │       ├── cum_kernel.cc
    │       ├── distribute_fpn_proposals_kernel.cc
    │       ├── dropout_kernel.cc
    │       ├── element_add_kernel.cc
    │       ├── element_div_kernel.cc
    │       ├── element_mul_kernel.cc
    │       ├── element_sub_kernel.cc
    │       ├── elementwise_floordiv_kernel.cc
    │       ├── elementwise_max_kernel.cc
    │       ├── elementwise_min_kernel.cc
    │       ├── elementwise_mod_kernel.cc
    │       ├── elementwise_pow_kernel.cc
    │       ├── embedding_kernel.cc
    │       ├── expand_as_kernel.cc
    │       ├── expand_kernel.cc
    │       ├── fill_kernel.cc
    │       ├── flash_attenttion_kernel.cc
    │       ├── flip_kernel.cc
    │       ├── full_kernel.cc
    │       ├── funcs
    │       │   ├── contiguous
    │       │   │   ├── contiguous_register.h
    │       │   │   ├── copy_stride_opt.cc
    │       │   │   └── transpose_opt.cc
    │       │   ├── elementwise_functor.h
    │       │   ├── high_precision_op_list.cc
    │       │   ├── high_precision_op_list.h
    │       │   ├── nv_align.h
    │       │   ├── sdaa_baseop.cc
    │       │   ├── sdaa_baseop.h
    │       │   ├── sdaa_funcs.h
    │       │   ├── slice_utils.h
    │       │   ├── strided_copy_utils.cc
    │       │   ├── strided_copy_utils.h
    │       │   ├── tblas_baseop.h
    │       │   └── tecodnn_conv_impl.h
    │       ├── gather_kernel.cc
    │       ├── gather_nd_kernel.cc
    │       ├── gaussian_random_kernel.cc
    │       ├── generate_proposals_kernel.cc
    │       ├── grid_sample_kernel.cc
    │       ├── group_norm_kernel.cc
    │       ├── huber_loss_kernel.cc
    │       ├── identity_kernel.cc
    │       ├── increment_kernel.cc
    │       ├── index_put_kernel.cc
    │       ├── index_sample_kernel.cc
    │       ├── index_select_kernel.cc
    │       ├── instance_norm_kernel.cc
    │       ├── interpolate_kernel.cc
    │       ├── is_empty_kernel.cc
    │       ├── isfinite_kernel.cc
    │       ├── label_smooth_kernel.cc
    │       ├── layer_norm_kernel.cc
    │       ├── linspace_kernel.cc
    │       ├── log_loss_kernel.cc
    │       ├── log_softmax_kernel.cc
    │       ├── logical_kernel.cc
    │       ├── masked_select_kernel.cc
    │       ├── matmul_kernel.cc
    │       ├── memcpy_kernel.cc
    │       ├── merged_adam_kernel.cc
    │       ├── merged_momentum_kernel.cc
    │       ├── meshgrid_kernel.cc
    │       ├── momentum_kernel.cc
    │       ├── multiclass_nms3_kernel.cc
    │       ├── nll_loss_kernel.cc
    │       ├── nonzero_kernel.cc
    │       ├── one_hot_kernel.cc
    │       ├── p_norm_kernel.cc
    │       ├── pool2d_kernel.cc
    │       ├── prelu_kernel.cc
    │       ├── prior_box_kernel.cc
    │       ├── profiler
    │       │   ├── RecordEvent.cc
    │       │   ├── RecordEvent.h
    │       │   ├── os_info.cc
    │       │   ├── os_info.h
    │       │   ├── sdaa_wrapper.h
    │       │   ├── tcpx_util.cc
    │       │   └── tcpx_util.h
    │       ├── randint_kernel.cc
    │       ├── reduce_logic_kernel.cc
    │       ├── reduce_max_kernel.cc
    │       ├── reduce_mean_kernel.cc
    │       ├── reduce_min_kernel.cc
    │       ├── reduce_prod_kernel.cc
    │       ├── reduce_sum_kernel.cc
    │       ├── rnn_kernel.cc
    │       ├── roi_align_kernel.cc
    │       ├── scale_kernel.cc
    │       ├── scatter_kernel.cc
    │       ├── scatter_nd_add_kernel.cc
    │       ├── set_value_kernel.cc
    │       ├── sigmoid_cross_entropy_with_logits_kernel.cc
    │       ├── slice_kernel.cc
    │       ├── softmax_kernel.cc
    │       ├── split_kernel.cc
    │       ├── squared_l2_norm_kernel.cc
    │       ├── squeeze_kernel.cc
    │       ├── stack_kernel.cc
    │       ├── stride_slice_kernel.cc
    │       ├── strided_copy_kernel.cc
    │       ├── sync_batch_norm_kernel.cc
    │       ├── tile_kernel.cc
    │       ├── top_k_kernel.cc
    │       ├── transpose_kernel.cc
    │       ├── tril_triu_kernel.cc
    │       ├── truncated_gaussian_random_kernel.cc
    │       ├── unbind_kernel.cc
    │       ├── uniform_random_kernel.cc
    │       ├── unsqueeze_kernel.cc
    │       ├── unstack_kernel.cc
    │       ├── warpctc_kernel.cc
    │       └── where_kernel.cc
    │   ├── pr_ci_sdaa.sh
    │   ├── runtime
    │       ├── flags.h
    │       ├── runtime.cc
    │       ├── runtime.h
    │       └── sdaaEvent.h
    │   ├── sdaa_ext
    │       ├── python
    │       │   ├── __init__.py
    │       │   ├── custom_parallel
    │       │   │   ├── Adam.py
    │       │   │   ├── AdamW.py
    │       │   │   ├── Momentum.py
    │       │   │   ├── __init__.py
    │       │   │   ├── device_map.py
    │       │   │   ├── distributed_gradscalar.py
    │       │   │   └── distributed_optimizer.py
    │       │   ├── demo
    │       │   │   ├── __init__.py
    │       │   │   └── demo.py
    │       │   ├── ops
    │       │   │   ├── __init__.py
    │       │   │   ├── fused_swiglu.py
    │       │   │   ├── high_performance_op.py
    │       │   │   ├── rms_norm.py
    │       │   │   └── rope.py
    │       │   ├── passes
    │       │   │   ├── __init__.py
    │       │   │   ├── common.py
    │       │   │   ├── conv_bn_fused_pass.py
    │       │   │   └── ir_custom_pass_patch.py
    │       │   ├── patch
    │       │   │   ├── __init__.py
    │       │   │   └── adaptor_mpu.py
    │       │   ├── storage
    │       │   │   ├── __init__.py
    │       │   │   └── storage.py
    │       │   ├── utils
    │       │   │   ├── __init__.py
    │       │   │   ├── extension_utils.py
    │       │   │   ├── install_check.py
    │       │   │   ├── load_balance.py
    │       │   │   ├── sdaa_extension.py
    │       │   │   └── utils.py
    │       │   └── version
    │       │   │   ├── __init__.py
    │       │   │   └── version_query.py
    │       ├── sdaa_add_n_op.cc
    │       ├── sdaa_add_op.cc
    │       ├── sdaa_custom_tan.cc
    │       ├── sdaa_fc_op.cc
    │       ├── sdaa_fused_conv_bn.cc
    │       ├── sdaa_fused_rms_norm_op.cc
    │       ├── sdaa_fused_rotary_position_embedding.cc
    │       ├── sdaa_rankid.cc
    │       ├── sdaa_sgemmex_op.cc
    │       ├── sdaa_swiglu_op.cc
    │       ├── sdaa_version_query_op.cc
    │       ├── setup.py.in
    │       └── tensot_storage.cc
    │   ├── sdaac_ops
    │       ├── CMakeLists.txt
    │       ├── contrib
    │       │   ├── README.md
    │       │   └── sdaacops_contrib.h
    │       ├── custom_sdaacops.h
    │       └── custom_tan.scpp
    │   ├── setup.py.in
    │   ├── tests
    │       ├── CMakeLists.txt
    │       ├── distribution
    │       │   ├── CMakeLists.txt
    │       │   ├── allgather_api_test_case.py
    │       │   ├── allreduce_api_test_case.py
    │       │   ├── broadcast_api_test_case.py
    │       │   ├── communication_api_test_base.py
    │       │   ├── ddp_optimizer.py
    │       │   ├── hybrid_column_parallel_mp_layers.py
    │       │   ├── hybrid_cross_entropy_parallel_mp_layers.py
    │       │   ├── hybrid_embedding_parallel_mp_layers.py
    │       │   ├── hybrid_parallel_mp_amp.py
    │       │   ├── hybrid_parallel_mp_model.py
    │       │   ├── hybrid_parallel_mp_model_with_sequence_parallel.py
    │       │   ├── hybrid_parallel_pp_amp.py
    │       │   ├── hybrid_parallel_pp_embedding.py
    │       │   ├── hybrid_parallel_pp_layer.py
    │       │   ├── hybrid_parallel_sep_model.py
    │       │   ├── hybrid_parallel_shared_weight.py
    │       │   ├── hybrid_row_parallel_mp_layers.py
    │       │   ├── multi_nodes_ddp_test
    │       │   │   ├── multi_nodes_test.sh
    │       │   │   ├── test_all_gather.py
    │       │   │   ├── test_all_reduce.py
    │       │   │   └── test_broadcast.py
    │       │   ├── profiler_api_test_case.py
    │       │   ├── reduce_api_test_case.py
    │       │   ├── reduce_scatter_api_test_case.py
    │       │   ├── sdaa_dygraph_group_sharded_api.py
    │       │   ├── sdaa_dygraph_group_sharded_stage2.py
    │       │   ├── sdaa_dygraph_group_sharded_stage2_loss_stable.py
    │       │   ├── sdaa_dygraph_group_sharded_stage2_offload.py
    │       │   ├── sdaa_dygraph_group_sharded_stage3.py
    │       │   ├── sdaa_dygraph_group_sharded_stage3_offload.py
    │       │   ├── sendrecv_api_test_case.py
    │       │   ├── test_communication_stream_allgather_api.py
    │       │   ├── test_communication_stream_allreduce_api.py
    │       │   ├── test_communication_stream_broadcast_api.py
    │       │   ├── test_communication_stream_profiler_api.py
    │       │   ├── test_communication_stream_reduce_api.py
    │       │   ├── test_communication_stream_reduce_scatter_api.py
    │       │   ├── test_communication_stream_sendrecv_api.py
    │       │   ├── test_ddp_optimizer.py
    │       │   ├── test_dygraph_sharding_stage_2.py
    │       │   ├── test_dygraph_sharding_stage_3.py
    │       │   ├── test_parallel_dygraph_mp_layers.py
    │       │   ├── test_parallel_dygraph_pipeline_parallel.py
    │       │   ├── test_parallel_dygraph_pp_layers.py
    │       │   ├── test_parallel_dygraph_sep_parallel.py
    │       │   └── test_parallel_dygraph_tensor_parallel.py
    │       ├── rnn
    │       │   ├── convert.py
    │       │   └── rnn_numpy.py
    │       ├── runtime
    │       │   ├── CMakeLists.txt
    │       │   ├── test_profiler.py
    │       │   ├── test_profiler_with_kernel.py
    │       │   └── test_runtime.cc
    │       ├── test_MNIST_model.py
    │       └── unittests
    │       │   ├── CMakeLists.txt
    │       │   ├── op_test_dy.py
    │       │   ├── parallel_dygraph_sync_batch_norm.py
    │       │   ├── test_abs_op_sdaa.py
    │       │   ├── test_accuracy_op_sdaa.py
    │       │   ├── test_adam_op_sdaa.py
    │       │   ├── test_add_n_op_sdaa.py
    │       │   ├── test_all_any_op_sdaa.py
    │       │   ├── test_amp_level_sdaa.py
    │       │   ├── test_arange_op_sdaa.py
    │       │   ├── test_arg_max_op_sdaa.py
    │       │   ├── test_arg_min_op_sdaa.py
    │       │   ├── test_argsort_op_sdaa.py
    │       │   ├── test_assign_op_sdaa.py
    │       │   ├── test_assign_value_op_sdaa.py
    │       │   ├── test_atan_op_sdaa.py
    │       │   ├── test_batch_norm_op_sdaa.py
    │       │   ├── test_bce_loss_sdaa.py
    │       │   ├── test_bitwise_op_sdaa.py
    │       │   ├── test_bmm_op_sdaa.py
    │       │   ├── test_cast_op_sdaa.py
    │       │   ├── test_ceil_op_sdaa.py
    │       │   ├── test_check_finite_and_unscale_op_sdaa.py
    │       │   ├── test_clip_op_sdaa.py
    │       │   ├── test_coalesce_tensor_op_sdaa.py
    │       │   ├── test_compare_op_sdaa.py
    │       │   ├── test_concat_op_sdaa.py
    │       │   ├── test_contiguous_op_sdaa.py
    │       │   ├── test_conv2d_op_depthwise_conv_sdaa.py
    │       │   ├── test_conv2d_transposed_op_sdaa.py
    │       │   ├── test_conv_op_sdaa.py
    │       │   ├── test_cos_op_sdaa.py
    │       │   ├── test_cumsum_op_sdaa.py
    │       │   ├── test_custom_api_sdaa.py
    │       │   ├── test_custom_linear_op_sdaa.py
    │       │   ├── test_custom_matmul_op_sdaa.py
    │       │   ├── test_custom_pass_sdaa.py
    │       │   ├── test_custom_tan_op_sdaa.py
    │       │   ├── test_dist_base.py
    │       │   ├── test_distribute_fpn_proposals_op_sdaa.py
    │       │   ├── test_dropout_op_sdaa.py
    │       │   ├── test_dy2static_mlp_sdaa.py
    │       │   ├── test_dy_and_inplace_sdaa.py
    │       │   ├── test_dygraph_recompute_for_eager.py
    │       │   ├── test_elementwise_add_op_sdaa.py
    │       │   ├── test_elementwise_div_op_sdaa.py
    │       │   ├── test_elementwise_floordiv_op_sdaa.py
    │       │   ├── test_elementwise_max_op_sdaa.py
    │       │   ├── test_elementwise_min_op_sdaa.py
    │       │   ├── test_elementwise_mod_op_sdaa.py
    │       │   ├── test_elementwise_mul_op_sdaa.py
    │       │   ├── test_elementwise_pow_op_sdaa.py
    │       │   ├── test_elementwise_sub_op_sdaa.py
    │       │   ├── test_elu_op_sdaa.py
    │       │   ├── test_embedding_op_sdaa.py
    │       │   ├── test_erf_op_sdaa.py
    │       │   ├── test_exp_op_sdaa.py
    │       │   ├── test_expand_as_v2_op_sdaa.py
    │       │   ├── test_expand_v2_op_sdaa.py
    │       │   ├── test_fill_constant_batch_size_like_op_sdaa.py
    │       │   ├── test_fill_op_sdaa.py
    │       │   ├── test_flash_attn_op_sdaa.py
    │       │   ├── test_flip_op_sdaa.py
    │       │   ├── test_floor_op_sdaa.py
    │       │   ├── test_full_like_op_sdaa.py
    │       │   ├── test_full_op_sdaa.py
    │       │   ├── test_gather_nd_op_sdaa.py
    │       │   ├── test_gather_op_sdaa.py
    │       │   ├── test_gaussian_random_sdaa.py
    │       │   ├── test_gelu_op_sdaa.py
    │       │   ├── test_generate_proposals_sdaa.py
    │       │   ├── test_grid_sampler_op_sdaa.py
    │       │   ├── test_group_norm_op_sdaa.py
    │       │   ├── test_hard_sigmoid_op_sdaa.py
    │       │   ├── test_hard_swish_op_sdaa.py
    │       │   ├── test_hard_tanh_op_sdaa.py
    │       │   ├── test_highperformance_conv.py
    │       │   ├── test_highperformance_convtranspose.py
    │       │   ├── test_highperformance_gemm.py
    │       │   ├── test_huber_loss_op_sdaa.py
    │       │   ├── test_identity_op_sdaa.py
    │       │   ├── test_increment_op_sdaa.py
    │       │   ├── test_index_put_op_sdaa.py
    │       │   ├── test_index_sample_op_sdaa.py
    │       │   ├── test_index_select_op_sdaa.py
    │       │   ├── test_instance_norm_op_v2_sdaa.py
    │       │   ├── test_is_empty_op_sdaa.py
    │       │   ├── test_isnan_op_sdaa.py
    │       │   ├── test_label_smooth_op_sdaa.py
    │       │   ├── test_layer_norm_op_sdaa.py
    │       │   ├── test_leaky_relu_op_sdaa.py
    │       │   ├── test_linspace_op_sdaa.py
    │       │   ├── test_log2_op_sdaa.py
    │       │   ├── test_log_loss_op_sdaa.py
    │       │   ├── test_log_op_sdaa.py
    │       │   ├── test_log_softmax_sdaa.py
    │       │   ├── test_logical_op_sdaa.py
    │       │   ├── test_logsigmoid_op_sdaa.py
    │       │   ├── test_masked_select_op_sdaa.py
    │       │   ├── test_matmul_op_sdaa.py
    │       │   ├── test_max_op_sdaa.py
    │       │   ├── test_mean_all_op_sdaa.py
    │       │   ├── test_mean_op_sdaa.py
    │       │   ├── test_memcpy_op_sdaa.py
    │       │   ├── test_merged_adam_op_sdaa.py
    │       │   ├── test_merged_momentum_op_sdaa.py
    │       │   ├── test_meshgrid_op_sdaa.py
    │       │   ├── test_min_op_sdaa.py
    │       │   ├── test_mish_op_sdaa.py
    │       │   ├── test_momentum_op_sdaa.py
    │       │   ├── test_mul_op_sdaa.py
    │       │   ├── test_multiclass_nms_op_sdaa.py
    │       │   ├── test_nearest_interp_v2_op_sdaa.py
    │       │   ├── test_nll_loss_op_sdaa.py
    │       │   ├── test_no_event_pool.py
    │       │   ├── test_nonzero_op_sdaa.py
    │       │   ├── test_one_hot_v2_op_sdaa.py
    │       │   ├── test_p_norm_op_sdaa.py
    │       │   ├── test_pool2d_op_sdaa.py
    │       │   ├── test_pow_op_sdaa.py
    │       │   ├── test_prelu_op_sdaa.py
    │       │   ├── test_prior_box_op_sdaa.py
    │       │   ├── test_randint_op_sdaa.py
    │       │   ├── test_reciprocal_op_sdaa.py
    │       │   ├── test_reduce_prod_op_sdaa.py
    │       │   ├── test_reduce_sum_op_sdaa.py
    │       │   ├── test_relu6_op_sdaa.py
    │       │   ├── test_relu_op_sdaa.py
    │       │   ├── test_rms_norm_op_sdaa.py
    │       │   ├── test_rnn_op_sdaa.py
    │       │   ├── test_roi_align_sdaa.py
    │       │   ├── test_rope_op_sdaa.py
    │       │   ├── test_rsqrt_op_sdaa.py
    │       │   ├── test_scale_op_sdaa.py
    │       │   ├── test_scatter_op_sdaa.py
    │       │   ├── test_sdaa_matmul_scale.py
    │       │   ├── test_set_device.py
    │       │   ├── test_set_value_op_sdaa.py
    │       │   ├── test_sigmoid_cross_entropy_with_logits_op_sdaa.py
    │       │   ├── test_sigmoid_op_sdaa.py
    │       │   ├── test_silu_op_sdaa.py
    │       │   ├── test_sin_op_sdaa.py
    │       │   ├── test_slice_op_sdaa.py
    │       │   ├── test_softmax_sdaa.py
    │       │   ├── test_softmax_with_cross_entropy_op_sdaa.py
    │       │   ├── test_softplus_op_sdaa.py
    │       │   ├── test_softsign_op_sdaa.py
    │       │   ├── test_split_op_sdaa.py
    │       │   ├── test_sqrt_op_sdaa.py
    │       │   ├── test_square_op_sdaa.py
    │       │   ├── test_squared_l2_norm_op_sdaa.py
    │       │   ├── test_squeeze_op_sdaa.py
    │       │   ├── test_stack_op_sdaa.py
    │       │   ├── test_strided_op_sdaa.py
    │       │   ├── test_strided_slice_op_sdaa.py
    │       │   ├── test_swiglu_op_sdaa.py
    │       │   ├── test_swish_op_sdaa.py
    │       │   ├── test_sync_batch_norm_op_sdaa.py
    │       │   ├── test_tanh_op_sdaa.py
    │       │   ├── test_tcpx.py
    │       │   ├── test_tile_op_sdaa.py
    │       │   ├── test_topk_op_sdaa.py
    │       │   ├── test_transpose_op_sdaa.py
    │       │   ├── test_tril_triu_op_sdaa.py
    │       │   ├── test_truncated_gaussian_random_op_sdaa.py
    │       │   ├── test_unbind_op_sdaa.py
    │       │   ├── test_uniform_random_op_sdaa.py
    │       │   ├── test_unsqueeze_op_sdaa.py
    │       │   ├── test_unstack_op_sdaa.py
    │       │   ├── test_update_loss_scaling_op_sdaa.py
    │       │   ├── test_version_sdaa.py
    │       │   ├── test_warpctc_op_sdaa.py
    │       │   ├── test_where_op_sdaa.py
    │       │   └── test_zero_dim_tensor_sdaa.py
    │   └── tools
    │       └── version
    │           ├── dump.cc
    │           ├── minimum_supported_version.h.in
    │           ├── query.cc
    │           └── query.h
├── cmake
    ├── dummy.c.in
    ├── external
    │   ├── gflags.cmake
    │   ├── glog.cmake
    │   ├── gtest.cmake
    │   ├── onednn.cmake
    │   └── pybind11.cmake
    ├── generic.cmake
    ├── paddle.cmake
    ├── third_party.cmake
    └── version.cmake
├── python
    ├── __init__.py
    ├── tests
    │   ├── auto_parallel_op_test.py
    │   ├── config.py
    │   ├── convert.py
    │   ├── op.py
    │   ├── op_test.py
    │   ├── prim_op_test.py
    │   ├── rnn_numpy.py
    │   ├── testsuite.py
    │   ├── utils.py
    │   └── white_list
    │   │   ├── __init__.py
    │   │   ├── check_op_sequence_batch_1_input_white_list.py
    │   │   ├── check_op_sequence_instance_0_input_white_list.py
    │   │   ├── check_shape_white_list.py
    │   │   ├── compile_vs_runtime_white_list.py
    │   │   ├── new_ir_python_api_grad_white_list.py
    │   │   ├── no_check_set_white_list.py
    │   │   ├── no_grad_set_white_list.py
    │   │   ├── op_accuracy_white_list.py
    │   │   └── op_threshold_white_list.py
    └── tools
    │   ├── __init__.py
    │   ├── static_mode_white_list.py
    │   └── test_runner.py
└── tools
    └── codestyle
        ├── .cmakelintrc
        ├── .gitignore
        ├── clang_format.hook
        ├── copyright.hook
        ├── cpplint_pre_commit.hook
        ├── pre_commit.sh
        └── pylint_pre_commit.hook


/.clang-format:
--------------------------------------------------------------------------------
 1 | # This file is used by clang-format to autoformat paddle source code
 2 | #
 3 | # The clang-format is part of llvm toolchain.
 4 | # It need to install llvm and clang to format source code style.
 5 | #
 6 | # The basic usage is,
 7 | #   clang-format -i -style=file PATH/TO/SOURCE/CODE
 8 | #
 9 | # The -style=file implicit use ".clang-format" file located in one of 
10 | # parent directory. 
11 | # The -i means inplace change.
12 | #
13 | # The document of clang-format is 
14 | #   http://clang.llvm.org/docs/ClangFormat.html
15 | #   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
16 | ---
17 | Language:        Cpp
18 | BasedOnStyle:  Google
19 | IndentWidth:     2
20 | TabWidth:        2
21 | ContinuationIndentWidth: 4
22 | AccessModifierOffset: -1  # The private/protected/public has no indent in class
23 | Standard:  Cpp11 
24 | AllowAllParametersOfDeclarationOnNextLine: true
25 | BinPackParameters: false
26 | BinPackArguments: false
27 | ...
28 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | select = C,E,F,W
 3 | exclude =
 4 |     ./build,
 5 |     ./backends/npu/build,
 6 |     ./backends/mlu/build,
 7 |     ./backends/custom_cpu/build
 8 | ignore =
 9 |     # E, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes
10 |     E203,
11 |     E401,E402,
12 |     E501,
13 |     E721,E722,E731,E741,
14 | 
15 |     # F, see https://flake8.pycqa.org/en/latest/user/error-codes.html
16 |     F405,
17 |     F811,F841,
18 | 
19 |     # W, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes
20 |     W503
21 | per-file-ignores =
22 |     # Ignore unused imports in __init__.py
23 |     __init__.py: F401
24 |     # Ignore undefined variables in CMake config and some dygraph_to_static tests
25 |     .cmake-format.py: F821
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.DS_Store
 2 | *.vs
 3 | build/
 4 | *.user
 5 | *.tmp
 6 | *.pyc
 7 | *.swp
 8 | *.diff
 9 | *.run
10 | 
11 | .vscode
12 | .idea
13 | .project
14 | .cproject
15 | .pydevproject
16 | .settings/
17 | CMakeSettings.json
18 | Makefile
19 | .test_env/
20 | 
21 | *~
22 | bazel-*
23 | 
24 | build_*
25 | # clion workspace.
26 | cmake-build-*
27 | 
28 | Testing
29 | tools/__pycache__
30 | 
31 | # ignore npu cache
32 | kernel_meta*/
33 | ascend_install.info
34 | version.info
35 | 
36 | # ignore Paddle change
37 | Paddle/
38 | 
39 | # ignore clangd cache
40 | .cache/
41 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Paddle"]
2 | 	path = Paddle
3 | 	url = https://github.com/PaddlePaddle/Paddle.git
4 | 	branch = develop
5 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = pep8
3 | column_limit = 80
4 | 


--------------------------------------------------------------------------------
/Guides/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/Guides/image.png


--------------------------------------------------------------------------------
/Guides/paddle_load_customdevice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/Guides/paddle_load_customdevice.png


--------------------------------------------------------------------------------
/Guides/subgraph_case.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/Guides/subgraph_case.png


--------------------------------------------------------------------------------
/README_ja.md:
--------------------------------------------------------------------------------
 1 | # PaddleCustomDevice
 2 | 
 3 | [English](./README_en.md) | [简体中文](./README.md) | 日本語
 4 | 
 5 | PaddlePaddle カスタムデバイスの実装。
 6 | 
 7 | ## ユーザーガイド
 8 | 
 9 | プログラム設計ドキュメントは[カスタムデバイスの概要](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/dev_guides/custom_device_docs/custom_device_overview_cn.html)を、開発ガイドラインについては、[新しいハードウェアのアクセス例](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/dev_guides/custom_device_docs/custom_device_example_cn.html)を、デモコードは [CustomCPU](backends/custom_cpu/README_ja.md) を参照してください。
10 | 
11 | ## ハードウェアバックエンド
12 | 
13 | PaddleCustomDevice は以下のバックエンドをサポートしています:
14 | 
15 | - [Ascend NPU 用 PaddlePaddle カスタムデバイス実装](backends/npu/README.md)
16 | - [Cambricon MLU 用 PaddlePaddle カスタムデバイス実装](backends/mlu/README.md)
17 | - [Intel GPU 用 PaddlePaddle カスタムデバイス実装](backends/intel_gpu/README.md)
18 | - [Apple MPS 用 PaddlePaddle カスタムデバイス実装](backends/mps/README.md)
19 | - [Biren GPU 用 PaddlePaddle カスタムデバイス実装](backends/biren_gpu/README.md)
20 | - [Enflame GCU 用 PaddlePaddle カスタムデバイス実装](backends/gcu/README.md)
21 | - [Tecorigin SDAA 用 PaddlePaddle カスタムデバイス実装](backends/sdaa/README.md)
22 | 
23 | ## 著作権とライセンス
24 | 
25 | PaddleCustomDevice は [Apache-2.0 license](LICENSE) の下で提供されています。
26 | 


--------------------------------------------------------------------------------
/backends/biren_gpu/README.md:
--------------------------------------------------------------------------------
 1 | # PaddlePaddle Custom Device Implementation for Biren GPU
 2 | 
 3 | English | [简体中文](./README_cn.md)
 4 | 
 5 | Please refer to the following steps to compile, install and verify the custom device implementation for Biren GPU.
 6 | 
 7 | ## Compile and Install
 8 | 
 9 | ```bash
10 | # Acquire Biren PaddlePaddle Docker Image
11 | 
12 | # Clone PaddleCustomDevice source code
13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice
14 | 
15 | # Compile Source Code and Install
16 | cd backends/biren_gpu
17 | mkdir -p build
18 | pushd build
19 | cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug ..
20 | ninja
21 | pip3 install --no-index --find-links=offline dist/paddle_custom_supa-*.whl --force-reinstall
22 | ```
23 | 
24 | ## Verification
25 | 
26 | ```bash
27 | # build with -DWITH_TESTING=ON
28 | cmake -G Ninja -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug ..
29 | 
30 | # ctest
31 | cd build
32 | ninja test
33 | ```
34 | 


--------------------------------------------------------------------------------
/backends/biren_gpu/README_cn.md:
--------------------------------------------------------------------------------
 1 | # 飞桨自定义接入硬件后端(壁仞GPU)
 2 | 
 3 | 简体中文 | [English](./README.md)
 4 | 
 5 | 请参考以下步骤进行编译安装与验证
 6 | 
 7 | ## 编译安装
 8 | 
 9 | ```bash
10 | # 获取壁仞PaddlePaddle Docker镜像
11 | 
12 | # 克隆PaddleCustomDevice源码
13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice
14 | 
15 | # 编译安装
16 | cd backends/biren_gpu
17 | mkdir -p build
18 | pushd build
19 | cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug ..
20 | ninja
21 | pip3 install --no-index --find-links=offline dist/paddle_custom_supa-*.whl --force-reinstall
22 | ```
23 | 
24 | ## 验证
25 | 
26 | ```bash
27 | # -DWITH_TESTING=ON
28 | cmake -G Ninja -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug ..
29 | 
30 | # ctest
31 | cd build
32 | ninja test
33 | ```
34 | 


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/external/pybind11.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/pybind11.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/external/supa.cmake:
--------------------------------------------------------------------------------
 1 | if(DEFINED ENV{SUPA_CUSTOM_PATH})
 2 |   set(SUPA_DIR $ENV{SUPA_CUSTOM_PATH})
 3 | else()
 4 |   set(SUPA_DIR /usr/local/supa)
 5 | endif()
 6 | 
 7 | set(SUPA_CL_DIR ${SUPA_DIR})
 8 | set(supa_cl_lib ${SUPA_CL_DIR}/lib/sucl/libsupa_cl.so)
 9 | 
10 | set(SUPA_CL_INC_DIR ${SUPA_CL_DIR}/include/ ${SUPA_CL_DIR}/include/sucl/)
11 | 
12 | message(STATUS "SUPA_CL_INC_DIR ${SUPA_CL_INC_DIR}")
13 | message(STATUS "SUPA_CL_DIR ${SUPA_CL_DIR}")
14 | 
15 | include_directories(${SUPA_CL_INC_DIR})
16 | 


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/paddle.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/paddle.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/third_party.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/third_party.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/version.cmake


--------------------------------------------------------------------------------
/backends/biren_gpu/setup.py.in:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Distribution
 2 | 
 3 | packages = []
 4 | package_data = {}
 5 | 
 6 | class BinaryDistribution(Distribution):
 7 |     def has_ext_modules(self):
 8 |         return True
 9 | 
10 | setup(
11 |     name = '@CMAKE_PROJECT_NAME@',
12 |     version='@PADDLE_VERSION@',
13 |     description='Paddle SUPA plugin',
14 |     long_description='',
15 |     long_description_content_type="text/markdown",
16 |     author_email="Paddle-better@baidu.com",
17 |     maintainer="PaddlePaddle",
18 |     maintainer_email="Paddle-better@baidu.com",
19 |     project_urls={},
20 |     license='Apache Software License',
21 |     packages= [
22 |         'paddle_custom_device',
23 |     ],
24 |     include_package_data=True,
25 |     package_data = {
26 |         '': ['*.so', '*.h', '*.py', '*.hpp'],
27 |     },
28 |     package_dir = {
29 |         '': 'python',
30 |     },
31 |     zip_safe=False,
32 |     distclass=BinaryDistribution,
33 |     entry_points={
34 |         'console_scripts': [
35 |         ]
36 |     },
37 |     classifiers=[
38 |     ],
39 |     keywords='Paddle SUPA plugin',
40 | )
41 | 


--------------------------------------------------------------------------------
/backends/biren_gpu/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(py_test_modules TARGET_NAME)
 2 |   set(options SERIAL)
 3 |   set(oneValueArgs "")
 4 |   set(multiValueArgs MODULES DEPS ENVS)
 5 |   cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}"
 6 |                         "${multiValueArgs}" ${ARGN})
 7 | 
 8 |   add_test(
 9 |     NAME ${TARGET_NAME}
10 |     COMMAND
11 |       ${CMAKE_COMMAND} -E env
12 |       CUSTOM_DEVICE_ROOT=${CMAKE_BINARY_DIR}/python/paddle_custom_device/
13 |       PYTHONPATH=${PYTHON_SOURCE_DIR}:${PYTHON_SOURCE_DIR}/tests:$ENV{PYTHONPATH}
14 |       ${py_test_modules_ENVS} python ${PYTHON_SOURCE_DIR}/tools/test_runner.py
15 |       ${py_test_modules_MODULES}
16 |     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
17 | 
18 |   if(py_test_modules_SERIAL)
19 |     set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
20 |   endif()
21 | endfunction()
22 | 
23 | add_subdirectory(unittests)
24 | 


--------------------------------------------------------------------------------
/backends/biren_gpu/tests/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(
 2 |   GLOB TEST_OPS
 3 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
 4 |   "test_*.py")
 5 | 
 6 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
 7 | 
 8 | foreach(TEST_OP ${TEST_OPS})
 9 |   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
10 | endforeach()
11 | 
12 | set_tests_properties(${TEST_OPS} PROPERTIES TIMEOUT 1000)
13 | 


--------------------------------------------------------------------------------
/backends/custom_cpu/cmake/paddle.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/paddle.cmake


--------------------------------------------------------------------------------
/backends/custom_cpu/setup.py.in:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Distribution
 2 | 
 3 | packages = []
 4 | package_data = {}
 5 | 
 6 | class BinaryDistribution(Distribution):
 7 |     def has_ext_modules(self):
 8 |         return True
 9 | 
10 | setup(
11 |     name = '@CMAKE_PROJECT_NAME@',
12 |     version='@PLUGIN_VERSION@',
13 |     description='Paddle CustomCPU plugin',
14 |     long_description='',
15 |     long_description_content_type="text/markdown",
16 |     author_email="Paddle-better@baidu.com",
17 |     maintainer="PaddlePaddle",
18 |     maintainer_email="Paddle-better@baidu.com",
19 |     project_urls={},
20 |     license='Apache Software License',
21 |     packages= [
22 |         'paddle_custom_device',
23 |     ],
24 |     include_package_data=True,
25 |     package_data = {
26 |         '': ['*.so', '*.h', '*.py', '*.hpp'],
27 |     },
28 |     package_dir = {
29 |         '': 'python',
30 |     },
31 |     zip_safe=False,
32 |     distclass=BinaryDistribution,
33 |     entry_points={
34 |         'console_scripts': [
35 |         ]
36 |     },
37 |     classifiers=[
38 |     ],
39 |     keywords='Paddle CustomCPU plugin',
40 | )
41 | 


--------------------------------------------------------------------------------
/backends/custom_cpu/tests/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_OPS
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
20 | 
21 | foreach(TEST_OP ${TEST_OPS})
22 |   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
23 | endforeach()
24 | 


--------------------------------------------------------------------------------
/backends/custom_cpu/tools/dockerfile/root/.bashrc:
--------------------------------------------------------------------------------
 1 | # .bashrc
 2 | 
 3 | # Source global definitions
 4 | if [ -f /etc/bashrc ]; then
 5 |     . /etc/bashrc
 6 | fi
 7 | 
 8 | # no auto logout
 9 | export TMOUT=0
10 | 
11 | # Locales
12 | export LC_ALL=en_US.UTF-8
13 | export LANG=en_US.UTF-8
14 | export LANGUAGE=en_US.UTF-8
15 | 
16 | # Aliases
17 | 
18 | alias rm='rm -i'
19 | alias cp='cp -i'
20 | alias mv='mv -i'
21 | 
22 | alias l='ls -lF'
23 | alias ll='ls -alF'
24 | alias lt='ls -ltrF'
25 | alias ll='ls -alF'
26 | alias lls='ls -alSrF'
27 | alias llt='ls -altrF'
28 | 
29 | # Colorize directory listing
30 | alias ls="ls -p --color=auto"
31 | alias pstall='pip install -U --no-deps --force-reinstall'
32 | 
33 | # Colorize grep
34 | if echo hello|grep --color=auto l >/dev/null 2>&1; then
35 |   alias grep='grep --color=always'
36 |   export GREP_COLOR="1;31"
37 | fi
38 | 
39 | # Shell
40 | export CLICOLOR="1"
41 | 
42 | source ~/.scripts/git-prompt.sh
43 | export PS1="\[\e[1;33m\]λ\[\e[0m\] \h \[\e[1;32m\]\w\[\e[1;33m\]\$(__git_ps1 \" \[\e[35m\]{\[\e[36m\]%s\[\e[35m\]}\") \[\e[0m\]"
44 | source ~/.scripts/git-completion.sh
45 | 


--------------------------------------------------------------------------------
/backends/custom_cpu/tools/dockerfile/root/.gitconfig:
--------------------------------------------------------------------------------
 1 | [user]
 2 |   name =
 3 |   email =
 4 | 
 5 | [alias]
 6 |   st = status --branch --short
 7 |   ci = commit
 8 |   br = branch
 9 |   co = checkout
10 |   df = diff
11 |   l = log --pretty=format:\"%h %ad | %s%d [%an]\" --graph --date=short
12 |   ll = log --stat
13 |   lg = log --oneline -10
14 | 
15 | [merge]
16 |   tool = vimdiff
17 | 
18 | [core]
19 |   excludesfile = ~/.gitignore
20 |   editor = vim
21 | 
22 | [color]
23 |   branch = auto
24 |   diff = auto
25 |   status = auto
26 | 
27 | [color "branch"]
28 |   current = yellow reverse
29 |   local = yellow
30 |   remote = green
31 | 
32 | [color "diff"]
33 |   meta = yellow bold
34 |   frag = magenta bold
35 |   old = red bold
36 |   new = green bold
37 | 
38 | [color "status"]
39 |   added = yellow
40 |   changed = green
41 |   untracked = cyan
42 | 
43 | [push]
44 |   default = matching
45 | [credential]
46 | 	helper = store
47 | 


--------------------------------------------------------------------------------
/backends/custom_cpu/tools/dockerfile/root/.vimrc:
--------------------------------------------------------------------------------
 1 | set nocompatible
 2 | filetype plugin indent on
 3 | 
 4 | set nu
 5 | syntax enable
 6 | syntax on
 7 | set hlsearch
 8 | set incsearch
 9 | set fileencodings=utf-8,ucs-bom,gb18030,gbk,gb2312,cp936
10 | set termencoding=utf-8
11 | set encoding=utf-8
12 | set cursorline
13 | set paste
14 | set mouse=a
15 | set showmode
16 | set showcmd
17 | 
18 | " expand tab to space
19 | set expandtab
20 | " The width of a hard tabstop measured in "spaces"
21 | set tabstop=4
22 | " The size of an "indent"
23 | set shiftwidth=4
24 | " insert a combination of spaces to simulate tab stops
25 | set softtabstop=4
26 | 
27 | "remember last update or view postion"
28 |  " Only do this part when compiled with support for autocommands
29 |  if has("autocmd")
30 |  " In text files, always limit the width of text to 78 characters
31 |  autocmd BufRead *.txt set tw=78
32 |  " When editing a file, always jump to the last cursor position
33 |  autocmd BufReadPost *
34 |  \ if line("'\"") > 0 && line ("'\"") <= line("$") |
35 |  \ exe "normal g'\"" |
36 |  \ endif
37 |  endif
38 | 


--------------------------------------------------------------------------------
/backends/gcu/backend/equivalence_trans/insensitive_ops/assign.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #pragma once
16 | 
17 | #include <memory>
18 | 
19 | #include "backend/register/register.h"
20 | 
21 | namespace backend {
22 | const char* const kAssign = "assign";
23 | 
24 | IMPLEMT_EQUIVALENCE_TRANS_FUNC(
25 |     gcu_builder, op, map_inputs, running_mode, AssignEquivalenceTrans) {
26 |   auto input = *(map_inputs["X"].at(0));
27 |   auto out = builder::Reshape(input, input.GetType());
28 |   return std::make_shared<GcuOp>(out);
29 | }
30 | 
31 | EQUIVALENCE_TRANS_FUNC_REG(kAssign, INSENSITIVE, AssignEquivalenceTrans);
32 | 
33 | }  // namespace backend
34 | 


--------------------------------------------------------------------------------
/backends/gcu/backend/equivalence_trans/insensitive_ops/equal.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #pragma once
16 | 
17 | #include <memory>
18 | 
19 | #include "backend/register/register.h"
20 | 
21 | namespace backend {
22 | const char *const kEqual = "equal";
23 | 
24 | IMPLEMT_EQUIVALENCE_TRANS_FUNC(
25 |     gcu_builder, op, map_inputs, running_mode, EqualEquivalenceTrans) {
26 |   GcuOp X = *(map_inputs["X"].at(0));
27 |   GcuOp Y = *(map_inputs["Y"].at(0));
28 |   auto result = builder::Equal(X, Y);
29 |   return std::make_shared<GcuOp>(result);
30 | }
31 | 
32 | EQUIVALENCE_TRANS_FUNC_REG(kEqual, INSENSITIVE, EqualEquivalenceTrans);
33 | 
34 | }  // namespace backend
35 | 


--------------------------------------------------------------------------------
/backends/gcu/backend/equivalence_trans/insensitive_ops/log.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #pragma once
16 | 
17 | #include <memory>
18 | #include <string>
19 | #include <vector>
20 | 
21 | #include "backend/register/register.h"
22 | 
23 | namespace backend {
24 | const char *const kLog = "log";
25 | 
26 | IMPLEMT_EQUIVALENCE_TRANS_FUNC(
27 |     gcu_builder, op, map_inputs, running_mode, LogEquivalenceTrans) {
28 |   GcuOp data = *(map_inputs["X"].at(0));
29 |   auto result = builder::Log(data);
30 |   return std::make_shared<GcuOp>(result);
31 | }
32 | 
33 | EQUIVALENCE_TRANS_FUNC_REG(kLog, INSENSITIVE, LogEquivalenceTrans);
34 | 
35 | }  // namespace backend
36 | 


--------------------------------------------------------------------------------
/backends/gcu/backend/equivalence_trans/insensitive_ops/sign.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #pragma once
16 | 
17 | #include <memory>
18 | 
19 | #include "backend/register/register.h"
20 | 
21 | namespace backend {
22 | const char *const kSign = "sign";
23 | 
24 | IMPLEMT_EQUIVALENCE_TRANS_FUNC(
25 |     gcu_builder, op, map_inputs, running_mode, SignEquivalenceTrans) {
26 |   builder::Op input = *(map_inputs["X"].at(0));
27 |   auto output = builder::Sign(input);
28 |   return std::make_shared<GcuOp>(output);
29 | }
30 | 
31 | EQUIVALENCE_TRANS_FUNC_REG(kSign, INSENSITIVE, SignEquivalenceTrans);
32 | 
33 | }  // namespace backend
34 | 


--------------------------------------------------------------------------------
/backends/gcu/backend/executor/cast_runner.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | #include <tops/tops_ext.h>
17 | 
18 | #include <vector>
19 | 
20 | #include "backend/executor/gcu_node.h"
21 | #include "backend/utils/utils.h"
22 | 
23 | namespace backend {
24 | 
25 | void CastRunner(const topsStream_t stream,
26 |                 const std::vector<int64_t> dims,
27 |                 const phi::DataType src_data_type,
28 |                 const phi::DataType dst_data_type,
29 |                 const void* src_buf,
30 |                 void* dst_buf);
31 | 
32 | }  // namespace backend
33 | 


--------------------------------------------------------------------------------
/backends/gcu/backend/executor/tops_compiler.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #pragma once
16 | 
17 | #include <tops/tops_ext.h>
18 | 
19 | #include <memory>
20 | 
21 | namespace hlir {
22 | class Module;
23 | }
24 | 
25 | namespace backend {
26 | topsExecutable_t CompileTopsExecutable(
27 |     const std::shared_ptr<hlir::Module> &module);
28 | 
29 | }  // namespace backend
30 | 


--------------------------------------------------------------------------------
/backends/gcu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/gcu/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/external/pybind11.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/pybind11.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/paddle.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/paddle.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/third_party.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/third_party.cmake


--------------------------------------------------------------------------------
/backends/gcu/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/version.cmake


--------------------------------------------------------------------------------
/backends/gcu/common/flags.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/common/flags.h"
16 | 
17 | #include <string>
18 | 
19 | #include "runtime/flags.h"
20 | 
21 | FLAGS_DEFINE_int32(custom_engine_min_group_size,
22 |                    3,
23 |                    "when the custom device subgraph size is not larger than "
24 |                    "`custom_engine_min_group_size`, the group will fallback to "
25 |                    "original graph.");
26 | 


--------------------------------------------------------------------------------
/backends/gcu/common/gcu_env_list.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include <string>
18 | // #include "runtime/flags.h"
19 | 
20 | namespace env {
21 | const char *const kUseJitKernels = "PADDLE_GCU_USE_JIT_KERNELS_ONLY";
22 | const char *const kProfiler = "PADDLE_GCU_PROFILE";
23 | const char *const kStreamAsync = "PADDLE_RUN_ASYNC";
24 | const char *const kEnableTransOpt = "PADDLE_ENABLE_TRANSPOSE_OPT";
25 | }  // namespace env
26 | 


--------------------------------------------------------------------------------
/backends/gcu/passes/gcu_custom_passes.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "paddle/pir/include/pass/pass_registry.h"
18 | 
19 | USE_PIR_PASS(addn_replace_pass);
20 | USE_PIR_PASS(gcu_op_marker_pass);
21 | USE_PIR_PASS(gcu_sub_graph_extract_pass);
22 | USE_PIR_PASS(gcu_replace_with_engine_op_pass);
23 | 


--------------------------------------------------------------------------------
/backends/gcu/passes/gcu_pass_pipeline.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include <string>
18 | #include <vector>
19 | 
20 | const std::vector<std::string>* GetPirGcuPasses();
21 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_FUSE_PASSES
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_FUSE_PASSES "${TEST_FUSE_PASSES}")
20 | 
21 | foreach(TEST_PASS ${TEST_FUSE_PASSES})
22 |   py_test_modules(${TEST_PASS} false MODULES fuse_pass/${TEST_PASS}.py)
23 |   message(STATUS "with fuse pass: ${TEST_PASS}")
24 | endforeach()
25 | 
26 | set_tests_properties(${TEST_FUSE_PASSES} PROPERTIES TIMEOUT 1000)
27 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/model/conv_bn.pdiparams:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn.pdiparams


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/model/conv_bn.pdmodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn.pdmodel


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdiparams:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdiparams


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdmodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdmodel


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdiparams:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdiparams


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdmodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdmodel


--------------------------------------------------------------------------------
/backends/gcu/tests/fuse_pass/model_graph/test_graph.pdiparams:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model_graph/test_graph.pdiparams


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_OPS
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
20 | 
21 | foreach(TEST_OP ${TEST_OPS})
22 |   py_test_modules(${TEST_OP} false MODULES unittests/${TEST_OP}.py)
23 |   message(STATUS "with op unittest: ${TEST_OP}")
24 | endforeach()
25 | 
26 | set_tests_properties(${TEST_OPS} PROPERTIES TIMEOUT 1000)
27 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | include(ctest.cmake)
16 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_atan.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(func=paddle.atan, feed_names=["data"], feed_shapes=[[4]])
21 | 
22 | 
23 | @pytest.mark.atan
24 | @pytest.mark.filterwarnings("ignore::UserWarning")
25 | def test_atan():
26 |     data = np.array([-0.4, -0.2, 0.1, 0.3], dtype=np.float32)
27 |     test.run(feed=[data])
28 | 
29 | 
30 | test_atan()
31 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_bmm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.bmm,
22 |     feed_names=["lhs", "rhs"],
23 |     feed_shapes=[[10, 3, 4], [10, 4, 5]],
24 |     is_train=True,
25 | )
26 | 
27 | 
28 | @pytest.mark.bmm
29 | @pytest.mark.filterwarnings("ignore::UserWarning")
30 | def test_bmm():
31 |     input1 = np.random.random([10, 3, 4]).astype("float32")
32 |     input2 = np.random.random([10, 4, 5]).astype("float32")
33 |     test.run(feed=[input1, input2])
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_clip.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(func=paddle.clip, feed_names=["data"], feed_shapes=[[2, 2]])
21 | 
22 | 
23 | @pytest.mark.clip
24 | @pytest.mark.filterwarning("ignore::UserWarning")
25 | def test_clip():
26 |     data = np.array([[1.2, 3.5], [4.5, 6.4]]).astype("float32")
27 |     test.run(feed=[data], min=3.5, max=5.0)
28 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_cos.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(func=paddle.cos, feed_names=["data"], feed_shapes=[[4]])
21 | 
22 | 
23 | @pytest.mark.cos
24 | @pytest.mark.filterwarnings("ignore::UserWarning")
25 | def test_cos():
26 |     data = np.array([-0.4, -0.2, 0.1, 0.3], dtype=np.float32)
27 |     test.run(feed=[data])
28 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_equal.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.equal, feed_names=["X", "Y"], feed_shapes=[[3], [3]], is_train=False
22 | )
23 | 
24 | 
25 | @pytest.mark.equal
26 | @pytest.mark.filterwarnings("ignore::UserWarning")
27 | def test_equal():
28 |     x = np.array([-0.4, -0.2, 1], dtype=np.float32)
29 |     y = np.array([-0.4, -0.2, 1], dtype=np.float32)
30 |     test.run(feed=[x, y])
31 | 
32 | 
33 | test_equal()
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_expand_as.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.expand_as,
22 |     feed_names=["data", "target"],
23 |     feed_shapes=[[3], [2, 3]],
24 |     is_train=False,
25 | )
26 | 
27 | 
28 | @pytest.mark.expand_as_v2
29 | @pytest.mark.filterwarnings("ignore::UserWarning")
30 | def test_expand_as_v2():
31 |     np.random.seed(1)
32 |     data = np.random.uniform(0, 1, (3,)).astype("float32")
33 |     target = np.random.randn(2, 3).astype("float32")
34 |     test.run(feed=[data, target])
35 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_fc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.static.nn.fc,
22 |     feed_names=["data"],
23 |     feed_shapes=[[2, 3, 4, 5]],
24 |     input_is_list=False,
25 |     is_train=True,
26 | )
27 | 
28 | 
29 | @pytest.mark.fc
30 | @pytest.mark.filterwarnings("ignore::UserWarning")
31 | def test_fc():
32 |     np.random.seed(1)
33 |     data = np.random.uniform(0, 1, (2, 3, 4, 5)).astype("float32")
34 |     test.run(feed=[data], size=10)
35 | 
36 | 
37 | test_fc()
38 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_full_like.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.full_like,
22 |     feed_names=["data"],
23 |     is_train=False,
24 |     feed_shapes=[[2, 3, 4, 5]],
25 | )
26 | 
27 | 
28 | @pytest.mark.layer_norm
29 | @pytest.mark.filterwarnings("ignore::UserWarning")
30 | def test_full_like():
31 |     data = np.random.random(size=[2, 3, 4, 5]).astype("float32")
32 |     test.run(feed=[data], fill_value=3.0, dtype=np.int64)
33 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_gelu.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.nn.functional.gelu, feed_names=["data"], feed_shapes=[[2, 3]]
22 | )
23 | 
24 | 
25 | @pytest.mark.gelu
26 | @pytest.mark.filterwarnings("ignore::UserWarning")
27 | def test_gelu():
28 |     data = np.random.randn(2, 3).astype("float32")
29 |     test.run(feed=[data])
30 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_greater_equal.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.greater_equal,
22 |     feed_names=["lhs", "rhs"],
23 |     feed_shapes=[[3], [3]],
24 |     is_train=False,
25 | )
26 | 
27 | 
28 | @pytest.mark.greater_equal
29 | @pytest.mark.filterwarnings("ignore::UserWarning")
30 | def test_greater_equal():
31 |     lhs = np.array([-1, 0, 1], dtype=np.float32)
32 |     rhs = np.array([1, 0, -1], dtype=np.float32)
33 |     test.run(feed=[lhs, rhs])
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_greater_than.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.greater_than,
22 |     feed_names=["lhs", "rhs"],
23 |     feed_shapes=[[3], [3]],
24 |     is_train=False,
25 | )
26 | 
27 | 
28 | @pytest.mark.greater_than
29 | @pytest.mark.filterwarnings("ignore::UserWarning")
30 | def test_greater_than():
31 |     lhs = np.array([-1, 0, 1], dtype=np.float32)
32 |     rhs = np.array([1, 0, -1], dtype=np.float32)
33 |     test.run(feed=[lhs, rhs])
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_isinf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.isinf,
22 |     feed_names=["data"],
23 |     is_train=False,
24 |     feed_shapes=[[7]],
25 |     threshold=1.0e-5,
26 | )
27 | 
28 | 
29 | @pytest.mark.isinf_v2
30 | @pytest.mark.filterwarnings("ignore::UserWarning")
31 | def test_isinfv2():
32 |     np.random.seed(1)
33 |     data = np.array(
34 |         [float("-inf"), -2, 3.6, float("inf"), 0, float("-nan"), float("nan")]
35 |     ).astype("float32")
36 |     test.run(feed=[data])
37 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_label_smooth.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.nn.functional.label_smooth,
22 |     feed_names=["data"],
23 |     feed_shapes=[[1, 2, 3]],
24 |     is_train=False,
25 | )
26 | 
27 | 
28 | @pytest.mark.label_smooth
29 | @pytest.mark.filterwarnings("ignore::UserWarning")
30 | def test_label_smooth():
31 |     data = np.array([[[0, 1, 0], [1, 0, 1]]], dtype=np.float32)
32 |     test.run(feed=[data])
33 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_leaky_relu.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.nn.functional.leaky_relu,
22 |     feed_names=["data"],
23 |     feed_shapes=[[2, 3, 224, 224]],
24 | )
25 | 
26 | 
27 | @pytest.mark.leaky_relu
28 | @pytest.mark.filterwarnings("ignore::UserWarning")
29 | def test_leaky_relu():
30 |     data = np.random.randn(2, 3, 224, 224).astype("float32")
31 |     test.run(feed=[data], negative_slope=0.2)
32 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_log.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test1 = ApiBase(
21 |     func=paddle.log,
22 |     feed_names=["data"],
23 |     is_train=False,
24 |     feed_shapes=[[2, 3]],
25 |     threshold=1.0e-5,
26 | )
27 | 
28 | 
29 | @pytest.mark.log
30 | @pytest.mark.filterwarnings("ignore::UserWarning")
31 | def test_log():
32 |     np.random.seed(1)
33 |     data = np.array([[10, 20, 30], [5, 78, 96]]).astype("float32")
34 |     test1.run(feed=[data])
35 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_maximum.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.maximum,
22 |     feed_names=["x", "y"],
23 |     # is_train=False,
24 |     feed_shapes=[[2, 2, 2], [2, 2]],
25 | )
26 | 
27 | 
28 | @pytest.mark.maximum
29 | @pytest.mark.filterwarning("ignore::UserWarning")
30 | def test_maximum():
31 |     x = np.array([[[2, 3], [4, 6]], [[7, 8], [5, 9]]]).astype("float32")
32 |     y = np.array([[2, 3.1], [1, 8.1]]).astype("float32")
33 |     test.run(feed=[x, y])
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_mean.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.mean, feed_names=["data"], feed_shapes=[[2, 3]], is_train=True
22 | )
23 | 
24 | 
25 | @pytest.mark.mean
26 | @pytest.mark.filterwarnings("ignore::UserWarning")
27 | def test_mean():
28 |     np.random.seed(1)
29 |     data = np.random.random(size=[2, 3]).astype("float32")
30 |     test.run(feed=[data])
31 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_minimum.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.minimum,
22 |     feed_names=["data1", "data2"],
23 |     feed_shapes=[[1, 2, 3], [3]],
24 |     is_train=True,
25 | )
26 | 
27 | 
28 | @pytest.mark.minimum
29 | @pytest.mark.filterwarnings("ignore::UserWarning")
30 | def test_minimum():
31 |     data1 = np.array([[[-0.4, 1, -0.2], [0.1, 2, 0.3]]], dtype=np.float32)
32 |     data2 = np.array([1, -1, 0], dtype=np.float32)
33 |     test.run(feed=[data1, data2])
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_one_hot.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.nn.functional.one_hot,
22 |     feed_names=["data"],
23 |     #    is_train=False,
24 |     feed_shapes=[[4, 1]],
25 |     feed_dtypes=["int64"],
26 | )
27 | 
28 | 
29 | @pytest.mark.one_hot
30 | @pytest.mark.filterwarnings("ignore::UserWarning")
31 | def test_one_hot():
32 |     data = np.array([[1], [1], [3], [5]]).astype("int64")
33 |     test.run(feed=[data], num_classes=6)
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_reverse.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | 
21 | test = ApiBase(func=paddle.reverse, feed_names=["data"], feed_shapes=[[7, 8, 9, 10]])
22 | 
23 | 
24 | @pytest.mark.reverse
25 | @pytest.mark.filterwarnings("ignore::UserWarning")
26 | def test_reverse():
27 |     data = np.random.random(size=[7, 8, 9, 10]).astype("float32")
28 |     test.run(feed=[data], axis=[-1, 0, 2])
29 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_shape.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.shape, feed_names=["data"], feed_shapes=[[2, 4]], is_train=False
22 | )
23 | np.random.seed(1)
24 | 
25 | 
26 | @pytest.mark.shape
27 | @pytest.mark.filterwarnings("ignore::UserWarning")
28 | def test_shape():
29 |     data = np.random.uniform(1, 10, (2, 4)).astype("float32")
30 |     test.run(feed=[data])
31 | 
32 | 
33 | test_shape()
34 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_sign.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | 
21 | test = ApiBase(func=paddle.sign, feed_names=["data"], feed_shapes=[[2, 3]])
22 | 
23 | 
24 | @pytest.mark.sign
25 | @pytest.mark.filterwarnings("ignore::UserWarning")
26 | def test_sign():
27 |     data = np.array([[-45.432, -0.23, 55.55], [3.3, 0.0, -43.6]]).astype("float32")
28 |     test.run(feed=[data])
29 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_silu.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.nn.functional.silu, feed_names=["data"], feed_shapes=[[2, 3]]
22 | )
23 | 
24 | 
25 | @pytest.mark.silu
26 | @pytest.mark.filterwarnings("ignore::UserWarning")
27 | def test_silu():
28 |     data = np.random.randn(2, 3).astype("float32")
29 |     test.run(feed=[data])
30 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_squeeze_v2.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(
21 |     func=paddle.squeeze,
22 |     feed_names=["data"],
23 |     feed_shapes=[[2, 1, 4]],
24 |     input_is_list=False,
25 |     is_train=True,
26 | )
27 | 
28 | 
29 | @pytest.mark.squeeze2
30 | @pytest.mark.filterwarnings("ignore::UserWarning")
31 | def test_squeeze():
32 |     np.random.seed(1)
33 |     data = np.random.uniform(0, 1, (2, 1, 4)).astype("float32")
34 |     test.run(feed=[data], axis=[1])
35 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_jit/test_tanh.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from api_base import ApiBase
16 | import paddle
17 | import pytest
18 | import numpy as np
19 | 
20 | test = ApiBase(func=paddle.tanh, feed_names=["data"], feed_shapes=[[2, 3]])
21 | 
22 | 
23 | @pytest.mark.tanh
24 | @pytest.mark.filterwarnings("ignore::UserWarning")
25 | def test_tanh():
26 |     data = np.array([[-0.4, 0.6, 2.3], [1.0, 5.0, -3.2]], dtype=np.float32)
27 |     test.run(feed=[data])
28 | 


--------------------------------------------------------------------------------
/backends/gcu/tests/unittests_legacy/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_OPS
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
20 | 
21 | foreach(TEST_OP ${TEST_OPS})
22 |   py_test_modules(${TEST_OP} false MODULES unittests_legacy/${TEST_OP}.py)
23 |   message(STATUS "with op unittest: ${TEST_OP}")
24 | endforeach()
25 | 
26 | set_tests_properties(${TEST_OPS} PROPERTIES TIMEOUT 1000)
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/README.md:
--------------------------------------------------------------------------------
 1 | # PaddlePaddle Custom Device Implementation for Iluvatar GPU
 2 | 
 3 | English | [简体中文](./README_cn.md)
 4 | 
 5 | Please refer to the following steps to compile, install and verify the custom device implementation for Iluvatar GPU.
 6 | 
 7 | ## Compilation and Installation
 8 | 
 9 | ```bash
10 | # Please contact Iluvatar customer support (services@iluvatar.com) to obtain the SDK image
11 | 
12 | # Clone PaddleCustomDevice source code
13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice
14 | 
15 | # Set environment variables
16 | export PATH=/usr/local/corex-4.3.0/bin:$PATH
17 | export LD_LIBRARY_PATH=/usr/local/corex-4.3.0/lib
18 | export LIBRARY_PATH=/usr/local/corex-4.3.0/lib
19 | 
20 | # Compile Paddle Custom Device
21 | cd backends/iluvatar_gpu
22 | bash build_paddle.sh
23 | 
24 | # Install
25 | bash install_paddle.sh
26 | ```
27 | 
28 | ## Verification
29 | 
30 | ```bash
31 | # Run tests
32 | cd tests
33 | bash run_test.sh
34 | ```
35 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/README_cn.md:
--------------------------------------------------------------------------------
 1 | # 飞桨自定义接入硬件后端(天数GPU)
 2 | 
 3 | 简体中文 | [English](./README.md)
 4 | 
 5 | 请参考以下步骤进行编译安装与验证
 6 | 
 7 | ## 编译安装
 8 | 
 9 | ```bash
10 | # 获请联系天数智芯客户支持(services@iluvatar.com)获取SDK镜像
11 | 
12 | # 克隆PaddleCustomDevice源码
13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice
14 | 
15 | # 设置环境变量
16 | export PATH=/usr/local/corex-4.3.0/bin:$PATH
17 | export LD_LIBRARY_PATH=/usr/local/corex-4.3.0/lib
18 | export LIBRARY_PATH=/usr/local/corex-4.3.0/lib
19 | 
20 | # 编译 Paddle Custom Device
21 | cd backends/iluvatar_gpu
22 | bash build_paddle.sh
23 | 
24 | # 安装
25 | bash install_paddle.sh
26 | ```
27 | 
28 | ## 验证
29 | 
30 | ```bash
31 | # 运行测试
32 | cd tests
33 | bash run_test.sh
34 | ```
35 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/clean_paddle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 4 | # 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | # 
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | # 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | rm -rf build
18 | rm -rf build_pip
19 | rm -rf build_lib
20 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/cblas.cmake:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/cblas.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/cuda.cmake:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/cuda.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/external/cccl.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/cccl.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/external/mklml.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/mklml.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/external/xxhash.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/xxhash.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/external/zlib.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/zlib.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/paddle.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/paddle.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/third_party.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/third_party.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/version.cmake


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/addmm_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #include "../impl/addmm_kernel_impl.h"
16 | #include "paddle/phi/backends/gpu/gpu_context.h"
17 | #include "paddle/phi/core/kernel_registry.h"
18 | #include "paddle/phi/kernels/addmm_kernel.h"
19 | 
20 | PD_CUSTOM_KERNEL_REGISTER(addmm,
21 |                           iluvatar_gpu,
22 |                           ALL_LAYOUT,
23 |                           phi::AddmmKernel,
24 |                           float,
25 |                           phi::dtype::float16,
26 |                           phi::dtype::bfloat16) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/c_embedding_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/c_embedding_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(c_embedding_grad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::CEmbeddingGradKernel,
22 |                           float,
23 |                           phi::dtype::bfloat16,
24 |                           phi::dtype::float16,
25 |                           phi::dtype::complex<float>) {}
26 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/c_embedding_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/c_embedding_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(c_embedding,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::CEmbeddingKernel,
22 |                           float,
23 |                           phi::dtype::bfloat16,
24 |                           phi::dtype::float16,
25 |                           phi::dtype::complex<float>) {}
26 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/c_identity_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/c_identity_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(c_identity,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::CIdentityKernel,
22 |                           float,
23 |                           int,
24 |                           int64_t,
25 |                           phi::dtype::bfloat16,
26 |                           phi::dtype::float16) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/c_softmax_with_cross_entropy_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu"  // NOLINT
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(c_softmax_with_cross_entropy_grad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::CSoftmaxWithCrossEntropyGradKernel,
22 |                           float,
23 |                           phi::dtype::float16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/c_softmax_with_cross_entropy_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu"  // NOLINT
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(c_softmax_with_cross_entropy,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::CSoftmaxWithCrossEntropyKernel,
22 |                           float,
23 |                           phi::dtype::float16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/clip_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/clip_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(clip_grad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::ClipGradKernel,
22 |                           float,
23 |                           int,
24 |                           int64_t,
25 |                           phi::dtype::bfloat16,
26 |                           phi::dtype::float16) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/clip_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/clip_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(clip,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::ClipKernel,
22 |                           float,
23 |                           int,
24 |                           int64_t,
25 |                           phi::dtype::float16,
26 |                           phi::dtype::bfloat16) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/embedding_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/embedding_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(embedding,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::EmbeddingKernel,
22 |                           float,
23 |                           int8_t,
24 |                           phi::dtype::float16,
25 |                           phi::dtype::bfloat16,
26 |                           phi::dtype::complex<float>) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/logsumexp_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/logsumexp_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(logsumexp,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::LogsumexpKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/mean_all_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/mean_all_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(mean_all_grad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::MeanAllGradKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::complex<float>) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/mean_all_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/mean_all_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(mean_all,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::MeanAllKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::complex<float>) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/multinomial_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/multinomial_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(multinomial,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::MultinomialKernel,
22 |                           phi::dtype::float16,
23 |                           phi::dtype::bfloat16,
24 |                           float) {
25 |   kernel->OutputAt(0).SetDataType(phi::DataType::INT64);
26 | }
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/one_hot_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/one_hot_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(
19 |     one_hot, iluvatar_gpu, ALL_LAYOUT, phi::OneHotKernel, int, int64_t) {
20 |   kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32);
21 | }
22 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/p_norm_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/p_norm_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(p_norm_grad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::PNormGradKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/p_norm_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/p_norm_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(p_norm,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::PNormKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/pad_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/pad_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(pad_grad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::PadGradKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16,
25 |                           phi::dtype::complex<float>) {}
26 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/pad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/pad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(pad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::PadKernel,
22 |                           float,
23 |                           int,
24 |                           int64_t,
25 |                           phi::dtype::float16,
26 |                           phi::dtype::bfloat16,
27 |                           phi::dtype::complex<float>) {}
28 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/put_along_axis_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/put_along_axis_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(put_along_axis,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::PutAlongAxisKernel,
22 |                           float,
23 |                           int64_t,
24 |                           int,
25 |                           phi::dtype::float16,
26 |                           phi::dtype::bfloat16) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/randint_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/randint_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(
19 |     randint, iluvatar_gpu, ALL_LAYOUT, phi::RandintKernel, int, int64_t) {}
20 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/reshape_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/reshape_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER_FOR_ALL_DTYPE(reshape_grad,
19 |                                         iluvatar_gpu,
20 |                                         ALL_LAYOUT,
21 |                                         phi::ReshapeGradKernel) {}
22 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/reshape_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | //   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/reshape_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER_FOR_ALL_DTYPE(reshape,
19 |                                         iluvatar_gpu,
20 |                                         ALL_LAYOUT,
21 |                                         phi::ReshapeKernel) {}
22 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/squared_l2_norm_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/squared_l2_norm_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(squared_l2_norm,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::SquaredL2NormKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/swiglu_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/swiglu_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(swiglu_grad,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::SwiGLUGradKernel,
22 |                           float,
23 |                           double,
24 |                           phi::dtype::float16,
25 |                           phi::dtype::bfloat16) {}
26 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/swiglu_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/swiglu_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(swiglu,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::SwiGLUKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/take_along_axis_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/take_along_axis_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(take_along_axis,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::TakeAlongAxisKernel,
22 |                           float,
23 |                           int64_t,
24 |                           int,
25 |                           phi::dtype::float16,
26 |                           phi::dtype::bfloat16) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/uniform_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/uniform_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(uniform,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::UniformKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/cuda_kernels/where_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/where_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(where,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::WhereKernel,
22 |                           float,
23 |                           int,
24 |                           bool,
25 |                           int64_t,
26 |                           phi::dtype::float16,
27 |                           phi::dtype::bfloat16) {}
28 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/cal_aux_loss_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/cal_aux_loss_grad_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(cal_aux_loss_grad,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::CalAuxLossGradKernel,
21 |                           float) {}
22 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/expand_modality_expert_id_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/expand_modality_expert_id_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(expand_modality_expert_id,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::ExpandModalityExpertIDKernel,
21 |                           int,
22 |                           int64_t) {}
23 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/fused_bias_act_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu"  //NOLINT
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(fused_bias_act,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::fusion::FusedBiasActKernel,
22 |                           float,
23 |                           phi::dtype::bfloat16,
24 |                           phi::dtype::float16,
25 |                           int32_t) {}
26 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/int_bincount_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/int_bincount.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(
18 |     int_bincount, iluvatar_gpu, ALL_LAYOUT, phi::IntBincount, int64_t, int) {}
19 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/layer_norm_cuda_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/gpu/layer_norm_cuda_kernel.cu"  //NOLINT
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(
18 |     fused_rms_norm, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnFwd, float) {}
19 | 
20 | PD_CUSTOM_KERNEL_REGISTER(
21 |     fused_rms_norm_grad, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnBwd, float) {}
22 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_combine_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_combine_grad_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_combine_grad,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoeCombineGradKernel,
21 |                           float,
22 |                           phi::dtype::bfloat16,
23 |                           phi::dtype::float16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_combine_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_combine_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_combine,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoeCombineKernel,
21 |                           float,
22 |                           phi::dtype::bfloat16,
23 |                           phi::dtype::float16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_gate_dispatch_grad_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_grad,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoeGateDispatchGradKernel,
21 |                           float,
22 |                           phi::dtype::float16,
23 |                           phi::dtype::bfloat16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_gate_dispatch_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoeGradDispatchKernel,
21 |                           float,
22 |                           phi::dtype::float16,
23 |                           phi::dtype::bfloat16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_permute_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_gate_dispatch_permute_grad_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_permute_grad,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoeGateDispatchGradKernel,
21 |                           float,
22 |                           phi::dtype::float16,
23 |                           phi::dtype::bfloat16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_permute_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_gate_dispatch_permute_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_permute,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoEDispatchPermuteKernel,
21 |                           float,
22 |                           phi::dtype::float16,
23 |                           phi::dtype::bfloat16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_ops_partial_nosoftmaxtopk_grad_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_ops_partial_nosoftmaxtopk_grad_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_partial_nosoftmaxtopk_grad,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoeGateDispatchPartialNoSoftMaxTopkGradKernel,
21 |                           float,
22 |                           phi::dtype::float16,
23 |                           phi::dtype::bfloat16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/moe_ops_partial_nosoftmaxtopk_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/moe_ops_partial_nosoftmaxtopk_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_partial_nosoftmaxtopk,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::MoeGateDispatchPartialNoSoftMaxTopkKernel,
21 |                           float,
22 |                           phi::dtype::bfloat16,
23 |                           phi::dtype::float16) {}
24 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/register_build_src_rank_and_local_expert_id_kernel.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/build_src_rank_and_local_expert_id_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(build_src_rank_and_local_expert_id,
18 |                           iluvatar_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::BuildSrcRankAndLocalExpertIdKernel,
21 |                           int32_t,
22 |                           int64_t) {}
23 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/rms_norm_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/rms_norm_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(rms_norm,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::RmsNormKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/ernie_core/top_p_sampling_kernel_register.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/top_p_sampling_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(top_p_sampling,
19 |                           iluvatar_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::TopPSamplingKernel,
22 |                           float,
23 |                           int,
24 |                           int64_t,
25 |                           phi::dtype::float16,
26 |                           phi::dtype::bfloat16) {}
27 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/kernels/funcs/blas/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | collect_srcs(kernels_srcs SRCS blas.cc)
2 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/setup.py.in:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Distribution
 2 | 
 3 | packages = []
 4 | package_data = {}
 5 | 
 6 | class BinaryDistribution(Distribution):
 7 |     def has_ext_modules(self):
 8 |         return True
 9 | 
10 | setup(
11 |     name = '@CMAKE_PROJECT_NAME@',
12 |     version='@PLUGIN_VERSION@',
13 |     description='Paddle iluvatar_gpu plugin',
14 |     long_description='',
15 |     long_description_content_type="text/markdown",
16 |     author_email="Paddle-better@baidu.com",
17 |     maintainer="PaddlePaddle",
18 |     maintainer_email="Paddle-better@baidu.com",
19 |     project_urls={},
20 |     license='Apache Software License',
21 |     packages= [
22 |         'paddle_custom_device',
23 |     ],
24 |     include_package_data=True,
25 |     package_data = {
26 |         '': ['*.so', '*.h', '*.py', '*.hpp'],
27 |     },
28 |     package_dir = {
29 |         '': 'python',
30 |     },
31 |     zip_safe=False,
32 |     distclass=BinaryDistribution,
33 |     entry_points={
34 |         'console_scripts': [
35 |         ]
36 |     },
37 |     classifiers=[
38 |     ],
39 |     keywords='Paddle iluvatar_gpu plugin',
40 | )
41 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(MyProject)
 3 | 
 4 | find_package(Python REQUIRED COMPONENTS Interpreter)
 5 | 
 6 | add_custom_target(
 7 |   run_test
 8 |   COMMAND pytest ${CMAKE_SOURCE_DIR}/unittests/
 9 |   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
10 |   COMMENT "Running unit tests with pytest")
11 | 
12 | set(CMAKE_DEFAULT_MAKE_TARGET run_test)
13 | 


--------------------------------------------------------------------------------
/backends/iluvatar_gpu/tests/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 4 | # 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | # 
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | # 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | export PYTHONPATH=${PYTHONPATH}:/home/tianyu.zhou/PaddleCustomDevice/Paddle/test/legacy_test
18 | mkdir -p build && cd build && cmake ..
19 | make run_test
20 | cd -
21 | rm -rf build
22 | 


--------------------------------------------------------------------------------
/backends/intel_gpu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/intel_gpu/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/intel_gpu/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/intel_gpu/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/intel_gpu/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/backends/intel_gpu/cmake/external/pybind11.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/pybind11.cmake


--------------------------------------------------------------------------------
/backends/intel_gpu/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/intel_gpu/load.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | # 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | # 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | 
17 | PaddleDev=$(dirname $(dirname `pwd`))
18 | 
19 | echo $d
20 | export PYTHONPATH=$PYTHONPATH:${PaddleDev}/python/tests/
21 | 
22 | 
23 | comp="dnnl tbb compiler"
24 | 
25 | for item in $comp;
26 | do
27 | 
28 | 	P="${HOME}/intel/oneapi/$item/latest/env/vars.sh"
29 |         echo "$P"
30 |         source $P
31 | done
32 | 


--------------------------------------------------------------------------------
/backends/intel_gpu/setup.py.in:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Distribution
 2 | 
 3 | packages = []
 4 | package_data = {}
 5 | 
 6 | class BinaryDistribution(Distribution):
 7 |     def has_ext_modules(self):
 8 |         return True
 9 | 
10 | setup(
11 |     name = '@CMAKE_PROJECT_NAME@',
12 |     version='@PLUGIN_VERSION@',
13 |     description='Paddle CustomCPU plugin',
14 |     long_description='',
15 |     long_description_content_type="text/markdown",
16 |     author_email="Paddle-better@baidu.com",
17 |     maintainer="PaddlePaddle",
18 |     maintainer_email="Paddle-better@baidu.com",
19 |     project_urls={},
20 |     license='Apache Software License',
21 |     packages= [
22 |         'paddle_custom_device',
23 |     ],
24 |     include_package_data=True,
25 |     package_data = {
26 |         '': ['*.so', '*.h', '*.py', '*.hpp'],
27 |     },
28 |     package_dir = {
29 |         '': 'python',
30 |     },
31 |     zip_safe=False,
32 |     distclass=BinaryDistribution,
33 |     entry_points={
34 |         'console_scripts': [
35 |         ]
36 |     },
37 |     classifiers=[
38 |     ],
39 |     keywords='Paddle CustomCPU plugin',
40 | )
41 | 


--------------------------------------------------------------------------------
/backends/intel_gpu/tests/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_OPS
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
20 | 
21 | foreach(TEST_OP ${TEST_OPS})
22 |   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
23 | endforeach()
24 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/external/custom_tpc_lib.cmake:
--------------------------------------------------------------------------------
 1 | set(DOWNLOAD_URL "https://paddle-ci.cdn.bcebos.com/libcustom_tpc_perf_lib.so")
 2 | set(TARGET_DIR "${CMAKE_BINARY_DIR}/python/paddle_custom_device/intel_hpu")
 3 | set(TARGET_PATH "${TARGET_DIR}/libcustom_tpc_perf_lib.so")
 4 | 
 5 | file(MAKE_DIRECTORY ${TARGET_DIR})
 6 | file(DOWNLOAD ${DOWNLOAD_URL} ${TARGET_PATH} STATUS download_status)
 7 | 
 8 | list(GET download_status 0 download_success)
 9 | if(NOT (download_success EQUAL 0))
10 |   message(FATAL_ERROR "Failed to download ${DOWNLOAD_URL} to ${TARGET_PATH}")
11 | endif()
12 | 
13 | message(STATUS "Downloaded ${DOWNLOAD_URL} to ${TARGET_PATH}")
14 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/external/pybind11.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/pybind11.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/external/synapse.cmake:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License.
14 | execute_process(
15 |   COMMAND dpkg-query --show --showformat='\${Version}' habanalabs-graph
16 |   OUTPUT_VARIABLE SYNAPSE_VERSION
17 |   OUTPUT_STRIP_TRAILING_WHITESPACE)
18 | 
19 | if(SYNAPSE_VERSION)
20 |   message(STATUS "Synapse version: ${SYNAPSE_VERSION}")
21 | else()
22 |   message(FATAL_ERROR "Synapse is not installed.")
23 | endif()
24 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/paddle.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/paddle.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/third_party.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/third_party.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/version.cmake


--------------------------------------------------------------------------------
/backends/intel_hpu/custom_ops/python/paddlenlp_ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from paddle_custom_device.intel_hpu.ops import *  # noqa
16 | from .layers import *  # noqa
17 | from .llama_block_atten import *  # noqa
18 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/tests/ccl/allgather.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import paddle
16 | import paddle.distributed as dist
17 | 
18 | dist.init_parallel_env()
19 | tensor_list = []
20 | if dist.get_rank() == 0:
21 |     data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]], dtype="float32")
22 | else:
23 |     data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]], dtype="float32")
24 | dist.all_gather(tensor_list, data)
25 | print(tensor_list)
26 | # [[[4, 5, 6], [4, 5, 6]], [[1, 2, 3], [1, 2, 3]]] (2 GPUs)
27 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/tests/ccl/allreduce.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import paddle
16 | import paddle.distributed as dist
17 | 
18 | paddle.set_device("intel_hpu")
19 | 
20 | dist.init_parallel_env()
21 | if dist.get_rank() == 0:
22 |     data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]], dtype="float32")
23 | else:
24 |     data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]], dtype="float32")
25 | dist.all_reduce(data)
26 | print(data)
27 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/tests/ccl/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | # cmd
3 | 
4 | 
5 | `INTEL_HPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PADDLE_DISTRI_BACKEND=xccl PADDLE_XCCL_BACKEND=intel_hpu  python -m paddle.distributed.launch --devices "6,7" --log_level=DEBUG allreduce.py`
6 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/tests/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
 6 | # not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | skip_case_lst = {}
18 | 
19 | # when filter passwdown 'stable' will load this list
20 | # this list for the unstable test case to skip
21 | skip_case_lst = [
22 |     "test_cast.py",
23 | ]
24 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/tests/test_softmax_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
 4 | # not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import paddle
16 | 
17 | paddle.set_device("intel_hpu")
18 | # paddle.set_device("custom_cpu")
19 | 
20 | # x = paddle.randn([2, 3, 4], dtype="bfloat16")
21 | x = paddle.to_tensor(
22 |     [
23 |         [[2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0], [7.0, 8.0, 8.0, 9.0]],
24 |         [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [6.0, 7.0, 8.0, 9.0]],
25 |     ],
26 |     dtype="float32",
27 | )
28 | 
29 | m = paddle.nn.Softmax()
30 | 
31 | out = m(x)
32 | out = m(out)
33 | out = m(out)
34 | 
35 | print(x.shape)
36 | print(out.shape)
37 | print(x)
38 | print(out)
39 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/tests/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_OPS
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
20 | 
21 | foreach(TEST_OP ${TEST_OPS})
22 |   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
23 | endforeach()
24 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/tests/unittests/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
 4 | # not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import print_function, division
16 | 
17 | import paddle
18 | 
19 | 
20 | # set paddlepaddle whether works as_static_mode
21 | def enable_paddle_static_mode(intel_hpus_static_mode):
22 |     if int(intel_hpus_static_mode) == 0:  # dynamic mode
23 |         paddle.disable_static()
24 |     else:  # static mode
25 |         paddle.enable_static()
26 | 


--------------------------------------------------------------------------------
/backends/intel_hpu/utils/hpu_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you may
 4 | // not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/README.md:
--------------------------------------------------------------------------------
 1 | # PaddlePaddle Custom Device Implementation for METAX GPU
 2 | 
 3 | English | [简体中文](./README_cn.md)
 4 | 
 5 | Please refer to the following steps to compile, install and verify the custom device implementation for Iluvatar GPU.
 6 | 
 7 | ## Install Paddle
 8 | 
 9 | python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
10 | 
11 | ## Compile and Install
12 | 
13 | ```bash
14 | # Acquire Metax PaddlePaddle Docker Image
15 | 
16 | # Clone PaddleCustomDevice source code
17 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice
18 | 
19 | # Compile Source Code
20 | cd backends/metax_gpu
21 | bash build.sh
22 | 
23 | # Install PaddlePaddle
24 | # bash install_paddle.sh
25 | ```
26 | 
27 | ## Verification
28 | 
29 | ```bash
30 | # build with BUILD_TEST=1
31 | 
32 | # run_test
33 | cd tests
34 | bash run_test.sh
35 | ```
36 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/README_cn.md:
--------------------------------------------------------------------------------
 1 | # 飞桨自定义接入硬件后端(沐曦GPU)
 2 | 
 3 | 简体中文 | [English](./README.md)
 4 | 
 5 | 请参考以下步骤进行编译安装与验证
 6 | 
 7 | ## 安装paddle-cpu
 8 | python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
 9 | 
10 | ## 编译安装
11 | 
12 | ```bash
13 | # 获取沐曦PaddlePaddle Docker镜像
14 | 
15 | # 克隆PaddleCustomDevice源码
16 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice
17 | 
18 | # 编译安装
19 | cd backends/metax_gpu
20 | bash build.sh
21 | ```
22 | 
23 | ## 验证
24 | 
25 | ```bash
26 | 
27 | # 运行测试
28 | cd tests
29 | bash run_test.sh
30 | ```
31 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | // Generated by @dummy_GENERATOR@. DO NOT EDIT!!!
2 | 
3 | const char *dummy = "@dummy_CONTENT@";
4 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/as_complex_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights
 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | #include "paddle/phi/common/type_traits.h"
16 | #include "paddle/phi/core/kernel_registry.h"
17 | #include "paddle/phi/kernels/as_complex_kernel.h"
18 | #include "paddle/phi/kernels/impl/as_complex_impl.h"
19 | 
20 | PD_CUSTOM_KERNEL_REGISTER(
21 |     as_complex, metax_gpu, ALL_LAYOUT, phi::AsComplexKernel, float, double) {
22 |   kernel->OutputAt(0).SetDataType(phi::dtype::ToComplex(kernel_key.dtype()));
23 | }
24 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/asgd_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights
 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/asgd_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(asgd,
19 |                           metax_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::ASGDKernel,
22 |                           phi::dtype::float16,
23 |                           phi::dtype::bfloat16,
24 |                           float,
25 |                           double) {}
26 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/assign_pos_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights
 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/assign_pos_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(
19 |     assign_pos, metax_gpu, ALL_LAYOUT, phi::AssignPosKernel, int64_t) {}
20 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/bincount_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/bincount_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(bincount,
18 |                           metax_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::BincountKernel,
21 |                           double,
22 |                           float,
23 |                           int,
24 |                           int64_t) {
25 |   kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
26 | }
27 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/c_embedding_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/c_embedding_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(c_embedding,
18 |                           metax_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::CEmbeddingKernel,
21 |                           float,
22 |                           double,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::complex<float>,
25 |                           phi::dtype::complex<double>) {}
26 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/clip_grad_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/clip_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(clip_grad,
19 |                           metax_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::ClipGradKernel,
22 |                           float,
23 |                           double,
24 |                           int,
25 |                           int64_t,
26 |                           phi::dtype::bfloat16,
27 |                           phi::dtype::float16) {}
28 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/clip_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/clip_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(clip,
18 |                           metax_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::ClipKernel,
21 |                           float,
22 |                           double,
23 |                           int,
24 |                           int64_t,
25 |                           phi::dtype::float16,
26 |                           phi::dtype::bfloat16) {}
27 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/index_add_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/index_add_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(index_add,
18 |                           metax_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::IndexAddKernel,
21 |                           float,
22 |                           double,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16,
25 |                           int,
26 |                           int64_t) {}
27 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/one_hot_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/one_hot_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(
19 |     one_hot, metax_gpu, ALL_LAYOUT, phi::OneHotKernel, int, int64_t) {
20 |   kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32);
21 | }
22 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/p_norm_grad_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/p_norm_grad_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(p_norm_grad,
19 |                           metax_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::PNormGradKernel,
22 |                           float,
23 |                           double,
24 |                           phi::dtype::float16,
25 |                           phi::dtype::bfloat16) {}
26 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/p_norm_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/p_norm_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER(p_norm,
18 |                           metax_gpu,
19 |                           ALL_LAYOUT,
20 |                           phi::PNormKernel,
21 |                           float,
22 |                           double,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/randint_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/randint_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(
19 |     randint, metax_gpu, ALL_LAYOUT, phi::RandintKernel, int, int64_t) {}
20 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/reshape_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | #include "paddle/phi/core/kernel_registry.h"
15 | #include "paddle/phi/kernels/reshape_kernel.h"
16 | 
17 | PD_CUSTOM_KERNEL_REGISTER_FOR_ALL_DTYPE(reshape,
18 |                                         metax_gpu,
19 |                                         ALL_LAYOUT,
20 |                                         phi::ReshapeKernel) {}
21 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/tril_indices_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights
 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/tril_indices_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(
19 |     tril_indices, metax_gpu, ALL_LAYOUT, phi::TrilIndicesKernel, int, int64_t) {
20 | }
21 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/cuda_kernels/triu_indices_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights
 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 3 | //
 4 | // Licensed under the Apache License, Version 2.0 (the "License");
 5 | // you may not use this file except in compliance with the License.
 6 | // You may obtain a copy of the License at
 7 | //
 8 | //     http://www.apache.org/licenses/LICENSE-2.0
 9 | //
10 | // Unless required by applicable law or agreed to in writing, software
11 | // distributed under the License is distributed on an "AS IS" BASIS,
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | // See the License for the specific language governing permissions and
14 | // limitations under the License.
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/triu_indices_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(
19 |     triu_indices, metax_gpu, ALL_LAYOUT, phi::TriuIndicesKernel, int, int64_t) {
20 | }
21 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/dynload/cupti_lib_path.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #pragma once
16 | 
17 | #define CUPTI_LIB_PATH "/root/cu-bridge/CUDA_DIR/extras/CUPTI/lib64"
18 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/ernie_core/fused_bias_act_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu"  //NOLINT
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(fused_bias_act,
19 |                           metax_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::fusion::FusedBiasActKernel,
22 |                           float,
23 |                           phi::dtype::bfloat16,
24 |                           phi::dtype::float16,
25 |                           int32_t) {}
26 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/ernie_core/rms_norm_kernel_register.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/phi/core/kernel_registry.h"
16 | #include "paddle/phi/kernels/rms_norm_kernel.h"
17 | 
18 | PD_CUSTOM_KERNEL_REGISTER(rms_norm,
19 |                           metax_gpu,
20 |                           ALL_LAYOUT,
21 |                           phi::RmsNormKernel,
22 |                           float,
23 |                           phi::dtype::float16,
24 |                           phi::dtype::bfloat16) {}
25 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/kernels/flags_declare.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | namespace paddle_flags {
16 | bool FLAGS_cudnn_deterministic = false;
17 | bool FLAGS_embedding_deterministic = false;
18 | bool FLAGS_enable_cublas_tensor_op_math = false;
19 | bool FLAGS_gemm_use_half_precision_compute_type = false;
20 | bool FLAGS_use_fast_math = false;
21 | }  // namespace paddle_flags
22 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/patch/mcEigen_3.4.0_paddle_final.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/metax_gpu/patch/mcEigen_3.4.0_paddle_final.zip


--------------------------------------------------------------------------------
/backends/metax_gpu/setup.py.in:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Distribution
 2 | 
 3 | packages = []
 4 | package_data = {}
 5 | 
 6 | class BinaryDistribution(Distribution):
 7 |     def has_ext_modules(self):
 8 |         return True
 9 | 
10 | setup(
11 |     name = '@CMAKE_PROJECT_NAME@',
12 |     version='@PLUGIN_VERSION@',
13 |     description='Paddle metax_gpu plugin',
14 |     long_description='',
15 |     long_description_content_type="text/markdown",
16 |     author_email="Paddle-better@baidu.com",
17 |     maintainer="PaddlePaddle",
18 |     maintainer_email="Paddle-better@baidu.com",
19 |     project_urls={},
20 |     license='Apache Software License',
21 |     packages= [
22 |         'paddle_custom_device',
23 |     ],
24 |     include_package_data=True,
25 |     package_data = {
26 |         '': ['*.so', '*.h', '*.py', '*.hpp'],
27 |     },
28 |     package_dir = {
29 |         '': 'python',
30 |     },
31 |     zip_safe=False,
32 |     distclass=BinaryDistribution,
33 |     entry_points={
34 |         'console_scripts': [
35 |         ]
36 |     },
37 |     classifiers=[
38 |     ],
39 |     keywords='Paddle metax_gpu plugin',
40 | )
41 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | project(MyProject)
 3 | 
 4 | find_package(Python REQUIRED COMPONENTS Interpreter)
 5 | 
 6 | add_custom_target(
 7 |   run_tests
 8 |   COMMAND pytest ${CMAKE_SOURCE_DIR}/unittest/
 9 |   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
10 |   COMMENT "Running unit tests with pytest")
11 | 
12 | set(CMAKE_DEFAULT_MAKE_TARGET run_tests)
13 | 


--------------------------------------------------------------------------------
/backends/metax_gpu/tests/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 4 | # 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | # 
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | # 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # export PATH=/usr/local/corex-4.3.0/bin:$PATH
18 | # export LD_LIBRARY_PATH=/usr/local/corex-4.3.0/lib
19 | # export LIBRARY_PATH=/usr/local/corex-4.3.0/lib
20 | export PYTHONPATH=${PYTHONPATH}:${PADDLE_SOURCE_DIR}/test/legacy_test
21 | 
22 | mkdir -p build && cd build && cmake ..
23 | make run_tests
24 | 


--------------------------------------------------------------------------------
/backends/mlu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/mlu/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/external/pybind11.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/pybind11.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/paddle.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/paddle.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/third_party.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/third_party.cmake


--------------------------------------------------------------------------------
/backends/mlu/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/version.cmake


--------------------------------------------------------------------------------
/backends/mlu/tests/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_OPS
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
20 | 
21 | foreach(TEST_OP ${TEST_OPS})
22 |   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
23 | endforeach()
24 | 


--------------------------------------------------------------------------------
/backends/mlu/tests/unittests/test_collective_api.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices
18 | 
19 | 
20 | class TestProcessGroup(TestMultipleCustomDevices):
21 |     def test_process_group_xccl(self):
22 |         self.run_mnist_2_custom_devices("process_group_xccl.py", "mlu")
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------
/backends/mlu/tests/unittests/test_dygraph_sharding_stage_2.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/backends/mlu/tools/disable_ut_mlu:
--------------------------------------------------------------------------------
 1 | disable_ut_mlu
 2 | test_dygraph_recompute_for_eager
 3 | test_parallel_dygraph_mp_layers
 4 | test_zero_dim_tensor_mlu
 5 | test_collective_api
 6 | test_parallel_dygraph_pipeline_parallel
 7 | test_set_value_op_mlu
 8 | test_dygraph_sharding_stage_3
 9 | test_compare_op_mlu
10 | test_kldiv_loss_op_mlu
11 | test_flash_attention_op_mlu
12 | test_adamw_op_mlu
13 | test_rms_norm_op_mlu
14 | test_sync_batch_norm_op_mlu
15 | test_unsqueeze_op_mlu
16 | test_LeNet_MNIST
17 | 


--------------------------------------------------------------------------------
/backends/mps/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | Language:        ObjC
 3 | BasedOnStyle:  Google
 4 | IndentWidth:     2
 5 | TabWidth:        2
 6 | ContinuationIndentWidth: 4
 7 | AccessModifierOffset: -1  # The private/protected/public has no indent in class
 8 | Standard:  Cpp11 
 9 | AllowAllParametersOfDeclarationOnNextLine: true
10 | BinPackParameters: false
11 | BinPackArguments: false
12 | ...
13 | 


--------------------------------------------------------------------------------
/backends/mps/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/mps/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/mps/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../Paddle/cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/mps/kernels/activation_impl.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include <vector>
18 | 
19 | namespace mps_kernel {
20 | 
21 | enum ActivationOP { EXP, SIGMOID, SIN, COS };
22 | 
23 | void Activation(const float *x,
24 |                 float *out,
25 |                 const std::vector<int64_t> &dims,
26 |                 ActivationOP op);
27 | 
28 | void Pow(const float *x,
29 |          float *out,
30 |          const std::vector<int64_t> &dims,
31 |          float factor);
32 | 
33 | }  // namespace mps_kernel
34 | 


--------------------------------------------------------------------------------
/backends/mps/kernels/elementwise_impl.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include <vector>
18 | 
19 | namespace mps_kernel {
20 | 
21 | enum ElementwiseOP {
22 |   ADD,
23 |   SUB,
24 |   MUL,
25 |   DIV,
26 | };
27 | 
28 | void Elementwise(const float *x,
29 |                  const float *y,
30 |                  float *out,
31 |                  const std::vector<int64_t> &dims,
32 |                  ElementwiseOP op);
33 | 
34 | }  // namespace mps_kernel
35 | 


--------------------------------------------------------------------------------
/backends/mps/kernels/matmul_impl.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include <vector>
18 | 
19 | namespace mps_kernel {
20 | 
21 | void Matmul(const float* x,
22 |             const float* y,
23 |             float* out,
24 |             const std::vector<int64_t>& x_dims,
25 |             const std::vector<int64_t>& y_dims,
26 |             bool transpose_x,
27 |             bool transpose_y);
28 | 
29 | }  // namespace mps_kernel
30 | 


--------------------------------------------------------------------------------
/backends/mps/kernels/softmax_impl.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include <vector>
18 | 
19 | namespace mps_kernel {
20 | 
21 | void Softmax(const float *in,
22 |              float *out,
23 |              const std::vector<int64_t> &dims,
24 |              int axis);
25 | void SoftmaxGrad(const float *out,
26 |                  const float *out_grad,
27 |                  const std::vector<int64_t> &dims,
28 |                  int axis,
29 |                  float *in_grad);
30 | 
31 | }  // namespace mps_kernel
32 | 


--------------------------------------------------------------------------------
/backends/mps/runtime/mps_runtime.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | namespace mps {
18 | 
19 | bool init_device(void);
20 | 
21 | bool alloc_memory(void** ptr, size_t size);
22 | 
23 | bool dealloc_memory(void* ptr);
24 | 
25 | bool memcpy_d2d(void* dst, const void* src, size_t size);
26 | 
27 | bool memcpy_d2h(void* dst, const void* src, size_t size);
28 | 
29 | bool memcpy_h2d(void* dst, const void* src, size_t size);
30 | 
31 | }  // namespace mps
32 | 


--------------------------------------------------------------------------------
/backends/mps/setup.py.in:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, Distribution
 2 | 
 3 | packages = []
 4 | package_data = {}
 5 | 
 6 | class BinaryDistribution(Distribution):
 7 |     def has_ext_modules(self):
 8 |         return True
 9 | 
10 | setup(
11 |     name = '@CMAKE_PROJECT_NAME@',
12 |     version='@PLUGIN_VERSION@',
13 |     description='Paddle MPS plugin',
14 |     long_description='',
15 |     long_description_content_type="text/markdown",
16 |     author_email="Paddle-better@baidu.com",
17 |     maintainer="PaddlePaddle",
18 |     maintainer_email="Paddle-better@baidu.com",
19 |     project_urls={},
20 |     license='Apache Software License',
21 |     packages= [
22 |         'paddle_custom_device',
23 |     ],
24 |     include_package_data=True,
25 |     package_data = {
26 |         '': ['*.dylib', '*.h', '*.py', '*.hpp'],
27 |     },
28 |     package_dir = {
29 |         '': 'python',
30 |     },
31 |     zip_safe=False,
32 |     distclass=BinaryDistribution,
33 |     entry_points={
34 |         'console_scripts': [
35 |         ]
36 |     },
37 |     classifiers=[
38 |     ],
39 |     keywords='Paddle MPS plugin',
40 | )
41 | 


--------------------------------------------------------------------------------
/backends/mps/tests/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 4 | # use this file except in compliance with the License. You may obtain a copy of
 5 | # the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 | # License for the specific language governing permissions and limitations under
13 | # the License
14 | 
15 | file(
16 |   GLOB TEST_OPS
17 |   RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
18 |   "test_*.py")
19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
20 | 
21 | foreach(TEST_OP ${TEST_OPS})
22 |   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
23 | endforeach()
24 | 


--------------------------------------------------------------------------------
/backends/npu/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/npu/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/external/pybind11.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/pybind11.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/paddle.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/paddle.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/third_party.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/third_party.cmake


--------------------------------------------------------------------------------
/backends/npu/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | ../../../cmake/version.cmake


--------------------------------------------------------------------------------
/backends/npu/custom_op/llama_infer/write_cache_kv.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "paddle/extension.h"
16 | 
17 | void WriteCacheKV(const paddle::Tensor& input_k,
18 |                   const paddle::Tensor& input_v,
19 |                   const paddle::Tensor& cache_kv,
20 |                   const paddle::Tensor& sequence_lengths_shape) {}
21 | 
22 | PD_BUILD_OP(write_cache_kv)
23 |     .Inputs({"input_k", "input_v", "cache_kv", "sequence_lengths"})
24 |     .Outputs({"cache_kv_out"})
25 |     .SetInplaceMap({{"cache_kv", "cache_kv_out"}})
26 |     .SetKernelFn(PD_KERNEL(WriteCacheKV));
27 | 


--------------------------------------------------------------------------------
/backends/npu/passes/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .common import setUp
16 | from .common import addPasses
17 | 


--------------------------------------------------------------------------------
/backends/npu/profile/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .cann_export import cann_parse_enabled as cann_parse_enabled
16 | 


--------------------------------------------------------------------------------
/backends/npu/tests/unittests/test_collective_api.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices
18 | 
19 | 
20 | class TestProcessGroup(TestMultipleCustomDevices):
21 |     def test_process_group_xccl(self):
22 |         self.run_mnist_2_custom_devices("process_group_xccl.py", "npu")
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------
/backends/npu/tests/unittests/test_dygraph_sharding_stage_2.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/backends/npu/tools/disable_ut_npu:
--------------------------------------------------------------------------------
 1 | disable_ut_npu
 2 | test_softmax_with_cross_entropy_op_npu
 3 | test_parallel_dygraph_mp_layers
 4 | test_dygraph_recompute_for_eager
 5 | test_slice_op_npu
 6 | test_set_value_op_npu
 7 | test_dygraph_sharding_stage_3
 8 | test_zero_dim_tensor_npu
 9 | test_momentum_op_npu
10 | test_elementwise_sub_op_npu
11 | test_index_sample_op_npu
12 | test_flashattention_npu
13 | test_rmsprop_op_npu
14 | test_einsum_op_npu
15 | test_fused_matmul_bias_op_npu
16 | test_set_value_op_npu.py
17 | 


--------------------------------------------------------------------------------
/backends/npu/tools/disable_ut_npu_910b:
--------------------------------------------------------------------------------
1 | disable_ut_npu
2 | test_check_nan_inf_op_npu
3 | test_conv3d_op_npu
4 | test_elementwise_mod_op_npu
5 | test_matmulv2_op_npu
6 | test_zero_dim_tensor_npu
7 | test_group_norm_op_npu
8 | 


--------------------------------------------------------------------------------
/backends/npu/tools/important_ut_npu:
--------------------------------------------------------------------------------
 1 | test_assign_op_npu_eager
 2 | test_bitwise_op_npu
 3 | test_concat_op_npu_eager
 4 | test_clip_op_npu
 5 | test_elementwise_add_op_npu_eager
 6 | test_elementwise_div_op_npu_eager
 7 | test_elementwise_max_op_npu_eager
 8 | test_elementwise_mul_op_npu_eager
 9 | test_elementwise_sub_op_npu_eager
10 | test_is_empty_op_npu
11 | test_fill_any_like_op_npu
12 | test_fill_constant_op_npu
13 | test_full_op
14 | test_logical_op_npu
15 | test_lookup_table_v2_op_npu
16 | test_matmulv2_op_npu
17 | test_reduce_max_op_npu_eager
18 | test_reduce_sum_op_npu_eager
19 | test_scale_op_npu_eager
20 | test_slice_op_npu_eager
21 | test_split_op_npu_eager
22 | test_squared_l2_norm_op_npu_eager
23 | test_stack_op_npu_eager
24 | test_tril_triu_op_npu
25 | 


--------------------------------------------------------------------------------
/backends/sdaa/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/dummy.c.in


--------------------------------------------------------------------------------
/backends/sdaa/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/backends/sdaa/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/glog.cmake


--------------------------------------------------------------------------------
/backends/sdaa/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/backends/sdaa/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../../../cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/backends/sdaa/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../../../Paddle/cmake/generic.cmake


--------------------------------------------------------------------------------
/backends/sdaa/dynload/dynamic_loader.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License. */
14 | 
15 | #pragma once
16 | 
17 | namespace custom_dynload {
18 | #ifndef _WIN32
19 | #define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
20 | #else
21 | #define DECLARE_TYPE(__name, ...) decltype(auto)
22 | #endif
23 | 
24 | void* GetSDPTIDsoHandle();
25 | 
26 | }  // namespace custom_dynload
27 | 


--------------------------------------------------------------------------------
/backends/sdaa/pr_ci_sdaa.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 2 | # 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | # 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Install paddle whl
16 | pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
17 | 
18 | # make PaddleCustomDevice
19 | bash compile.sh
20 | pip install build/dist/*.whl --force-reinstall
21 | 
22 | # Test
23 | cd build/tests/unittests
24 | ctest --output-on-failure -j 1 -E "test_highperformance_conv"
25 | 


--------------------------------------------------------------------------------
/backends/sdaa/sdaac_ops/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10.2)
 2 | project(customsdaacops)
 3 | 
 4 | set(CMAKE_C_COMPILER "$ENV{SDAA_ROOT}/bin/tecocc")
 5 | set(CMAKE_CXX_COMPILER "$ENV{SDAA_ROOT}/bin/tecocc")
 6 | 
 7 | set(CMAKE_C_FLAGS "-std=c99")
 8 | set(CMAKE_CXX_FLAGS "-std=c++17")
 9 | 
10 | set(SDAAC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ${SDAA_INC})
11 | 
12 | file(
13 |   GLOB_RECURSE SDAA_C_OPS
14 |   RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
15 |   *.scpp)
16 | 
17 | set(complie_options -flto -fPIC)
18 | 
19 | add_library(sdaacops_objs OBJECT ${SDAA_C_OPS})
20 | set_source_files_properties(${SDAA_C_OPS} PROPERTIES LANGUAGE CXX)
21 | set_target_properties(sdaacops_objs PROPERTIES LINKER_LANGUAGE CXX)
22 | target_include_directories(sdaacops_objs PUBLIC ${SDAAC_INCLUDE_DIR})
23 | target_compile_options(sdaacops_objs PRIVATE ${complie_options})
24 | add_custom_target(
25 |   customsdaacops ALL
26 |   COMMAND
27 |     tecocc $<TARGET_OBJECTS:sdaacops_objs> -flto -fPIC -shared --sdaa-link
28 |     -sdaa-static-lib -fuse-ld=lld -lm -L ${SDAA_LIB} -o
29 |     ${CUSTOM_SDAA_C_OPS_LIB}/libcustomsdaacops.a
30 |   COMMAND_EXPAND_LISTS)
31 | add_dependencies(customsdaacops sdaacops_objs)
32 | 


--------------------------------------------------------------------------------
/backends/sdaa/tests/distribution/test_parallel_dygraph_pp_layers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import unittest
17 | 
18 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices
19 | 
20 | 
21 | class TestHybridPipeParallel(TestMultipleCustomDevices):
22 |     def test_hybrid_parallel_pp_layer(self):
23 |         self.run_mnist_custom_devices(
24 |             os.path.abspath("hybrid_parallel_pp_layer.py"), "sdaa"
25 |         )
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     unittest.main()
30 | 


--------------------------------------------------------------------------------
/backends/sdaa/tests/distribution/test_parallel_dygraph_sep_parallel.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import unittest
16 | 
17 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices
18 | 
19 | 
20 | class TestHybridParallel(TestMultipleCustomDevices):
21 |     def test_hybrid_parallel_sep_model(self):
22 |         self.run_mnist_custom_devices(
23 |             "hybrid_parallel_sep_model.py", "sdaa", need_envs={}
24 |         )
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/backends/sdaa/tests/runtime/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if("${TEST_CUSTOM_DEVICE_ROOT}" STREQUAL "")
 2 |   return()
 3 | endif()
 4 | 
 5 | add_executable(test_runtime test_runtime.cc)
 6 | add_dependencies(test_runtime third_party)
 7 | target_link_libraries(test_runtime gtest gtest_main ${SDAA_LIB} ${TECODNN_LIB})
 8 | add_test(test_runtime test_runtime)
 9 | 
10 | py_test_modules(test_profiler MODULES test_profiler ENVS ENABLE_SDPTI=0)
11 | py_test_modules(
12 |   test_profiler_with_kernel MODULES test_profiler_with_kernel ENVS
13 |   FLAGS_allocator_strategy=naive_best_fit FLAGS_init_allocated_mem=True)
14 | py_test_modules(
15 |   test_profiler_with_kernel_with_env
16 |   MODULES
17 |   test_profiler_with_kernel
18 |   ENVS
19 |   FLAGS_allocator_strategy=naive_best_fit
20 |   FLAGS_init_allocated_mem=True
21 |   FLAGS_sdaa_error_check=True
22 |   ENABLE_SDPTI=1)
23 | 


--------------------------------------------------------------------------------
/cmake/dummy.c.in:
--------------------------------------------------------------------------------
1 | ../Paddle/cmake/dummy.c.in


--------------------------------------------------------------------------------
/cmake/external/gflags.cmake:
--------------------------------------------------------------------------------
1 | ../../Paddle/cmake/external/gflags.cmake


--------------------------------------------------------------------------------
/cmake/external/glog.cmake:
--------------------------------------------------------------------------------
1 | ../../Paddle/cmake/external/glog.cmake


--------------------------------------------------------------------------------
/cmake/external/gtest.cmake:
--------------------------------------------------------------------------------
1 | ../../Paddle/cmake/external/gtest.cmake


--------------------------------------------------------------------------------
/cmake/external/onednn.cmake:
--------------------------------------------------------------------------------
1 | ../../Paddle/cmake/external/onednn.cmake


--------------------------------------------------------------------------------
/cmake/external/pybind11.cmake:
--------------------------------------------------------------------------------
1 | ../../Paddle/cmake/external/pybind11.cmake


--------------------------------------------------------------------------------
/cmake/generic.cmake:
--------------------------------------------------------------------------------
1 | ../Paddle/cmake/generic.cmake


--------------------------------------------------------------------------------
/cmake/version.cmake:
--------------------------------------------------------------------------------
1 | ../Paddle/cmake/version.cmake


--------------------------------------------------------------------------------
/python/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/python/tests/auto_parallel_op_test.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/legacy_test/auto_parallel_op_test.py


--------------------------------------------------------------------------------
/python/tests/config.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/legacy_test/config.py


--------------------------------------------------------------------------------
/python/tests/convert.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/rnn/convert.py


--------------------------------------------------------------------------------
/python/tests/op.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/legacy_test/op.py


--------------------------------------------------------------------------------
/python/tests/op_test.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/legacy_test/op_test.py


--------------------------------------------------------------------------------
/python/tests/prim_op_test.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/legacy_test/prim_op_test.py


--------------------------------------------------------------------------------
/python/tests/rnn_numpy.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/rnn/rnn_numpy.py


--------------------------------------------------------------------------------
/python/tests/testsuite.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/legacy_test/testsuite.py


--------------------------------------------------------------------------------
/python/tests/utils.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/test/legacy_test/utils.py


--------------------------------------------------------------------------------
/python/tests/white_list/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # TODO(duanyanhui):
16 | # Devices differ in accurary, so we need to build different white_list for
17 | # diffrent device. For example, ascend dose not aupport vell well for int64_t
18 | # and double. The cast of data type will bring errors. We need to put that
19 | # kernel in the op_threshlod_white_list.
20 | 
21 | # Next, we will built white_list for each device and put it on backends.
22 | 


--------------------------------------------------------------------------------
/python/tests/white_list/check_shape_white_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | NEED_TO_FIX_OP_LIST = [
16 |     "pow",
17 |     "hard_swish",
18 |     "fused_elemwise_activation",
19 |     "bilinear_tensor_product",
20 |     "conv2d_transpose",
21 |     "depthwise_conv2d_transpose",
22 |     "grid_sampler",
23 |     "lstmp",
24 |     "margin_rank_loss",
25 |     "matmul",
26 |     "scatter",
27 |     "soft_relu",
28 |     "squared_l2_distance",
29 |     "tree_conv",
30 |     "cvm",
31 |     "cudnn_lstm",
32 |     "rnn",
33 |     "multi_dot",
34 |     "index_add",
35 | ]
36 | 


--------------------------------------------------------------------------------
/python/tests/white_list/new_ir_python_api_grad_white_list.py:
--------------------------------------------------------------------------------
1 | ../../../Paddle/test/white_list/new_ir_python_api_grad_white_list.py


--------------------------------------------------------------------------------
/python/tools/__init__.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/tools/__init__.py


--------------------------------------------------------------------------------
/python/tools/static_mode_white_list.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/tools/static_mode_white_list.py


--------------------------------------------------------------------------------
/python/tools/test_runner.py:
--------------------------------------------------------------------------------
1 | ../../Paddle/tools/test_runner.py


--------------------------------------------------------------------------------
/tools/codestyle/.cmakelintrc:
--------------------------------------------------------------------------------
1 | filter=-readability/wonkycase,-syntax,-convention/filename,-package/stdargs,-whitespace/indent,-whitespace/extra,-linelength,-readability/mixedcase
2 | 


--------------------------------------------------------------------------------
/tools/codestyle/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/tools/codestyle/clang_format.hook:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | readonly VERSION="13.0.0"
 5 | 
 6 | version=$(clang-format -version)
 7 | 
 8 | if ! [[ $(python -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1$2}') -ge 36 ]]; then
 9 |     echo "clang-format installation by pip need python version great equal 3.6,
10 |           please change the default python to higher version."
11 |     exit 1
12 | fi
13 | 
14 | if ! [[ $version == *"$VERSION"* ]]; then
15 |     # low version of pip may not have the source of clang-format whl
16 |     pip install --upgrade pip
17 |     pip install clang-format==13.0.0
18 | fi
19 | 
20 | clang-format $@
21 | 


--------------------------------------------------------------------------------
/tools/codestyle/cpplint_pre_commit.hook:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | readonly VERSION="1.6.0"
 4 | 
 5 | version=$(cpplint --version)
 6 | 
 7 | if ! [[ $version == *"$VERSION"* ]]; then
 8 |     pip install cpplint==1.6.0
 9 | fi
10 | 
11 | cpplint $@
12 | 


--------------------------------------------------------------------------------
/tools/codestyle/pylint_pre_commit.hook:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TOTAL_ERRORS=0
 4 | 
 5 | 
 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 7 | export PYTHONPATH=$DIR:$PYTHONPATH
 8 | 
 9 | readonly VERSION="2.12.0"
10 | version=$(pylint --version | grep 'pylint')
11 | 
12 | if ! [[ $version == *"$VERSION"* ]]; then
13 |     pip install pylint==2.12.0
14 | fi
15 | 
16 | # The trick to remove deleted files: https://stackoverflow.com/a/2413151
17 | for file in $(git diff --name-status | awk '$1 != "D" {print $2}'); do
18 |     # skip submodule of Paddle check
19 |     if ! [[ $file == "Paddle" ]]; then
20 |         pylint --disable=all --load-plugins=docstring_checker \
21 |         --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file;
22 |         TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
23 |     fi
24 | done
25 | 
26 | exit $TOTAL_ERRORS
27 | #For now, just warning:
28 | #exit 0
29 | 


--------------------------------------------------------------------------------