├── .clang-format ├── .flake8 ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .style.yapf ├── Guides ├── image.png ├── paddle_load_customdevice.png ├── pir_plugin_subgraph.md └── subgraph_case.png ├── LICENSE ├── README.md ├── README_en.md ├── README_ja.md ├── backends ├── biren_gpu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ ├── gflags.cmake │ │ │ ├── glog.cmake │ │ │ ├── gtest.cmake │ │ │ ├── onednn.cmake │ │ │ ├── pybind11.cmake │ │ │ └── supa.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ ├── third_party.cmake │ │ └── version.cmake │ ├── kernels │ │ ├── abs_kernel.cc │ │ └── funcs │ │ │ └── br_paddle_supa.h │ ├── runtime │ │ ├── runtime.cc │ │ └── runtime.h │ ├── setup.py.in │ └── tests │ │ ├── CMakeLists.txt │ │ └── unittests │ │ ├── CMakeLists.txt │ │ └── test_abs_op_supa.py ├── custom_cpu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── README_ja.md │ ├── cmake │ │ └── paddle.cmake │ ├── kernels │ │ ├── argsort_kernel.cc │ │ ├── assign_value_kernel.cc │ │ ├── cast_kernel.cc │ │ ├── compare_kernel.cc │ │ ├── concat_kernel.cc │ │ ├── contiguous_kernel.cc │ │ ├── cross_entropy_with_softmax_kernel.cc │ │ ├── elementwise_kernel.cc │ │ ├── fill_kernel.cc │ │ ├── full_kernel.cc │ │ ├── kernels.h │ │ ├── matmul_kernel.cc │ │ ├── mean_kernel.cc │ │ ├── memcpy_kernel.cc │ │ ├── phi_funcs.h │ │ ├── reduce_kernel.cc │ │ ├── reshape_kernel.cc │ │ ├── sgd_kernel.cc │ │ ├── slice_kernel.cc │ │ ├── softmax_kernel.cc │ │ ├── strided_copy_kernel.cc │ │ ├── transpose_kernel.cc │ │ └── uniform_kernel.cc │ ├── runtime │ │ └── runtime.cc │ ├── setup.py.in │ ├── tests │ │ ├── CMakeLists.txt │ │ ├── test_MNIST_model.py │ │ └── unittests │ │ │ ├── CMakeLists.txt │ │ │ ├── test_argsort_op.py │ │ │ ├── test_cast_op.py │ │ │ ├── test_compare_op.py │ │ │ ├── test_concat_op.py │ │ │ ├── test_elementwise_mul_op.py │ │ │ ├── test_fill_constant_op.py │ │ │ ├── test_matmul_op.py │ │ │ ├── test_matmul_v2_op.py │ │ │ ├── test_mean_op.py │ │ │ ├── test_reduce_op.py │ │ │ ├── test_reshape_op.py │ │ │ ├── test_sgd_op.py │ │ │ ├── test_slice_op.py │ │ │ ├── test_softmax_op.py │ │ │ ├── test_softmax_with_cross_entropy_op.py │ │ │ ├── test_transpose_op.py │ │ │ ├── test_uniform_random_op.py │ │ │ └── test_zero_dim_tensor.py │ └── tools │ │ └── dockerfile │ │ ├── Dockerfile.ubuntu20.aarch64.gcc84 │ │ ├── Dockerfile.ubuntu20.x86_64.gcc84 │ │ ├── build-image.sh │ │ └── root │ │ ├── .bashrc │ │ ├── .gitconfig │ │ ├── .scripts │ │ ├── git-completion.sh │ │ └── git-prompt.sh │ │ └── .vimrc ├── gcu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── backend │ │ ├── equivalence_trans │ │ │ ├── all_ops.h │ │ │ ├── insensitive_ops │ │ │ │ ├── accuracy.h │ │ │ │ ├── activation.h │ │ │ │ ├── adam.h │ │ │ │ ├── adamw.h │ │ │ │ ├── add_n.h │ │ │ │ ├── argmax.h │ │ │ │ ├── argmin.h │ │ │ │ ├── argsort.h │ │ │ │ ├── assign.h │ │ │ │ ├── assign_value.h │ │ │ │ ├── atan.h │ │ │ │ ├── batch_norm.h │ │ │ │ ├── bilinear_interp_v2.h │ │ │ │ ├── bitwise_not.h │ │ │ │ ├── bmm.h │ │ │ │ ├── cast.h │ │ │ │ ├── check_finite_and_unscale.h │ │ │ │ ├── clip.h │ │ │ │ ├── concat.h │ │ │ │ ├── conv2d.h │ │ │ │ ├── conv3d.h │ │ │ │ ├── cos.h │ │ │ │ ├── cross_entropy.h │ │ │ │ ├── cumsum.h │ │ │ │ ├── dropout.h │ │ │ │ ├── elementwise_binary.h │ │ │ │ ├── elementwise_unary.h │ │ │ │ ├── embedding.h │ │ │ │ ├── equal.h │ │ │ │ ├── expand.h │ │ │ │ ├── expand_as.h │ │ │ │ ├── fill_constant.h │ │ │ │ ├── fill_zeros_like.h │ │ │ │ ├── flatten.h │ │ │ │ ├── flip.h │ │ │ │ ├── floor.h │ │ │ │ ├── full_like.h │ │ │ │ ├── fuse │ │ │ │ │ ├── conv_add_mul_add_hard_swish_mul_add.h │ │ │ │ │ ├── conv_bias_activate.h │ │ │ │ │ ├── conv_bn.h │ │ │ │ │ ├── conv_bn_hard_swish.h │ │ │ │ │ ├── conv_bn_relu.h │ │ │ │ │ ├── dot_bias.h │ │ │ │ │ ├── mul_add.h │ │ │ │ │ └── utility.h │ │ │ │ ├── gather.h │ │ │ │ ├── gather_nd.h │ │ │ │ ├── gelu.h │ │ │ │ ├── grid_sampler.h │ │ │ │ ├── huber_loss.h │ │ │ │ ├── increment.h │ │ │ │ ├── index_select.h │ │ │ │ ├── instance_norm.h │ │ │ │ ├── iou_similarity.h │ │ │ │ ├── isinf_v2.h │ │ │ │ ├── label_smooth.h │ │ │ │ ├── layer_norm.h │ │ │ │ ├── log.h │ │ │ │ ├── log_loss.h │ │ │ │ ├── log_softmax.h │ │ │ │ ├── logical_and.h │ │ │ │ ├── logical_not.h │ │ │ │ ├── masked_select.h │ │ │ │ ├── matmul_v2.h │ │ │ │ ├── maximum.h │ │ │ │ ├── mean.h │ │ │ │ ├── meshgrid.h │ │ │ │ ├── minimum.h │ │ │ │ ├── momentum.h │ │ │ │ ├── mul.h │ │ │ │ ├── nearest_interp.h │ │ │ │ ├── nearest_interp_v2.h │ │ │ │ ├── not_equal.h │ │ │ │ ├── one_hot.h │ │ │ │ ├── pool2d.h │ │ │ │ ├── prior_box.h │ │ │ │ ├── range.h │ │ │ │ ├── reduce_x.h │ │ │ │ ├── reshape.h │ │ │ │ ├── reverse.h │ │ │ │ ├── rmsprop.h │ │ │ │ ├── rnn.h │ │ │ │ ├── roi_align.h │ │ │ │ ├── roll.h │ │ │ │ ├── scale.h │ │ │ │ ├── scatter.h │ │ │ │ ├── set_value.h │ │ │ │ ├── shape.h │ │ │ │ ├── share_data.h │ │ │ │ ├── sigmoid_cross_entropy_with_logits.h │ │ │ │ ├── sign.h │ │ │ │ ├── size.h │ │ │ │ ├── slice.h │ │ │ │ ├── softmax.h │ │ │ │ ├── softmax_with_cross_entropy.h │ │ │ │ ├── split.h │ │ │ │ ├── sqrt.h │ │ │ │ ├── squared_l2_norm.h │ │ │ │ ├── squeeze.h │ │ │ │ ├── stack.h │ │ │ │ ├── strided_slice.h │ │ │ │ ├── tanh.h │ │ │ │ ├── tile.h │ │ │ │ ├── topk.h │ │ │ │ ├── transpose.h │ │ │ │ ├── tril_triu.h │ │ │ │ ├── unsqueeze.h │ │ │ │ ├── unstack.h │ │ │ │ ├── where.h │ │ │ │ └── yolo_box.h │ │ │ └── utils.h │ │ ├── executor │ │ │ ├── cast_runner.cc │ │ │ ├── cast_runner.h │ │ │ ├── gcu_node.h │ │ │ ├── single_op_executor.cc │ │ │ ├── single_op_executor.h │ │ │ ├── tops_compiler.cc │ │ │ └── tops_compiler.h │ │ ├── register │ │ │ └── register.h │ │ └── utils │ │ │ ├── gcu_op_desc.cc │ │ │ ├── gcu_op_desc.h │ │ │ ├── types.h │ │ │ ├── utils.cc │ │ │ └── utils.h │ ├── ci_test.sh │ ├── cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ ├── gcu.cmake │ │ │ ├── gflags.cmake │ │ │ ├── glog.cmake │ │ │ ├── gtest.cmake │ │ │ ├── onednn.cmake │ │ │ ├── pybind11.cmake │ │ │ └── topscc.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ ├── third_party.cmake │ │ └── version.cmake │ ├── common │ │ ├── flags.cc │ │ ├── gcu_env_list.h │ │ ├── gcu_funcs.h │ │ ├── gcu_op_runner.cc │ │ ├── gcu_op_runner.h │ │ ├── gcu_profiler.h │ │ ├── host_pinned_allocator.h │ │ ├── utils.cc │ │ └── utils.h │ ├── custom_engine │ │ ├── custom_engine_interface.cc │ │ ├── custom_engine_interface.h │ │ ├── custom_engine_op.cc │ │ ├── custom_engine_op.h │ │ ├── gcu_engine.cc │ │ ├── gcu_engine.h │ │ ├── gcu_engine_compiler.cc │ │ ├── gcu_engine_compiler.h │ │ ├── gcu_engine_executor.cc │ │ ├── gcu_engine_executor.h │ │ └── ir_translator │ │ │ ├── operators │ │ │ ├── activation_ops.cc │ │ │ ├── batch_norm.cc │ │ │ ├── binary_ops.cc │ │ │ ├── builtin_ops.cc │ │ │ ├── cast.cc │ │ │ ├── concat.cc │ │ │ ├── conv2d.cc │ │ │ ├── full.cc │ │ │ ├── full_int_array.cc │ │ │ ├── full_like.cc │ │ │ ├── isnan.cc │ │ │ ├── matmul.cc │ │ │ ├── nearest_interp.cc │ │ │ ├── pool2d.cc │ │ │ ├── reshape.cc │ │ │ ├── scale.cc │ │ │ ├── shape.cc │ │ │ ├── slice.cc │ │ │ ├── softmax.cc │ │ │ ├── where.cc │ │ │ └── yield.cc │ │ │ ├── translator_registry.h │ │ │ └── utils │ │ │ ├── utils.cc │ │ │ └── utils.h │ ├── custom_op │ │ ├── conv_add_mul_add_hard_swish_mul_add_op.cc │ │ ├── conv_bias_activate_op.cc │ │ ├── conv_bn_hard_swish_op.cc │ │ ├── conv_bn_op.cc │ │ ├── conv_bn_relu_op.cc │ │ ├── custom_op_common.cc │ │ ├── custom_op_common.h │ │ ├── dot_bias_op.cc │ │ ├── fused_add_rms_norm_op.cc │ │ ├── fused_conv2d_add_op.cc │ │ ├── fused_conv2d_transpose_bias_act_op.cc │ │ ├── fused_fc_elementwise_layernorm_op.cc │ │ ├── fused_multi_head_attention.cc │ │ ├── fused_rotary_embedding_op.cc │ │ ├── fused_sdp_flash_attention_op.cc │ │ ├── fused_self_attention.cc │ │ ├── gcu_netoutput_op.cc │ │ ├── mul_add_op.cc │ │ ├── my_add_n_op.cc │ │ ├── rms_norm_op.cc │ │ └── test_for_custom_engine_op.cc │ ├── kernels │ │ ├── accuracy_kernel.cc │ │ ├── activation_kernels.cc │ │ ├── adam_kernel.cc │ │ ├── add_n_kernel.cc │ │ ├── arange_kernel.cc │ │ ├── arg_min_max_kernels.cc │ │ ├── argsort_kernel.cc │ │ ├── assign_kernel.cc │ │ ├── atan2_kernel.cc │ │ ├── batch_norm_kernel.cc │ │ ├── binary_kernels.cc │ │ ├── bitwise_kernel.cc │ │ ├── cast_kernel.cc │ │ ├── cholesky_kernel.cc │ │ ├── clip_kernel.cc │ │ ├── compare_kernels.cc │ │ ├── concat_kernel.cc │ │ ├── contiguous_kernel.cc │ │ ├── conv_kernel.cc │ │ ├── conv_transpose_kernel.cc │ │ ├── copysign_kernel.cc │ │ ├── cross_entropy_kernel.cc │ │ ├── cross_kernel.cc │ │ ├── cumulate_kernel.cc │ │ ├── diag_kernel.cc │ │ ├── diagonal_kernel.cc │ │ ├── dropout_kernel.cc │ │ ├── einsum_kernel.cc │ │ ├── embedding_kernel.cc │ │ ├── expand_as_kernel.cc │ │ ├── expand_kernel.cc │ │ ├── eye_kernel.cc │ │ ├── fc_kernel.cc │ │ ├── flatten_kernel.cc │ │ ├── flip_kernel.cc │ │ ├── full_kernel.cc │ │ ├── funcs │ │ │ ├── common_ops.cc │ │ │ ├── common_ops.h │ │ │ ├── gcu_kernel_funcs.cc │ │ │ ├── gcu_kernel_funcs.h │ │ │ ├── gcu_layout_funcs.cc │ │ │ ├── gcu_layout_funcs.h │ │ │ ├── op_utils.cc │ │ │ ├── op_utils.h │ │ │ ├── topsaten_op_launch.h │ │ │ ├── topsaten_op_utils.cc │ │ │ └── topsaten_op_utils.h │ │ ├── fused_conv2d_add_act_kernel.cc │ │ ├── fused_fc_elementwise_layernorm_kernel.cc │ │ ├── gather_kernel.cc │ │ ├── gather_nd_kernel.cc │ │ ├── gaussian_kernel.cc │ │ ├── grid_sample_kernel.cc │ │ ├── huber_loss_kernel.cc │ │ ├── increment_kernel.cc │ │ ├── index_add_kernel.cc │ │ ├── index_put_kernel.cc │ │ ├── index_sample_kernel.cc │ │ ├── index_select_kernel.cc │ │ ├── instance_norm_kernel.cc │ │ ├── interpolate_kernels.cc │ │ ├── is_empty_kernel.cc │ │ ├── isclose_kernel.cc │ │ ├── isfinite_kernel.cc │ │ ├── isinf_kernel.cc │ │ ├── isnan_kernel.cc │ │ ├── label_smooth_kernel.cc │ │ ├── layer_norm_kernel.cc │ │ ├── lerp_kernel.cc │ │ ├── llama_stub_kernels.cc │ │ ├── log_loss_kernel.cc │ │ ├── log_softmax_kernel.cc │ │ ├── logcumsumexp_kernel.cc │ │ ├── logical_kernels.cc │ │ ├── logsumexp_kernel.cc │ │ ├── masked_select_kernel.cc │ │ ├── matmul_kernel.cc │ │ ├── mean_all_kernel.cc │ │ ├── memcpy_kernels.cc │ │ ├── merged_adam_kernel.cc │ │ ├── merged_momentum_kernel.cc │ │ ├── meshgrid_kernel.cc │ │ ├── momentum_kernel.cc │ │ ├── multiclass_nms3_kernel.cc │ │ ├── multinomial_kernel.cc │ │ ├── nms_kernel.cc │ │ ├── numel_kernel.cc │ │ ├── one_hot_kernel.cc │ │ ├── pool2d_kernel.cc │ │ ├── prior_box_kernel.cc │ │ ├── randperm_kernel.cc │ │ ├── reduce_kernels.cc │ │ ├── reshape_kernel.cc │ │ ├── rmsprop_kernel.cc │ │ ├── rnn_kernel.cc │ │ ├── roi_align_kernel.cc │ │ ├── roll_kernel.cc │ │ ├── scale_kernel.cc │ │ ├── scatter_kernel.cc │ │ ├── set_value_kernel.cc │ │ ├── sigmoid_cross_entropy_with_logits_kernel.cc │ │ ├── sign_kernel.cc │ │ ├── slice_kernel.cc │ │ ├── softmax_kernel.cc │ │ ├── split_kernel.cc │ │ ├── squared_l2_norm_kernel.cc │ │ ├── squeeze_kernel.cc │ │ ├── stack_kernel.cc │ │ ├── strided_copy_kernel.cc │ │ ├── strided_slice_kernel.cc │ │ ├── swiglu_kernel.cc │ │ ├── take_along_axis.cc │ │ ├── temporal_shift_kernel.cc │ │ ├── tile_kernel.cc │ │ ├── top_p_sampling_kernel.cc │ │ ├── topk_kernel.cc │ │ ├── transfer_layout_kernel.cc │ │ ├── transpose_kernel.cc │ │ ├── tril_triu_kernel.cc │ │ ├── trunc_kernel.cc │ │ ├── truncated_gaussian_random_kernel.cc │ │ ├── uniform_kernel.cc │ │ ├── unsqueeze_kernel.cc │ │ ├── where_kernel.cc │ │ └── yolo_box_kernel.cc │ ├── paddle_gcu_export.map │ ├── passes │ │ ├── __init__.py │ │ ├── common.py │ │ ├── custom_addn_pass.cc │ │ ├── gcu_conv2d_add_act_fuse_pass.cc │ │ ├── gcu_conv2d_add_fuse_pass.cc │ │ ├── gcu_conv2d_bn_fuse_pass.cc │ │ ├── gcu_custom_passes.h │ │ ├── gcu_fused_conv2d_add_act_append_pass.cc │ │ ├── gcu_op_marker_pass.cc │ │ ├── gcu_pass_pipeline.cc │ │ ├── gcu_pass_pipeline.h │ │ ├── gcu_replace_with_engine_op_pass.cc │ │ ├── gcu_sub_graph_extract_pass.cc │ │ └── legacy_ir_passes │ │ │ ├── gcu_conv_add_mul_add_hard_swish_mul_add_fuse.py │ │ │ ├── gcu_conv_bias_activate_fuse.py │ │ │ ├── gcu_conv_bn_fuse.py │ │ │ ├── gcu_conv_bn_hard_swish_fuse.py │ │ │ ├── gcu_conv_depthwise_elementwise_add_fuse_pass.py │ │ │ ├── gcu_conv_elementwise_add_fuse_pass.py │ │ │ ├── gcu_conv_transpose_elementwise_add_act_fuse_pass.py │ │ │ ├── gcu_conv_transpose_elementwise_add_fuse_pass.py │ │ │ ├── gcu_dot_bias_fuse.py │ │ │ ├── gcu_fused_conv2d_add_act_append_pass.py │ │ │ ├── gcu_linear_fuse.py │ │ │ ├── gcu_mul_add_fuse.py │ │ │ ├── gcu_multi_head_attn_fuse.py │ │ │ ├── gcu_netoutput_pass.py │ │ │ └── gcu_sdp_attn_fuse.py │ ├── runtime │ │ ├── flags.h │ │ ├── runtime.cc │ │ └── runtime.h │ ├── setup.py.in │ ├── tests │ │ ├── CMakeLists.txt │ │ ├── fuse_pass │ │ │ ├── CMakeLists.txt │ │ │ ├── model │ │ │ │ ├── conv_bn.pdiparams │ │ │ │ ├── conv_bn.pdmodel │ │ │ │ ├── conv_bn_hard_swish.pdiparams │ │ │ │ ├── conv_bn_hard_swish.pdmodel │ │ │ │ ├── conv_bn_relu.pdiparams │ │ │ │ └── conv_bn_relu.pdmodel │ │ │ ├── model_graph │ │ │ │ ├── test_graph.json │ │ │ │ └── test_graph.pdiparams │ │ │ ├── test_conv_bn_hard_swish_pass.py │ │ │ ├── test_conv_bn_pass.py │ │ │ ├── test_conv_bn_relu_pass.py │ │ │ ├── test_custom_addn_pass.py │ │ │ ├── test_custom_engine.py │ │ │ └── test_custom_engine_ppocr_cls.py │ │ ├── unittests │ │ │ ├── CMakeLists.txt │ │ │ ├── api_base.py │ │ │ ├── test_accuracy.py │ │ │ ├── test_add_n.py │ │ │ ├── test_arange.py │ │ │ ├── test_argmax.py │ │ │ ├── test_argmin.py │ │ │ ├── test_argsort.py │ │ │ ├── test_assign.py │ │ │ ├── test_atan2.py │ │ │ ├── test_batch_norm.py │ │ │ ├── test_binary_ops.py │ │ │ ├── test_bitwise_ops.py │ │ │ ├── test_cast.py │ │ │ ├── test_cholesky.py │ │ │ ├── test_clip.py │ │ │ ├── test_compare_ops.py │ │ │ ├── test_concat.py │ │ │ ├── test_contiguous.py │ │ │ ├── test_conv_ops.py │ │ │ ├── test_conv_transpose_ops.py │ │ │ ├── test_copysign.py │ │ │ ├── test_cross.py │ │ │ ├── test_cumulate_ops.py │ │ │ ├── test_diag.py │ │ │ ├── test_diagonal.py │ │ │ ├── test_dropout.py │ │ │ ├── test_einsum.py │ │ │ ├── test_embedding.py │ │ │ ├── test_expand.py │ │ │ ├── test_expand_as.py │ │ │ ├── test_eye_kernel.py │ │ │ ├── test_fc.py │ │ │ ├── test_flatten.py │ │ │ ├── test_flip.py │ │ │ ├── test_full.py │ │ │ ├── test_fused_add_rms_norm.py │ │ │ ├── test_fused_conv2d_add.py │ │ │ ├── test_fused_conv2d_add_act.py │ │ │ ├── test_fused_conv2d_transpose_bias_act.py │ │ │ ├── test_fused_rotary_embedding.py │ │ │ ├── test_fused_sdp_flash_attention.py │ │ │ ├── test_gather_nd.py │ │ │ ├── test_gather_op.py │ │ │ ├── test_gaussian_random.py │ │ │ ├── test_grid_sample.py │ │ │ ├── test_hard_sigmoid.py │ │ │ ├── test_huber_loss.py │ │ │ ├── test_increment.py │ │ │ ├── test_index_add.py │ │ │ ├── test_index_put.py │ │ │ ├── test_index_sample.py │ │ │ ├── test_index_select.py │ │ │ ├── test_instance_norm.py │ │ │ ├── test_interpolate.py │ │ │ ├── test_is_empty.py │ │ │ ├── test_isclose.py │ │ │ ├── test_isfinite.py │ │ │ ├── test_isinf.py │ │ │ ├── test_isnan.py │ │ │ ├── test_layer_norm.py │ │ │ ├── test_lerp.py │ │ │ ├── test_log_loss.py │ │ │ ├── test_log_softmax.py │ │ │ ├── test_logcumsumexp.py │ │ │ ├── test_logical_ops.py │ │ │ ├── test_logsumexp.py │ │ │ ├── test_masked_select.py │ │ │ ├── test_matmul.py │ │ │ ├── test_mean_all.py │ │ │ ├── test_meshgrid.py │ │ │ ├── test_multinomial.py │ │ │ ├── test_nms.py │ │ │ ├── test_numel.py │ │ │ ├── test_one_hot.py │ │ │ ├── test_pool2d.py │ │ │ ├── test_reduce_ops.py │ │ │ ├── test_rms_norm.py │ │ │ ├── test_rnn.py │ │ │ ├── test_roll.py │ │ │ ├── test_scale.py │ │ │ ├── test_scatter.py │ │ │ ├── test_set_value.py │ │ │ ├── test_sign.py │ │ │ ├── test_slice.py │ │ │ ├── test_softmax.py │ │ │ ├── test_split.py │ │ │ ├── test_squeeze.py │ │ │ ├── test_stack.py │ │ │ ├── test_strided_slice.py │ │ │ ├── test_swiglu.py │ │ │ ├── test_take_along_axis.py │ │ │ ├── test_tile.py │ │ │ ├── test_topk.py │ │ │ ├── test_transpose.py │ │ │ ├── test_tril_triu.py │ │ │ ├── test_trunc.py │ │ │ ├── test_unary_ops.py │ │ │ ├── test_unbind.py │ │ │ ├── test_uniform_random.py │ │ │ ├── test_unsqueeze.py │ │ │ ├── test_unstack.py │ │ │ ├── test_where.py │ │ │ └── test_x_custom_engine_op.py │ │ ├── unittests_jit │ │ │ ├── CMakeLists.txt │ │ │ ├── api_base.py │ │ │ ├── ctest.cmake │ │ │ ├── mise.cmake │ │ │ ├── pytest.ini │ │ │ ├── test_abs.py │ │ │ ├── test_accuracy.py │ │ │ ├── test_adam.py │ │ │ ├── test_adamw.py │ │ │ ├── test_add_n.py │ │ │ ├── test_argmax.py │ │ │ ├── test_argmin.py │ │ │ ├── test_argsort.py │ │ │ ├── test_assign.py │ │ │ ├── test_assign_value.py │ │ │ ├── test_atan.py │ │ │ ├── test_batch_norm.py │ │ │ ├── test_bilinear_interp_v2.py │ │ │ ├── test_bmm.py │ │ │ ├── test_cast.py │ │ │ ├── test_check_finite_and_unscale.py │ │ │ ├── test_clip.py │ │ │ ├── test_collective_gcu.py │ │ │ ├── test_concat.py │ │ │ ├── test_conv2d.py │ │ │ ├── test_conv2d_transpose.py │ │ │ ├── test_conv3d.py │ │ │ ├── test_conv3d_transpose.py │ │ │ ├── test_cos.py │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_cross_entropy2.py │ │ │ ├── test_cumsum.py │ │ │ ├── test_depthwise_conv2d.py │ │ │ ├── test_dropout.py │ │ │ ├── test_eager_collective.py │ │ │ ├── test_eager_fp_bp_no_trans.py │ │ │ ├── test_elementwise_add.py │ │ │ ├── test_elementwise_div.py │ │ │ ├── test_elementwise_mul.py │ │ │ ├── test_elementwise_pow.py │ │ │ ├── test_elementwise_sub.py │ │ │ ├── test_embedding.py │ │ │ ├── test_equal.py │ │ │ ├── test_exp.py │ │ │ ├── test_expand.py │ │ │ ├── test_expand_as.py │ │ │ ├── test_fc.py │ │ │ ├── test_fill_constant.py │ │ │ ├── test_flatten.py │ │ │ ├── test_flip.py │ │ │ ├── test_floor.py │ │ │ ├── test_full_like.py │ │ │ ├── test_gather.py │ │ │ ├── test_gather_nd.py │ │ │ ├── test_gelu.py │ │ │ ├── test_greater_equal.py │ │ │ ├── test_greater_than.py │ │ │ ├── test_grid_sample.py │ │ │ ├── test_hard_sigmoid.py │ │ │ ├── test_hard_swish.py │ │ │ ├── test_huber_loss.py │ │ │ ├── test_increment.py │ │ │ ├── test_index_select.py │ │ │ ├── test_instance_norm.py │ │ │ ├── test_iou_similarity.py │ │ │ ├── test_isinf.py │ │ │ ├── test_label_smooth.py │ │ │ ├── test_layer_norm.py │ │ │ ├── test_leaky_relu.py │ │ │ ├── test_less_equal.py │ │ │ ├── test_less_than.py │ │ │ ├── test_log.py │ │ │ ├── test_log_softmax.py │ │ │ ├── test_logical_and.py │ │ │ ├── test_logical_not.py │ │ │ ├── test_lstm.py │ │ │ ├── test_masked_select.py │ │ │ ├── test_matmul_v2.py │ │ │ ├── test_maximum.py │ │ │ ├── test_mean.py │ │ │ ├── test_merged_adam.py │ │ │ ├── test_merged_momentum.py │ │ │ ├── test_meshgrid.py │ │ │ ├── test_minimum.py │ │ │ ├── test_momentum.py │ │ │ ├── test_nearest_interp.py │ │ │ ├── test_nearest_interp_v2.py │ │ │ ├── test_not_equal.py │ │ │ ├── test_one_hot.py │ │ │ ├── test_one_hot_v2.py │ │ │ ├── test_pool2d_avg.py │ │ │ ├── test_pool2d_max.py │ │ │ ├── test_pow.py │ │ │ ├── test_prior_box.py │ │ │ ├── test_randperm.py │ │ │ ├── test_range.py │ │ │ ├── test_reciprocal.py │ │ │ ├── test_reduce_max.py │ │ │ ├── test_reduce_mean.py │ │ │ ├── test_reduce_min.py │ │ │ ├── test_reduce_prod.py │ │ │ ├── test_reduce_sum.py │ │ │ ├── test_relu.py │ │ │ ├── test_relu6.py │ │ │ ├── test_reshape.py │ │ │ ├── test_reverse.py │ │ │ ├── test_rmsprop.py │ │ │ ├── test_roi_align.py │ │ │ ├── test_roll.py │ │ │ ├── test_scale.py │ │ │ ├── test_scatter.py │ │ │ ├── test_set_value.py │ │ │ ├── test_shape.py │ │ │ ├── test_sigmoid.py │ │ │ ├── test_sigmoid_cross_entropy_with_logits.py │ │ │ ├── test_sign.py │ │ │ ├── test_silu.py │ │ │ ├── test_size.py │ │ │ ├── test_slice.py │ │ │ ├── test_softmax.py │ │ │ ├── test_softmax_cross_entropy_hardlabel.py │ │ │ ├── test_softmax_cross_entropy_softlabel.py │ │ │ ├── test_split.py │ │ │ ├── test_sqrt.py │ │ │ ├── test_square.py │ │ │ ├── test_squared_l2_norm.py │ │ │ ├── test_squeeze.py │ │ │ ├── test_squeeze_v2.py │ │ │ ├── test_stack.py │ │ │ ├── test_strided_slice.py │ │ │ ├── test_swish.py │ │ │ ├── test_tanh.py │ │ │ ├── test_tile.py │ │ │ ├── test_topk.py │ │ │ ├── test_transpose.py │ │ │ ├── test_tril_triu.py │ │ │ ├── test_truncated_gaussian_random.py │ │ │ ├── test_unsqueeze.py │ │ │ ├── test_unstack.py │ │ │ ├── test_where.py │ │ │ └── test_yolo_box.py │ │ └── unittests_legacy │ │ │ ├── CMakeLists.txt │ │ │ ├── test_abs_op_gcu.py │ │ │ ├── test_accuracy_op_gcu.py │ │ │ ├── test_activation_op_gcu.py │ │ │ ├── test_adam_op_gcu.py │ │ │ ├── test_adamw_op_gcu.py │ │ │ ├── test_add_n_op_gcu.py │ │ │ ├── test_arange_op_gcu.py │ │ │ ├── test_arg_max_op_gcu.py │ │ │ ├── test_arg_min_op_gcu.py │ │ │ ├── test_argsort_op_gcu.py │ │ │ ├── test_assign_op_gcu.py │ │ │ ├── test_assign_value_op_gcu.py │ │ │ ├── test_batch_norm_op_gcu.py │ │ │ ├── test_bilinear_interp_op_gcu.py │ │ │ ├── test_bitwise_op_gcu.py │ │ │ ├── test_cast_op_gcu.py │ │ │ ├── test_clip_op_gcu.py │ │ │ ├── test_compare_op_gcu.py │ │ │ ├── test_concat_op_gcu.py │ │ │ ├── test_conv2d_op_gcu.py │ │ │ ├── test_conv2d_transposed_op_gcu.py │ │ │ ├── test_conv3d_op_gcu.py │ │ │ ├── test_conv_bn_relu.py │ │ │ ├── test_depthwise_conv2d_op_gcu.py │ │ │ ├── test_dropout_op_gcu.py │ │ │ ├── test_elementwise_add_op_gcu.py │ │ │ ├── test_elementwise_div_op_gcu.py │ │ │ ├── test_elementwise_max_op_gcu.py │ │ │ ├── test_elementwise_min_op_gcu.py │ │ │ ├── test_elementwise_mul_op_gcu.py │ │ │ ├── test_elementwise_sub_op_gcu.py │ │ │ ├── test_expand_as_v2_op_gcu.py │ │ │ ├── test_expand_v2_op_gcu.py │ │ │ ├── test_fill_any_like_op_gcu.py │ │ │ ├── test_fill_constant_op_gcu.py │ │ │ ├── test_flatten_op_gcu.py │ │ │ ├── test_gather_nd_op_gcu.py │ │ │ ├── test_gather_op_gcu.py │ │ │ ├── test_grid_sample_op_gcu.py │ │ │ ├── test_huber_loss_op_gcu.py │ │ │ ├── test_instance_norm_op_gcu.py │ │ │ ├── test_isinf_v2_op_gcu.py │ │ │ ├── test_label_smooth_op_gcu.py │ │ │ ├── test_layer_norm_op_gcu.py │ │ │ ├── test_log_loss_op_gcu.py │ │ │ ├── test_log_softmax_op_gcu.py │ │ │ ├── test_logical_op_gcu.py │ │ │ ├── test_lookup_table_v2_op_gcu.py │ │ │ ├── test_matmul_op_gcu.py │ │ │ ├── test_mean_op_gcu.py │ │ │ ├── test_memcpy_op_gcu.py │ │ │ ├── test_merged_adam_op_gcu.py │ │ │ ├── test_merged_momentum_op_gcu.py │ │ │ ├── test_meshgrid_op_gcu.py │ │ │ ├── test_momentum_op_gcu.py │ │ │ ├── test_nearest_interp_op_gcu.py │ │ │ ├── test_numel_op_gcu.py │ │ │ ├── test_one_hot_v2_op_gcu.py │ │ │ ├── test_pool2d_op_gcu.py │ │ │ ├── test_prior_box_op_gcu.py │ │ │ ├── test_randperm_op_gcu.py │ │ │ ├── test_reduce_max_op_gcu.py │ │ │ ├── test_reduce_mean_op_gcu.py │ │ │ ├── test_reduce_min_op_gcu.py │ │ │ ├── test_reduce_prod_op_gcu.py │ │ │ ├── test_reduce_sum_op_gcu.py │ │ │ ├── test_reshape_op_gcu.py │ │ │ ├── test_rmsprop_op_gcu.py │ │ │ ├── test_roi_align_op_gcu.py │ │ │ ├── test_scale_op_gcu.py │ │ │ ├── test_scatter_op_gcu.py │ │ │ ├── test_set_value_op_gcu.py │ │ │ ├── test_sigmoid_cross_entropy_with_logits_op_gcu.py │ │ │ ├── test_slice_op_gcu.py │ │ │ ├── test_softmax_op_gcu.py │ │ │ ├── test_softmax_with_cross_entropy_op_gcu.py │ │ │ ├── test_split_op_gcu.py │ │ │ ├── test_squared_l2_norm_op_gcu.py │ │ │ ├── test_squeeze_op_gcu.py │ │ │ ├── test_stack_op_gcu.py │ │ │ ├── test_strided_slice_op_gcu.py │ │ │ ├── test_tile_op_gcu.py │ │ │ ├── test_top_k_op_gcu.py │ │ │ ├── test_transpose_op_gcu.py │ │ │ ├── test_tril_triu_op_gcu.py │ │ │ ├── test_unsqueeze_op_gcu.py │ │ │ ├── test_where_op_gcu.py │ │ │ └── test_yolo_box_op_gcu.py │ ├── tools │ │ └── dockerfile │ │ │ ├── Dockerfile.gcu.ubuntu20.gcc84 │ │ │ └── build-image.sh │ └── topscc_custom_kernels │ │ └── topscc_custom_demo_kernel.cc ├── iluvatar_gpu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── clean_paddle.sh │ ├── cmake │ │ ├── cblas.cmake │ │ ├── cuda.cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ ├── cccl.cmake │ │ │ ├── eigen.cmake │ │ │ ├── mklml.cmake │ │ │ ├── protobuf.cmake │ │ │ ├── xxhash.cmake │ │ │ └── zlib.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ ├── third_party.cmake │ │ └── version.cmake │ ├── common │ │ └── cuda_flags.cc │ ├── install_paddle.sh │ ├── kernels │ │ ├── cuda_kernels │ │ │ ├── abs_grad_kernel_register.cc │ │ │ ├── abs_kernel_register.cc │ │ │ ├── activation_grad_kernel_register.cc │ │ │ ├── activation_kernel_register.cc │ │ │ ├── adamw_kernel_register.cc │ │ │ ├── adamw_kernel_row_register.cc │ │ │ ├── addmm_kernel_register.cc │ │ │ ├── amp_kernel_register.cc │ │ │ ├── arange_kernel_register.cc │ │ │ ├── assign_kernel_register.cc │ │ │ ├── bitwise_kernel_register.cc │ │ │ ├── c_embedding_grad_kernel_register.cc │ │ │ ├── c_embedding_kernel_register.cc │ │ │ ├── c_identity_kernel_register.cc │ │ │ ├── c_softmax_with_cross_entropy_grad_kernel_register.cc │ │ │ ├── c_softmax_with_cross_entropy_kernel_register.cc │ │ │ ├── cast_kernel_register.cc │ │ │ ├── clip_grad_kernel_register.cc │ │ │ ├── clip_kernel_register.cc │ │ │ ├── compare_kernel_register.cc │ │ │ ├── concat_grad_kernel_register.cc │ │ │ ├── concat_kernel_register.cc │ │ │ ├── contiguous_kernel_register.cc │ │ │ ├── cross_entropy_grad_kernel.cu │ │ │ ├── cross_entropy_kernel.cu │ │ │ ├── cum_grad_kernel_register.cc │ │ │ ├── cum_kernel_register.cc │ │ │ ├── einsum_kernel_register.cc │ │ │ ├── elementwise_grad_kernel_register.cc │ │ │ ├── elementwise_kernel_register.cc │ │ │ ├── embedding_grad_kernel_register.cc │ │ │ ├── embedding_kernel_register.cc │ │ │ ├── empty_kernel_register.cc │ │ │ ├── expand_kernel_register.cc │ │ │ ├── fill_kernel_register.cc │ │ │ ├── flatten_grad_kernel_register.cc │ │ │ ├── flatten_kernel_register.cc │ │ │ ├── full_kernel_register.cc │ │ │ ├── fused_rope_grad_kernel.cu │ │ │ ├── fused_rope_kernel.cu │ │ │ ├── gather_grad_kernel_register.cc │ │ │ ├── gather_kernel_register.cc │ │ │ ├── gather_nd_grad_kernel_register.cc │ │ │ ├── gather_nd_kernel_register.cc │ │ │ ├── gaussian_kernel_register.cc │ │ │ ├── index_put_kernel_register.cc │ │ │ ├── layer_norm_grad_kernel.cu │ │ │ ├── layer_norm_kernel.cu │ │ │ ├── logical_kernel_register.cc │ │ │ ├── logsumexp_kernel_register.cc │ │ │ ├── matmul_grad_kernel.cu │ │ │ ├── matmul_kernel.cu │ │ │ ├── mean_all_grad_kernel_register.cc │ │ │ ├── mean_all_kernel_register.cc │ │ │ ├── multinomial_kernel_register.cc │ │ │ ├── nonzero_kernel_register.cc │ │ │ ├── numel_kernel_register.cc │ │ │ ├── one_hot_kernel_register.cc │ │ │ ├── p_norm_grad_kernel_register.cc │ │ │ ├── p_norm_kernel_register.cc │ │ │ ├── pad_grad_kernel_register.cc │ │ │ ├── pad_kernel_register.cc │ │ │ ├── put_along_axis_kernel_register.cc │ │ │ ├── randint_kernel_register.cc │ │ │ ├── reduce_all_kernel_register.cc │ │ │ ├── reduce_any_kernel_register.cc │ │ │ ├── reduce_kernel_kps_register.cc │ │ │ ├── reduce_kernel_register.cc │ │ │ ├── reduce_max_kernel_register.cc │ │ │ ├── reduce_mean_kernel_register.cc │ │ │ ├── reduce_sum_kernel_register.cc │ │ │ ├── reshape_grad_kernel_register.cc │ │ │ ├── reshape_kernel_register.cc │ │ │ ├── rms_norm_grad_kernel_register.cc │ │ │ ├── rms_norm_kernel_register.cc │ │ │ ├── scale_kernel_register.cc │ │ │ ├── scatter_nd_add_grad_kernel.cu │ │ │ ├── scatter_nd_add_kernel.cu │ │ │ ├── set_value_grad_kernel_register.cc │ │ │ ├── set_value_kernel_register.cc │ │ │ ├── shape_kernel_register.cc │ │ │ ├── sign_kernel_register.cc │ │ │ ├── slice_grad_kernel_register.cc │ │ │ ├── slice_kernel_register.cc │ │ │ ├── softmax_grad_kernel.cu │ │ │ ├── softmax_kernel.cu │ │ │ ├── split_kernel_register.cc │ │ │ ├── squared_l2_norm_kernel_register.cc │ │ │ ├── squeeze_grad_kernel_register.cc │ │ │ ├── squeeze_kernel_register.cc │ │ │ ├── stack_kernel_register.cc │ │ │ ├── strided_copy_kernel_register.cc │ │ │ ├── strided_slice_grad_kernel_register.cc │ │ │ ├── strided_slice_kernel_register.cc │ │ │ ├── swiglu_grad_kernel_register.cc │ │ │ ├── swiglu_kernel_register.cc │ │ │ ├── take_along_axis_kernel_register.cc │ │ │ ├── tile_grad_kernel_register.cc │ │ │ ├── tile_kernel_register.cc │ │ │ ├── top_k_grad_kernel.cu │ │ │ ├── top_k_kernel.cu │ │ │ ├── transpose_grad_kernel_register.cc │ │ │ ├── transpose_kernel_register.cc │ │ │ ├── tril_triu_kernel_register.cc │ │ │ ├── unbind_kernel_register.cc │ │ │ ├── uniform_kernel_register.cc │ │ │ ├── unsqueeze_grad_kernel_register.cc │ │ │ ├── unsqueeze_kernel_register.cc │ │ │ └── where_kernel_register.cc │ │ ├── ernie_core │ │ │ ├── cal_aux_loss_grad_kernel_register.cc │ │ │ ├── expand_modality_expert_id_kernel_register.cc │ │ │ ├── fused_bias_act_kernel_register.cc │ │ │ ├── int_bincount_kernel_register.cc │ │ │ ├── layer_norm_cuda_kernel_register.cc │ │ │ ├── moe_combine_grad_kernel_register.cc │ │ │ ├── moe_combine_kernel_register.cc │ │ │ ├── moe_gate_dispatch_grad_kernel_register.cc │ │ │ ├── moe_gate_dispatch_kernel_register.cc │ │ │ ├── moe_gate_dispatch_permute_grad_kernel_register.cc │ │ │ ├── moe_gate_dispatch_permute_kernel_register.cc │ │ │ ├── moe_ops_partial_nosoftmaxtopk_grad_kernel_register.cc │ │ │ ├── moe_ops_partial_nosoftmaxtopk_kernel_register.cc │ │ │ ├── register_build_src_rank_and_local_expert_id_kernel.cc │ │ │ ├── rms_norm_kernel_register.cc │ │ │ └── top_p_sampling_kernel_register.cc │ │ ├── funcs │ │ │ ├── blas │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── blas.cc │ │ │ │ ├── blas.h │ │ │ │ ├── blas_impl.cu.h │ │ │ │ ├── blas_impl.h │ │ │ │ ├── blaslt_gemm_search.h │ │ │ │ └── blaslt_impl.cu.h │ │ │ ├── fake_quantize_functor.h │ │ │ ├── layer_norm_impl.cu.h │ │ │ ├── layer_norm_util.h │ │ │ ├── math_cuda_utils.h │ │ │ └── top_k_function_cuda.h │ │ ├── gpudnn │ │ │ └── softmax_gpudnn.h │ │ └── impl │ │ │ ├── addmm_kernel_impl.h │ │ │ ├── matmul_grad_kernel_impl.h │ │ │ └── matmul_kernel_impl.h │ ├── patches │ │ ├── eigen │ │ │ ├── Core │ │ │ ├── Tensor │ │ │ └── TensorAssign.h │ │ └── paddle-corex.patch │ ├── runtime │ │ └── runtime.cc │ ├── setup.py.in │ └── tests │ │ ├── CMakeLists.txt │ │ ├── run_test.sh │ │ └── unittests │ │ ├── test_abs_iluvatar.py │ │ ├── test_addmm_op_iluvatar.py │ │ ├── test_arange_iluvatar.py │ │ ├── test_assign_op_iluvatar.py │ │ ├── test_bfloat16_embedding_iluvatar.py │ │ ├── test_bitwise_op_iluvatar.py │ │ ├── test_cast_op_iluvatar.py │ │ ├── test_concat_op_iluvatar.py │ │ ├── test_count_nonzero_api_iluvatar.py │ │ ├── test_cumsum_op_iluvatar.py │ │ ├── test_einsum_iluvatar.py │ │ ├── test_elementwise_div_op_iluvatar.py │ │ ├── test_empty_op_iluvatar.py │ │ ├── test_flatten2_op_iluvatar.py │ │ ├── test_full_like_op_iluvatar.py │ │ ├── test_full_op_iluvatar.py │ │ ├── test_fused_rope_iluvatar.py │ │ ├── test_gather_op_iluvatar.py │ │ ├── test_greater_equal_op_iluvatar.py │ │ ├── test_layer_norm_op_iluvatar.py │ │ ├── test_layer_norm_op_v2_iluvatar.py │ │ ├── test_matmul_op_iluvatar.py │ │ ├── test_max_op_iluvatar.py │ │ ├── test_maximum_op_iluvatar.py │ │ ├── test_nonzero_api_iluvatar.py │ │ ├── test_one_hot_v2_op_iluvatar.py │ │ ├── test_p_norm_op_iluvatar.py │ │ ├── test_pad_op_iluvatar.py │ │ ├── test_rms_norm_op_iluvatar.py │ │ ├── test_scale_op_iluvatar.py │ │ ├── test_set_value_op_iluvatar.py │ │ ├── test_shape_op_iluvatar.py │ │ ├── test_sign_op_iluvatar.py │ │ ├── test_softmax_op_iluvatar.py │ │ ├── test_squared_l2_norm_op_iluvatar.py │ │ ├── test_squeeze2_op_iluvatar.py │ │ ├── test_stack_op_iluvatar.py │ │ ├── test_strided_slice_op_iluvatar.py │ │ ├── test_subtract_op_iluvatar.py │ │ ├── test_sum_op_iluvatar.py │ │ ├── test_tile_op_iluvatar.py │ │ ├── test_top_k_op_iluvatar.py │ │ ├── test_transpose_op_iluvatar.py │ │ └── test_zeros_like_op_iluvatar.py ├── intel_gpu │ ├── CMakeLists.txt │ ├── README.md │ ├── cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ ├── gflags.cmake │ │ │ ├── glog.cmake │ │ │ ├── gtest.cmake │ │ │ ├── onednn.cmake │ │ │ ├── onednn_gpu.cmake │ │ │ └── pybind11.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ └── third_party.cmake │ ├── kernels │ │ ├── argsort_kernel.cc │ │ ├── assign_value_kernel.cc │ │ ├── cast_kernel.cc │ │ ├── compare_kernel.cc │ │ ├── dnn_support.hpp │ │ ├── elementwise_kernel.cc │ │ ├── full_kernel.cc │ │ ├── kernels.h │ │ ├── mean_kernel.cc │ │ ├── memcpy_kernel.cc │ │ ├── phi_funcs.h │ │ ├── reduce_kernel.cc │ │ ├── reshape_kernel.cc │ │ ├── slice_kernel.cc │ │ ├── softmax_kernel.cc │ │ ├── transpose_kernel.cc │ │ └── uniform_random_kernel.cc │ ├── load.sh │ ├── runtime │ │ └── runtime.cc │ ├── setup.py.in │ └── tests │ │ ├── CMakeLists.txt │ │ ├── test_MNIST_model.py │ │ ├── test_mean_op.py │ │ └── unittests │ │ ├── CMakeLists.txt │ │ ├── test_argsort_op.py │ │ ├── test_assign_value_op.py │ │ ├── test_cast_op.py │ │ ├── test_compare_op.py │ │ ├── test_elementwise_mul_op.py │ │ ├── test_fill_constant_op.py │ │ ├── test_mean_op.py │ │ ├── test_memcpy_op.py │ │ ├── test_reduce_op.py │ │ ├── test_reshape_op.py │ │ ├── test_slice_op.py │ │ ├── test_softmax_op.py │ │ ├── test_transpose_op.py │ │ └── test_uniform_random_op.py ├── intel_hpu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ ├── custom_tpc_lib.cmake │ │ │ ├── gflags.cmake │ │ │ ├── glog.cmake │ │ │ ├── gtest.cmake │ │ │ ├── onednn.cmake │ │ │ ├── pybind11.cmake │ │ │ └── synapse.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ ├── third_party.cmake │ │ └── version.cmake │ ├── custom_ops │ │ ├── llama_infer │ │ │ ├── block_attn_pp_kernels.cc │ │ │ ├── fused_block_attention.cc │ │ │ ├── fused_flatpa_proj.cc │ │ │ ├── fused_fp8_gemm.cc │ │ │ ├── fused_fp8_rms_qkv_rope_t.cc │ │ │ ├── fused_fp8_sdpa.cc │ │ │ ├── fused_get_rotary_embedding.cc │ │ │ ├── fused_mlp.cc │ │ │ ├── fused_rms_mlp.cc │ │ │ ├── fused_rms_mlp_add.cc │ │ │ ├── fused_rms_qkv_rope.cc │ │ │ ├── fused_rms_qkv_rope_t.cc │ │ │ ├── fused_rms_qkv_rope_v2.cc │ │ │ ├── fused_rms_qkv_rope_v3.cc │ │ │ ├── fused_sdpa_proj.cc │ │ │ ├── fused_sdpa_proj_dec.cc │ │ │ ├── fused_sdpa_proj_t.cc │ │ │ ├── fused_sdpa_proj_v2.cc │ │ │ ├── get_output.cc │ │ │ ├── get_padding_offset.cc │ │ │ ├── pp_kernels.cc │ │ │ ├── prepare_block_metadata.cc │ │ │ ├── recover_block.cc │ │ │ ├── save_with_output_msg.cc │ │ │ ├── speculate_get_output.cc │ │ │ ├── step.cc │ │ │ └── update_inputs_v2.cc │ │ ├── python │ │ │ └── paddlenlp_ops │ │ │ │ ├── __init__.py │ │ │ │ ├── layers.py │ │ │ │ └── llama_block_atten.py │ │ ├── setup.py │ │ ├── src │ │ │ ├── fake_gpu_kernels.cc │ │ │ ├── fused_moe.cc │ │ │ ├── index_copy.cc │ │ │ └── index_reduce.cc │ │ └── tests │ │ │ ├── test_flatPA_proj.py │ │ │ ├── test_fused_block_attention.py │ │ │ ├── test_fused_fp8_rms_qkv_rope_t.py │ │ │ ├── test_fused_mlp.py │ │ │ ├── test_fused_rms_mlp.py │ │ │ ├── test_index_copy.py │ │ │ ├── test_inplace_cumsum.py │ │ │ ├── test_mixture_of_experts.py │ │ │ ├── test_rms_qkv_rope.py │ │ │ ├── test_sdpa_proj.py │ │ │ └── test_sdpa_proj_v2.py │ ├── kernels │ │ ├── all_kernel.cc │ │ ├── any_kernel.cc │ │ ├── arange_kernel.cc │ │ ├── arg_min_max_kernel.cc │ │ ├── assign_kernel.cc │ │ ├── bitwise_kernel.cc │ │ ├── c_identity_kernel.cc │ │ ├── cast_kernel.cc │ │ ├── compare_kernel.cc │ │ ├── concat_kernel.cc │ │ ├── contiguous.cc │ │ ├── cum_kernel.cc │ │ ├── einsum_kernel.cc │ │ ├── elementwise_kernel.cc │ │ ├── expand_kernel.cc │ │ ├── full_kernel.cc │ │ ├── funcs.h │ │ ├── gather_kernel.cc │ │ ├── gather_nd_kernel.cc │ │ ├── gaussian_kernel.cc │ │ ├── hpu_funcs.h │ │ ├── hpu_operator.cc │ │ ├── hpu_operator.h │ │ ├── index_put_kernel.cc │ │ ├── index_sample_kernel.cc │ │ ├── index_select_kernel.cc │ │ ├── logical_kernel.cc │ │ ├── matmul_kernel.cc │ │ ├── memcpy_kernel.cc │ │ ├── multiply_kernel.cc │ │ ├── one_hot_kernel.cc │ │ ├── phi_funcs.h │ │ ├── reduce_kernel.cc │ │ ├── rms_norm_kernel.cc │ │ ├── rope_kernel.cc │ │ ├── scale_kernel.cc │ │ ├── scatter_kernel.cc │ │ ├── sdpa_kernel.cc │ │ ├── set_value_kernel.cc │ │ ├── slice_kernel.cc │ │ ├── softmax_kernel.cc │ │ ├── split_kernel.cc │ │ ├── squeeze_kernel.cc │ │ ├── swiglu_kernel.cc │ │ ├── tile_kernel.cc │ │ ├── top_k.cc │ │ ├── top_p.cc │ │ ├── top_p_hpu.cc │ │ ├── transpose_kernel.cc │ │ ├── tril_triu_kernel.cc │ │ ├── unary_kernel.cc │ │ ├── uniform_kernel.cc │ │ └── where_kernel.cc │ ├── runtime │ │ ├── flags.h │ │ ├── runtime.cc │ │ └── runtime.h │ ├── setup.py.in │ ├── tests │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── benchmark_paddlepaddle_cli.sh │ │ ├── ccl │ │ │ ├── all2all.py │ │ │ ├── allgather.py │ │ │ ├── allreduce.py │ │ │ └── readme.md │ │ ├── config.py │ │ ├── inference_hpu.py │ │ ├── junitxml.py │ │ ├── pr-test-run.py │ │ ├── run_distributed_generation.sh │ │ ├── run_generation.py │ │ ├── test_MNIST_model.py │ │ ├── test_addmul_model.py │ │ ├── test_hpu_memory_stat.py │ │ ├── test_index_reduce.py │ │ ├── test_kvcache.py │ │ ├── test_llama.py │ │ ├── test_llama_2x.sh │ │ ├── test_profiler.py │ │ ├── test_softmax_model.py │ │ ├── test_vllm_kvcache.py │ │ └── unittests │ │ │ ├── CMakeLists.txt │ │ │ ├── api_base.py │ │ │ ├── test_abs_op.py │ │ │ ├── test_abs_op_eager.py │ │ │ ├── test_activation_op.py │ │ │ ├── test_arg_max_op.py │ │ │ ├── test_block_attn_pp_stages.py │ │ │ ├── test_cast.py │ │ │ ├── test_concat.py │ │ │ ├── test_contiguous.py │ │ │ ├── test_cumsum_op.py │ │ │ ├── test_elementwise_add_op.py │ │ │ ├── test_elementwise_div_op.py │ │ │ ├── test_elementwise_max_op.py │ │ │ ├── test_elementwise_min_op.py │ │ │ ├── test_elementwise_mod_op.py │ │ │ ├── test_elementwise_mul_op.py │ │ │ ├── test_elementwise_pow_op.py │ │ │ ├── test_elementwise_sub_op.py │ │ │ ├── test_expand.py │ │ │ ├── test_floor.py │ │ │ ├── test_full_like_op.py │ │ │ ├── test_full_op.py │ │ │ ├── test_fused_fp8_gemm.py │ │ │ ├── test_fused_fp8_sdpa.py │ │ │ ├── test_fused_mlp.py │ │ │ ├── test_fused_rms_mlp.py │ │ │ ├── test_fused_rms_qkv_rope.py │ │ │ ├── test_fused_sdpa_proj.py │ │ │ ├── test_gather.py │ │ │ ├── test_gather_nd.py │ │ │ ├── test_get_token_multiscores.py │ │ │ ├── test_index_copy.py │ │ │ ├── test_index_put_op.py │ │ │ ├── test_index_sample.py │ │ │ ├── test_index_select.py │ │ │ ├── test_logical_op.py │ │ │ ├── test_one_hot.py │ │ │ ├── test_pow.py │ │ │ ├── test_reduce_all.py │ │ │ ├── test_reduce_any.py │ │ │ ├── test_reduce_max.py │ │ │ ├── test_reduce_mean.py │ │ │ ├── test_reduce_min.py │ │ │ ├── test_reduce_prod.py │ │ │ ├── test_reduce_sum.py │ │ │ ├── test_relu.py │ │ │ ├── test_rms_norm.py │ │ │ ├── test_scale.py │ │ │ ├── test_scatter_hpu.py │ │ │ ├── test_set_value_afbi.py │ │ │ ├── test_set_value_flags.py │ │ │ ├── test_sigmoid.py │ │ │ ├── test_silu.py │ │ │ ├── test_sin.py │ │ │ ├── test_slice.py │ │ │ ├── test_softmax.py │ │ │ ├── test_split.py │ │ │ ├── test_sqrt.py │ │ │ ├── test_squeeze.py │ │ │ ├── test_swiglu_op.py │ │ │ ├── test_tanh.py │ │ │ ├── test_tile.py │ │ │ ├── test_top_k.py │ │ │ ├── test_top_p_sampling.py │ │ │ ├── test_transpose_op_eager.py │ │ │ ├── test_tril_triu.py │ │ │ ├── test_uniform.py │ │ │ ├── test_update_inputs_v2.py │ │ │ ├── test_where_op.py │ │ │ └── util.py │ ├── tools │ │ ├── pr_hpu_ci.sh │ │ └── testresult_analyse.py │ └── utils │ │ ├── hlml_shm.h │ │ ├── hpu_tracer.cc │ │ ├── hpu_tracer.h │ │ ├── hpu_utils.h │ │ ├── mem_hlml.cc │ │ ├── mem_hlml.h │ │ ├── utils.cc │ │ └── utils.h ├── metax_gpu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── build.sh │ ├── cmake │ │ ├── cblas.cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ └── mklml.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ ├── third_party.cmake │ │ └── version.cmake │ ├── kernels │ │ ├── c_softmax_with_cross_entropy_kernel.cu │ │ ├── cross_entropy_grad_kernel_register.cu │ │ ├── cross_entropy_kernel_register.cu │ │ ├── cuda_kernels │ │ │ ├── abs_grad_kernel_register.cu │ │ │ ├── abs_kernel_register.cu │ │ │ ├── accuracy_check_kernel_register.cu │ │ │ ├── accuracy_kernel_register.cu │ │ │ ├── activation_grad_kernel_register.cu │ │ │ ├── activation_kernel_register.cu │ │ │ ├── adadelta_kernel_register.cu │ │ │ ├── adagrad_kernel_register.cu │ │ │ ├── adam_kernel_register.cu │ │ │ ├── adamax_kernel_register.cu │ │ │ ├── adamw_kernel_register.cu │ │ │ ├── adamw_kernel_row_register.cu │ │ │ ├── add_n_kernel_register.cu │ │ │ ├── all_gather_kernel_register.cu │ │ │ ├── all_reduce_kernel_register.cu │ │ │ ├── all_to_all_kernel_register.cu │ │ │ ├── allclose_kernel_register.cu │ │ │ ├── amp_kernel_register.cu │ │ │ ├── angle_grad_kernel_register.cu │ │ │ ├── angle_kernel_register.cu │ │ │ ├── apply_per_channel_scale_kernel_register.cu │ │ │ ├── arange_kernel_register.cu │ │ │ ├── arg_min_max_kernel_register.cu │ │ │ ├── array_kernel_register.cu │ │ │ ├── as_complex_kernel_register.cu │ │ │ ├── as_real_kernel_register.cu │ │ │ ├── asgd_kernel_register.cu │ │ │ ├── assign_kernel_register.cu │ │ │ ├── assign_pos_kernel_register.cu │ │ │ ├── bincount_kernel_register.cu │ │ │ ├── bitwise_kernel_register.cu │ │ │ ├── c_embedding_grad_kernel_register.cu │ │ │ ├── c_embedding_kernel_register.cu │ │ │ ├── c_identity_kernel_register.cu │ │ │ ├── c_softmax_with_cross_entropy_grad_kernel_register.cu │ │ │ ├── cast_kernel_register.cu │ │ │ ├── clip_grad_kernel_register.cu │ │ │ ├── clip_kernel_register.cu │ │ │ ├── compare_kernel_legacy_register.cu │ │ │ ├── compare_kernel_register.cu │ │ │ ├── concat_grad_kernel_register.cu │ │ │ ├── concat_kernel_register.cu │ │ │ ├── contiguous_kernel_register.cu │ │ │ ├── cum_kernel_register.cu │ │ │ ├── einsum_grad_kernel_register.cu │ │ │ ├── einsum_kernel_register.cu │ │ │ ├── elementwise_grad_kernel_register.cu │ │ │ ├── elementwise_kernel_register.cu │ │ │ ├── elementwise_legacy_kernel.cu │ │ │ ├── embedding_grad_kernel_register.cu │ │ │ ├── embedding_kernel_register.cu │ │ │ ├── embedding_with_scaled_gradient_grad_kernel_register.cu │ │ │ ├── empty_kernel_register.cu │ │ │ ├── expand_as_grad_kernel_register.cu │ │ │ ├── expand_as_kernel_register.cu │ │ │ ├── expand_grad_kernel_register.cu │ │ │ ├── expand_kernel_register.cu │ │ │ ├── fill_diagonal_grad_kernel_register.cu │ │ │ ├── fill_diagonal_kernel_register.cu │ │ │ ├── fill_diagonal_tensor_grad_kernel_register.cu │ │ │ ├── fill_diagonal_tensor_kernel_register.cu │ │ │ ├── fill_grad_kernel_register.cu │ │ │ ├── fill_kernel_register.cu │ │ │ ├── flatten_grad_kernel_register.cu │ │ │ ├── flatten_kernel_register.cu │ │ │ ├── full_kernel_register.cu │ │ │ ├── gather_nd_grad_kernel_register.cu │ │ │ ├── gather_nd_kernel_register.cu │ │ │ ├── gaussian_kernel_register.cu │ │ │ ├── gelu_grad_kernel_register.cu │ │ │ ├── gelu_kernel_register.cu │ │ │ ├── index_add_kernel_register.cu │ │ │ ├── index_put_grad_kernel_register.cu │ │ │ ├── index_put_kernel_register.cu │ │ │ ├── logical_kernel_register.cu │ │ │ ├── logsumexp_kernel_register.cu │ │ │ ├── matmul_grad_kernel_register.cu │ │ │ ├── matmul_kernel_register.cu │ │ │ ├── mean_all_grad_kernel_register.cu │ │ │ ├── mean_all_kernel_register.cu │ │ │ ├── multiplex_grad_kernel_register.cu │ │ │ ├── multiplex_kernel_register.cu │ │ │ ├── nonzero_kernel_register.cu │ │ │ ├── numel_kernel_register.cu │ │ │ ├── one_hot_kernel_register.cu │ │ │ ├── p_norm_grad_kernel_register.cu │ │ │ ├── p_norm_kernel_register.cu │ │ │ ├── pad_kernel_register.cu │ │ │ ├── pow2_decay_with_linear_warmup_kernel_register.cu │ │ │ ├── put_along_axis_kernel_register.cu │ │ │ ├── randint_kernel_register.cu │ │ │ ├── reduce_all_kernel_register.cc │ │ │ ├── reduce_any_kernel_register.cc │ │ │ ├── reduce_kernel_kps_register.cu │ │ │ ├── reduce_kernel_register.cu │ │ │ ├── reduce_max_kernel_register.cu │ │ │ ├── reduce_mean_kernel_register.cu │ │ │ ├── reduce_sum_kernel_register.cu │ │ │ ├── reshape_grad_kernel_register.cu │ │ │ ├── reshape_kernel_register.cu │ │ │ ├── scale_kernel_register.cu │ │ │ ├── scatter_nd_add_kernel_register.cu │ │ │ ├── set_value_kernel_register.cu │ │ │ ├── shape_kernel_register.cu │ │ │ ├── sigmoid_cross_entropy_with_logits_grad_kernel_register.cu │ │ │ ├── sign_kernel_register.cu │ │ │ ├── slice_grad_kernel_register.cu.cc │ │ │ ├── slice_kernel_register.cu.cc │ │ │ ├── soft_relu_kernel_register.cu │ │ │ ├── split_kernel_register.cu │ │ │ ├── squared_l2_norm_grad_kernel_register.cu │ │ │ ├── squared_l2_norm_kernel_register.cu │ │ │ ├── stack_kernel_register.cu │ │ │ ├── strided_copy_kernel_register.cu │ │ │ ├── strided_slice_grad_kernel_register.cu │ │ │ ├── strided_slice_kernel_register.cu │ │ │ ├── swiglu_grad_kernel_register.cu │ │ │ ├── swiglu_kernel_register.cu │ │ │ ├── take_along_axis_grad_kernel_register.cu │ │ │ ├── take_along_axis_kernel_register.cu │ │ │ ├── tile_kernel_register.cu │ │ │ ├── top_k_grad_kernel_register.cu │ │ │ ├── top_k_kernel_register.cu │ │ │ ├── transpose_grad_kernel_register.cu │ │ │ ├── transpose_kernel_register.cu │ │ │ ├── tril_indices_kernel_register.cu │ │ │ ├── tril_triu_grad_kernel_register.cu │ │ │ ├── tril_triu_kernel_register.cu │ │ │ ├── triu_indices_kernel_register.cu │ │ │ ├── unbind_kernel_register.cu │ │ │ ├── uniform_inplace_grad_kernel_register.cu │ │ │ ├── uniform_inplace_kernel_register.cu │ │ │ ├── uniform_kernel_register.cu │ │ │ ├── uniform_random_batch_size_like_kernel_register.cu │ │ │ ├── unsqueeze_grad_kernel_register.cu │ │ │ ├── unsqueeze_kernel_register.cu │ │ │ ├── where_grad_kernel_register.cu │ │ │ └── where_kernel_register.cu │ │ ├── dynload │ │ │ ├── cupti_lib_path.h │ │ │ ├── dynamic_loader.cc │ │ │ └── dynamic_loader.h │ │ ├── ernie_core │ │ │ ├── fused_bias_act_kernel_register.cu │ │ │ ├── rms_norm_kernel_register.cu │ │ │ └── top_p_sampling_kernel_register.cu │ │ ├── flags_declare.cu │ │ ├── funcs │ │ │ ├── blas │ │ │ │ ├── blas.cc │ │ │ │ ├── blas.h │ │ │ │ ├── blas_impl.cu.h │ │ │ │ ├── blas_impl.h │ │ │ │ ├── blaslt_gemm_search.h │ │ │ │ ├── blaslt_impl.cu.h │ │ │ │ ├── cublas.cc │ │ │ │ ├── cublas.h │ │ │ │ ├── cublaslt.h │ │ │ │ ├── port.cc │ │ │ │ └── port.h │ │ │ ├── layer_norm_util.h │ │ │ └── quant_dequant.h │ │ ├── gpudnn │ │ │ ├── mxdnn_helper.h │ │ │ └── softmax_gpudnn.h │ │ ├── impl │ │ │ ├── addmm_kernel_impl.h │ │ │ ├── matmul_grad_kernel_impl.h │ │ │ ├── matmul_kernel_impl.h │ │ │ └── matmul_kernel_impl_maca.h │ │ └── layer_norm_kernel_register.cu │ ├── patch │ │ ├── mcEigen_3.4.0_paddle_final.zip │ │ └── paddle.patch │ ├── runtime │ │ └── runtime.cc │ ├── setup.py.in │ └── tests │ │ ├── CMakeLists.txt │ │ ├── error_pytest │ │ ├── test_activation_op.py │ │ ├── test_bincount_op_metax.py │ │ ├── test_bitwise_op_metax.py │ │ ├── test_c_embedding_op_metax.py │ │ ├── test_cumsum_op_metax.py │ │ ├── test_einsum_op.py │ │ ├── test_elementwise_add_op_metax.py │ │ ├── test_fill_constant_op_metax.py │ │ ├── test_flatten_contiguous_range_op_metax.py │ │ ├── test_index_put_op.py │ │ ├── test_logical_op_metax.py │ │ ├── test_maximum_op_metax.py │ │ ├── test_p_norm_op_metax.py │ │ ├── test_rms_norm_op_meatx.py │ │ ├── test_split_op_metax.py │ │ ├── test_stack_op.py │ │ └── test_sum_op_metax.py │ │ ├── run_test.sh │ │ └── unittest │ │ ├── test_abs_metax.py │ │ ├── test_adamw_op_metax.py │ │ ├── test_arange_metax.py │ │ ├── test_cast_op_metax.py │ │ ├── test_clip_op_metax.py │ │ ├── test_compare_op_metax.py │ │ ├── test_concat_op_metax.py │ │ ├── test_elementwise_floordiv_op_metax.py │ │ ├── test_elementwise_mul_op_metax.py │ │ ├── test_elementwise_pow_op_metax.py │ │ ├── test_empty_op_metax.py │ │ ├── test_fill_any_op_metax.py │ │ ├── test_full_like_op_metax.py │ │ ├── test_fused_bias_act_op.py │ │ ├── test_gather_nd_op_metax.py │ │ ├── test_index_add_op_metax.py │ │ ├── test_one_hot_v2_op.py │ │ ├── test_pad_op_metax.py │ │ ├── test_reduce_op_metax.py │ │ ├── test_reshape_op_metax.py │ │ ├── test_sign_op_metax.py │ │ ├── test_subtract_op_metax.py │ │ ├── test_top_k_v2_op_metax.py │ │ ├── test_top_p_sampling.py │ │ └── test_uniform_random_op_metax.py ├── mlu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ ├── concurrentqueue.cmake │ │ │ ├── gflags.cmake │ │ │ ├── glog.cmake │ │ │ ├── gtest.cmake │ │ │ ├── neuware.cmake │ │ │ ├── onednn.cmake │ │ │ └── pybind11.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ ├── third_party.cmake │ │ └── version.cmake │ ├── kernels │ │ ├── abs_kernel.cc │ │ ├── accuracy_kernel.cc │ │ ├── activation_kernel.cc │ │ ├── adam_kernel.cc │ │ ├── add_n_kernel.cc │ │ ├── arange_kernel.cc │ │ ├── arg_max_kernel.cc │ │ ├── argsort_kernel.cc │ │ ├── assign_kernel.cc │ │ ├── batch_norm_kernel.cc │ │ ├── bce_loss_kernel.cc │ │ ├── bitwise_kernel.cc │ │ ├── cast_kernel.cc │ │ ├── check_finite_and_unscale_kernel.cc │ │ ├── clip_kernel.cc │ │ ├── coalesce_tensor_kernel.cc │ │ ├── compare_kernel.cc │ │ ├── concat_kernel.cc │ │ ├── contiguous_kernel.cc │ │ ├── conv_kernel.cc │ │ ├── conv_transpose_kernel.cc │ │ ├── cross_entropy_kernel.cc │ │ ├── cumsum_kernel.cc │ │ ├── data_kernel.cc │ │ ├── deformable_conv_kernel.cc │ │ ├── dropout_kernel.cc │ │ ├── elementwise_add_kernel.cc │ │ ├── elementwise_div_kernel.cc │ │ ├── elementwise_max_kernel.cc │ │ ├── elementwise_min_kernel.cc │ │ ├── elementwise_mul_kernel.cc │ │ ├── elementwise_pow_kernel.cc │ │ ├── elementwise_sub_kernel.cc │ │ ├── expand_as_kernel.cc │ │ ├── expand_kernel.cc │ │ ├── fill_kernel.cc │ │ ├── flash_attn_kernel.cc │ │ ├── flip_kernel.cc │ │ ├── full_kernel.cc │ │ ├── funcs │ │ │ ├── conv_utils.h │ │ │ ├── elementwise_utils.h │ │ │ ├── logic_op.h │ │ │ ├── mlu_baseop.cc │ │ │ ├── mlu_baseop.h │ │ │ ├── mlu_funcs.h │ │ │ ├── range_op.h │ │ │ └── reduce_op.h │ │ ├── gather_kernel.cc │ │ ├── gather_nd_kernel.cc │ │ ├── gaussian_kernel.cc │ │ ├── generate_proposals_kernel.cc │ │ ├── grid_sample_kernel.cc │ │ ├── huber_loss_kernel.cc │ │ ├── index_sample_kernel.cc │ │ ├── index_select_kernel.cc │ │ ├── interpolate_kernel.cc │ │ ├── kldiv_loss_kernel.cc │ │ ├── label_smooth_kernel.cc │ │ ├── layer_norm_kernel.cc │ │ ├── log_softmax_kernel.cc │ │ ├── logical_kernel.cc │ │ ├── lookup_table_v2_op_kernel.cc │ │ ├── masked_select_kernel.cc │ │ ├── matmul_kernel.cc │ │ ├── mean_all_kernel.cc │ │ ├── memcpy_kernel.cc │ │ ├── meshgrid_kernel.cc │ │ ├── momentum_kernel.cc │ │ ├── multinomial_kernel.cc │ │ ├── nonzero_kernel.cc │ │ ├── numel_kernel.cc │ │ ├── one_hot_kernel.cc │ │ ├── p_norm_kernel.cc │ │ ├── pool2d_kernel.cc │ │ ├── prior_box_kernel.cc │ │ ├── prod_kernel.cc │ │ ├── randperm_kernel.cc │ │ ├── reduce_all_kernel.cc │ │ ├── reduce_any_kernel.cc │ │ ├── reduce_max_kernel.cc │ │ ├── reduce_mean_kernel.cc │ │ ├── reduce_min_kernel.cc │ │ ├── reduce_sum_kernel.cc │ │ ├── rnn_kernel.cc │ │ ├── roi_align_kernel.cc │ │ ├── roll_kernel.cc │ │ ├── scale_kernel.cc │ │ ├── scatter_kernel.cc │ │ ├── set_value_kernel.cc │ │ ├── sgd_kernel.cc │ │ ├── sigmoid_cross_with_logits_kernel.cc │ │ ├── slice_kernel.cc │ │ ├── softmax_kernel.cc │ │ ├── split_kernel.cc │ │ ├── squared_l2_norm_kernel.cc │ │ ├── squeeze_kernel.cc │ │ ├── stack_kernel.cc │ │ ├── strided_copy_kernel.cc │ │ ├── strided_slice_kernel.cc │ │ ├── sync_batch_norm_kernel.cc │ │ ├── tile_kernel.cc │ │ ├── top_k_kernel.cc │ │ ├── transpose_kernel.cc │ │ ├── tril_triu_op_mlu.cc │ │ ├── truncated_gaussian_random_kernel.cc │ │ ├── uniform_kernel.cc │ │ ├── unsqueeze_kernel.cc │ │ ├── unstack_kernel.cc │ │ ├── where_kernel.cc │ │ └── yolo_box_kernel.cc │ ├── plugin_ops │ │ ├── fused_rms_norm_mlu.cc │ │ ├── fused_rope_mlu.cc │ │ └── my_add_n_op.cc │ ├── runtime │ │ ├── CNRTEvent.h │ │ ├── flags.h │ │ ├── process_cnpapi_data.cc │ │ ├── process_cnpapi_data.h │ │ ├── runtime.cc │ │ ├── runtime.h │ │ └── thread_data_registry.h │ ├── setup.py.in │ ├── tests │ │ ├── CMakeLists.txt │ │ ├── test_LeNet_MNIST.py │ │ ├── test_MNIST_model.py │ │ └── unittests │ │ │ ├── CMakeLists.txt │ │ │ ├── dygraph_group_sharded_stage2.py │ │ │ ├── dygraph_group_sharded_stage2_comm_overlap.py │ │ │ ├── dygraph_group_sharded_stage2_offload.py │ │ │ ├── dygraph_group_sharded_stage3.py │ │ │ ├── dygraph_group_sharded_stage3_offload.py │ │ │ ├── model_parallel │ │ │ ├── hybrid_column_parallel_mp_layers.py │ │ │ ├── hybrid_cross_entropy_parallel_mp_layers.py │ │ │ ├── hybrid_embedding_parallel_mp_layers.py │ │ │ ├── hybrid_parallel_pp_amp.py │ │ │ ├── hybrid_parallel_pp_clip_grad.py │ │ │ ├── hybrid_parallel_pp_embedding.py │ │ │ ├── hybrid_parallel_pp_fp16.py │ │ │ ├── hybrid_parallel_pp_layer.py │ │ │ ├── hybrid_parallel_pp_recompute.py │ │ │ ├── hybrid_parallel_pp_save_load.py │ │ │ ├── hybrid_parallel_pp_transformer.py │ │ │ ├── hybrid_parallel_shared_weight.py │ │ │ └── hybrid_row_parallel_mp_layers.py │ │ │ ├── parallel_dygraph_sync_batch_norm.py │ │ │ ├── process_group_xccl.py │ │ │ ├── test_abs_op_mlu.py │ │ │ ├── test_accuracy_op_mlu.py │ │ │ ├── test_adam_op_mlu.py │ │ │ ├── test_adamw_op_mlu.py │ │ │ ├── test_arange_op_mlu.py │ │ │ ├── test_arg_max_op_mlu.py │ │ │ ├── test_argsort_op_mlu.py │ │ │ ├── test_assign_op_mlu.py │ │ │ ├── test_assign_value_op_mlu.py │ │ │ ├── test_atan_op_mlu.py │ │ │ ├── test_batch_norm_op_mlu.py │ │ │ ├── test_batch_norm_op_mlu_v2.py │ │ │ ├── test_bce_loss_mlu.py │ │ │ ├── test_bilinear_interp_v2_op_mlu.py │ │ │ ├── test_bitwise_op_mlu.py │ │ │ ├── test_c_embedding_op_mlu.py │ │ │ ├── test_cast_op_mlu.py │ │ │ ├── test_check_finite_and_unscale_op_mlu.py │ │ │ ├── test_clip_op_mlu.py │ │ │ ├── test_coalesce_tensor_op_mlu.py │ │ │ ├── test_collective_api.py │ │ │ ├── test_compare_op_mlu.py │ │ │ ├── test_concat_op_mlu.py │ │ │ ├── test_conv2d_op_depthwise_conv_mlu.py │ │ │ ├── test_conv2d_op_mlu.py │ │ │ ├── test_conv2d_transposed_op_mlu.py │ │ │ ├── test_cos_op_mlu.py │ │ │ ├── test_cumsum_op_mlu.py │ │ │ ├── test_custom_pass_mlu.py │ │ │ ├── test_deformable_conv_op_mlu.py │ │ │ ├── test_dist_base.py │ │ │ ├── test_dropout_op_mlu.py │ │ │ ├── test_dygraph_recompute_for_eager.py │ │ │ ├── test_dygraph_sharding_stage_2.py │ │ │ ├── test_dygraph_sharding_stage_3.py │ │ │ ├── test_elementwise_add_op_mlu.py │ │ │ ├── test_elementwise_div_op_mlu.py │ │ │ ├── test_elementwise_max_op_mlu.py │ │ │ ├── test_elementwise_min_op_mlu.py │ │ │ ├── test_elementwise_mul_op_mlu.py │ │ │ ├── test_elementwise_pow_op_mlu.py │ │ │ ├── test_elementwise_sub_op_mlu.py │ │ │ ├── test_embedding_op_mlu.py │ │ │ ├── test_exp_op_mlu.py │ │ │ ├── test_expand_as_v2_op_mlu.py │ │ │ ├── test_expand_v2_op_mlu.py │ │ │ ├── test_fill_any_like_op_mlu.py │ │ │ ├── test_fill_constant_batch_size_like_op_mlu.py │ │ │ ├── test_fill_constant_op_mlu.py │ │ │ ├── test_flash_attention_op_mlu.py │ │ │ ├── test_flip_op_mlu.py │ │ │ ├── test_floor_op_mlu.py │ │ │ ├── test_full_with_tensor_op_mlu.py │ │ │ ├── test_gather_nd_op_mlu.py │ │ │ ├── test_gather_op_mlu.py │ │ │ ├── test_gaussian_random_op_mlu.py │ │ │ ├── test_gelu_op_mlu.py │ │ │ ├── test_generate_proposals_v2_op_mlu.py │ │ │ ├── test_grid_sample_op_mlu.py │ │ │ ├── test_hard_sigmoid_op_mlu.py │ │ │ ├── test_hard_swish_op_mlu.py │ │ │ ├── test_huber_loss_op_mlu.py │ │ │ ├── test_index_sample_op_mlu.py │ │ │ ├── test_index_select_op_mlu.py │ │ │ ├── test_kldiv_loss_op_mlu.py │ │ │ ├── test_label_smooth_op_mlu.py │ │ │ ├── test_layer_norm_op_mlu.py │ │ │ ├── test_leaky_relu_op_mlu.py │ │ │ ├── test_log_op_mlu.py │ │ │ ├── test_log_softmax_op_mlu.py │ │ │ ├── test_logical_op_mlu.py │ │ │ ├── test_lookup_table_v2_op_mlu.py │ │ │ ├── test_masked_select_op_mlu.py │ │ │ ├── test_matmul_op_mlu.py │ │ │ ├── test_mean_op_mlu.py │ │ │ ├── test_merged_adam_op_mlu.py │ │ │ ├── test_merged_momentum_op_mlu.py │ │ │ ├── test_meshgrid_op_mlu.py │ │ │ ├── test_momentum_op_mlu.py │ │ │ ├── test_multinomial_op_mlu.py │ │ │ ├── test_nearest_interp_v2_op_mlu.py │ │ │ ├── test_numel_op_mlu.py │ │ │ ├── test_one_hot_v2_op_mlu.py │ │ │ ├── test_p_norm_op_mlu.py │ │ │ ├── test_parallel_dygraph_mp_layers.py │ │ │ ├── test_parallel_dygraph_pipeline_parallel.py │ │ │ ├── test_pool2d_op_mlu.py │ │ │ ├── test_pow_op_mlu.py │ │ │ ├── test_prior_box_op_mlu.py │ │ │ ├── test_randperm_op_mlu.py │ │ │ ├── test_reciprocal_op_mlu.py │ │ │ ├── test_reduce_all_op_mlu.py │ │ │ ├── test_reduce_any_op_mlu.py │ │ │ ├── test_reduce_max_op_mlu.py │ │ │ ├── test_reduce_mean_op_mlu.py │ │ │ ├── test_reduce_min_op_mlu.py │ │ │ ├── test_reduce_prod_op_mlu.py │ │ │ ├── test_reduce_sum_op_mlu.py │ │ │ ├── test_relu6_op_mlu.py │ │ │ ├── test_relu_op_mlu.py │ │ │ ├── test_rms_norm_op_mlu.py │ │ │ ├── test_rnn_op_mlu.py │ │ │ ├── test_roi_align_op_mlu.py │ │ │ ├── test_roll_op_mlu.py │ │ │ ├── test_rope_op_mlu.py │ │ │ ├── test_round_op_mlu.py │ │ │ ├── test_rsqrt_op_mlu.py │ │ │ ├── test_scale_op_mlu.py │ │ │ ├── test_scatter_nd_op_mlu.py │ │ │ ├── test_scatter_op_mlu.py │ │ │ ├── test_set_value_op_mlu.py │ │ │ ├── test_sgd_op_mlu.py │ │ │ ├── test_shape_op_mlu.py │ │ │ ├── test_sigmoid_cross_entropy_with_logits_op_mlu.py │ │ │ ├── test_silu_op_mlu.py │ │ │ ├── test_sin_op_mlu.py │ │ │ ├── test_slice_op_mlu.py │ │ │ ├── test_softmax_op_mlu.py │ │ │ ├── test_softmax_with_cross_entropy_op_mlu.py │ │ │ ├── test_split_op_mlu.py │ │ │ ├── test_sqrt_op_mlu.py │ │ │ ├── test_square_op_mlu.py │ │ │ ├── test_squared_l2_norm_op_mlu.py │ │ │ ├── test_squeeze_op_mlu.py │ │ │ ├── test_stack_op_mlu.py │ │ │ ├── test_static_print_mlu.py │ │ │ ├── test_stride_mlu.py │ │ │ ├── test_strided_slice_op_mlu.py │ │ │ ├── test_sum_op_mlu.py │ │ │ ├── test_swish_op_mlu.py │ │ │ ├── test_sync_batch_norm_op_mlu.py │ │ │ ├── test_take_along_axis_op_mlu.py │ │ │ ├── test_tanh_op_mlu.py │ │ │ ├── test_tile_op_mlu.py │ │ │ ├── test_top_k_op_mlu.py │ │ │ ├── test_transpose_op_mlu.py │ │ │ ├── test_tril_triu_op_mlu.py │ │ │ ├── test_truncated_gaussian_random_op_mlu.py │ │ │ ├── test_uniform_random_op_mlu.py │ │ │ ├── test_unsqueeze_op_mlu.py │ │ │ ├── test_unstack_op_mlu.py │ │ │ ├── test_where_index_op_mlu.py │ │ │ ├── test_where_op_mlu.py │ │ │ ├── test_yolo_box_op_mlu.py │ │ │ └── test_zero_dim_tensor_mlu.py │ └── tools │ │ ├── compile.sh │ │ ├── disable_ut_mlu │ │ ├── dockerfile │ │ ├── Dockerfile.mlu.kylinv10.gcc82.py310 │ │ ├── Dockerfile.mlu.ubuntu20.gcc84.py310 │ │ └── build-image.sh │ │ └── pr_ci_mlu.sh ├── mps │ ├── .clang-format │ ├── CMakeLists.txt │ ├── README.md │ ├── cmake │ │ ├── external │ │ │ ├── gflags.cmake │ │ │ ├── glog.cmake │ │ │ └── gtest.cmake │ │ ├── paddle.cmake │ │ └── third_party.cmake │ ├── kernels │ │ ├── activation_impl.h │ │ ├── activation_impl.mm │ │ ├── activation_kernl.cc │ │ ├── elementwise_impl.h │ │ ├── elementwise_impl.mm │ │ ├── elementwise_kernel.cc │ │ ├── matmul_impl.h │ │ ├── matmul_impl.mm │ │ ├── matmul_kernel.cc │ │ ├── op_utils.h │ │ ├── op_utils.mm │ │ ├── phi_funcs.h │ │ ├── reshape_kernel.cc │ │ ├── softmax_impl.h │ │ ├── softmax_impl.mm │ │ ├── softmax_kernel.cc │ │ ├── squeeze_kernel.cc │ │ └── unsqueeze_kernel.cc │ ├── runtime │ │ ├── mps_device.h │ │ ├── mps_device.mm │ │ ├── mps_runtime.h │ │ ├── mps_stream.h │ │ ├── mps_stream.mm │ │ └── runtime.cc │ ├── setup.py.in │ └── tests │ │ ├── CMakeLists.txt │ │ └── unittests │ │ ├── CMakeLists.txt │ │ └── test_softmax_op.py ├── npu │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── cmake │ │ ├── dummy.c.in │ │ ├── external │ │ │ ├── ascend.cmake │ │ │ ├── gflags.cmake │ │ │ ├── glog.cmake │ │ │ ├── gtest.cmake │ │ │ ├── onednn.cmake │ │ │ └── pybind11.cmake │ │ ├── generic.cmake │ │ ├── paddle.cmake │ │ ├── third_party.cmake │ │ └── version.cmake │ ├── custom_op │ │ ├── fused_allgather_mm.cc │ │ ├── fused_attention_npu.cc │ │ ├── fused_mm_allreduce.cc │ │ ├── fused_mm_reduce_scatter.cc │ │ ├── fused_rms_norm_npu.cc │ │ ├── fused_rope_npu.cc │ │ ├── llama_infer │ │ │ ├── atb_ops │ │ │ │ ├── atb_layers │ │ │ │ │ ├── fused_blha_layer.cc │ │ │ │ │ ├── fused_blha_layer.h │ │ │ │ │ ├── fused_lm_head_layer.cc │ │ │ │ │ ├── fused_lm_head_layer.h │ │ │ │ │ ├── linear.cc │ │ │ │ │ ├── linear.h │ │ │ │ │ ├── mixed_gate_up_act.cc │ │ │ │ │ ├── mixed_gate_up_act.h │ │ │ │ │ ├── qkv_split.cc │ │ │ │ │ ├── qkv_split.h │ │ │ │ │ ├── runner.cc │ │ │ │ │ ├── runner.h │ │ │ │ │ ├── smooth_quant.cc │ │ │ │ │ └── smooth_quant.h │ │ │ │ ├── fused_blha_layer_op.cc │ │ │ │ ├── fused_blha_layer_op_utils.cc │ │ │ │ ├── fused_blha_layer_op_utils.h │ │ │ │ ├── fused_lm_head_op.cc │ │ │ │ └── remove_padding_op.cc │ │ │ ├── dequant_int8.cc │ │ │ ├── encode_rotary_qk.cc │ │ │ ├── fused_get_rope.cc │ │ │ ├── get_output.cc │ │ │ ├── get_padding_offset.cc │ │ │ ├── get_padding_offset_v2.cc │ │ │ ├── qkv_transpose_split.cc │ │ │ ├── quant_int8.cc │ │ │ ├── rebuild_padding.cc │ │ │ ├── rebuild_padding_v2.cc │ │ │ ├── save_with_output.cc │ │ │ ├── save_with_output_msg.cc │ │ │ ├── set_value_by_flags.cc │ │ │ ├── set_value_by_flags_v2.cc │ │ │ ├── step.cc │ │ │ ├── stop_generation_multi_ends.cc │ │ │ ├── stop_generation_multi_ends_v2.cc │ │ │ ├── token_penalty_multi_scores.cc │ │ │ ├── token_penalty_multi_scores_v2.cc │ │ │ ├── transpose_removing_padding.cc │ │ │ ├── update_inputs.cc │ │ │ ├── write_cache_kv.cc │ │ │ └── write_int8_cache_kv.cc │ │ └── my_add_n_op.cc │ ├── kernels │ │ ├── abs_kernel.cc │ │ ├── accuracy_kernel.cc │ │ ├── activation_kernel.cc │ │ ├── adadelta_kernel.cc │ │ ├── adagrad_kernel.cc │ │ ├── adam_kernel.cc │ │ ├── add_n_kernel.cc │ │ ├── amp │ │ │ ├── check_finite_and_unscale_kernel.cc │ │ │ └── update_loss_scaling_kernel.cc │ │ ├── arange_kernel.cc │ │ ├── arg_min_max_kernel.cc │ │ ├── argsort_grad_kernel.cc │ │ ├── argsort_kernel.cc │ │ ├── assign_kernel.cc │ │ ├── batch_norm_kernel.cc │ │ ├── bce_loss_kernel.cc │ │ ├── bitwise_kernel.cc │ │ ├── bmm_kernel.cc │ │ ├── box_coder_kernel.cc │ │ ├── c_identity_kernel.cc │ │ ├── cast_kernel.cc │ │ ├── clip_by_norm_kernel.cc │ │ ├── clip_kernel.cc │ │ ├── coalesce_tensor_kernel.cc │ │ ├── compare_kernel.cc │ │ ├── concat_kernel.cc │ │ ├── contiguous_kernel.cc │ │ ├── conv2d_kernel.cc │ │ ├── conv_kernel.cc │ │ ├── conv_transpose_kernel.cc │ │ ├── cross_entropy_kernel.cc │ │ ├── cum_kernel.cc │ │ ├── cumprod_kernel.cc │ │ ├── deformable_conv_kernel.cc │ │ ├── diag_kernel.cc │ │ ├── diagonal_kernel.cc │ │ ├── dropout_kernel.cc │ │ ├── einsum_kernel.cc │ │ ├── einsum_kernel.h │ │ ├── elementwise_add_kernel.cc │ │ ├── elementwise_div_kernel.cc │ │ ├── elementwise_floordiv_kernel.cc │ │ ├── elementwise_max_kernel.cc │ │ ├── elementwise_min_kernel.cc │ │ ├── elementwise_mod_kernel.cc │ │ ├── elementwise_mul_kernel.cc │ │ ├── elementwise_pow_kernel.cc │ │ ├── elementwise_sub_kernel.cc │ │ ├── expand_as_kernel.cc │ │ ├── expand_kernel.cc │ │ ├── eye_kernel.cc │ │ ├── fill_diagonal_kernel.cc │ │ ├── fill_diagonal_tensor_kernel.cc │ │ ├── fill_kernel.cc │ │ ├── flip_kernel.cc │ │ ├── full_kernel.cc │ │ ├── funcs │ │ │ ├── conv_util.h │ │ │ ├── format_utils.cc │ │ │ ├── format_utils.h │ │ │ ├── npu_enforce.h │ │ │ ├── npu_funcs.h │ │ │ ├── npu_op_prepare.h │ │ │ ├── npu_op_runner.cc │ │ │ ├── npu_op_runner.h │ │ │ ├── slice_utils.h │ │ │ ├── string_helper.cc │ │ │ └── string_helper.h │ │ ├── fused_gemm_epilogue_grad_kernel.cc │ │ ├── fused_gemm_epilogue_kernel.cc │ │ ├── fused_linear_param_grad_add_kernel.cc │ │ ├── fusion │ │ │ ├── blha_get_max_len.cc │ │ │ ├── block_multihead_attention_kernel.cc │ │ │ ├── fused_bias_act_kernel.cc │ │ │ ├── fused_bias_residual_layernorm_kernel.cc │ │ │ ├── masked_multihead_attention_kernel.cc │ │ │ ├── rms_norm_kernel.cc │ │ │ ├── topp_sampling_kernel.cc │ │ │ └── variable_length_memory_efficient_attention_kernel.cc │ │ ├── gather_kernel.cc │ │ ├── gather_nd_kernel.cc │ │ ├── gaussian_kernel.cc │ │ ├── grid_sample_kernel.cc │ │ ├── group_norm_kernel.cc │ │ ├── histogram_kernel.cc │ │ ├── huber_loss_kernel.cc │ │ ├── increment_kernel.cc │ │ ├── index_put_kernel.cc │ │ ├── index_sample_kernel.cc │ │ ├── index_select_kernel.cc │ │ ├── interpolate_kernel.cc │ │ ├── inverse_kernel.cc │ │ ├── is_empty_kernel.cc │ │ ├── isfinite_kernel.cc │ │ ├── kldiv_loss_kernel.cc │ │ ├── label_smooth_kernel.cc │ │ ├── layer_norm_kernel.cc │ │ ├── linspace_kernel.cc │ │ ├── log_loss_kernel.cc │ │ ├── log_softmax_kernel.cc │ │ ├── logical_kernel.cc │ │ ├── lookup_table_v2_op_npu_kernel.cc │ │ ├── masked_select_kernel.cc │ │ ├── matmul_kernel.cc │ │ ├── mean_all_kernel.cc │ │ ├── memcpy_kernel.cc │ │ ├── merged_momentum.cc │ │ ├── meshgrid_kernel.cc │ │ ├── momentum_kernel.cc │ │ ├── multinomial_kernel.cc │ │ ├── nll_loss_kernel.cc │ │ ├── nonzero_kernel.cc │ │ ├── norm_kernel.cc │ │ ├── npu_identity_kernel.cc │ │ ├── one_hot_kernel.cc │ │ ├── p_norm_kernel.cc │ │ ├── pad3d_kernel.cc │ │ ├── pad_kernel.cc │ │ ├── pool2d_kernel.cc │ │ ├── prelu_kernel.cc │ │ ├── prior_box_kernel.cc │ │ ├── randint_kernel.cc │ │ ├── randperm_kernel.cc │ │ ├── reduce_all_kernel.cc │ │ ├── reduce_any_kernel.cc │ │ ├── reduce_max_kernel.cc │ │ ├── reduce_mean_kernel.cc │ │ ├── reduce_min_kernel.cc │ │ ├── reduce_prod_kernel.cc │ │ ├── reduce_sum_kernel.cc │ │ ├── rmsprop_kernel.cc │ │ ├── rnn_kernel.cc │ │ ├── roi_align_kernel.cc │ │ ├── roll_kernel.cc │ │ ├── scale_kernel.cc │ │ ├── scatter_kernel.cc │ │ ├── scatter_nd_add_kernel.cc │ │ ├── sequence_mask.cc │ │ ├── set_value_kernel.cc │ │ ├── sgd_kernel.cc │ │ ├── shard_index_kernel.cc │ │ ├── sigmoid_cross_entropy_with_logits_kernel.cc │ │ ├── sign_kernel.cc │ │ ├── slice_kernel.cc │ │ ├── softmax_kernel.cc │ │ ├── split_kernel.cc │ │ ├── squared_l2_norm_kernel.cc │ │ ├── squeeze_kernel.cc │ │ ├── stack_kernel.cc │ │ ├── strided_copy_kernel.cc │ │ ├── strided_slice_kernel.cc │ │ ├── swiglu_kernel.cc │ │ ├── take_along_axis_kernel.cc │ │ ├── tile_kernel.cc │ │ ├── top_k_kernel.cc │ │ ├── transpose_kernel.cc │ │ ├── tril_triu_kernel.cc │ │ ├── truncated_gaussian_random_kernel.cc │ │ ├── unbind_kernel.cc │ │ ├── uniform_kernel.cc │ │ ├── unsqueeze_kernel.cc │ │ ├── unstack_kernel.cc │ │ ├── warpctc_kernel.cc │ │ └── where_kernel.cc │ ├── passes │ │ ├── __init__.py │ │ ├── chatglm.py │ │ ├── common.py │ │ └── llama.py │ ├── profile │ │ ├── __init__.py │ │ └── cann_export.py │ ├── runtime │ │ ├── flags.h │ │ ├── runtime.cc │ │ └── runtime.h │ ├── setup.py.in │ ├── tests │ │ ├── CMakeLists.txt │ │ ├── test_LeNet_MNIST.py │ │ └── unittests │ │ │ ├── CMakeLists.txt │ │ │ ├── big_shape_cases.json │ │ │ ├── dygraph_group_sharded_stage2.py │ │ │ ├── dygraph_group_sharded_stage2_comm_overlap.py │ │ │ ├── dygraph_group_sharded_stage2_offload.py │ │ │ ├── dygraph_group_sharded_stage3.py │ │ │ ├── dygraph_group_sharded_stage3_offload.py │ │ │ ├── hybrid_parallel_mp_layers.py │ │ │ ├── npu_utils.py │ │ │ ├── process_group_xccl.py │ │ │ ├── test_abs_op_npu.py │ │ │ ├── test_abs_op_npu_eager.py │ │ │ ├── test_accuracy_op_npu.py │ │ │ ├── test_activation_op.py │ │ │ ├── test_activation_op_eager.py │ │ │ ├── test_adadelta_op_npu.py │ │ │ ├── test_adagrad_op_npu.py │ │ │ ├── test_adam_op_npu.py │ │ │ ├── test_adam_op_npu_eager.py │ │ │ ├── test_adamw_op_npu.py │ │ │ ├── test_arg_max_op_npu.py │ │ │ ├── test_arg_min_op_npu.py │ │ │ ├── test_argsort_op_npu.py │ │ │ ├── test_assign_op_npu.py │ │ │ ├── test_assign_op_npu_eager.py │ │ │ ├── test_assign_value_op_npu.py │ │ │ ├── test_batch_norm_op_npu.py │ │ │ ├── test_bce_loss_npu.py │ │ │ ├── test_big_shape_npu.py │ │ │ ├── test_bilinear_interp_v2_op_npu.py │ │ │ ├── test_bitwise_op_npu.py │ │ │ ├── test_bmm_op_npu.py │ │ │ ├── test_box_coder_op_npu.py │ │ │ ├── test_cast_op_npu.py │ │ │ ├── test_cast_op_npu_eager.py │ │ │ ├── test_check_finite_and_unscale_op_npu.py │ │ │ ├── test_check_finite_and_unscale_op_npu_eager.py │ │ │ ├── test_check_nan_inf_op_npu.py │ │ │ ├── test_clip_by_norm_op_npu.py │ │ │ ├── test_clip_op_npu.py │ │ │ ├── test_coalesce_tensor_op_npu.py │ │ │ ├── test_collective_api.py │ │ │ ├── test_compare_op_npu.py │ │ │ ├── test_concat_op_npu.py │ │ │ ├── test_concat_op_npu_eager.py │ │ │ ├── test_contiguous_op_npu.py │ │ │ ├── test_conv2d_op_depthwise_conv_npu.py │ │ │ ├── test_conv2d_op_npu.py │ │ │ ├── test_conv2d_transpose_op_npu.py │ │ │ ├── test_conv3d_op_npu.py │ │ │ ├── test_cumprod_op_npu.py │ │ │ ├── test_cumsum_op_npu.py │ │ │ ├── test_custom_pass_npu.py │ │ │ ├── test_deformable_conv_op_npu.py │ │ │ ├── test_diag_op_npu.py │ │ │ ├── test_diagonal_op_npu.py │ │ │ ├── test_divide_op_npu_eager.py │ │ │ ├── test_dropout_op_npu.py │ │ │ ├── test_dygraph_recompute_for_eager.py │ │ │ ├── test_dygraph_sharding_stage_2.py │ │ │ ├── test_dygraph_sharding_stage_3.py │ │ │ ├── test_einsum_op_npu.py │ │ │ ├── test_elementwise_add_op_npu.py │ │ │ ├── test_elementwise_add_op_npu_eager.py │ │ │ ├── test_elementwise_div_op_npu.py │ │ │ ├── test_elementwise_floordiv_op_npu.py │ │ │ ├── test_elementwise_max_op_npu.py │ │ │ ├── test_elementwise_max_op_npu_eager.py │ │ │ ├── test_elementwise_min_op_npu.py │ │ │ ├── test_elementwise_mod_op_npu.py │ │ │ ├── test_elementwise_mul_op_npu.py │ │ │ ├── test_elementwise_mul_op_npu_eager.py │ │ │ ├── test_elementwise_pow_op_npu.py │ │ │ ├── test_elementwise_pow_op_npu_eager.py │ │ │ ├── test_elementwise_sub_op_npu.py │ │ │ ├── test_elu_op_npu.py │ │ │ ├── test_expand_as_v2_op_npu.py │ │ │ ├── test_expand_as_v2_op_npu_eager.py │ │ │ ├── test_expand_v2_op_npu.py │ │ │ ├── test_expand_v2_op_npu_eager.py │ │ │ ├── test_eye_op_npu.py │ │ │ ├── test_fill_any_like_op_npu.py │ │ │ ├── test_fill_constant_batch_size_like_op_npu.py │ │ │ ├── test_fill_constant_op_npu.py │ │ │ ├── test_fill_diagonal_op_npu.py │ │ │ ├── test_fill_diagonal_tensor_op_npu.py │ │ │ ├── test_fill_op_npu_eager.py │ │ │ ├── test_flashattention_npu.py │ │ │ ├── test_flip_op_npu.py │ │ │ ├── test_full_like_op.py │ │ │ ├── test_full_op.py │ │ │ ├── test_full_with_tensor_op_on_npu.py │ │ │ ├── test_fused_linear_param_grad_add_op_npu.py │ │ │ ├── test_fused_matmul_bias_op_npu.py │ │ │ ├── test_gather_nd_op_npu.py │ │ │ ├── test_gather_nd_op_npu_eager.py │ │ │ ├── test_gather_op_npu.py │ │ │ ├── test_gaussian_random_op_npu.py │ │ │ ├── test_gelu_op_npu.py │ │ │ ├── test_grid_sample_op_npu.py │ │ │ ├── test_group_norm_op_npu.py │ │ │ ├── test_hard_shrink_op_npu.py │ │ │ ├── test_hard_sigmoid_op_npu.py │ │ │ ├── test_hard_swish_op_npu.py │ │ │ ├── test_hard_tanh_op_npu.py │ │ │ ├── test_histogram_op_npu.py │ │ │ ├── test_huber_loss_op_npu.py │ │ │ ├── test_increment_op_npu.py │ │ │ ├── test_index_put_op_npu_eager.py │ │ │ ├── test_index_sample_op_npu.py │ │ │ ├── test_index_select_op_npu.py │ │ │ ├── test_inverse_op_npu.py │ │ │ ├── test_is_empty_op_npu.py │ │ │ ├── test_isfinite_v2_op_npu.py │ │ │ ├── test_kldiv_loss_op_npu.py │ │ │ ├── test_label_smooth_op_npu.py │ │ │ ├── test_layer_norm_op_npu.py │ │ │ ├── test_linear_op_npu.py │ │ │ ├── test_linspace_op_npu.py │ │ │ ├── test_log_loss_op_npu.py │ │ │ ├── test_log_softmax_op_npu.py │ │ │ ├── test_logical_op_npu.py │ │ │ ├── test_lookup_table_v2_op_npu.py │ │ │ ├── test_masked_select_op_npu.py │ │ │ ├── test_matmul_op_npu_eager.py │ │ │ ├── test_matmulv2_op_npu.py │ │ │ ├── test_mean_op_npu.py │ │ │ ├── test_memcpy_op_npu.py │ │ │ ├── test_merged_momentum_op_npu.py │ │ │ ├── test_meshgrid_op_npu.py │ │ │ ├── test_momentum_op_npu.py │ │ │ ├── test_multinomial_op_npu.py │ │ │ ├── test_nearest_interp_op_npu.py │ │ │ ├── test_nll_loss_op_npu.py │ │ │ ├── test_norm_op_npu.py │ │ │ ├── test_npu_identity_op.py │ │ │ ├── test_one_dim_tensor_npu.py │ │ │ ├── test_one_hot_v2_op_npu.py │ │ │ ├── test_p_norm_op_npu.py │ │ │ ├── test_pad3d_op_npu.py │ │ │ ├── test_pad_op_npu.py │ │ │ ├── test_pad_op_npu_eager.py │ │ │ ├── test_parallel_dygraph_mp_layers.py │ │ │ ├── test_pool2d_op_npu.py │ │ │ ├── test_prelu_op_npu.py │ │ │ ├── test_prior_box_op_npu.py │ │ │ ├── test_randint_op_npu.py │ │ │ ├── test_randperm_op_npu.py │ │ │ ├── test_range_npu.py │ │ │ ├── test_reciprocal_op_npu.py │ │ │ ├── test_reduce_all_op_npu.py │ │ │ ├── test_reduce_any_op_npu.py │ │ │ ├── test_reduce_max_op_npu.py │ │ │ ├── test_reduce_max_op_npu_eager.py │ │ │ ├── test_reduce_mean_op_npu.py │ │ │ ├── test_reduce_mean_op_npu_eager.py │ │ │ ├── test_reduce_min_op_npu.py │ │ │ ├── test_reduce_prod_op_npu.py │ │ │ ├── test_reduce_sum_op_npu.py │ │ │ ├── test_reduce_sum_op_npu_eager.py │ │ │ ├── test_rms_norm_npu.py │ │ │ ├── test_rmsprop_op_npu.py │ │ │ ├── test_rnn_op_npu.py │ │ │ ├── test_roi_align_op_npu.py │ │ │ ├── test_roll_op_npu.py │ │ │ ├── test_rope_npu.py │ │ │ ├── test_rsqrt_op_npu.py │ │ │ ├── test_rsqrt_op_npu_eager.py │ │ │ ├── test_scale_op_npu.py │ │ │ ├── test_scale_op_npu_eager.py │ │ │ ├── test_scatter_nd_add_op_npu.py │ │ │ ├── test_scatter_op_npu.py │ │ │ ├── test_selu_op_npu.py │ │ │ ├── test_sequence_mask_op_npu.py │ │ │ ├── test_set_value_op_npu.py │ │ │ ├── test_sgd_op_npu.py │ │ │ ├── test_shard_index_op_npu.py │ │ │ ├── test_sigmoid_cross_entropy_with_logits_op_npu.py │ │ │ ├── test_sign_op_npu.py │ │ │ ├── test_slice_op_npu.py │ │ │ ├── test_slice_op_npu_eager.py │ │ │ ├── test_softmax_op_npu.py │ │ │ ├── test_softmax_with_cross_entropy_op_npu.py │ │ │ ├── test_split_op_npu.py │ │ │ ├── test_split_op_npu_eager.py │ │ │ ├── test_squared_l2_norm_op_npu.py │ │ │ ├── test_squared_l2_norm_op_npu_eager.py │ │ │ ├── test_squeeze_op_npu.py │ │ │ ├── test_squeeze_op_npu_eager.py │ │ │ ├── test_stack_op_npu.py │ │ │ ├── test_stack_op_npu_eager.py │ │ │ ├── test_strided_slice_op_npu.py │ │ │ ├── test_strided_slice_op_npu_eager.py │ │ │ ├── test_sum_op_npu.py │ │ │ ├── test_swiglu_op_npu.py │ │ │ ├── test_take_along_axis_op_npu.py │ │ │ ├── test_tile_op_npu.py │ │ │ ├── test_top_k_v2_op_npu.py │ │ │ ├── test_transpose_op_npu.py │ │ │ ├── test_transpose_op_npu_eager.py │ │ │ ├── test_tril_triu_op_npu.py │ │ │ ├── test_truncated_gaussian_random_op_npu.py │ │ │ ├── test_unbind_op_npu.py │ │ │ ├── test_uniform_random_op_npu.py │ │ │ ├── test_unsqueeze_op_npu.py │ │ │ ├── test_unsqueeze_op_npu_eager.py │ │ │ ├── test_unstack_op_npu.py │ │ │ ├── test_update_loss_scaling_op_npu.py │ │ │ ├── test_warpctc_op_npu.py │ │ │ ├── test_where_index_npu.py │ │ │ ├── test_where_op_npu.py │ │ │ ├── test_where_op_npu_eager.py │ │ │ └── test_zero_dim_tensor_npu.py │ └── tools │ │ ├── compile.sh │ │ ├── coverage │ │ ├── coverage_diff.py │ │ ├── coverage_process.sh │ │ └── pull_request.py │ │ ├── disable_ut_npu │ │ ├── disable_ut_npu_910b │ │ ├── dockerfile │ │ ├── Dockerfile.npu.ubuntu20.gcc84 │ │ └── build-image.sh │ │ ├── important_ut_npu │ │ ├── pr_ci_llama_npu.sh │ │ └── pr_ci_npu.sh └── sdaa │ ├── CMakeLists.txt │ ├── README.md │ ├── README_cn.md │ ├── cmake │ ├── dummy.c.in │ ├── external │ │ ├── gflags.cmake │ │ ├── glog.cmake │ │ ├── gtest.cmake │ │ ├── onednn.cmake │ │ └── tabulate.cmake │ ├── generic.cmake │ ├── paddle.cmake │ ├── teco.cmake │ ├── third_party.cmake │ └── version.cmake │ ├── compile.sh │ ├── dynload │ ├── dynamic_loader.cc │ ├── dynamic_loader.h │ ├── sdpti.cc │ └── sdpti.h │ ├── external │ └── customsdaastream.h │ ├── kernels │ ├── abs_kernel.cc │ ├── accuracy_kernel.cc │ ├── activation_kernel.cc │ ├── adam_kernel.cc │ ├── add_n_kernel.cc │ ├── amp │ │ ├── amp_funcs.h │ │ ├── check_finite_and_scaling_kernel.cc │ │ └── update_loss_scaling_kernel.cc │ ├── arange_kernel.cc │ ├── arg_max_min_kernel.cc │ ├── argsort_kernel.cc │ ├── assign_kernel.cc │ ├── batch_norm_kernel.cc │ ├── bce_loss_kernel.cc │ ├── bitwise_kernel.cc │ ├── bmm_kernel.cc │ ├── cast_kernel.cc │ ├── clip_kernel.cc │ ├── coalesce_tensor_kernel.cc │ ├── compare_kernel.cc │ ├── concat_kernel.cc │ ├── contiguous_kernel.cc │ ├── conv2d_kernel.cc │ ├── conv_transpose_kernel.cc │ ├── cross_entropy_kernel.cc │ ├── cum_kernel.cc │ ├── distribute_fpn_proposals_kernel.cc │ ├── dropout_kernel.cc │ ├── element_add_kernel.cc │ ├── element_div_kernel.cc │ ├── element_mul_kernel.cc │ ├── element_sub_kernel.cc │ ├── elementwise_floordiv_kernel.cc │ ├── elementwise_max_kernel.cc │ ├── elementwise_min_kernel.cc │ ├── elementwise_mod_kernel.cc │ ├── elementwise_pow_kernel.cc │ ├── embedding_kernel.cc │ ├── expand_as_kernel.cc │ ├── expand_kernel.cc │ ├── fill_kernel.cc │ ├── flash_attenttion_kernel.cc │ ├── flip_kernel.cc │ ├── full_kernel.cc │ ├── funcs │ │ ├── contiguous │ │ │ ├── contiguous_register.h │ │ │ ├── copy_stride_opt.cc │ │ │ └── transpose_opt.cc │ │ ├── elementwise_functor.h │ │ ├── high_precision_op_list.cc │ │ ├── high_precision_op_list.h │ │ ├── nv_align.h │ │ ├── sdaa_baseop.cc │ │ ├── sdaa_baseop.h │ │ ├── sdaa_funcs.h │ │ ├── slice_utils.h │ │ ├── strided_copy_utils.cc │ │ ├── strided_copy_utils.h │ │ ├── tblas_baseop.h │ │ └── tecodnn_conv_impl.h │ ├── gather_kernel.cc │ ├── gather_nd_kernel.cc │ ├── gaussian_random_kernel.cc │ ├── generate_proposals_kernel.cc │ ├── grid_sample_kernel.cc │ ├── group_norm_kernel.cc │ ├── huber_loss_kernel.cc │ ├── identity_kernel.cc │ ├── increment_kernel.cc │ ├── index_put_kernel.cc │ ├── index_sample_kernel.cc │ ├── index_select_kernel.cc │ ├── instance_norm_kernel.cc │ ├── interpolate_kernel.cc │ ├── is_empty_kernel.cc │ ├── isfinite_kernel.cc │ ├── label_smooth_kernel.cc │ ├── layer_norm_kernel.cc │ ├── linspace_kernel.cc │ ├── log_loss_kernel.cc │ ├── log_softmax_kernel.cc │ ├── logical_kernel.cc │ ├── masked_select_kernel.cc │ ├── matmul_kernel.cc │ ├── memcpy_kernel.cc │ ├── merged_adam_kernel.cc │ ├── merged_momentum_kernel.cc │ ├── meshgrid_kernel.cc │ ├── momentum_kernel.cc │ ├── multiclass_nms3_kernel.cc │ ├── nll_loss_kernel.cc │ ├── nonzero_kernel.cc │ ├── one_hot_kernel.cc │ ├── p_norm_kernel.cc │ ├── pool2d_kernel.cc │ ├── prelu_kernel.cc │ ├── prior_box_kernel.cc │ ├── profiler │ │ ├── RecordEvent.cc │ │ ├── RecordEvent.h │ │ ├── os_info.cc │ │ ├── os_info.h │ │ ├── sdaa_wrapper.h │ │ ├── tcpx_util.cc │ │ └── tcpx_util.h │ ├── randint_kernel.cc │ ├── reduce_logic_kernel.cc │ ├── reduce_max_kernel.cc │ ├── reduce_mean_kernel.cc │ ├── reduce_min_kernel.cc │ ├── reduce_prod_kernel.cc │ ├── reduce_sum_kernel.cc │ ├── rnn_kernel.cc │ ├── roi_align_kernel.cc │ ├── scale_kernel.cc │ ├── scatter_kernel.cc │ ├── scatter_nd_add_kernel.cc │ ├── set_value_kernel.cc │ ├── sigmoid_cross_entropy_with_logits_kernel.cc │ ├── slice_kernel.cc │ ├── softmax_kernel.cc │ ├── split_kernel.cc │ ├── squared_l2_norm_kernel.cc │ ├── squeeze_kernel.cc │ ├── stack_kernel.cc │ ├── stride_slice_kernel.cc │ ├── strided_copy_kernel.cc │ ├── sync_batch_norm_kernel.cc │ ├── tile_kernel.cc │ ├── top_k_kernel.cc │ ├── transpose_kernel.cc │ ├── tril_triu_kernel.cc │ ├── truncated_gaussian_random_kernel.cc │ ├── unbind_kernel.cc │ ├── uniform_random_kernel.cc │ ├── unsqueeze_kernel.cc │ ├── unstack_kernel.cc │ ├── warpctc_kernel.cc │ └── where_kernel.cc │ ├── pr_ci_sdaa.sh │ ├── runtime │ ├── flags.h │ ├── runtime.cc │ ├── runtime.h │ └── sdaaEvent.h │ ├── sdaa_ext │ ├── python │ │ ├── __init__.py │ │ ├── custom_parallel │ │ │ ├── Adam.py │ │ │ ├── AdamW.py │ │ │ ├── Momentum.py │ │ │ ├── __init__.py │ │ │ ├── device_map.py │ │ │ ├── distributed_gradscalar.py │ │ │ └── distributed_optimizer.py │ │ ├── demo │ │ │ ├── __init__.py │ │ │ └── demo.py │ │ ├── ops │ │ │ ├── __init__.py │ │ │ ├── fused_swiglu.py │ │ │ ├── high_performance_op.py │ │ │ ├── rms_norm.py │ │ │ └── rope.py │ │ ├── passes │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── conv_bn_fused_pass.py │ │ │ └── ir_custom_pass_patch.py │ │ ├── patch │ │ │ ├── __init__.py │ │ │ └── adaptor_mpu.py │ │ ├── storage │ │ │ ├── __init__.py │ │ │ └── storage.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── extension_utils.py │ │ │ ├── install_check.py │ │ │ ├── load_balance.py │ │ │ ├── sdaa_extension.py │ │ │ └── utils.py │ │ └── version │ │ │ ├── __init__.py │ │ │ └── version_query.py │ ├── sdaa_add_n_op.cc │ ├── sdaa_add_op.cc │ ├── sdaa_custom_tan.cc │ ├── sdaa_fc_op.cc │ ├── sdaa_fused_conv_bn.cc │ ├── sdaa_fused_rms_norm_op.cc │ ├── sdaa_fused_rotary_position_embedding.cc │ ├── sdaa_rankid.cc │ ├── sdaa_sgemmex_op.cc │ ├── sdaa_swiglu_op.cc │ ├── sdaa_version_query_op.cc │ ├── setup.py.in │ └── tensot_storage.cc │ ├── sdaac_ops │ ├── CMakeLists.txt │ ├── contrib │ │ ├── README.md │ │ └── sdaacops_contrib.h │ ├── custom_sdaacops.h │ └── custom_tan.scpp │ ├── setup.py.in │ ├── tests │ ├── CMakeLists.txt │ ├── distribution │ │ ├── CMakeLists.txt │ │ ├── allgather_api_test_case.py │ │ ├── allreduce_api_test_case.py │ │ ├── broadcast_api_test_case.py │ │ ├── communication_api_test_base.py │ │ ├── ddp_optimizer.py │ │ ├── hybrid_column_parallel_mp_layers.py │ │ ├── hybrid_cross_entropy_parallel_mp_layers.py │ │ ├── hybrid_embedding_parallel_mp_layers.py │ │ ├── hybrid_parallel_mp_amp.py │ │ ├── hybrid_parallel_mp_model.py │ │ ├── hybrid_parallel_mp_model_with_sequence_parallel.py │ │ ├── hybrid_parallel_pp_amp.py │ │ ├── hybrid_parallel_pp_embedding.py │ │ ├── hybrid_parallel_pp_layer.py │ │ ├── hybrid_parallel_sep_model.py │ │ ├── hybrid_parallel_shared_weight.py │ │ ├── hybrid_row_parallel_mp_layers.py │ │ ├── multi_nodes_ddp_test │ │ │ ├── multi_nodes_test.sh │ │ │ ├── test_all_gather.py │ │ │ ├── test_all_reduce.py │ │ │ └── test_broadcast.py │ │ ├── profiler_api_test_case.py │ │ ├── reduce_api_test_case.py │ │ ├── reduce_scatter_api_test_case.py │ │ ├── sdaa_dygraph_group_sharded_api.py │ │ ├── sdaa_dygraph_group_sharded_stage2.py │ │ ├── sdaa_dygraph_group_sharded_stage2_loss_stable.py │ │ ├── sdaa_dygraph_group_sharded_stage2_offload.py │ │ ├── sdaa_dygraph_group_sharded_stage3.py │ │ ├── sdaa_dygraph_group_sharded_stage3_offload.py │ │ ├── sendrecv_api_test_case.py │ │ ├── test_communication_stream_allgather_api.py │ │ ├── test_communication_stream_allreduce_api.py │ │ ├── test_communication_stream_broadcast_api.py │ │ ├── test_communication_stream_profiler_api.py │ │ ├── test_communication_stream_reduce_api.py │ │ ├── test_communication_stream_reduce_scatter_api.py │ │ ├── test_communication_stream_sendrecv_api.py │ │ ├── test_ddp_optimizer.py │ │ ├── test_dygraph_sharding_stage_2.py │ │ ├── test_dygraph_sharding_stage_3.py │ │ ├── test_parallel_dygraph_mp_layers.py │ │ ├── test_parallel_dygraph_pipeline_parallel.py │ │ ├── test_parallel_dygraph_pp_layers.py │ │ ├── test_parallel_dygraph_sep_parallel.py │ │ └── test_parallel_dygraph_tensor_parallel.py │ ├── rnn │ │ ├── convert.py │ │ └── rnn_numpy.py │ ├── runtime │ │ ├── CMakeLists.txt │ │ ├── test_profiler.py │ │ ├── test_profiler_with_kernel.py │ │ └── test_runtime.cc │ ├── test_MNIST_model.py │ └── unittests │ │ ├── CMakeLists.txt │ │ ├── op_test_dy.py │ │ ├── parallel_dygraph_sync_batch_norm.py │ │ ├── test_abs_op_sdaa.py │ │ ├── test_accuracy_op_sdaa.py │ │ ├── test_adam_op_sdaa.py │ │ ├── test_add_n_op_sdaa.py │ │ ├── test_all_any_op_sdaa.py │ │ ├── test_amp_level_sdaa.py │ │ ├── test_arange_op_sdaa.py │ │ ├── test_arg_max_op_sdaa.py │ │ ├── test_arg_min_op_sdaa.py │ │ ├── test_argsort_op_sdaa.py │ │ ├── test_assign_op_sdaa.py │ │ ├── test_assign_value_op_sdaa.py │ │ ├── test_atan_op_sdaa.py │ │ ├── test_batch_norm_op_sdaa.py │ │ ├── test_bce_loss_sdaa.py │ │ ├── test_bitwise_op_sdaa.py │ │ ├── test_bmm_op_sdaa.py │ │ ├── test_cast_op_sdaa.py │ │ ├── test_ceil_op_sdaa.py │ │ ├── test_check_finite_and_unscale_op_sdaa.py │ │ ├── test_clip_op_sdaa.py │ │ ├── test_coalesce_tensor_op_sdaa.py │ │ ├── test_compare_op_sdaa.py │ │ ├── test_concat_op_sdaa.py │ │ ├── test_contiguous_op_sdaa.py │ │ ├── test_conv2d_op_depthwise_conv_sdaa.py │ │ ├── test_conv2d_transposed_op_sdaa.py │ │ ├── test_conv_op_sdaa.py │ │ ├── test_cos_op_sdaa.py │ │ ├── test_cumsum_op_sdaa.py │ │ ├── test_custom_api_sdaa.py │ │ ├── test_custom_linear_op_sdaa.py │ │ ├── test_custom_matmul_op_sdaa.py │ │ ├── test_custom_pass_sdaa.py │ │ ├── test_custom_tan_op_sdaa.py │ │ ├── test_dist_base.py │ │ ├── test_distribute_fpn_proposals_op_sdaa.py │ │ ├── test_dropout_op_sdaa.py │ │ ├── test_dy2static_mlp_sdaa.py │ │ ├── test_dy_and_inplace_sdaa.py │ │ ├── test_dygraph_recompute_for_eager.py │ │ ├── test_elementwise_add_op_sdaa.py │ │ ├── test_elementwise_div_op_sdaa.py │ │ ├── test_elementwise_floordiv_op_sdaa.py │ │ ├── test_elementwise_max_op_sdaa.py │ │ ├── test_elementwise_min_op_sdaa.py │ │ ├── test_elementwise_mod_op_sdaa.py │ │ ├── test_elementwise_mul_op_sdaa.py │ │ ├── test_elementwise_pow_op_sdaa.py │ │ ├── test_elementwise_sub_op_sdaa.py │ │ ├── test_elu_op_sdaa.py │ │ ├── test_embedding_op_sdaa.py │ │ ├── test_erf_op_sdaa.py │ │ ├── test_exp_op_sdaa.py │ │ ├── test_expand_as_v2_op_sdaa.py │ │ ├── test_expand_v2_op_sdaa.py │ │ ├── test_fill_constant_batch_size_like_op_sdaa.py │ │ ├── test_fill_op_sdaa.py │ │ ├── test_flash_attn_op_sdaa.py │ │ ├── test_flip_op_sdaa.py │ │ ├── test_floor_op_sdaa.py │ │ ├── test_full_like_op_sdaa.py │ │ ├── test_full_op_sdaa.py │ │ ├── test_gather_nd_op_sdaa.py │ │ ├── test_gather_op_sdaa.py │ │ ├── test_gaussian_random_sdaa.py │ │ ├── test_gelu_op_sdaa.py │ │ ├── test_generate_proposals_sdaa.py │ │ ├── test_grid_sampler_op_sdaa.py │ │ ├── test_group_norm_op_sdaa.py │ │ ├── test_hard_sigmoid_op_sdaa.py │ │ ├── test_hard_swish_op_sdaa.py │ │ ├── test_hard_tanh_op_sdaa.py │ │ ├── test_highperformance_conv.py │ │ ├── test_highperformance_convtranspose.py │ │ ├── test_highperformance_gemm.py │ │ ├── test_huber_loss_op_sdaa.py │ │ ├── test_identity_op_sdaa.py │ │ ├── test_increment_op_sdaa.py │ │ ├── test_index_put_op_sdaa.py │ │ ├── test_index_sample_op_sdaa.py │ │ ├── test_index_select_op_sdaa.py │ │ ├── test_instance_norm_op_v2_sdaa.py │ │ ├── test_is_empty_op_sdaa.py │ │ ├── test_isnan_op_sdaa.py │ │ ├── test_label_smooth_op_sdaa.py │ │ ├── test_layer_norm_op_sdaa.py │ │ ├── test_leaky_relu_op_sdaa.py │ │ ├── test_linspace_op_sdaa.py │ │ ├── test_log2_op_sdaa.py │ │ ├── test_log_loss_op_sdaa.py │ │ ├── test_log_op_sdaa.py │ │ ├── test_log_softmax_sdaa.py │ │ ├── test_logical_op_sdaa.py │ │ ├── test_logsigmoid_op_sdaa.py │ │ ├── test_masked_select_op_sdaa.py │ │ ├── test_matmul_op_sdaa.py │ │ ├── test_max_op_sdaa.py │ │ ├── test_mean_all_op_sdaa.py │ │ ├── test_mean_op_sdaa.py │ │ ├── test_memcpy_op_sdaa.py │ │ ├── test_merged_adam_op_sdaa.py │ │ ├── test_merged_momentum_op_sdaa.py │ │ ├── test_meshgrid_op_sdaa.py │ │ ├── test_min_op_sdaa.py │ │ ├── test_mish_op_sdaa.py │ │ ├── test_momentum_op_sdaa.py │ │ ├── test_mul_op_sdaa.py │ │ ├── test_multiclass_nms_op_sdaa.py │ │ ├── test_nearest_interp_v2_op_sdaa.py │ │ ├── test_nll_loss_op_sdaa.py │ │ ├── test_no_event_pool.py │ │ ├── test_nonzero_op_sdaa.py │ │ ├── test_one_hot_v2_op_sdaa.py │ │ ├── test_p_norm_op_sdaa.py │ │ ├── test_pool2d_op_sdaa.py │ │ ├── test_pow_op_sdaa.py │ │ ├── test_prelu_op_sdaa.py │ │ ├── test_prior_box_op_sdaa.py │ │ ├── test_randint_op_sdaa.py │ │ ├── test_reciprocal_op_sdaa.py │ │ ├── test_reduce_prod_op_sdaa.py │ │ ├── test_reduce_sum_op_sdaa.py │ │ ├── test_relu6_op_sdaa.py │ │ ├── test_relu_op_sdaa.py │ │ ├── test_rms_norm_op_sdaa.py │ │ ├── test_rnn_op_sdaa.py │ │ ├── test_roi_align_sdaa.py │ │ ├── test_rope_op_sdaa.py │ │ ├── test_rsqrt_op_sdaa.py │ │ ├── test_scale_op_sdaa.py │ │ ├── test_scatter_op_sdaa.py │ │ ├── test_sdaa_matmul_scale.py │ │ ├── test_set_device.py │ │ ├── test_set_value_op_sdaa.py │ │ ├── test_sigmoid_cross_entropy_with_logits_op_sdaa.py │ │ ├── test_sigmoid_op_sdaa.py │ │ ├── test_silu_op_sdaa.py │ │ ├── test_sin_op_sdaa.py │ │ ├── test_slice_op_sdaa.py │ │ ├── test_softmax_sdaa.py │ │ ├── test_softmax_with_cross_entropy_op_sdaa.py │ │ ├── test_softplus_op_sdaa.py │ │ ├── test_softsign_op_sdaa.py │ │ ├── test_split_op_sdaa.py │ │ ├── test_sqrt_op_sdaa.py │ │ ├── test_square_op_sdaa.py │ │ ├── test_squared_l2_norm_op_sdaa.py │ │ ├── test_squeeze_op_sdaa.py │ │ ├── test_stack_op_sdaa.py │ │ ├── test_strided_op_sdaa.py │ │ ├── test_strided_slice_op_sdaa.py │ │ ├── test_swiglu_op_sdaa.py │ │ ├── test_swish_op_sdaa.py │ │ ├── test_sync_batch_norm_op_sdaa.py │ │ ├── test_tanh_op_sdaa.py │ │ ├── test_tcpx.py │ │ ├── test_tile_op_sdaa.py │ │ ├── test_topk_op_sdaa.py │ │ ├── test_transpose_op_sdaa.py │ │ ├── test_tril_triu_op_sdaa.py │ │ ├── test_truncated_gaussian_random_op_sdaa.py │ │ ├── test_unbind_op_sdaa.py │ │ ├── test_uniform_random_op_sdaa.py │ │ ├── test_unsqueeze_op_sdaa.py │ │ ├── test_unstack_op_sdaa.py │ │ ├── test_update_loss_scaling_op_sdaa.py │ │ ├── test_version_sdaa.py │ │ ├── test_warpctc_op_sdaa.py │ │ ├── test_where_op_sdaa.py │ │ └── test_zero_dim_tensor_sdaa.py │ └── tools │ └── version │ ├── dump.cc │ ├── minimum_supported_version.h.in │ ├── query.cc │ └── query.h ├── cmake ├── dummy.c.in ├── external │ ├── gflags.cmake │ ├── glog.cmake │ ├── gtest.cmake │ ├── onednn.cmake │ └── pybind11.cmake ├── generic.cmake ├── paddle.cmake ├── third_party.cmake └── version.cmake ├── python ├── __init__.py ├── tests │ ├── auto_parallel_op_test.py │ ├── config.py │ ├── convert.py │ ├── op.py │ ├── op_test.py │ ├── prim_op_test.py │ ├── rnn_numpy.py │ ├── testsuite.py │ ├── utils.py │ └── white_list │ │ ├── __init__.py │ │ ├── check_op_sequence_batch_1_input_white_list.py │ │ ├── check_op_sequence_instance_0_input_white_list.py │ │ ├── check_shape_white_list.py │ │ ├── compile_vs_runtime_white_list.py │ │ ├── new_ir_python_api_grad_white_list.py │ │ ├── no_check_set_white_list.py │ │ ├── no_grad_set_white_list.py │ │ ├── op_accuracy_white_list.py │ │ └── op_threshold_white_list.py └── tools │ ├── __init__.py │ ├── static_mode_white_list.py │ └── test_runner.py └── tools └── codestyle ├── .cmakelintrc ├── .gitignore ├── clang_format.hook ├── copyright.hook ├── cpplint_pre_commit.hook ├── pre_commit.sh └── pylint_pre_commit.hook /.clang-format: -------------------------------------------------------------------------------- 1 | # This file is used by clang-format to autoformat paddle source code 2 | # 3 | # The clang-format is part of llvm toolchain. 4 | # It need to install llvm and clang to format source code style. 5 | # 6 | # The basic usage is, 7 | # clang-format -i -style=file PATH/TO/SOURCE/CODE 8 | # 9 | # The -style=file implicit use ".clang-format" file located in one of 10 | # parent directory. 11 | # The -i means inplace change. 12 | # 13 | # The document of clang-format is 14 | # http://clang.llvm.org/docs/ClangFormat.html 15 | # http://clang.llvm.org/docs/ClangFormatStyleOptions.html 16 | --- 17 | Language: Cpp 18 | BasedOnStyle: Google 19 | IndentWidth: 2 20 | TabWidth: 2 21 | ContinuationIndentWidth: 4 22 | AccessModifierOffset: -1 # The private/protected/public has no indent in class 23 | Standard: Cpp11 24 | AllowAllParametersOfDeclarationOnNextLine: true 25 | BinPackParameters: false 26 | BinPackArguments: false 27 | ... 28 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select = C,E,F,W 3 | exclude = 4 | ./build, 5 | ./backends/npu/build, 6 | ./backends/mlu/build, 7 | ./backends/custom_cpu/build 8 | ignore = 9 | # E, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes 10 | E203, 11 | E401,E402, 12 | E501, 13 | E721,E722,E731,E741, 14 | 15 | # F, see https://flake8.pycqa.org/en/latest/user/error-codes.html 16 | F405, 17 | F811,F841, 18 | 19 | # W, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes 20 | W503 21 | per-file-ignores = 22 | # Ignore unused imports in __init__.py 23 | __init__.py: F401 24 | # Ignore undefined variables in CMake config and some dygraph_to_static tests 25 | .cmake-format.py: F821 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.vs 3 | build/ 4 | *.user 5 | *.tmp 6 | *.pyc 7 | *.swp 8 | *.diff 9 | *.run 10 | 11 | .vscode 12 | .idea 13 | .project 14 | .cproject 15 | .pydevproject 16 | .settings/ 17 | CMakeSettings.json 18 | Makefile 19 | .test_env/ 20 | 21 | *~ 22 | bazel-* 23 | 24 | build_* 25 | # clion workspace. 26 | cmake-build-* 27 | 28 | Testing 29 | tools/__pycache__ 30 | 31 | # ignore npu cache 32 | kernel_meta*/ 33 | ascend_install.info 34 | version.info 35 | 36 | # ignore Paddle change 37 | Paddle/ 38 | 39 | # ignore clangd cache 40 | .cache/ 41 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Paddle"] 2 | path = Paddle 3 | url = https://github.com/PaddlePaddle/Paddle.git 4 | branch = develop 5 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | column_limit = 80 4 | -------------------------------------------------------------------------------- /Guides/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/Guides/image.png -------------------------------------------------------------------------------- /Guides/paddle_load_customdevice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/Guides/paddle_load_customdevice.png -------------------------------------------------------------------------------- /Guides/subgraph_case.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/Guides/subgraph_case.png -------------------------------------------------------------------------------- /README_ja.md: -------------------------------------------------------------------------------- 1 | # PaddleCustomDevice 2 | 3 | [English](./README_en.md) | [简体中文](./README.md) | 日本語 4 | 5 | PaddlePaddle カスタムデバイスの実装。 6 | 7 | ## ユーザーガイド 8 | 9 | プログラム設計ドキュメントは[カスタムデバイスの概要](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/dev_guides/custom_device_docs/custom_device_overview_cn.html)を、開発ガイドラインについては、[新しいハードウェアのアクセス例](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/dev_guides/custom_device_docs/custom_device_example_cn.html)を、デモコードは [CustomCPU](backends/custom_cpu/README_ja.md) を参照してください。 10 | 11 | ## ハードウェアバックエンド 12 | 13 | PaddleCustomDevice は以下のバックエンドをサポートしています: 14 | 15 | - [Ascend NPU 用 PaddlePaddle カスタムデバイス実装](backends/npu/README.md) 16 | - [Cambricon MLU 用 PaddlePaddle カスタムデバイス実装](backends/mlu/README.md) 17 | - [Intel GPU 用 PaddlePaddle カスタムデバイス実装](backends/intel_gpu/README.md) 18 | - [Apple MPS 用 PaddlePaddle カスタムデバイス実装](backends/mps/README.md) 19 | - [Biren GPU 用 PaddlePaddle カスタムデバイス実装](backends/biren_gpu/README.md) 20 | - [Enflame GCU 用 PaddlePaddle カスタムデバイス実装](backends/gcu/README.md) 21 | - [Tecorigin SDAA 用 PaddlePaddle カスタムデバイス実装](backends/sdaa/README.md) 22 | 23 | ## 著作権とライセンス 24 | 25 | PaddleCustomDevice は [Apache-2.0 license](LICENSE) の下で提供されています。 26 | -------------------------------------------------------------------------------- /backends/biren_gpu/README.md: -------------------------------------------------------------------------------- 1 | # PaddlePaddle Custom Device Implementation for Biren GPU 2 | 3 | English | [简体中文](./README_cn.md) 4 | 5 | Please refer to the following steps to compile, install and verify the custom device implementation for Biren GPU. 6 | 7 | ## Compile and Install 8 | 9 | ```bash 10 | # Acquire Biren PaddlePaddle Docker Image 11 | 12 | # Clone PaddleCustomDevice source code 13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice 14 | 15 | # Compile Source Code and Install 16 | cd backends/biren_gpu 17 | mkdir -p build 18 | pushd build 19 | cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug .. 20 | ninja 21 | pip3 install --no-index --find-links=offline dist/paddle_custom_supa-*.whl --force-reinstall 22 | ``` 23 | 24 | ## Verification 25 | 26 | ```bash 27 | # build with -DWITH_TESTING=ON 28 | cmake -G Ninja -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug .. 29 | 30 | # ctest 31 | cd build 32 | ninja test 33 | ``` 34 | -------------------------------------------------------------------------------- /backends/biren_gpu/README_cn.md: -------------------------------------------------------------------------------- 1 | # 飞桨自定义接入硬件后端(壁仞GPU) 2 | 3 | 简体中文 | [English](./README.md) 4 | 5 | 请参考以下步骤进行编译安装与验证 6 | 7 | ## 编译安装 8 | 9 | ```bash 10 | # 获取壁仞PaddlePaddle Docker镜像 11 | 12 | # 克隆PaddleCustomDevice源码 13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice 14 | 15 | # 编译安装 16 | cd backends/biren_gpu 17 | mkdir -p build 18 | pushd build 19 | cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug .. 20 | ninja 21 | pip3 install --no-index --find-links=offline dist/paddle_custom_supa-*.whl --force-reinstall 22 | ``` 23 | 24 | ## 验证 25 | 26 | ```bash 27 | # -DWITH_TESTING=ON 28 | cmake -G Ninja -DWITH_TESTING=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug .. 29 | 30 | # ctest 31 | cd build 32 | ninja test 33 | ``` 34 | -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/onednn.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/external/supa.cmake: -------------------------------------------------------------------------------- 1 | if(DEFINED ENV{SUPA_CUSTOM_PATH}) 2 | set(SUPA_DIR $ENV{SUPA_CUSTOM_PATH}) 3 | else() 4 | set(SUPA_DIR /usr/local/supa) 5 | endif() 6 | 7 | set(SUPA_CL_DIR ${SUPA_DIR}) 8 | set(supa_cl_lib ${SUPA_CL_DIR}/lib/sucl/libsupa_cl.so) 9 | 10 | set(SUPA_CL_INC_DIR ${SUPA_CL_DIR}/include/ ${SUPA_CL_DIR}/include/sucl/) 11 | 12 | message(STATUS "SUPA_CL_INC_DIR ${SUPA_CL_INC_DIR}") 13 | message(STATUS "SUPA_CL_DIR ${SUPA_CL_DIR}") 14 | 15 | include_directories(${SUPA_CL_INC_DIR}) 16 | -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/generic.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/paddle.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/paddle.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/third_party.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/third_party.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/cmake/version.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/version.cmake -------------------------------------------------------------------------------- /backends/biren_gpu/setup.py.in: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Distribution 2 | 3 | packages = [] 4 | package_data = {} 5 | 6 | class BinaryDistribution(Distribution): 7 | def has_ext_modules(self): 8 | return True 9 | 10 | setup( 11 | name = '@CMAKE_PROJECT_NAME@', 12 | version='@PADDLE_VERSION@', 13 | description='Paddle SUPA plugin', 14 | long_description='', 15 | long_description_content_type="text/markdown", 16 | author_email="Paddle-better@baidu.com", 17 | maintainer="PaddlePaddle", 18 | maintainer_email="Paddle-better@baidu.com", 19 | project_urls={}, 20 | license='Apache Software License', 21 | packages= [ 22 | 'paddle_custom_device', 23 | ], 24 | include_package_data=True, 25 | package_data = { 26 | '': ['*.so', '*.h', '*.py', '*.hpp'], 27 | }, 28 | package_dir = { 29 | '': 'python', 30 | }, 31 | zip_safe=False, 32 | distclass=BinaryDistribution, 33 | entry_points={ 34 | 'console_scripts': [ 35 | ] 36 | }, 37 | classifiers=[ 38 | ], 39 | keywords='Paddle SUPA plugin', 40 | ) 41 | -------------------------------------------------------------------------------- /backends/biren_gpu/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(py_test_modules TARGET_NAME) 2 | set(options SERIAL) 3 | set(oneValueArgs "") 4 | set(multiValueArgs MODULES DEPS ENVS) 5 | cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" 6 | "${multiValueArgs}" ${ARGN}) 7 | 8 | add_test( 9 | NAME ${TARGET_NAME} 10 | COMMAND 11 | ${CMAKE_COMMAND} -E env 12 | CUSTOM_DEVICE_ROOT=${CMAKE_BINARY_DIR}/python/paddle_custom_device/ 13 | PYTHONPATH=${PYTHON_SOURCE_DIR}:${PYTHON_SOURCE_DIR}/tests:$ENV{PYTHONPATH} 14 | ${py_test_modules_ENVS} python ${PYTHON_SOURCE_DIR}/tools/test_runner.py 15 | ${py_test_modules_MODULES} 16 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) 17 | 18 | if(py_test_modules_SERIAL) 19 | set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) 20 | endif() 21 | endfunction() 22 | 23 | add_subdirectory(unittests) 24 | -------------------------------------------------------------------------------- /backends/biren_gpu/tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file( 2 | GLOB TEST_OPS 3 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 4 | "test_*.py") 5 | 6 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 7 | 8 | foreach(TEST_OP ${TEST_OPS}) 9 | py_test_modules(${TEST_OP} MODULES ${TEST_OP}) 10 | endforeach() 11 | 12 | set_tests_properties(${TEST_OPS} PROPERTIES TIMEOUT 1000) 13 | -------------------------------------------------------------------------------- /backends/custom_cpu/cmake/paddle.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/paddle.cmake -------------------------------------------------------------------------------- /backends/custom_cpu/setup.py.in: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Distribution 2 | 3 | packages = [] 4 | package_data = {} 5 | 6 | class BinaryDistribution(Distribution): 7 | def has_ext_modules(self): 8 | return True 9 | 10 | setup( 11 | name = '@CMAKE_PROJECT_NAME@', 12 | version='@PLUGIN_VERSION@', 13 | description='Paddle CustomCPU plugin', 14 | long_description='', 15 | long_description_content_type="text/markdown", 16 | author_email="Paddle-better@baidu.com", 17 | maintainer="PaddlePaddle", 18 | maintainer_email="Paddle-better@baidu.com", 19 | project_urls={}, 20 | license='Apache Software License', 21 | packages= [ 22 | 'paddle_custom_device', 23 | ], 24 | include_package_data=True, 25 | package_data = { 26 | '': ['*.so', '*.h', '*.py', '*.hpp'], 27 | }, 28 | package_dir = { 29 | '': 'python', 30 | }, 31 | zip_safe=False, 32 | distclass=BinaryDistribution, 33 | entry_points={ 34 | 'console_scripts': [ 35 | ] 36 | }, 37 | classifiers=[ 38 | ], 39 | keywords='Paddle CustomCPU plugin', 40 | ) 41 | -------------------------------------------------------------------------------- /backends/custom_cpu/tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_OPS 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 20 | 21 | foreach(TEST_OP ${TEST_OPS}) 22 | py_test_modules(${TEST_OP} MODULES ${TEST_OP}) 23 | endforeach() 24 | -------------------------------------------------------------------------------- /backends/custom_cpu/tools/dockerfile/root/.bashrc: -------------------------------------------------------------------------------- 1 | # .bashrc 2 | 3 | # Source global definitions 4 | if [ -f /etc/bashrc ]; then 5 | . /etc/bashrc 6 | fi 7 | 8 | # no auto logout 9 | export TMOUT=0 10 | 11 | # Locales 12 | export LC_ALL=en_US.UTF-8 13 | export LANG=en_US.UTF-8 14 | export LANGUAGE=en_US.UTF-8 15 | 16 | # Aliases 17 | 18 | alias rm='rm -i' 19 | alias cp='cp -i' 20 | alias mv='mv -i' 21 | 22 | alias l='ls -lF' 23 | alias ll='ls -alF' 24 | alias lt='ls -ltrF' 25 | alias ll='ls -alF' 26 | alias lls='ls -alSrF' 27 | alias llt='ls -altrF' 28 | 29 | # Colorize directory listing 30 | alias ls="ls -p --color=auto" 31 | alias pstall='pip install -U --no-deps --force-reinstall' 32 | 33 | # Colorize grep 34 | if echo hello|grep --color=auto l >/dev/null 2>&1; then 35 | alias grep='grep --color=always' 36 | export GREP_COLOR="1;31" 37 | fi 38 | 39 | # Shell 40 | export CLICOLOR="1" 41 | 42 | source ~/.scripts/git-prompt.sh 43 | export PS1="\[\e[1;33m\]λ\[\e[0m\] \h \[\e[1;32m\]\w\[\e[1;33m\]\$(__git_ps1 \" \[\e[35m\]{\[\e[36m\]%s\[\e[35m\]}\") \[\e[0m\]" 44 | source ~/.scripts/git-completion.sh 45 | -------------------------------------------------------------------------------- /backends/custom_cpu/tools/dockerfile/root/.gitconfig: -------------------------------------------------------------------------------- 1 | [user] 2 | name = 3 | email = 4 | 5 | [alias] 6 | st = status --branch --short 7 | ci = commit 8 | br = branch 9 | co = checkout 10 | df = diff 11 | l = log --pretty=format:\"%h %ad | %s%d [%an]\" --graph --date=short 12 | ll = log --stat 13 | lg = log --oneline -10 14 | 15 | [merge] 16 | tool = vimdiff 17 | 18 | [core] 19 | excludesfile = ~/.gitignore 20 | editor = vim 21 | 22 | [color] 23 | branch = auto 24 | diff = auto 25 | status = auto 26 | 27 | [color "branch"] 28 | current = yellow reverse 29 | local = yellow 30 | remote = green 31 | 32 | [color "diff"] 33 | meta = yellow bold 34 | frag = magenta bold 35 | old = red bold 36 | new = green bold 37 | 38 | [color "status"] 39 | added = yellow 40 | changed = green 41 | untracked = cyan 42 | 43 | [push] 44 | default = matching 45 | [credential] 46 | helper = store 47 | -------------------------------------------------------------------------------- /backends/custom_cpu/tools/dockerfile/root/.vimrc: -------------------------------------------------------------------------------- 1 | set nocompatible 2 | filetype plugin indent on 3 | 4 | set nu 5 | syntax enable 6 | syntax on 7 | set hlsearch 8 | set incsearch 9 | set fileencodings=utf-8,ucs-bom,gb18030,gbk,gb2312,cp936 10 | set termencoding=utf-8 11 | set encoding=utf-8 12 | set cursorline 13 | set paste 14 | set mouse=a 15 | set showmode 16 | set showcmd 17 | 18 | " expand tab to space 19 | set expandtab 20 | " The width of a hard tabstop measured in "spaces" 21 | set tabstop=4 22 | " The size of an "indent" 23 | set shiftwidth=4 24 | " insert a combination of spaces to simulate tab stops 25 | set softtabstop=4 26 | 27 | "remember last update or view postion" 28 | " Only do this part when compiled with support for autocommands 29 | if has("autocmd") 30 | " In text files, always limit the width of text to 78 characters 31 | autocmd BufRead *.txt set tw=78 32 | " When editing a file, always jump to the last cursor position 33 | autocmd BufReadPost * 34 | \ if line("'\"") > 0 && line ("'\"") <= line("$") | 35 | \ exe "normal g'\"" | 36 | \ endif 37 | endif 38 | -------------------------------------------------------------------------------- /backends/gcu/backend/equivalence_trans/insensitive_ops/assign.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include "backend/register/register.h" 20 | 21 | namespace backend { 22 | const char* const kAssign = "assign"; 23 | 24 | IMPLEMT_EQUIVALENCE_TRANS_FUNC( 25 | gcu_builder, op, map_inputs, running_mode, AssignEquivalenceTrans) { 26 | auto input = *(map_inputs["X"].at(0)); 27 | auto out = builder::Reshape(input, input.GetType()); 28 | return std::make_shared(out); 29 | } 30 | 31 | EQUIVALENCE_TRANS_FUNC_REG(kAssign, INSENSITIVE, AssignEquivalenceTrans); 32 | 33 | } // namespace backend 34 | -------------------------------------------------------------------------------- /backends/gcu/backend/equivalence_trans/insensitive_ops/equal.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include "backend/register/register.h" 20 | 21 | namespace backend { 22 | const char *const kEqual = "equal"; 23 | 24 | IMPLEMT_EQUIVALENCE_TRANS_FUNC( 25 | gcu_builder, op, map_inputs, running_mode, EqualEquivalenceTrans) { 26 | GcuOp X = *(map_inputs["X"].at(0)); 27 | GcuOp Y = *(map_inputs["Y"].at(0)); 28 | auto result = builder::Equal(X, Y); 29 | return std::make_shared(result); 30 | } 31 | 32 | EQUIVALENCE_TRANS_FUNC_REG(kEqual, INSENSITIVE, EqualEquivalenceTrans); 33 | 34 | } // namespace backend 35 | -------------------------------------------------------------------------------- /backends/gcu/backend/equivalence_trans/insensitive_ops/log.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include "backend/register/register.h" 22 | 23 | namespace backend { 24 | const char *const kLog = "log"; 25 | 26 | IMPLEMT_EQUIVALENCE_TRANS_FUNC( 27 | gcu_builder, op, map_inputs, running_mode, LogEquivalenceTrans) { 28 | GcuOp data = *(map_inputs["X"].at(0)); 29 | auto result = builder::Log(data); 30 | return std::make_shared(result); 31 | } 32 | 33 | EQUIVALENCE_TRANS_FUNC_REG(kLog, INSENSITIVE, LogEquivalenceTrans); 34 | 35 | } // namespace backend 36 | -------------------------------------------------------------------------------- /backends/gcu/backend/equivalence_trans/insensitive_ops/sign.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include "backend/register/register.h" 20 | 21 | namespace backend { 22 | const char *const kSign = "sign"; 23 | 24 | IMPLEMT_EQUIVALENCE_TRANS_FUNC( 25 | gcu_builder, op, map_inputs, running_mode, SignEquivalenceTrans) { 26 | builder::Op input = *(map_inputs["X"].at(0)); 27 | auto output = builder::Sign(input); 28 | return std::make_shared(output); 29 | } 30 | 31 | EQUIVALENCE_TRANS_FUNC_REG(kSign, INSENSITIVE, SignEquivalenceTrans); 32 | 33 | } // namespace backend 34 | -------------------------------------------------------------------------------- /backends/gcu/backend/executor/cast_runner.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | #include 17 | 18 | #include 19 | 20 | #include "backend/executor/gcu_node.h" 21 | #include "backend/utils/utils.h" 22 | 23 | namespace backend { 24 | 25 | void CastRunner(const topsStream_t stream, 26 | const std::vector dims, 27 | const phi::DataType src_data_type, 28 | const phi::DataType dst_data_type, 29 | const void* src_buf, 30 | void* dst_buf); 31 | 32 | } // namespace backend 33 | -------------------------------------------------------------------------------- /backends/gcu/backend/executor/tops_compiler.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | #include 20 | 21 | namespace hlir { 22 | class Module; 23 | } 24 | 25 | namespace backend { 26 | topsExecutable_t CompileTopsExecutable( 27 | const std::shared_ptr &module); 28 | 29 | } // namespace backend 30 | -------------------------------------------------------------------------------- /backends/gcu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/gcu/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/onednn.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/generic.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/paddle.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/paddle.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/third_party.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/third_party.cmake -------------------------------------------------------------------------------- /backends/gcu/cmake/version.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/version.cmake -------------------------------------------------------------------------------- /backends/gcu/common/flags.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/common/flags.h" 16 | 17 | #include 18 | 19 | #include "runtime/flags.h" 20 | 21 | FLAGS_DEFINE_int32(custom_engine_min_group_size, 22 | 3, 23 | "when the custom device subgraph size is not larger than " 24 | "`custom_engine_min_group_size`, the group will fallback to " 25 | "original graph."); 26 | -------------------------------------------------------------------------------- /backends/gcu/common/gcu_env_list.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | // #include "runtime/flags.h" 19 | 20 | namespace env { 21 | const char *const kUseJitKernels = "PADDLE_GCU_USE_JIT_KERNELS_ONLY"; 22 | const char *const kProfiler = "PADDLE_GCU_PROFILE"; 23 | const char *const kStreamAsync = "PADDLE_RUN_ASYNC"; 24 | const char *const kEnableTransOpt = "PADDLE_ENABLE_TRANSPOSE_OPT"; 25 | } // namespace env 26 | -------------------------------------------------------------------------------- /backends/gcu/passes/gcu_custom_passes.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "paddle/pir/include/pass/pass_registry.h" 18 | 19 | USE_PIR_PASS(addn_replace_pass); 20 | USE_PIR_PASS(gcu_op_marker_pass); 21 | USE_PIR_PASS(gcu_sub_graph_extract_pass); 22 | USE_PIR_PASS(gcu_replace_with_engine_op_pass); 23 | -------------------------------------------------------------------------------- /backends/gcu/passes/gcu_pass_pipeline.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | const std::vector* GetPirGcuPasses(); 21 | -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_FUSE_PASSES 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_FUSE_PASSES "${TEST_FUSE_PASSES}") 20 | 21 | foreach(TEST_PASS ${TEST_FUSE_PASSES}) 22 | py_test_modules(${TEST_PASS} false MODULES fuse_pass/${TEST_PASS}.py) 23 | message(STATUS "with fuse pass: ${TEST_PASS}") 24 | endforeach() 25 | 26 | set_tests_properties(${TEST_FUSE_PASSES} PROPERTIES TIMEOUT 1000) 27 | -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/model/conv_bn.pdiparams: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn.pdiparams -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/model/conv_bn.pdmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn.pdmodel -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdiparams: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdiparams -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_hard_swish.pdmodel -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdiparams: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdiparams -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model/conv_bn_relu.pdmodel -------------------------------------------------------------------------------- /backends/gcu/tests/fuse_pass/model_graph/test_graph.pdiparams: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/gcu/tests/fuse_pass/model_graph/test_graph.pdiparams -------------------------------------------------------------------------------- /backends/gcu/tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_OPS 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 20 | 21 | foreach(TEST_OP ${TEST_OPS}) 22 | py_test_modules(${TEST_OP} false MODULES unittests/${TEST_OP}.py) 23 | message(STATUS "with op unittest: ${TEST_OP}") 24 | endforeach() 25 | 26 | set_tests_properties(${TEST_OPS} PROPERTIES TIMEOUT 1000) 27 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | include(ctest.cmake) 16 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_atan.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase(func=paddle.atan, feed_names=["data"], feed_shapes=[[4]]) 21 | 22 | 23 | @pytest.mark.atan 24 | @pytest.mark.filterwarnings("ignore::UserWarning") 25 | def test_atan(): 26 | data = np.array([-0.4, -0.2, 0.1, 0.3], dtype=np.float32) 27 | test.run(feed=[data]) 28 | 29 | 30 | test_atan() 31 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_bmm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.bmm, 22 | feed_names=["lhs", "rhs"], 23 | feed_shapes=[[10, 3, 4], [10, 4, 5]], 24 | is_train=True, 25 | ) 26 | 27 | 28 | @pytest.mark.bmm 29 | @pytest.mark.filterwarnings("ignore::UserWarning") 30 | def test_bmm(): 31 | input1 = np.random.random([10, 3, 4]).astype("float32") 32 | input2 = np.random.random([10, 4, 5]).astype("float32") 33 | test.run(feed=[input1, input2]) 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_clip.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase(func=paddle.clip, feed_names=["data"], feed_shapes=[[2, 2]]) 21 | 22 | 23 | @pytest.mark.clip 24 | @pytest.mark.filterwarning("ignore::UserWarning") 25 | def test_clip(): 26 | data = np.array([[1.2, 3.5], [4.5, 6.4]]).astype("float32") 27 | test.run(feed=[data], min=3.5, max=5.0) 28 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_cos.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase(func=paddle.cos, feed_names=["data"], feed_shapes=[[4]]) 21 | 22 | 23 | @pytest.mark.cos 24 | @pytest.mark.filterwarnings("ignore::UserWarning") 25 | def test_cos(): 26 | data = np.array([-0.4, -0.2, 0.1, 0.3], dtype=np.float32) 27 | test.run(feed=[data]) 28 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_equal.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.equal, feed_names=["X", "Y"], feed_shapes=[[3], [3]], is_train=False 22 | ) 23 | 24 | 25 | @pytest.mark.equal 26 | @pytest.mark.filterwarnings("ignore::UserWarning") 27 | def test_equal(): 28 | x = np.array([-0.4, -0.2, 1], dtype=np.float32) 29 | y = np.array([-0.4, -0.2, 1], dtype=np.float32) 30 | test.run(feed=[x, y]) 31 | 32 | 33 | test_equal() 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_expand_as.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.expand_as, 22 | feed_names=["data", "target"], 23 | feed_shapes=[[3], [2, 3]], 24 | is_train=False, 25 | ) 26 | 27 | 28 | @pytest.mark.expand_as_v2 29 | @pytest.mark.filterwarnings("ignore::UserWarning") 30 | def test_expand_as_v2(): 31 | np.random.seed(1) 32 | data = np.random.uniform(0, 1, (3,)).astype("float32") 33 | target = np.random.randn(2, 3).astype("float32") 34 | test.run(feed=[data, target]) 35 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_fc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.static.nn.fc, 22 | feed_names=["data"], 23 | feed_shapes=[[2, 3, 4, 5]], 24 | input_is_list=False, 25 | is_train=True, 26 | ) 27 | 28 | 29 | @pytest.mark.fc 30 | @pytest.mark.filterwarnings("ignore::UserWarning") 31 | def test_fc(): 32 | np.random.seed(1) 33 | data = np.random.uniform(0, 1, (2, 3, 4, 5)).astype("float32") 34 | test.run(feed=[data], size=10) 35 | 36 | 37 | test_fc() 38 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_full_like.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.full_like, 22 | feed_names=["data"], 23 | is_train=False, 24 | feed_shapes=[[2, 3, 4, 5]], 25 | ) 26 | 27 | 28 | @pytest.mark.layer_norm 29 | @pytest.mark.filterwarnings("ignore::UserWarning") 30 | def test_full_like(): 31 | data = np.random.random(size=[2, 3, 4, 5]).astype("float32") 32 | test.run(feed=[data], fill_value=3.0, dtype=np.int64) 33 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_gelu.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.nn.functional.gelu, feed_names=["data"], feed_shapes=[[2, 3]] 22 | ) 23 | 24 | 25 | @pytest.mark.gelu 26 | @pytest.mark.filterwarnings("ignore::UserWarning") 27 | def test_gelu(): 28 | data = np.random.randn(2, 3).astype("float32") 29 | test.run(feed=[data]) 30 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_greater_equal.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.greater_equal, 22 | feed_names=["lhs", "rhs"], 23 | feed_shapes=[[3], [3]], 24 | is_train=False, 25 | ) 26 | 27 | 28 | @pytest.mark.greater_equal 29 | @pytest.mark.filterwarnings("ignore::UserWarning") 30 | def test_greater_equal(): 31 | lhs = np.array([-1, 0, 1], dtype=np.float32) 32 | rhs = np.array([1, 0, -1], dtype=np.float32) 33 | test.run(feed=[lhs, rhs]) 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_greater_than.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.greater_than, 22 | feed_names=["lhs", "rhs"], 23 | feed_shapes=[[3], [3]], 24 | is_train=False, 25 | ) 26 | 27 | 28 | @pytest.mark.greater_than 29 | @pytest.mark.filterwarnings("ignore::UserWarning") 30 | def test_greater_than(): 31 | lhs = np.array([-1, 0, 1], dtype=np.float32) 32 | rhs = np.array([1, 0, -1], dtype=np.float32) 33 | test.run(feed=[lhs, rhs]) 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_isinf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.isinf, 22 | feed_names=["data"], 23 | is_train=False, 24 | feed_shapes=[[7]], 25 | threshold=1.0e-5, 26 | ) 27 | 28 | 29 | @pytest.mark.isinf_v2 30 | @pytest.mark.filterwarnings("ignore::UserWarning") 31 | def test_isinfv2(): 32 | np.random.seed(1) 33 | data = np.array( 34 | [float("-inf"), -2, 3.6, float("inf"), 0, float("-nan"), float("nan")] 35 | ).astype("float32") 36 | test.run(feed=[data]) 37 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_label_smooth.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.nn.functional.label_smooth, 22 | feed_names=["data"], 23 | feed_shapes=[[1, 2, 3]], 24 | is_train=False, 25 | ) 26 | 27 | 28 | @pytest.mark.label_smooth 29 | @pytest.mark.filterwarnings("ignore::UserWarning") 30 | def test_label_smooth(): 31 | data = np.array([[[0, 1, 0], [1, 0, 1]]], dtype=np.float32) 32 | test.run(feed=[data]) 33 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_leaky_relu.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.nn.functional.leaky_relu, 22 | feed_names=["data"], 23 | feed_shapes=[[2, 3, 224, 224]], 24 | ) 25 | 26 | 27 | @pytest.mark.leaky_relu 28 | @pytest.mark.filterwarnings("ignore::UserWarning") 29 | def test_leaky_relu(): 30 | data = np.random.randn(2, 3, 224, 224).astype("float32") 31 | test.run(feed=[data], negative_slope=0.2) 32 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_log.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test1 = ApiBase( 21 | func=paddle.log, 22 | feed_names=["data"], 23 | is_train=False, 24 | feed_shapes=[[2, 3]], 25 | threshold=1.0e-5, 26 | ) 27 | 28 | 29 | @pytest.mark.log 30 | @pytest.mark.filterwarnings("ignore::UserWarning") 31 | def test_log(): 32 | np.random.seed(1) 33 | data = np.array([[10, 20, 30], [5, 78, 96]]).astype("float32") 34 | test1.run(feed=[data]) 35 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_maximum.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.maximum, 22 | feed_names=["x", "y"], 23 | # is_train=False, 24 | feed_shapes=[[2, 2, 2], [2, 2]], 25 | ) 26 | 27 | 28 | @pytest.mark.maximum 29 | @pytest.mark.filterwarning("ignore::UserWarning") 30 | def test_maximum(): 31 | x = np.array([[[2, 3], [4, 6]], [[7, 8], [5, 9]]]).astype("float32") 32 | y = np.array([[2, 3.1], [1, 8.1]]).astype("float32") 33 | test.run(feed=[x, y]) 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_mean.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.mean, feed_names=["data"], feed_shapes=[[2, 3]], is_train=True 22 | ) 23 | 24 | 25 | @pytest.mark.mean 26 | @pytest.mark.filterwarnings("ignore::UserWarning") 27 | def test_mean(): 28 | np.random.seed(1) 29 | data = np.random.random(size=[2, 3]).astype("float32") 30 | test.run(feed=[data]) 31 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_minimum.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.minimum, 22 | feed_names=["data1", "data2"], 23 | feed_shapes=[[1, 2, 3], [3]], 24 | is_train=True, 25 | ) 26 | 27 | 28 | @pytest.mark.minimum 29 | @pytest.mark.filterwarnings("ignore::UserWarning") 30 | def test_minimum(): 31 | data1 = np.array([[[-0.4, 1, -0.2], [0.1, 2, 0.3]]], dtype=np.float32) 32 | data2 = np.array([1, -1, 0], dtype=np.float32) 33 | test.run(feed=[data1, data2]) 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_one_hot.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.nn.functional.one_hot, 22 | feed_names=["data"], 23 | # is_train=False, 24 | feed_shapes=[[4, 1]], 25 | feed_dtypes=["int64"], 26 | ) 27 | 28 | 29 | @pytest.mark.one_hot 30 | @pytest.mark.filterwarnings("ignore::UserWarning") 31 | def test_one_hot(): 32 | data = np.array([[1], [1], [3], [5]]).astype("int64") 33 | test.run(feed=[data], num_classes=6) 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_reverse.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | 21 | test = ApiBase(func=paddle.reverse, feed_names=["data"], feed_shapes=[[7, 8, 9, 10]]) 22 | 23 | 24 | @pytest.mark.reverse 25 | @pytest.mark.filterwarnings("ignore::UserWarning") 26 | def test_reverse(): 27 | data = np.random.random(size=[7, 8, 9, 10]).astype("float32") 28 | test.run(feed=[data], axis=[-1, 0, 2]) 29 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_shape.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.shape, feed_names=["data"], feed_shapes=[[2, 4]], is_train=False 22 | ) 23 | np.random.seed(1) 24 | 25 | 26 | @pytest.mark.shape 27 | @pytest.mark.filterwarnings("ignore::UserWarning") 28 | def test_shape(): 29 | data = np.random.uniform(1, 10, (2, 4)).astype("float32") 30 | test.run(feed=[data]) 31 | 32 | 33 | test_shape() 34 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_sign.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | 21 | test = ApiBase(func=paddle.sign, feed_names=["data"], feed_shapes=[[2, 3]]) 22 | 23 | 24 | @pytest.mark.sign 25 | @pytest.mark.filterwarnings("ignore::UserWarning") 26 | def test_sign(): 27 | data = np.array([[-45.432, -0.23, 55.55], [3.3, 0.0, -43.6]]).astype("float32") 28 | test.run(feed=[data]) 29 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_silu.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.nn.functional.silu, feed_names=["data"], feed_shapes=[[2, 3]] 22 | ) 23 | 24 | 25 | @pytest.mark.silu 26 | @pytest.mark.filterwarnings("ignore::UserWarning") 27 | def test_silu(): 28 | data = np.random.randn(2, 3).astype("float32") 29 | test.run(feed=[data]) 30 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_squeeze_v2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase( 21 | func=paddle.squeeze, 22 | feed_names=["data"], 23 | feed_shapes=[[2, 1, 4]], 24 | input_is_list=False, 25 | is_train=True, 26 | ) 27 | 28 | 29 | @pytest.mark.squeeze2 30 | @pytest.mark.filterwarnings("ignore::UserWarning") 31 | def test_squeeze(): 32 | np.random.seed(1) 33 | data = np.random.uniform(0, 1, (2, 1, 4)).astype("float32") 34 | test.run(feed=[data], axis=[1]) 35 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_jit/test_tanh.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from api_base import ApiBase 16 | import paddle 17 | import pytest 18 | import numpy as np 19 | 20 | test = ApiBase(func=paddle.tanh, feed_names=["data"], feed_shapes=[[2, 3]]) 21 | 22 | 23 | @pytest.mark.tanh 24 | @pytest.mark.filterwarnings("ignore::UserWarning") 25 | def test_tanh(): 26 | data = np.array([[-0.4, 0.6, 2.3], [1.0, 5.0, -3.2]], dtype=np.float32) 27 | test.run(feed=[data]) 28 | -------------------------------------------------------------------------------- /backends/gcu/tests/unittests_legacy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_OPS 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 20 | 21 | foreach(TEST_OP ${TEST_OPS}) 22 | py_test_modules(${TEST_OP} false MODULES unittests_legacy/${TEST_OP}.py) 23 | message(STATUS "with op unittest: ${TEST_OP}") 24 | endforeach() 25 | 26 | set_tests_properties(${TEST_OPS} PROPERTIES TIMEOUT 1000) 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/README.md: -------------------------------------------------------------------------------- 1 | # PaddlePaddle Custom Device Implementation for Iluvatar GPU 2 | 3 | English | [简体中文](./README_cn.md) 4 | 5 | Please refer to the following steps to compile, install and verify the custom device implementation for Iluvatar GPU. 6 | 7 | ## Compilation and Installation 8 | 9 | ```bash 10 | # Please contact Iluvatar customer support (services@iluvatar.com) to obtain the SDK image 11 | 12 | # Clone PaddleCustomDevice source code 13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice 14 | 15 | # Set environment variables 16 | export PATH=/usr/local/corex-4.3.0/bin:$PATH 17 | export LD_LIBRARY_PATH=/usr/local/corex-4.3.0/lib 18 | export LIBRARY_PATH=/usr/local/corex-4.3.0/lib 19 | 20 | # Compile Paddle Custom Device 21 | cd backends/iluvatar_gpu 22 | bash build_paddle.sh 23 | 24 | # Install 25 | bash install_paddle.sh 26 | ``` 27 | 28 | ## Verification 29 | 30 | ```bash 31 | # Run tests 32 | cd tests 33 | bash run_test.sh 34 | ``` 35 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/README_cn.md: -------------------------------------------------------------------------------- 1 | # 飞桨自定义接入硬件后端(天数GPU) 2 | 3 | 简体中文 | [English](./README.md) 4 | 5 | 请参考以下步骤进行编译安装与验证 6 | 7 | ## 编译安装 8 | 9 | ```bash 10 | # 获请联系天数智芯客户支持(services@iluvatar.com)获取SDK镜像 11 | 12 | # 克隆PaddleCustomDevice源码 13 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice 14 | 15 | # 设置环境变量 16 | export PATH=/usr/local/corex-4.3.0/bin:$PATH 17 | export LD_LIBRARY_PATH=/usr/local/corex-4.3.0/lib 18 | export LIBRARY_PATH=/usr/local/corex-4.3.0/lib 19 | 20 | # 编译 Paddle Custom Device 21 | cd backends/iluvatar_gpu 22 | bash build_paddle.sh 23 | 24 | # 安装 25 | bash install_paddle.sh 26 | ``` 27 | 28 | ## 验证 29 | 30 | ```bash 31 | # 运行测试 32 | cd tests 33 | bash run_test.sh 34 | ``` 35 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/clean_paddle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | rm -rf build 18 | rm -rf build_pip 19 | rm -rf build_lib 20 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/cblas.cmake: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/cblas.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/cuda.cmake: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/cuda.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/external/cccl.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/cccl.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/external/mklml.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/mklml.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/external/xxhash.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/xxhash.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/external/zlib.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/zlib.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/generic.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/paddle.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/paddle.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/third_party.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/third_party.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/cmake/version.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/version.cmake -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/addmm_kernel_register.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include "../impl/addmm_kernel_impl.h" 16 | #include "paddle/phi/backends/gpu/gpu_context.h" 17 | #include "paddle/phi/core/kernel_registry.h" 18 | #include "paddle/phi/kernels/addmm_kernel.h" 19 | 20 | PD_CUSTOM_KERNEL_REGISTER(addmm, 21 | iluvatar_gpu, 22 | ALL_LAYOUT, 23 | phi::AddmmKernel, 24 | float, 25 | phi::dtype::float16, 26 | phi::dtype::bfloat16) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/c_embedding_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/c_embedding_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(c_embedding_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::CEmbeddingGradKernel, 22 | float, 23 | phi::dtype::bfloat16, 24 | phi::dtype::float16, 25 | phi::dtype::complex) {} 26 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/c_embedding_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/c_embedding_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(c_embedding, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::CEmbeddingKernel, 22 | float, 23 | phi::dtype::bfloat16, 24 | phi::dtype::float16, 25 | phi::dtype::complex) {} 26 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/c_identity_kernel_register.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/c_identity_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(c_identity, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::CIdentityKernel, 22 | float, 23 | int, 24 | int64_t, 25 | phi::dtype::bfloat16, 26 | phi::dtype::float16) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/c_softmax_with_cross_entropy_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu" // NOLINT 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(c_softmax_with_cross_entropy_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::CSoftmaxWithCrossEntropyGradKernel, 22 | float, 23 | phi::dtype::float16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/c_softmax_with_cross_entropy_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_kernel.cu" // NOLINT 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(c_softmax_with_cross_entropy, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::CSoftmaxWithCrossEntropyKernel, 22 | float, 23 | phi::dtype::float16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/clip_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/clip_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(clip_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::ClipGradKernel, 22 | float, 23 | int, 24 | int64_t, 25 | phi::dtype::bfloat16, 26 | phi::dtype::float16) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/clip_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/clip_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(clip, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::ClipKernel, 22 | float, 23 | int, 24 | int64_t, 25 | phi::dtype::float16, 26 | phi::dtype::bfloat16) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/embedding_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/embedding_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(embedding, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::EmbeddingKernel, 22 | float, 23 | int8_t, 24 | phi::dtype::float16, 25 | phi::dtype::bfloat16, 26 | phi::dtype::complex) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/logsumexp_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/logsumexp_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(logsumexp, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::LogsumexpKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/mean_all_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/mean_all_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(mean_all_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::MeanAllGradKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::complex) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/mean_all_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/mean_all_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(mean_all, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::MeanAllKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::complex) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/multinomial_kernel_register.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/multinomial_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(multinomial, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::MultinomialKernel, 22 | phi::dtype::float16, 23 | phi::dtype::bfloat16, 24 | float) { 25 | kernel->OutputAt(0).SetDataType(phi::DataType::INT64); 26 | } 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/one_hot_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/one_hot_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER( 19 | one_hot, iluvatar_gpu, ALL_LAYOUT, phi::OneHotKernel, int, int64_t) { 20 | kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); 21 | } 22 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/p_norm_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/p_norm_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(p_norm_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::PNormGradKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/p_norm_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/p_norm_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(p_norm, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::PNormKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/pad_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/pad_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(pad_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::PadGradKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16, 25 | phi::dtype::complex) {} 26 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/pad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/pad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(pad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::PadKernel, 22 | float, 23 | int, 24 | int64_t, 25 | phi::dtype::float16, 26 | phi::dtype::bfloat16, 27 | phi::dtype::complex) {} 28 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/put_along_axis_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/put_along_axis_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(put_along_axis, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::PutAlongAxisKernel, 22 | float, 23 | int64_t, 24 | int, 25 | phi::dtype::float16, 26 | phi::dtype::bfloat16) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/randint_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/randint_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER( 19 | randint, iluvatar_gpu, ALL_LAYOUT, phi::RandintKernel, int, int64_t) {} 20 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/reshape_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/reshape_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER_FOR_ALL_DTYPE(reshape_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::ReshapeGradKernel) {} 22 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/reshape_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/reshape_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER_FOR_ALL_DTYPE(reshape, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::ReshapeKernel) {} 22 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/squared_l2_norm_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/squared_l2_norm_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(squared_l2_norm, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::SquaredL2NormKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/swiglu_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/swiglu_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(swiglu_grad, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::SwiGLUGradKernel, 22 | float, 23 | double, 24 | phi::dtype::float16, 25 | phi::dtype::bfloat16) {} 26 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/swiglu_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/swiglu_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(swiglu, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::SwiGLUKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/take_along_axis_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/take_along_axis_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(take_along_axis, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::TakeAlongAxisKernel, 22 | float, 23 | int64_t, 24 | int, 25 | phi::dtype::float16, 26 | phi::dtype::bfloat16) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/uniform_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/uniform_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(uniform, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::UniformKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/cuda_kernels/where_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/where_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(where, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::WhereKernel, 22 | float, 23 | int, 24 | bool, 25 | int64_t, 26 | phi::dtype::float16, 27 | phi::dtype::bfloat16) {} 28 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/cal_aux_loss_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/cal_aux_loss_grad_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(cal_aux_loss_grad, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::CalAuxLossGradKernel, 21 | float) {} 22 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/expand_modality_expert_id_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/expand_modality_expert_id_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(expand_modality_expert_id, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::ExpandModalityExpertIDKernel, 21 | int, 22 | int64_t) {} 23 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/fused_bias_act_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu" //NOLINT 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(fused_bias_act, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::fusion::FusedBiasActKernel, 22 | float, 23 | phi::dtype::bfloat16, 24 | phi::dtype::float16, 25 | int32_t) {} 26 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/int_bincount_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/int_bincount.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER( 18 | int_bincount, iluvatar_gpu, ALL_LAYOUT, phi::IntBincount, int64_t, int) {} 19 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/layer_norm_cuda_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/gpu/layer_norm_cuda_kernel.cu" //NOLINT 16 | 17 | PD_CUSTOM_KERNEL_REGISTER( 18 | fused_rms_norm, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnFwd, float) {} 19 | 20 | PD_CUSTOM_KERNEL_REGISTER( 21 | fused_rms_norm_grad, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnBwd, float) {} 22 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_combine_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_combine_grad_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_combine_grad, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoeCombineGradKernel, 21 | float, 22 | phi::dtype::bfloat16, 23 | phi::dtype::float16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_combine_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_combine_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_combine, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoeCombineKernel, 21 | float, 22 | phi::dtype::bfloat16, 23 | phi::dtype::float16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_gate_dispatch_grad_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_grad, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoeGateDispatchGradKernel, 21 | float, 22 | phi::dtype::float16, 23 | phi::dtype::bfloat16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_gate_dispatch_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoeGradDispatchKernel, 21 | float, 22 | phi::dtype::float16, 23 | phi::dtype::bfloat16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_permute_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_gate_dispatch_permute_grad_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_permute_grad, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoeGateDispatchGradKernel, 21 | float, 22 | phi::dtype::float16, 23 | phi::dtype::bfloat16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_gate_dispatch_permute_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_gate_dispatch_permute_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_permute, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoEDispatchPermuteKernel, 21 | float, 22 | phi::dtype::float16, 23 | phi::dtype::bfloat16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_ops_partial_nosoftmaxtopk_grad_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_ops_partial_nosoftmaxtopk_grad_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_partial_nosoftmaxtopk_grad, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoeGateDispatchPartialNoSoftMaxTopkGradKernel, 21 | float, 22 | phi::dtype::float16, 23 | phi::dtype::bfloat16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/moe_ops_partial_nosoftmaxtopk_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/moe_ops_partial_nosoftmaxtopk_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(moe_gate_dispatch_partial_nosoftmaxtopk, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::MoeGateDispatchPartialNoSoftMaxTopkKernel, 21 | float, 22 | phi::dtype::bfloat16, 23 | phi::dtype::float16) {} 24 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/register_build_src_rank_and_local_expert_id_kernel.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/build_src_rank_and_local_expert_id_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(build_src_rank_and_local_expert_id, 18 | iluvatar_gpu, 19 | ALL_LAYOUT, 20 | phi::BuildSrcRankAndLocalExpertIdKernel, 21 | int32_t, 22 | int64_t) {} 23 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/rms_norm_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/rms_norm_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(rms_norm, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::RmsNormKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/ernie_core/top_p_sampling_kernel_register.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/top_p_sampling_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(top_p_sampling, 19 | iluvatar_gpu, 20 | ALL_LAYOUT, 21 | phi::TopPSamplingKernel, 22 | float, 23 | int, 24 | int64_t, 25 | phi::dtype::float16, 26 | phi::dtype::bfloat16) {} 27 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/kernels/funcs/blas/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | collect_srcs(kernels_srcs SRCS blas.cc) 2 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/setup.py.in: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Distribution 2 | 3 | packages = [] 4 | package_data = {} 5 | 6 | class BinaryDistribution(Distribution): 7 | def has_ext_modules(self): 8 | return True 9 | 10 | setup( 11 | name = '@CMAKE_PROJECT_NAME@', 12 | version='@PLUGIN_VERSION@', 13 | description='Paddle iluvatar_gpu plugin', 14 | long_description='', 15 | long_description_content_type="text/markdown", 16 | author_email="Paddle-better@baidu.com", 17 | maintainer="PaddlePaddle", 18 | maintainer_email="Paddle-better@baidu.com", 19 | project_urls={}, 20 | license='Apache Software License', 21 | packages= [ 22 | 'paddle_custom_device', 23 | ], 24 | include_package_data=True, 25 | package_data = { 26 | '': ['*.so', '*.h', '*.py', '*.hpp'], 27 | }, 28 | package_dir = { 29 | '': 'python', 30 | }, 31 | zip_safe=False, 32 | distclass=BinaryDistribution, 33 | entry_points={ 34 | 'console_scripts': [ 35 | ] 36 | }, 37 | classifiers=[ 38 | ], 39 | keywords='Paddle iluvatar_gpu plugin', 40 | ) 41 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(MyProject) 3 | 4 | find_package(Python REQUIRED COMPONENTS Interpreter) 5 | 6 | add_custom_target( 7 | run_test 8 | COMMAND pytest ${CMAKE_SOURCE_DIR}/unittests/ 9 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 10 | COMMENT "Running unit tests with pytest") 11 | 12 | set(CMAKE_DEFAULT_MAKE_TARGET run_test) 13 | -------------------------------------------------------------------------------- /backends/iluvatar_gpu/tests/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | export PYTHONPATH=${PYTHONPATH}:/home/tianyu.zhou/PaddleCustomDevice/Paddle/test/legacy_test 18 | mkdir -p build && cd build && cmake .. 19 | make run_test 20 | cd - 21 | rm -rf build 22 | -------------------------------------------------------------------------------- /backends/intel_gpu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/intel_gpu/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/intel_gpu/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/intel_gpu/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/intel_gpu/cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/onednn.cmake -------------------------------------------------------------------------------- /backends/intel_gpu/cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /backends/intel_gpu/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/generic.cmake -------------------------------------------------------------------------------- /backends/intel_gpu/load.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | 17 | PaddleDev=$(dirname $(dirname `pwd`)) 18 | 19 | echo $d 20 | export PYTHONPATH=$PYTHONPATH:${PaddleDev}/python/tests/ 21 | 22 | 23 | comp="dnnl tbb compiler" 24 | 25 | for item in $comp; 26 | do 27 | 28 | P="${HOME}/intel/oneapi/$item/latest/env/vars.sh" 29 | echo "$P" 30 | source $P 31 | done 32 | -------------------------------------------------------------------------------- /backends/intel_gpu/setup.py.in: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Distribution 2 | 3 | packages = [] 4 | package_data = {} 5 | 6 | class BinaryDistribution(Distribution): 7 | def has_ext_modules(self): 8 | return True 9 | 10 | setup( 11 | name = '@CMAKE_PROJECT_NAME@', 12 | version='@PLUGIN_VERSION@', 13 | description='Paddle CustomCPU plugin', 14 | long_description='', 15 | long_description_content_type="text/markdown", 16 | author_email="Paddle-better@baidu.com", 17 | maintainer="PaddlePaddle", 18 | maintainer_email="Paddle-better@baidu.com", 19 | project_urls={}, 20 | license='Apache Software License', 21 | packages= [ 22 | 'paddle_custom_device', 23 | ], 24 | include_package_data=True, 25 | package_data = { 26 | '': ['*.so', '*.h', '*.py', '*.hpp'], 27 | }, 28 | package_dir = { 29 | '': 'python', 30 | }, 31 | zip_safe=False, 32 | distclass=BinaryDistribution, 33 | entry_points={ 34 | 'console_scripts': [ 35 | ] 36 | }, 37 | classifiers=[ 38 | ], 39 | keywords='Paddle CustomCPU plugin', 40 | ) 41 | -------------------------------------------------------------------------------- /backends/intel_gpu/tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_OPS 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 20 | 21 | foreach(TEST_OP ${TEST_OPS}) 22 | py_test_modules(${TEST_OP} MODULES ${TEST_OP}) 23 | endforeach() 24 | -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/external/custom_tpc_lib.cmake: -------------------------------------------------------------------------------- 1 | set(DOWNLOAD_URL "https://paddle-ci.cdn.bcebos.com/libcustom_tpc_perf_lib.so") 2 | set(TARGET_DIR "${CMAKE_BINARY_DIR}/python/paddle_custom_device/intel_hpu") 3 | set(TARGET_PATH "${TARGET_DIR}/libcustom_tpc_perf_lib.so") 4 | 5 | file(MAKE_DIRECTORY ${TARGET_DIR}) 6 | file(DOWNLOAD ${DOWNLOAD_URL} ${TARGET_PATH} STATUS download_status) 7 | 8 | list(GET download_status 0 download_success) 9 | if(NOT (download_success EQUAL 0)) 10 | message(FATAL_ERROR "Failed to download ${DOWNLOAD_URL} to ${TARGET_PATH}") 11 | endif() 12 | 13 | message(STATUS "Downloaded ${DOWNLOAD_URL} to ${TARGET_PATH}") 14 | -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/onednn.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/external/synapse.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License. 14 | execute_process( 15 | COMMAND dpkg-query --show --showformat='\${Version}' habanalabs-graph 16 | OUTPUT_VARIABLE SYNAPSE_VERSION 17 | OUTPUT_STRIP_TRAILING_WHITESPACE) 18 | 19 | if(SYNAPSE_VERSION) 20 | message(STATUS "Synapse version: ${SYNAPSE_VERSION}") 21 | else() 22 | message(FATAL_ERROR "Synapse is not installed.") 23 | endif() 24 | -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/generic.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/paddle.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/paddle.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/third_party.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/third_party.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/cmake/version.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/version.cmake -------------------------------------------------------------------------------- /backends/intel_hpu/custom_ops/python/paddlenlp_ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from paddle_custom_device.intel_hpu.ops import * # noqa 16 | from .layers import * # noqa 17 | from .llama_block_atten import * # noqa 18 | -------------------------------------------------------------------------------- /backends/intel_hpu/tests/ccl/allgather.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle 16 | import paddle.distributed as dist 17 | 18 | dist.init_parallel_env() 19 | tensor_list = [] 20 | if dist.get_rank() == 0: 21 | data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]], dtype="float32") 22 | else: 23 | data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]], dtype="float32") 24 | dist.all_gather(tensor_list, data) 25 | print(tensor_list) 26 | # [[[4, 5, 6], [4, 5, 6]], [[1, 2, 3], [1, 2, 3]]] (2 GPUs) 27 | -------------------------------------------------------------------------------- /backends/intel_hpu/tests/ccl/allreduce.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle 16 | import paddle.distributed as dist 17 | 18 | paddle.set_device("intel_hpu") 19 | 20 | dist.init_parallel_env() 21 | if dist.get_rank() == 0: 22 | data = paddle.to_tensor([[4, 5, 6], [4, 5, 6]], dtype="float32") 23 | else: 24 | data = paddle.to_tensor([[1, 2, 3], [1, 2, 3]], dtype="float32") 25 | dist.all_reduce(data) 26 | print(data) 27 | -------------------------------------------------------------------------------- /backends/intel_hpu/tests/ccl/readme.md: -------------------------------------------------------------------------------- 1 | 2 | # cmd 3 | 4 | 5 | `INTEL_HPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PADDLE_DISTRI_BACKEND=xccl PADDLE_XCCL_BACKEND=intel_hpu python -m paddle.distributed.launch --devices "6,7" --log_level=DEBUG allreduce.py` 6 | -------------------------------------------------------------------------------- /backends/intel_hpu/tests/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 6 | # not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | skip_case_lst = {} 18 | 19 | # when filter passwdown 'stable' will load this list 20 | # this list for the unstable test case to skip 21 | skip_case_lst = [ 22 | "test_cast.py", 23 | ] 24 | -------------------------------------------------------------------------------- /backends/intel_hpu/tests/test_softmax_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 4 | # not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import paddle 16 | 17 | paddle.set_device("intel_hpu") 18 | # paddle.set_device("custom_cpu") 19 | 20 | # x = paddle.randn([2, 3, 4], dtype="bfloat16") 21 | x = paddle.to_tensor( 22 | [ 23 | [[2.0, 3.0, 4.0, 5.0], [3.0, 4.0, 5.0, 6.0], [7.0, 8.0, 8.0, 9.0]], 24 | [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [6.0, 7.0, 8.0, 9.0]], 25 | ], 26 | dtype="float32", 27 | ) 28 | 29 | m = paddle.nn.Softmax() 30 | 31 | out = m(x) 32 | out = m(out) 33 | out = m(out) 34 | 35 | print(x.shape) 36 | print(out.shape) 37 | print(x) 38 | print(out) 39 | -------------------------------------------------------------------------------- /backends/intel_hpu/tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_OPS 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 20 | 21 | foreach(TEST_OP ${TEST_OPS}) 22 | py_test_modules(${TEST_OP} MODULES ${TEST_OP}) 23 | endforeach() 24 | -------------------------------------------------------------------------------- /backends/intel_hpu/tests/unittests/util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 4 | # not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import print_function, division 16 | 17 | import paddle 18 | 19 | 20 | # set paddlepaddle whether works as_static_mode 21 | def enable_paddle_static_mode(intel_hpus_static_mode): 22 | if int(intel_hpus_static_mode) == 0: # dynamic mode 23 | paddle.disable_static() 24 | else: # static mode 25 | paddle.enable_static() 26 | -------------------------------------------------------------------------------- /backends/intel_hpu/utils/hpu_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you may 4 | // not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | -------------------------------------------------------------------------------- /backends/metax_gpu/README.md: -------------------------------------------------------------------------------- 1 | # PaddlePaddle Custom Device Implementation for METAX GPU 2 | 3 | English | [简体中文](./README_cn.md) 4 | 5 | Please refer to the following steps to compile, install and verify the custom device implementation for Iluvatar GPU. 6 | 7 | ## Install Paddle 8 | 9 | python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ 10 | 11 | ## Compile and Install 12 | 13 | ```bash 14 | # Acquire Metax PaddlePaddle Docker Image 15 | 16 | # Clone PaddleCustomDevice source code 17 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice 18 | 19 | # Compile Source Code 20 | cd backends/metax_gpu 21 | bash build.sh 22 | 23 | # Install PaddlePaddle 24 | # bash install_paddle.sh 25 | ``` 26 | 27 | ## Verification 28 | 29 | ```bash 30 | # build with BUILD_TEST=1 31 | 32 | # run_test 33 | cd tests 34 | bash run_test.sh 35 | ``` 36 | -------------------------------------------------------------------------------- /backends/metax_gpu/README_cn.md: -------------------------------------------------------------------------------- 1 | # 飞桨自定义接入硬件后端(沐曦GPU) 2 | 3 | 简体中文 | [English](./README.md) 4 | 5 | 请参考以下步骤进行编译安装与验证 6 | 7 | ## 安装paddle-cpu 8 | python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ 9 | 10 | ## 编译安装 11 | 12 | ```bash 13 | # 获取沐曦PaddlePaddle Docker镜像 14 | 15 | # 克隆PaddleCustomDevice源码 16 | git clone https://github.com/PaddlePaddle/PaddleCustomDevice 17 | 18 | # 编译安装 19 | cd backends/metax_gpu 20 | bash build.sh 21 | ``` 22 | 23 | ## 验证 24 | 25 | ```bash 26 | 27 | # 运行测试 28 | cd tests 29 | bash run_test.sh 30 | ``` 31 | -------------------------------------------------------------------------------- /backends/metax_gpu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | // Generated by @dummy_GENERATOR@. DO NOT EDIT!!! 2 | 3 | const char *dummy = "@dummy_CONTENT@"; 4 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/as_complex_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | #include "paddle/phi/common/type_traits.h" 16 | #include "paddle/phi/core/kernel_registry.h" 17 | #include "paddle/phi/kernels/as_complex_kernel.h" 18 | #include "paddle/phi/kernels/impl/as_complex_impl.h" 19 | 20 | PD_CUSTOM_KERNEL_REGISTER( 21 | as_complex, metax_gpu, ALL_LAYOUT, phi::AsComplexKernel, float, double) { 22 | kernel->OutputAt(0).SetDataType(phi::dtype::ToComplex(kernel_key.dtype())); 23 | } 24 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/asgd_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/asgd_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(asgd, 19 | metax_gpu, 20 | ALL_LAYOUT, 21 | phi::ASGDKernel, 22 | phi::dtype::float16, 23 | phi::dtype::bfloat16, 24 | float, 25 | double) {} 26 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/assign_pos_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/assign_pos_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER( 19 | assign_pos, metax_gpu, ALL_LAYOUT, phi::AssignPosKernel, int64_t) {} 20 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/bincount_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/bincount_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(bincount, 18 | metax_gpu, 19 | ALL_LAYOUT, 20 | phi::BincountKernel, 21 | double, 22 | float, 23 | int, 24 | int64_t) { 25 | kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED); 26 | } 27 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/c_embedding_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/c_embedding_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(c_embedding, 18 | metax_gpu, 19 | ALL_LAYOUT, 20 | phi::CEmbeddingKernel, 21 | float, 22 | double, 23 | phi::dtype::float16, 24 | phi::dtype::complex, 25 | phi::dtype::complex) {} 26 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/clip_grad_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/clip_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(clip_grad, 19 | metax_gpu, 20 | ALL_LAYOUT, 21 | phi::ClipGradKernel, 22 | float, 23 | double, 24 | int, 25 | int64_t, 26 | phi::dtype::bfloat16, 27 | phi::dtype::float16) {} 28 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/clip_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/clip_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(clip, 18 | metax_gpu, 19 | ALL_LAYOUT, 20 | phi::ClipKernel, 21 | float, 22 | double, 23 | int, 24 | int64_t, 25 | phi::dtype::float16, 26 | phi::dtype::bfloat16) {} 27 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/index_add_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/index_add_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(index_add, 18 | metax_gpu, 19 | ALL_LAYOUT, 20 | phi::IndexAddKernel, 21 | float, 22 | double, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16, 25 | int, 26 | int64_t) {} 27 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/one_hot_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/one_hot_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER( 19 | one_hot, metax_gpu, ALL_LAYOUT, phi::OneHotKernel, int, int64_t) { 20 | kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); 21 | } 22 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/p_norm_grad_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/p_norm_grad_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(p_norm_grad, 19 | metax_gpu, 20 | ALL_LAYOUT, 21 | phi::PNormGradKernel, 22 | float, 23 | double, 24 | phi::dtype::float16, 25 | phi::dtype::bfloat16) {} 26 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/p_norm_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/p_norm_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER(p_norm, 18 | metax_gpu, 19 | ALL_LAYOUT, 20 | phi::PNormKernel, 21 | float, 22 | double, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/randint_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/randint_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER( 19 | randint, metax_gpu, ALL_LAYOUT, phi::RandintKernel, int, int64_t) {} 20 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/reshape_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include "paddle/phi/core/kernel_registry.h" 15 | #include "paddle/phi/kernels/reshape_kernel.h" 16 | 17 | PD_CUSTOM_KERNEL_REGISTER_FOR_ALL_DTYPE(reshape, 18 | metax_gpu, 19 | ALL_LAYOUT, 20 | phi::ReshapeKernel) {} 21 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/tril_indices_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/tril_indices_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER( 19 | tril_indices, metax_gpu, ALL_LAYOUT, phi::TrilIndicesKernel, int, int64_t) { 20 | } 21 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/cuda_kernels/triu_indices_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights 2 | // Reserved. Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/triu_indices_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER( 19 | triu_indices, metax_gpu, ALL_LAYOUT, phi::TriuIndicesKernel, int, int64_t) { 20 | } 21 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/dynload/cupti_lib_path.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #define CUPTI_LIB_PATH "/root/cu-bridge/CUDA_DIR/extras/CUPTI/lib64" 18 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/ernie_core/fused_bias_act_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu" //NOLINT 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(fused_bias_act, 19 | metax_gpu, 20 | ALL_LAYOUT, 21 | phi::fusion::FusedBiasActKernel, 22 | float, 23 | phi::dtype::bfloat16, 24 | phi::dtype::float16, 25 | int32_t) {} 26 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/ernie_core/rms_norm_kernel_register.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/phi/core/kernel_registry.h" 16 | #include "paddle/phi/kernels/rms_norm_kernel.h" 17 | 18 | PD_CUSTOM_KERNEL_REGISTER(rms_norm, 19 | metax_gpu, 20 | ALL_LAYOUT, 21 | phi::RmsNormKernel, 22 | float, 23 | phi::dtype::float16, 24 | phi::dtype::bfloat16) {} 25 | -------------------------------------------------------------------------------- /backends/metax_gpu/kernels/flags_declare.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | namespace paddle_flags { 16 | bool FLAGS_cudnn_deterministic = false; 17 | bool FLAGS_embedding_deterministic = false; 18 | bool FLAGS_enable_cublas_tensor_op_math = false; 19 | bool FLAGS_gemm_use_half_precision_compute_type = false; 20 | bool FLAGS_use_fast_math = false; 21 | } // namespace paddle_flags 22 | -------------------------------------------------------------------------------- /backends/metax_gpu/patch/mcEigen_3.4.0_paddle_final.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PaddlePaddle/PaddleCustomDevice/ac0419ccf7f7e2a05717191cbc875ba44860e85a/backends/metax_gpu/patch/mcEigen_3.4.0_paddle_final.zip -------------------------------------------------------------------------------- /backends/metax_gpu/setup.py.in: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Distribution 2 | 3 | packages = [] 4 | package_data = {} 5 | 6 | class BinaryDistribution(Distribution): 7 | def has_ext_modules(self): 8 | return True 9 | 10 | setup( 11 | name = '@CMAKE_PROJECT_NAME@', 12 | version='@PLUGIN_VERSION@', 13 | description='Paddle metax_gpu plugin', 14 | long_description='', 15 | long_description_content_type="text/markdown", 16 | author_email="Paddle-better@baidu.com", 17 | maintainer="PaddlePaddle", 18 | maintainer_email="Paddle-better@baidu.com", 19 | project_urls={}, 20 | license='Apache Software License', 21 | packages= [ 22 | 'paddle_custom_device', 23 | ], 24 | include_package_data=True, 25 | package_data = { 26 | '': ['*.so', '*.h', '*.py', '*.hpp'], 27 | }, 28 | package_dir = { 29 | '': 'python', 30 | }, 31 | zip_safe=False, 32 | distclass=BinaryDistribution, 33 | entry_points={ 34 | 'console_scripts': [ 35 | ] 36 | }, 37 | classifiers=[ 38 | ], 39 | keywords='Paddle metax_gpu plugin', 40 | ) 41 | -------------------------------------------------------------------------------- /backends/metax_gpu/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(MyProject) 3 | 4 | find_package(Python REQUIRED COMPONENTS Interpreter) 5 | 6 | add_custom_target( 7 | run_tests 8 | COMMAND pytest ${CMAKE_SOURCE_DIR}/unittest/ 9 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 10 | COMMENT "Running unit tests with pytest") 11 | 12 | set(CMAKE_DEFAULT_MAKE_TARGET run_tests) 13 | -------------------------------------------------------------------------------- /backends/metax_gpu/tests/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # export PATH=/usr/local/corex-4.3.0/bin:$PATH 18 | # export LD_LIBRARY_PATH=/usr/local/corex-4.3.0/lib 19 | # export LIBRARY_PATH=/usr/local/corex-4.3.0/lib 20 | export PYTHONPATH=${PYTHONPATH}:${PADDLE_SOURCE_DIR}/test/legacy_test 21 | 22 | mkdir -p build && cd build && cmake .. 23 | make run_tests 24 | -------------------------------------------------------------------------------- /backends/mlu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/mlu/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/onednn.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/generic.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/paddle.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/paddle.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/third_party.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/third_party.cmake -------------------------------------------------------------------------------- /backends/mlu/cmake/version.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/version.cmake -------------------------------------------------------------------------------- /backends/mlu/tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_OPS 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 20 | 21 | foreach(TEST_OP ${TEST_OPS}) 22 | py_test_modules(${TEST_OP} MODULES ${TEST_OP}) 23 | endforeach() 24 | -------------------------------------------------------------------------------- /backends/mlu/tests/unittests/test_collective_api.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices 18 | 19 | 20 | class TestProcessGroup(TestMultipleCustomDevices): 21 | def test_process_group_xccl(self): 22 | self.run_mnist_2_custom_devices("process_group_xccl.py", "mlu") 23 | 24 | 25 | if __name__ == "__main__": 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /backends/mlu/tests/unittests/test_dygraph_sharding_stage_2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /backends/mlu/tools/disable_ut_mlu: -------------------------------------------------------------------------------- 1 | disable_ut_mlu 2 | test_dygraph_recompute_for_eager 3 | test_parallel_dygraph_mp_layers 4 | test_zero_dim_tensor_mlu 5 | test_collective_api 6 | test_parallel_dygraph_pipeline_parallel 7 | test_set_value_op_mlu 8 | test_dygraph_sharding_stage_3 9 | test_compare_op_mlu 10 | test_kldiv_loss_op_mlu 11 | test_flash_attention_op_mlu 12 | test_adamw_op_mlu 13 | test_rms_norm_op_mlu 14 | test_sync_batch_norm_op_mlu 15 | test_unsqueeze_op_mlu 16 | test_LeNet_MNIST 17 | -------------------------------------------------------------------------------- /backends/mps/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: ObjC 3 | BasedOnStyle: Google 4 | IndentWidth: 2 5 | TabWidth: 2 6 | ContinuationIndentWidth: 4 7 | AccessModifierOffset: -1 # The private/protected/public has no indent in class 8 | Standard: Cpp11 9 | AllowAllParametersOfDeclarationOnNextLine: true 10 | BinPackParameters: false 11 | BinPackArguments: false 12 | ... 13 | -------------------------------------------------------------------------------- /backends/mps/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/mps/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/mps/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../Paddle/cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/mps/kernels/activation_impl.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace mps_kernel { 20 | 21 | enum ActivationOP { EXP, SIGMOID, SIN, COS }; 22 | 23 | void Activation(const float *x, 24 | float *out, 25 | const std::vector &dims, 26 | ActivationOP op); 27 | 28 | void Pow(const float *x, 29 | float *out, 30 | const std::vector &dims, 31 | float factor); 32 | 33 | } // namespace mps_kernel 34 | -------------------------------------------------------------------------------- /backends/mps/kernels/elementwise_impl.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace mps_kernel { 20 | 21 | enum ElementwiseOP { 22 | ADD, 23 | SUB, 24 | MUL, 25 | DIV, 26 | }; 27 | 28 | void Elementwise(const float *x, 29 | const float *y, 30 | float *out, 31 | const std::vector &dims, 32 | ElementwiseOP op); 33 | 34 | } // namespace mps_kernel 35 | -------------------------------------------------------------------------------- /backends/mps/kernels/matmul_impl.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace mps_kernel { 20 | 21 | void Matmul(const float* x, 22 | const float* y, 23 | float* out, 24 | const std::vector& x_dims, 25 | const std::vector& y_dims, 26 | bool transpose_x, 27 | bool transpose_y); 28 | 29 | } // namespace mps_kernel 30 | -------------------------------------------------------------------------------- /backends/mps/kernels/softmax_impl.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace mps_kernel { 20 | 21 | void Softmax(const float *in, 22 | float *out, 23 | const std::vector &dims, 24 | int axis); 25 | void SoftmaxGrad(const float *out, 26 | const float *out_grad, 27 | const std::vector &dims, 28 | int axis, 29 | float *in_grad); 30 | 31 | } // namespace mps_kernel 32 | -------------------------------------------------------------------------------- /backends/mps/runtime/mps_runtime.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | namespace mps { 18 | 19 | bool init_device(void); 20 | 21 | bool alloc_memory(void** ptr, size_t size); 22 | 23 | bool dealloc_memory(void* ptr); 24 | 25 | bool memcpy_d2d(void* dst, const void* src, size_t size); 26 | 27 | bool memcpy_d2h(void* dst, const void* src, size_t size); 28 | 29 | bool memcpy_h2d(void* dst, const void* src, size_t size); 30 | 31 | } // namespace mps 32 | -------------------------------------------------------------------------------- /backends/mps/setup.py.in: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Distribution 2 | 3 | packages = [] 4 | package_data = {} 5 | 6 | class BinaryDistribution(Distribution): 7 | def has_ext_modules(self): 8 | return True 9 | 10 | setup( 11 | name = '@CMAKE_PROJECT_NAME@', 12 | version='@PLUGIN_VERSION@', 13 | description='Paddle MPS plugin', 14 | long_description='', 15 | long_description_content_type="text/markdown", 16 | author_email="Paddle-better@baidu.com", 17 | maintainer="PaddlePaddle", 18 | maintainer_email="Paddle-better@baidu.com", 19 | project_urls={}, 20 | license='Apache Software License', 21 | packages= [ 22 | 'paddle_custom_device', 23 | ], 24 | include_package_data=True, 25 | package_data = { 26 | '': ['*.dylib', '*.h', '*.py', '*.hpp'], 27 | }, 28 | package_dir = { 29 | '': 'python', 30 | }, 31 | zip_safe=False, 32 | distclass=BinaryDistribution, 33 | entry_points={ 34 | 'console_scripts': [ 35 | ] 36 | }, 37 | classifiers=[ 38 | ], 39 | keywords='Paddle MPS plugin', 40 | ) 41 | -------------------------------------------------------------------------------- /backends/mps/tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 4 | # use this file except in compliance with the License. You may obtain a copy of 5 | # the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 | # License for the specific language governing permissions and limitations under 13 | # the License 14 | 15 | file( 16 | GLOB TEST_OPS 17 | RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" 18 | "test_*.py") 19 | string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") 20 | 21 | foreach(TEST_OP ${TEST_OPS}) 22 | py_test_modules(${TEST_OP} MODULES ${TEST_OP}) 23 | endforeach() 24 | -------------------------------------------------------------------------------- /backends/npu/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/npu/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/onednn.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/generic.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/paddle.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/paddle.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/third_party.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/third_party.cmake -------------------------------------------------------------------------------- /backends/npu/cmake/version.cmake: -------------------------------------------------------------------------------- 1 | ../../../cmake/version.cmake -------------------------------------------------------------------------------- /backends/npu/custom_op/llama_infer/write_cache_kv.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "paddle/extension.h" 16 | 17 | void WriteCacheKV(const paddle::Tensor& input_k, 18 | const paddle::Tensor& input_v, 19 | const paddle::Tensor& cache_kv, 20 | const paddle::Tensor& sequence_lengths_shape) {} 21 | 22 | PD_BUILD_OP(write_cache_kv) 23 | .Inputs({"input_k", "input_v", "cache_kv", "sequence_lengths"}) 24 | .Outputs({"cache_kv_out"}) 25 | .SetInplaceMap({{"cache_kv", "cache_kv_out"}}) 26 | .SetKernelFn(PD_KERNEL(WriteCacheKV)); 27 | -------------------------------------------------------------------------------- /backends/npu/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .common import setUp 16 | from .common import addPasses 17 | -------------------------------------------------------------------------------- /backends/npu/profile/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .cann_export import cann_parse_enabled as cann_parse_enabled 16 | -------------------------------------------------------------------------------- /backends/npu/tests/unittests/test_collective_api.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices 18 | 19 | 20 | class TestProcessGroup(TestMultipleCustomDevices): 21 | def test_process_group_xccl(self): 22 | self.run_mnist_2_custom_devices("process_group_xccl.py", "npu") 23 | 24 | 25 | if __name__ == "__main__": 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /backends/npu/tests/unittests/test_dygraph_sharding_stage_2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /backends/npu/tools/disable_ut_npu: -------------------------------------------------------------------------------- 1 | disable_ut_npu 2 | test_softmax_with_cross_entropy_op_npu 3 | test_parallel_dygraph_mp_layers 4 | test_dygraph_recompute_for_eager 5 | test_slice_op_npu 6 | test_set_value_op_npu 7 | test_dygraph_sharding_stage_3 8 | test_zero_dim_tensor_npu 9 | test_momentum_op_npu 10 | test_elementwise_sub_op_npu 11 | test_index_sample_op_npu 12 | test_flashattention_npu 13 | test_rmsprop_op_npu 14 | test_einsum_op_npu 15 | test_fused_matmul_bias_op_npu 16 | test_set_value_op_npu.py 17 | -------------------------------------------------------------------------------- /backends/npu/tools/disable_ut_npu_910b: -------------------------------------------------------------------------------- 1 | disable_ut_npu 2 | test_check_nan_inf_op_npu 3 | test_conv3d_op_npu 4 | test_elementwise_mod_op_npu 5 | test_matmulv2_op_npu 6 | test_zero_dim_tensor_npu 7 | test_group_norm_op_npu 8 | -------------------------------------------------------------------------------- /backends/npu/tools/important_ut_npu: -------------------------------------------------------------------------------- 1 | test_assign_op_npu_eager 2 | test_bitwise_op_npu 3 | test_concat_op_npu_eager 4 | test_clip_op_npu 5 | test_elementwise_add_op_npu_eager 6 | test_elementwise_div_op_npu_eager 7 | test_elementwise_max_op_npu_eager 8 | test_elementwise_mul_op_npu_eager 9 | test_elementwise_sub_op_npu_eager 10 | test_is_empty_op_npu 11 | test_fill_any_like_op_npu 12 | test_fill_constant_op_npu 13 | test_full_op 14 | test_logical_op_npu 15 | test_lookup_table_v2_op_npu 16 | test_matmulv2_op_npu 17 | test_reduce_max_op_npu_eager 18 | test_reduce_sum_op_npu_eager 19 | test_scale_op_npu_eager 20 | test_slice_op_npu_eager 21 | test_split_op_npu_eager 22 | test_squared_l2_norm_op_npu_eager 23 | test_stack_op_npu_eager 24 | test_tril_triu_op_npu 25 | -------------------------------------------------------------------------------- /backends/sdaa/cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/dummy.c.in -------------------------------------------------------------------------------- /backends/sdaa/cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gflags.cmake -------------------------------------------------------------------------------- /backends/sdaa/cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/glog.cmake -------------------------------------------------------------------------------- /backends/sdaa/cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/gtest.cmake -------------------------------------------------------------------------------- /backends/sdaa/cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../../../cmake/external/onednn.cmake -------------------------------------------------------------------------------- /backends/sdaa/cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../../../Paddle/cmake/generic.cmake -------------------------------------------------------------------------------- /backends/sdaa/dynload/dynamic_loader.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | namespace custom_dynload { 18 | #ifndef _WIN32 19 | #define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__)) 20 | #else 21 | #define DECLARE_TYPE(__name, ...) decltype(auto) 22 | #endif 23 | 24 | void* GetSDPTIDsoHandle(); 25 | 26 | } // namespace custom_dynload 27 | -------------------------------------------------------------------------------- /backends/sdaa/pr_ci_sdaa.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Install paddle whl 16 | pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/ 17 | 18 | # make PaddleCustomDevice 19 | bash compile.sh 20 | pip install build/dist/*.whl --force-reinstall 21 | 22 | # Test 23 | cd build/tests/unittests 24 | ctest --output-on-failure -j 1 -E "test_highperformance_conv" 25 | -------------------------------------------------------------------------------- /backends/sdaa/sdaac_ops/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10.2) 2 | project(customsdaacops) 3 | 4 | set(CMAKE_C_COMPILER "$ENV{SDAA_ROOT}/bin/tecocc") 5 | set(CMAKE_CXX_COMPILER "$ENV{SDAA_ROOT}/bin/tecocc") 6 | 7 | set(CMAKE_C_FLAGS "-std=c99") 8 | set(CMAKE_CXX_FLAGS "-std=c++17") 9 | 10 | set(SDAAC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ${SDAA_INC}) 11 | 12 | file( 13 | GLOB_RECURSE SDAA_C_OPS 14 | RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} 15 | *.scpp) 16 | 17 | set(complie_options -flto -fPIC) 18 | 19 | add_library(sdaacops_objs OBJECT ${SDAA_C_OPS}) 20 | set_source_files_properties(${SDAA_C_OPS} PROPERTIES LANGUAGE CXX) 21 | set_target_properties(sdaacops_objs PROPERTIES LINKER_LANGUAGE CXX) 22 | target_include_directories(sdaacops_objs PUBLIC ${SDAAC_INCLUDE_DIR}) 23 | target_compile_options(sdaacops_objs PRIVATE ${complie_options}) 24 | add_custom_target( 25 | customsdaacops ALL 26 | COMMAND 27 | tecocc $ -flto -fPIC -shared --sdaa-link 28 | -sdaa-static-lib -fuse-ld=lld -lm -L ${SDAA_LIB} -o 29 | ${CUSTOM_SDAA_C_OPS_LIB}/libcustomsdaacops.a 30 | COMMAND_EXPAND_LISTS) 31 | add_dependencies(customsdaacops sdaacops_objs) 32 | -------------------------------------------------------------------------------- /backends/sdaa/tests/distribution/test_parallel_dygraph_pp_layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import unittest 17 | 18 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices 19 | 20 | 21 | class TestHybridPipeParallel(TestMultipleCustomDevices): 22 | def test_hybrid_parallel_pp_layer(self): 23 | self.run_mnist_custom_devices( 24 | os.path.abspath("hybrid_parallel_pp_layer.py"), "sdaa" 25 | ) 26 | 27 | 28 | if __name__ == "__main__": 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /backends/sdaa/tests/distribution/test_parallel_dygraph_sep_parallel.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import unittest 16 | 17 | from test_parallel_dygraph_mp_layers import TestMultipleCustomDevices 18 | 19 | 20 | class TestHybridParallel(TestMultipleCustomDevices): 21 | def test_hybrid_parallel_sep_model(self): 22 | self.run_mnist_custom_devices( 23 | "hybrid_parallel_sep_model.py", "sdaa", need_envs={} 24 | ) 25 | 26 | 27 | if __name__ == "__main__": 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /backends/sdaa/tests/runtime/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if("${TEST_CUSTOM_DEVICE_ROOT}" STREQUAL "") 2 | return() 3 | endif() 4 | 5 | add_executable(test_runtime test_runtime.cc) 6 | add_dependencies(test_runtime third_party) 7 | target_link_libraries(test_runtime gtest gtest_main ${SDAA_LIB} ${TECODNN_LIB}) 8 | add_test(test_runtime test_runtime) 9 | 10 | py_test_modules(test_profiler MODULES test_profiler ENVS ENABLE_SDPTI=0) 11 | py_test_modules( 12 | test_profiler_with_kernel MODULES test_profiler_with_kernel ENVS 13 | FLAGS_allocator_strategy=naive_best_fit FLAGS_init_allocated_mem=True) 14 | py_test_modules( 15 | test_profiler_with_kernel_with_env 16 | MODULES 17 | test_profiler_with_kernel 18 | ENVS 19 | FLAGS_allocator_strategy=naive_best_fit 20 | FLAGS_init_allocated_mem=True 21 | FLAGS_sdaa_error_check=True 22 | ENABLE_SDPTI=1) 23 | -------------------------------------------------------------------------------- /cmake/dummy.c.in: -------------------------------------------------------------------------------- 1 | ../Paddle/cmake/dummy.c.in -------------------------------------------------------------------------------- /cmake/external/gflags.cmake: -------------------------------------------------------------------------------- 1 | ../../Paddle/cmake/external/gflags.cmake -------------------------------------------------------------------------------- /cmake/external/glog.cmake: -------------------------------------------------------------------------------- 1 | ../../Paddle/cmake/external/glog.cmake -------------------------------------------------------------------------------- /cmake/external/gtest.cmake: -------------------------------------------------------------------------------- 1 | ../../Paddle/cmake/external/gtest.cmake -------------------------------------------------------------------------------- /cmake/external/onednn.cmake: -------------------------------------------------------------------------------- 1 | ../../Paddle/cmake/external/onednn.cmake -------------------------------------------------------------------------------- /cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- 1 | ../../Paddle/cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /cmake/generic.cmake: -------------------------------------------------------------------------------- 1 | ../Paddle/cmake/generic.cmake -------------------------------------------------------------------------------- /cmake/version.cmake: -------------------------------------------------------------------------------- 1 | ../Paddle/cmake/version.cmake -------------------------------------------------------------------------------- /python/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/tests/auto_parallel_op_test.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/legacy_test/auto_parallel_op_test.py -------------------------------------------------------------------------------- /python/tests/config.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/legacy_test/config.py -------------------------------------------------------------------------------- /python/tests/convert.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/rnn/convert.py -------------------------------------------------------------------------------- /python/tests/op.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/legacy_test/op.py -------------------------------------------------------------------------------- /python/tests/op_test.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/legacy_test/op_test.py -------------------------------------------------------------------------------- /python/tests/prim_op_test.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/legacy_test/prim_op_test.py -------------------------------------------------------------------------------- /python/tests/rnn_numpy.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/rnn/rnn_numpy.py -------------------------------------------------------------------------------- /python/tests/testsuite.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/legacy_test/testsuite.py -------------------------------------------------------------------------------- /python/tests/utils.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/test/legacy_test/utils.py -------------------------------------------------------------------------------- /python/tests/white_list/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # TODO(duanyanhui): 16 | # Devices differ in accurary, so we need to build different white_list for 17 | # diffrent device. For example, ascend dose not aupport vell well for int64_t 18 | # and double. The cast of data type will bring errors. We need to put that 19 | # kernel in the op_threshlod_white_list. 20 | 21 | # Next, we will built white_list for each device and put it on backends. 22 | -------------------------------------------------------------------------------- /python/tests/white_list/check_shape_white_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | NEED_TO_FIX_OP_LIST = [ 16 | "pow", 17 | "hard_swish", 18 | "fused_elemwise_activation", 19 | "bilinear_tensor_product", 20 | "conv2d_transpose", 21 | "depthwise_conv2d_transpose", 22 | "grid_sampler", 23 | "lstmp", 24 | "margin_rank_loss", 25 | "matmul", 26 | "scatter", 27 | "soft_relu", 28 | "squared_l2_distance", 29 | "tree_conv", 30 | "cvm", 31 | "cudnn_lstm", 32 | "rnn", 33 | "multi_dot", 34 | "index_add", 35 | ] 36 | -------------------------------------------------------------------------------- /python/tests/white_list/new_ir_python_api_grad_white_list.py: -------------------------------------------------------------------------------- 1 | ../../../Paddle/test/white_list/new_ir_python_api_grad_white_list.py -------------------------------------------------------------------------------- /python/tools/__init__.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/tools/__init__.py -------------------------------------------------------------------------------- /python/tools/static_mode_white_list.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/tools/static_mode_white_list.py -------------------------------------------------------------------------------- /python/tools/test_runner.py: -------------------------------------------------------------------------------- 1 | ../../Paddle/tools/test_runner.py -------------------------------------------------------------------------------- /tools/codestyle/.cmakelintrc: -------------------------------------------------------------------------------- 1 | filter=-readability/wonkycase,-syntax,-convention/filename,-package/stdargs,-whitespace/indent,-whitespace/extra,-linelength,-readability/mixedcase 2 | -------------------------------------------------------------------------------- /tools/codestyle/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /tools/codestyle/clang_format.hook: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | readonly VERSION="13.0.0" 5 | 6 | version=$(clang-format -version) 7 | 8 | if ! [[ $(python -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1$2}') -ge 36 ]]; then 9 | echo "clang-format installation by pip need python version great equal 3.6, 10 | please change the default python to higher version." 11 | exit 1 12 | fi 13 | 14 | if ! [[ $version == *"$VERSION"* ]]; then 15 | # low version of pip may not have the source of clang-format whl 16 | pip install --upgrade pip 17 | pip install clang-format==13.0.0 18 | fi 19 | 20 | clang-format $@ 21 | -------------------------------------------------------------------------------- /tools/codestyle/cpplint_pre_commit.hook: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | readonly VERSION="1.6.0" 4 | 5 | version=$(cpplint --version) 6 | 7 | if ! [[ $version == *"$VERSION"* ]]; then 8 | pip install cpplint==1.6.0 9 | fi 10 | 11 | cpplint $@ 12 | -------------------------------------------------------------------------------- /tools/codestyle/pylint_pre_commit.hook: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TOTAL_ERRORS=0 4 | 5 | 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | export PYTHONPATH=$DIR:$PYTHONPATH 8 | 9 | readonly VERSION="2.12.0" 10 | version=$(pylint --version | grep 'pylint') 11 | 12 | if ! [[ $version == *"$VERSION"* ]]; then 13 | pip install pylint==2.12.0 14 | fi 15 | 16 | # The trick to remove deleted files: https://stackoverflow.com/a/2413151 17 | for file in $(git diff --name-status | awk '$1 != "D" {print $2}'); do 18 | # skip submodule of Paddle check 19 | if ! [[ $file == "Paddle" ]]; then 20 | pylint --disable=all --load-plugins=docstring_checker \ 21 | --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file; 22 | TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); 23 | fi 24 | done 25 | 26 | exit $TOTAL_ERRORS 27 | #For now, just warning: 28 | #exit 0 29 | --------------------------------------------------------------------------------