├── .clang-format
├── .github
    ├── CODEOWNERS
    ├── ci
    │   └── fix-exit-multi-npu.patch
    └── workflows
    │   ├── format.yaml
    │   └── main.yml
├── .gitignore
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CODE_OF_CONDUCT_cn.md
├── Contributors.md
├── LICENSE
├── README.md
├── assets
    └── dlinfer_arch.png
├── benchmark
    ├── profile_generation.py
    └── profile_restful_api.py
├── cmake
    ├── FindATB.cmake
    ├── FindCANNToolkit.cmake
    ├── FindTorch_npu.cmake
    └── ascend.cmake
├── dlinfer
    ├── __init__.py
    ├── framework
    │   ├── __init__.py
    │   ├── lmdeploy_ext
    │   │   ├── __init__.py
    │   │   ├── cudagraph
    │   │   │   ├── __init__.py
    │   │   │   ├── camb_cudagraph.py
    │   │   │   └── maca_cudagraph.py
    │   │   ├── device
    │   │   │   ├── __init__.py
    │   │   │   ├── ascend.py
    │   │   │   └── camb.py
    │   │   ├── dynamo
    │   │   │   └── graph_mode_patch.py
    │   │   └── quants
    │   │   │   ├── __init__.py
    │   │   │   └── ascend_awq.py
    │   └── transformers_ext
    │   │   ├── __init__.py
    │   │   ├── cogvlm.py
    │   │   ├── internlm2.py
    │   │   ├── internvl.py
    │   │   └── patch.py
    ├── graph
    │   ├── __init__.py
    │   ├── custom_op.py
    │   └── dicp
    │   │   ├── __init__.py
    │   │   ├── dynamo_bridge
    │   │       ├── __init__.py
    │   │       ├── compile.py
    │   │       ├── compile_fx.py
    │   │       ├── conversion.py
    │   │       ├── decompositions.py
    │   │       ├── graph.py
    │   │       ├── op_transformer.py
    │   │       ├── operator.py
    │   │       ├── pt_patch.py
    │   │       ├── torch_version.py
    │   │       └── utils.py
    │   │   └── vendor
    │   │       ├── AtbGraph
    │   │           ├── __init__.py
    │   │           ├── atb_op.py
    │   │           ├── codegen
    │   │           │   ├── __init__.py
    │   │           │   ├── atb.py
    │   │           │   ├── atb_graph.py
    │   │           │   ├── atb_infer_param.py
    │   │           │   ├── atb_op.py
    │   │           │   ├── load_and_run.py
    │   │           │   ├── runtime
    │   │           │   │   ├── CMakeLists.txt
    │   │           │   │   ├── dicp_model.cpp
    │   │           │   │   ├── dicp_model.h
    │   │           │   │   ├── model.cpp
    │   │           │   │   ├── model.h
    │   │           │   │   ├── ops
    │   │           │   │   │   ├── aclnn_ops
    │   │           │   │   │   │   ├── acl_nn_operation.cpp
    │   │           │   │   │   │   ├── acl_nn_operation.h
    │   │           │   │   │   │   ├── add_operation.cpp
    │   │           │   │   │   │   ├── add_operation.h
    │   │           │   │   │   │   ├── add_rms_norm_operation.cpp
    │   │           │   │   │   │   ├── add_rms_norm_operation.h
    │   │           │   │   │   │   ├── adds_operation.cpp
    │   │           │   │   │   │   ├── adds_operation.h
    │   │           │   │   │   │   ├── arange_operation.cpp
    │   │           │   │   │   │   ├── arange_operation.h
    │   │           │   │   │   │   ├── batch_matmul_operation.cpp
    │   │           │   │   │   │   ├── batch_matmul_operation.h
    │   │           │   │   │   │   ├── bincount_operation.cpp
    │   │           │   │   │   │   ├── bincount_operation.h
    │   │           │   │   │   │   ├── bitwise_not_operation.cpp
    │   │           │   │   │   │   ├── bitwise_not_operation.h
    │   │           │   │   │   │   ├── cast_operation.cpp
    │   │           │   │   │   │   ├── cast_operation.h
    │   │           │   │   │   │   ├── cat_operation.cpp
    │   │           │   │   │   │   ├── cat_operation.h
    │   │           │   │   │   │   ├── cumsum_operation.cpp
    │   │           │   │   │   │   ├── cumsum_operation.h
    │   │           │   │   │   │   ├── div_operation.cpp
    │   │           │   │   │   │   ├── div_operation.h
    │   │           │   │   │   │   ├── divs_operation.cpp
    │   │           │   │   │   │   ├── divs_operation.h
    │   │           │   │   │   │   ├── dynamic_quant_operation.cpp
    │   │           │   │   │   │   ├── dynamic_quant_operation.h
    │   │           │   │   │   │   ├── expand_operation.cpp
    │   │           │   │   │   │   ├── expand_operation.h
    │   │           │   │   │   │   ├── gather_operation.cpp
    │   │           │   │   │   │   ├── gather_operation.h
    │   │           │   │   │   │   ├── ge_scalar_operation.cpp
    │   │           │   │   │   │   ├── ge_scalar_operation.h
    │   │           │   │   │   │   ├── grouped_matmul_operation.cpp
    │   │           │   │   │   │   ├── grouped_matmul_operation.h
    │   │           │   │   │   │   ├── gt_scalar_operation.cpp
    │   │           │   │   │   │   ├── gt_scalar_operation.h
    │   │           │   │   │   │   ├── index_select_operation.cpp
    │   │           │   │   │   │   ├── index_select_operation.h
    │   │           │   │   │   │   ├── inplace_copy_operation.cpp
    │   │           │   │   │   │   ├── inplace_copy_operation.h
    │   │           │   │   │   │   ├── inplace_div_operation.cpp
    │   │           │   │   │   │   ├── inplace_div_operation.h
    │   │           │   │   │   │   ├── inplace_index_copy_operation.cpp
    │   │           │   │   │   │   ├── inplace_index_copy_operation.h
    │   │           │   │   │   │   ├── inplace_masked_fill_scalar_operation.cpp
    │   │           │   │   │   │   ├── inplace_masked_fill_scalar_operation.h
    │   │           │   │   │   │   ├── inplace_scatter_operation.cpp
    │   │           │   │   │   │   ├── inplace_scatter_operation.h
    │   │           │   │   │   │   ├── max_operation.cpp
    │   │           │   │   │   │   ├── max_operation.h
    │   │           │   │   │   │   ├── moe_finalize_routing_operation.cpp
    │   │           │   │   │   │   ├── moe_finalize_routing_operation.h
    │   │           │   │   │   │   ├── moe_gating_topk_softmax.cpp
    │   │           │   │   │   │   ├── moe_gating_topk_softmax.h
    │   │           │   │   │   │   ├── moe_init_routing_operation.cpp
    │   │           │   │   │   │   ├── moe_init_routing_operation.h
    │   │           │   │   │   │   ├── moe_token_permute_operation.cpp
    │   │           │   │   │   │   ├── moe_token_permute_operation.h
    │   │           │   │   │   │   ├── moe_token_unpermute_operation.cpp
    │   │           │   │   │   │   ├── moe_token_unpermute_operation.h
    │   │           │   │   │   │   ├── mul_operation.cpp
    │   │           │   │   │   │   ├── mul_operation.h
    │   │           │   │   │   │   ├── muls_operation.cpp
    │   │           │   │   │   │   ├── muls_operation.h
    │   │           │   │   │   │   ├── permute_operation.cpp
    │   │           │   │   │   │   ├── permute_operation.h
    │   │           │   │   │   │   ├── pow_tensor_scalar_operation.cpp
    │   │           │   │   │   │   ├── pow_tensor_scalar_operation.h
    │   │           │   │   │   │   ├── pow_tensor_tensor_operation.cpp
    │   │           │   │   │   │   ├── pow_tensor_tensor_operation.h
    │   │           │   │   │   │   ├── quant_matmul_operation.cpp
    │   │           │   │   │   │   ├── quant_matmul_operation.h
    │   │           │   │   │   │   ├── reciprocal_operation.cpp
    │   │           │   │   │   │   ├── reciprocal_operation.h
    │   │           │   │   │   │   ├── reduce_sum_operation.cpp
    │   │           │   │   │   │   ├── reduce_sum_operation.h
    │   │           │   │   │   │   ├── s_where_operation.cpp
    │   │           │   │   │   │   ├── s_where_operation.h
    │   │           │   │   │   │   ├── scatter_value_operation.cpp
    │   │           │   │   │   │   ├── scatter_value_operation.h
    │   │           │   │   │   │   ├── slice_operation.cpp
    │   │           │   │   │   │   ├── slice_operation.h
    │   │           │   │   │   │   ├── softmax_operation.cpp
    │   │           │   │   │   │   ├── softmax_operation.h
    │   │           │   │   │   │   ├── split_with_size_operation.cpp
    │   │           │   │   │   │   ├── split_with_size_operation.h
    │   │           │   │   │   │   ├── sub_operation.cpp
    │   │           │   │   │   │   ├── sub_operation.h
    │   │           │   │   │   │   ├── subs_operation.cpp
    │   │           │   │   │   │   ├── subs_operation.h
    │   │           │   │   │   │   ├── topk_operation.cpp
    │   │           │   │   │   │   └── topk_operation.h
    │   │           │   │   │   ├── atb_ops
    │   │           │   │   │   │   ├── activation.cpp
    │   │           │   │   │   │   ├── allreduce.cpp
    │   │           │   │   │   │   ├── atb_ops.h
    │   │           │   │   │   │   ├── concat.cpp
    │   │           │   │   │   │   ├── elewise.cpp
    │   │           │   │   │   │   ├── gather.cpp
    │   │           │   │   │   │   ├── linear.cpp
    │   │           │   │   │   │   ├── linear_parallel.cpp
    │   │           │   │   │   │   ├── paged_attention.cpp
    │   │           │   │   │   │   ├── reduce.cpp
    │   │           │   │   │   │   ├── reshape_and_cache.cpp
    │   │           │   │   │   │   ├── rms_norm.cpp
    │   │           │   │   │   │   ├── rope.cpp
    │   │           │   │   │   │   ├── self_attention.cpp
    │   │           │   │   │   │   ├── slice.cpp
    │   │           │   │   │   │   ├── softmax.cpp
    │   │           │   │   │   │   ├── sort.cpp
    │   │           │   │   │   │   ├── split.cpp
    │   │           │   │   │   │   ├── transdata.cpp
    │   │           │   │   │   │   └── transpose.cpp
    │   │           │   │   │   ├── custom_ops
    │   │           │   │   │   │   ├── masked_fill_scalar_operation.cpp
    │   │           │   │   │   │   ├── masked_fill_scalar_operation.h
    │   │           │   │   │   │   ├── new_empty_operation.cpp
    │   │           │   │   │   │   ├── new_empty_operation.h
    │   │           │   │   │   │   ├── prepare_moe_operation.cpp
    │   │           │   │   │   │   ├── prepare_moe_operation.h
    │   │           │   │   │   │   ├── renormalize_operation.cpp
    │   │           │   │   │   │   ├── renormalize_operation.h
    │   │           │   │   │   │   ├── reshape_operation.cpp
    │   │           │   │   │   │   ├── reshape_operation.h
    │   │           │   │   │   │   ├── scalar_tensor_operaion.cpp
    │   │           │   │   │   │   ├── scalar_tensor_operation.h
    │   │           │   │   │   │   ├── slice_scatter_operation.cpp
    │   │           │   │   │   │   ├── slice_scatter_operation.h
    │   │           │   │   │   │   ├── squeeze_operation.cpp
    │   │           │   │   │   │   ├── squeeze_operation.h
    │   │           │   │   │   │   ├── unsqueeze_operation.cpp
    │   │           │   │   │   │   ├── unsqueeze_operation.h
    │   │           │   │   │   │   ├── view_operation.cpp
    │   │           │   │   │   │   ├── view_operation.h
    │   │           │   │   │   │   ├── zeros_like_operation.cpp
    │   │           │   │   │   │   ├── zeros_like_operation.h
    │   │           │   │   │   │   ├── zeros_operation.cpp
    │   │           │   │   │   │   └── zeros_operation.h
    │   │           │   │   │   ├── operation_creator.cpp
    │   │           │   │   │   └── operation_creator.h
    │   │           │   │   ├── third_party
    │   │           │   │   │   ├── half
    │   │           │   │   │   │   └── include
    │   │           │   │   │   │   │   └── half.hpp
    │   │           │   │   │   ├── json
    │   │           │   │   │   │   └── single_include
    │   │           │   │   │   │   │   └── nlohmann
    │   │           │   │   │   │   │       ├── json.hpp
    │   │           │   │   │   │   │       └── json_fwd.hpp
    │   │           │   │   │   └── spdlog
    │   │           │   │   │   │   └── include
    │   │           │   │   │   │       └── spdlog
    │   │           │   │   │   │           ├── async.h
    │   │           │   │   │   │           ├── async_logger-inl.h
    │   │           │   │   │   │           ├── async_logger.h
    │   │           │   │   │   │           ├── cfg
    │   │           │   │   │   │               ├── argv.h
    │   │           │   │   │   │               ├── env.h
    │   │           │   │   │   │               ├── helpers-inl.h
    │   │           │   │   │   │               └── helpers.h
    │   │           │   │   │   │           ├── common-inl.h
    │   │           │   │   │   │           ├── common.h
    │   │           │   │   │   │           ├── details
    │   │           │   │   │   │               ├── backtracer-inl.h
    │   │           │   │   │   │               ├── backtracer.h
    │   │           │   │   │   │               ├── circular_q.h
    │   │           │   │   │   │               ├── console_globals.h
    │   │           │   │   │   │               ├── file_helper-inl.h
    │   │           │   │   │   │               ├── file_helper.h
    │   │           │   │   │   │               ├── fmt_helper.h
    │   │           │   │   │   │               ├── log_msg-inl.h
    │   │           │   │   │   │               ├── log_msg.h
    │   │           │   │   │   │               ├── log_msg_buffer-inl.h
    │   │           │   │   │   │               ├── log_msg_buffer.h
    │   │           │   │   │   │               ├── mpmc_blocking_q.h
    │   │           │   │   │   │               ├── null_mutex.h
    │   │           │   │   │   │               ├── os-inl.h
    │   │           │   │   │   │               ├── os.h
    │   │           │   │   │   │               ├── periodic_worker-inl.h
    │   │           │   │   │   │               ├── periodic_worker.h
    │   │           │   │   │   │               ├── registry-inl.h
    │   │           │   │   │   │               ├── registry.h
    │   │           │   │   │   │               ├── synchronous_factory.h
    │   │           │   │   │   │               ├── tcp_client-windows.h
    │   │           │   │   │   │               ├── tcp_client.h
    │   │           │   │   │   │               ├── thread_pool-inl.h
    │   │           │   │   │   │               ├── thread_pool.h
    │   │           │   │   │   │               ├── udp_client-windows.h
    │   │           │   │   │   │               ├── udp_client.h
    │   │           │   │   │   │               └── windows_include.h
    │   │           │   │   │   │           ├── fmt
    │   │           │   │   │   │               ├── bin_to_hex.h
    │   │           │   │   │   │               ├── bundled
    │   │           │   │   │   │               │   ├── args.h
    │   │           │   │   │   │               │   ├── chrono.h
    │   │           │   │   │   │               │   ├── color.h
    │   │           │   │   │   │               │   ├── compile.h
    │   │           │   │   │   │               │   ├── core.h
    │   │           │   │   │   │               │   ├── fmt.license.rst
    │   │           │   │   │   │               │   ├── format-inl.h
    │   │           │   │   │   │               │   ├── format.h
    │   │           │   │   │   │               │   ├── locale.h
    │   │           │   │   │   │               │   ├── os.h
    │   │           │   │   │   │               │   ├── ostream.h
    │   │           │   │   │   │               │   ├── printf.h
    │   │           │   │   │   │               │   ├── ranges.h
    │   │           │   │   │   │               │   ├── std.h
    │   │           │   │   │   │               │   └── xchar.h
    │   │           │   │   │   │               ├── chrono.h
    │   │           │   │   │   │               ├── compile.h
    │   │           │   │   │   │               ├── fmt.h
    │   │           │   │   │   │               ├── ostr.h
    │   │           │   │   │   │               ├── ranges.h
    │   │           │   │   │   │               ├── std.h
    │   │           │   │   │   │               └── xchar.h
    │   │           │   │   │   │           ├── formatter.h
    │   │           │   │   │   │           ├── fwd.h
    │   │           │   │   │   │           ├── logger-inl.h
    │   │           │   │   │   │           ├── logger.h
    │   │           │   │   │   │           ├── mdc.h
    │   │           │   │   │   │           ├── pattern_formatter-inl.h
    │   │           │   │   │   │           ├── pattern_formatter.h
    │   │           │   │   │   │           ├── sinks
    │   │           │   │   │   │               ├── android_sink.h
    │   │           │   │   │   │               ├── ansicolor_sink-inl.h
    │   │           │   │   │   │               ├── ansicolor_sink.h
    │   │           │   │   │   │               ├── base_sink-inl.h
    │   │           │   │   │   │               ├── base_sink.h
    │   │           │   │   │   │               ├── basic_file_sink-inl.h
    │   │           │   │   │   │               ├── basic_file_sink.h
    │   │           │   │   │   │               ├── callback_sink.h
    │   │           │   │   │   │               ├── daily_file_sink.h
    │   │           │   │   │   │               ├── dist_sink.h
    │   │           │   │   │   │               ├── dup_filter_sink.h
    │   │           │   │   │   │               ├── hourly_file_sink.h
    │   │           │   │   │   │               ├── kafka_sink.h
    │   │           │   │   │   │               ├── mongo_sink.h
    │   │           │   │   │   │               ├── msvc_sink.h
    │   │           │   │   │   │               ├── null_sink.h
    │   │           │   │   │   │               ├── ostream_sink.h
    │   │           │   │   │   │               ├── qt_sinks.h
    │   │           │   │   │   │               ├── ringbuffer_sink.h
    │   │           │   │   │   │               ├── rotating_file_sink-inl.h
    │   │           │   │   │   │               ├── rotating_file_sink.h
    │   │           │   │   │   │               ├── sink-inl.h
    │   │           │   │   │   │               ├── sink.h
    │   │           │   │   │   │               ├── stdout_color_sinks-inl.h
    │   │           │   │   │   │               ├── stdout_color_sinks.h
    │   │           │   │   │   │               ├── stdout_sinks-inl.h
    │   │           │   │   │   │               ├── stdout_sinks.h
    │   │           │   │   │   │               ├── syslog_sink.h
    │   │           │   │   │   │               ├── systemd_sink.h
    │   │           │   │   │   │               ├── tcp_sink.h
    │   │           │   │   │   │               ├── udp_sink.h
    │   │           │   │   │   │               ├── win_eventlog_sink.h
    │   │           │   │   │   │               ├── wincolor_sink-inl.h
    │   │           │   │   │   │               └── wincolor_sink.h
    │   │           │   │   │   │           ├── spdlog-inl.h
    │   │           │   │   │   │           ├── spdlog.h
    │   │           │   │   │   │           ├── stopwatch.h
    │   │           │   │   │   │           ├── tweakme.h
    │   │           │   │   │   │           └── version.h
    │   │           │   │   └── utils
    │   │           │   │   │   ├── common.cpp
    │   │           │   │   │   ├── common.h
    │   │           │   │   │   ├── config.cpp
    │   │           │   │   │   ├── config.h
    │   │           │   │   │   ├── global_dict.cpp
    │   │           │   │   │   ├── global_dict.h
    │   │           │   │   │   ├── log.h
    │   │           │   │   │   ├── misc.cpp
    │   │           │   │   │   ├── misc.h
    │   │           │   │   │   ├── operation_util.h
    │   │           │   │   │   ├── scalar.cpp
    │   │           │   │   │   ├── scalar.h
    │   │           │   │   │   ├── tensor_utils.cpp
    │   │           │   │   │   ├── tensor_utils.h
    │   │           │   │   │   ├── timer.h
    │   │           │   │   │   ├── workspace.cpp
    │   │           │   │   │   └── workspace.h
    │   │           │   └── utils.py
    │   │           ├── compile_job.py
    │   │           ├── config.py
    │   │           ├── conversion.py
    │   │           ├── ext_ops.py
    │   │           ├── infer_res_utils.py
    │   │           ├── opset_convert.py
    │   │           └── pattern_replacement.py
    │   │       ├── CMakeLists.txt
    │   │       └── __init__.py
    ├── ops
    │   ├── __init__.py
    │   └── llm.py
    ├── utils
    │   ├── __init__.py
    │   ├── config.py
    │   ├── registry.py
    │   └── type_annotation.py
    └── vendor
    │   ├── __init__.py
    │   ├── ascend
    │       ├── CMakeLists.txt
    │       ├── __init__.py
    │       ├── csrc
    │       │   ├── ascend_ops.hpp
    │       │   ├── flash_attention.cpp
    │       │   ├── init.cpp
    │       │   ├── moe_gating_topk_softmax.cpp
    │       │   ├── op_api_common.cpp
    │       │   ├── op_api_common.hpp
    │       │   ├── torch_npu_symbol_fix.cpp
    │       │   ├── torch_npu_utils.cpp
    │       │   └── torch_npu_utils.hpp
    │       ├── pytorch_patch.py
    │       ├── torch_npu_ops.py
    │       └── utils.py
    │   ├── camb
    │       ├── CMakeLists.txt
    │       ├── __init__.py
    │       ├── camb_ops.py
    │       └── pytorch_patch.py
    │   └── maca
    │       ├── CMakeLists.txt
    │       ├── __init__.py
    │       ├── context_flashattention.py
    │       ├── csrc
    │           ├── CMakeLists.txt
    │           ├── activation_kernels.cu
    │           ├── attention
    │           │   ├── attention_dtypes.h
    │           │   ├── attention_generic.cuh
    │           │   ├── attention_kernels.cu
    │           │   ├── attention_utils.cuh
    │           │   ├── dtype_bfloat16.cuh
    │           │   ├── dtype_float16.cuh
    │           │   ├── dtype_float32.cuh
    │           │   └── dtype_fp8.cuh
    │           ├── cache.h
    │           ├── cache_kernels.cu
    │           ├── cuda_compat.h
    │           ├── dispatch_utils.h
    │           ├── layernorm_kernels.cu
    │           ├── moe
    │           │   ├── moe_ops.h
    │           │   └── topk_softmax_kernels.cu
    │           ├── moe_align_block_size_kernels.cu
    │           ├── ops.h
    │           ├── pos_encoding_kernels.cu
    │           ├── pybind.cpp
    │           ├── quantization
    │           │   └── fp8
    │           │   │   ├── amd
    │           │   │       ├── hip_float8.h
    │           │   │       ├── hip_float8_impl.h
    │           │   │       └── quant_utils.cuh
    │           │   │   ├── common.cu
    │           │   │   ├── fp8_marlin.cu
    │           │   │   └── nvidia
    │           │   │       └── quant_utils.cuh
    │           └── reduction_utils.cuh
    │       └── maca_ops.py
├── docs
    └── quant
    │   ├── ascend_kv_quant.md
    │   └── ascend_scales_offsets.py
├── pyproject.toml
├── requirements
    ├── ascend
    │   ├── build.txt
    │   ├── cann.txt
    │   ├── full.txt
    │   ├── runtime.txt
    │   └── torch.txt
    ├── camb
    │   ├── build.txt
    │   ├── full.txt
    │   ├── runtime.txt
    │   └── torch.txt
    └── maca
    │   ├── build.txt
    │   ├── full.txt
    │   ├── runtime.txt
    │   └── torch.txt
├── run_format.sh
├── scripts
    ├── build_wheel.sh
    └── build_wheel_allpy.sh
├── setup.py
└── tests
    ├── readme.md
    └── test_lmdeploy
        ├── __init__.py
        ├── e2e
            ├── __init__.py
            ├── config.yaml
            ├── conftest.py
            ├── prompt_case.yaml
            ├── pytest.ini
            ├── test_model_tp1.py
            └── test_model_tp2.py
        ├── scripts
            └── test_model_tp2.sh
        └── utils
            ├── __init__.py
            ├── config_utils.py
            ├── pipeline_chat.py
            └── rule_condition_assert.py


/.clang-format:
--------------------------------------------------------------------------------
 1 | # This file is used by clang-format to autoformat dlinfer source code
 2 | #
 3 | # The clang-format is part of llvm toolchain.
 4 | # It need to install llvm and clang to format source code style.
 5 | #
 6 | # The basic usage is,
 7 | #   clang-format -i -style=file PATH/TO/SOURCE/CODE
 8 | #
 9 | # The -style=file implicit use ".clang-format" file located in one of
10 | # parent directory.
11 | # The -i means inplace change.
12 | #
13 | # The document of clang-format is
14 | #   http://clang.llvm.org/docs/ClangFormat.html
15 | #   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
16 | ---
17 | Language:        Cpp
18 | BasedOnStyle:  Google
19 | IndentWidth:     4
20 | TabWidth:        4
21 | ContinuationIndentWidth: 4
22 | AccessModifierOffset: -4  # The private/protected/public has no indent in class
23 | Standard:  Cpp11
24 | AllowAllParametersOfDeclarationOnNextLine: true
25 | BinPackParameters: true
26 | BinPackArguments: false
27 | BreakAfterAttributes: Leave
28 | ColumnLimit: 160
29 | DerivePointerAlignment: false
30 | PointerAlignment: Left
31 | ReferenceAlignment: Left
32 | InsertNewlineAtEOF: true
33 | SpacesBeforeTrailingComments: 2
34 | IncludeIsMainSourceRegex: (\.cu)$
35 | IncludeCategories:
36 |   - Regex:           '^<.*\.h(pp)?>'
37 |     Priority:        1
38 |   - Regex:           '^<.*'
39 |     Priority:        2
40 |   - Regex:           '.*'
41 |     Priority:        3
42 | ...
43 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # IMPORTANT:
 2 | # This file is ONLY used to merge PRs. Approvals from people in this file are required for merging.
 3 | #
 4 | # WARNING: The last matching pattern takes the most precedence and OVERWRITES previous rules.
 5 | #          Please be very careful when adding new patterns.
 6 | 
 7 | # ---------- base ----------
 8 | *        @jinminxi104
 9 | 
10 | # ---------- ci ----------
11 | /cmake/  @jinminxi104 @CyCle1024
12 | 
13 | # ---------- ci ----------
14 | /tests/  @jinminxi104 @wugeshui
15 | 


--------------------------------------------------------------------------------
/.github/workflows/format.yaml:
--------------------------------------------------------------------------------
 1 | name: dlinfer format ci
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     branches:
 7 |       - main
 8 |   push:
 9 |     branches:
10 |       - main
11 | 
12 | jobs:
13 |   markdownlint:
14 |     runs-on: ubuntu-latest
15 |     if: github.repository == 'DeepLink-org/dlinfer'
16 |     steps:
17 |     - name: Checkout code
18 |       uses: actions/checkout@v4
19 |       with:
20 |         fetch-depth: 16
21 |     - name: Collect changed files
22 |       uses: tj-actions/changed-files@v40
23 |       id: changed-files
24 |       with:
25 |         files: '**/*.md'
26 |         separator: ','
27 |     - name: MarkdownLint
28 |       if: steps.changed-files.outputs.any_changed == 'true'
29 |       uses: DavidAnson/markdownlint-cli2-action@v14
30 |       with:
31 |         globs: ${{ steps.changed-files.outputs.all_changed_files }}
32 |         separator: ','
33 | 
34 |   clang-format:
35 |     needs: markdownlint
36 |     runs-on: ubuntu-latest
37 |     steps:
38 |       - uses: actions/checkout@v4
39 |       - uses: cpp-linter/cpp-linter-action@v2
40 |         id: cpp-lint
41 |         env:
42 |           GITHUB_TOKEN: ${{ secrets.CI_TOKEN}}
43 |         with:
44 |           style: file
45 |           ignore: 'dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/*'
46 |           tidy-checks: '-*' # disable clang tidy at this stage
47 |           version: 17
48 |       - name: Fail test
49 |         if: steps.cpp-lint.outputs.checks-failed > 0
50 |         run: echo "Some files failed the linting checks!" && exit 1
51 | 
52 |   python-black:
53 |     needs: markdownlint
54 |     runs-on: ubuntu-latest
55 |     steps:
56 |       - uses: actions/checkout@v4
57 |       - uses: psf/black@stable
58 |         env:
59 |           GITHUB_TOKEN: ${{ secrets.CI_TOKEN}}
60 |         with: # see: https://black.readthedocs.io/en/stable/getting_started.html
61 |           version: "~= 24.3.0"
62 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.18)
 2 | project(dlinfer LANGUAGES CXX)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 17)
 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 6 | set(CMAKE_CXX_EXTENSIONS OFF)
 7 | 
 8 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 9 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
10 | 
11 | # the default CMAKE_BUILD_TYPE is Release
12 | if(NOT CMAKE_BUILD_TYPE)
13 |   set(CMAKE_BUILD_TYPE "Release")
14 | endif()
15 | 
16 | set(DEVICE "" CACHE STRING "device string, default empty string")
17 | string(TOLOWER "${DEVICE}" DEVICE)
18 | 
19 | list(APPEND SUPPORTED_DEVICE "ascend" "maca" "camb")
20 | 
21 | if(NOT DEVICE)
22 |   message(FATAL_ERROR "Please specify variable DEVICE of dlinfer!")
23 | elseif(NOT DEVICE IN_LIST SUPPORTED_DEVICE)
24 |   message(FATAL_ERROR "Device ${DEVICE} is not supported! Supported devices: ${SUPPORTED_DEVICE}")
25 | endif()
26 | 
27 | add_subdirectory(dlinfer/vendor/${DEVICE})
28 | add_subdirectory(dlinfer/graph/dicp/vendor)
29 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT_cn.md:
--------------------------------------------------------------------------------
 1 | # 参与者公约
 2 | 
 3 | ## 我们的保证
 4 | 
 5 | 为了促进一个开放透明且友好的环境，我们作为贡献者和维护者保证：无论年龄、种族、民族、性别认同和表达（方式）、体型、身体健全与否、经验水平、国籍、个人表现、宗教或性别取向，参与者在我们项目和社区中都免于骚扰。
 6 | 
 7 | ## 我们的标准
 8 | 
 9 | 有助于创造正面环境的行为包括但不限于：
10 | * 使用友好和包容性语言
11 | * 尊重不同的观点和经历
12 | * 耐心地接受建设性批评
13 | * 关注对社区最有利的事情
14 | * 友善对待其他社区成员
15 | 
16 | 身为参与者不能接受的行为包括但不限于：
17 | * 使用与性有关的言语或是图像，以及不受欢迎的性骚扰
18 | * 捣乱/煽动/造谣的行为或进行侮辱/贬损的评论，人身攻击及政治攻击
19 | * 公开或私下的骚扰
20 | * 未经许可地发布他人的个人资料，例如住址或是电子地址
21 | * 其他可以被合理地认定为不恰当或者违反职业操守的行为
22 | 
23 | ## 我们的责任
24 | 
25 | 项目维护者有责任为「可接受的行为」标准做出诠释，以及对已发生的不被接受的行为采取恰当且公平的纠正措施。
26 | 
27 | 项目维护者有权利及责任去删除、编辑、拒绝与本行为标准有所违背的评论(comments)、提交(commits)、代码、wiki 编辑、问题(issues)和其他贡献，以及项目维护者可暂时或永久性的禁止任何他们认为有不适当、威胁、冒犯、有害行为的贡献者。
28 | 
29 | ## 使用范围
30 | 
31 | 当一个人代表该项目或是其社区时，本行为标准适用于其项目平台和公共平台。
32 | 
33 | 代表项目或是社区的情况，举例来说包括使用官方项目的电子邮件地址、通过官方的社区媒体账号发布或线上或线下事件中担任指定代表。
34 | 
35 | 该项目的呈现方式可由其项目维护者进行进一步的定义及解释。
36 | 
37 | ## 强制执行
38 | 
39 | 可以通过peizhilin@pjlab.org.cn，来联系项目团队来举报滥用、骚扰或其他不被接受的行为。
40 | 
41 | 任何维护团队认为有必要且适合的所有投诉都将进行审查及调查，并做出相对应的回应。项目小组有对事件回报者有保密的义务。具体执行的方针近一步细节可能会单独公布。
42 | 
43 | 没有切实地遵守或是执行本行为标准的项目维护人员，可能会因项目领导人或是其他成员的决定，暂时或是永久地取消其参与资格。
44 | 
45 | ## 来源
46 | 
47 | 本行为标准改编自[贡献者公约][主页]，版本 1.4
48 | 可在此观看https://www.contributor-covenant.org/zh-cn/version/1/4/code-of-conduct.html
49 | 
50 | [主页]: https://www.contributor-covenant.org
51 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, DeepLink
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/assets/dlinfer_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/assets/dlinfer_arch.png


--------------------------------------------------------------------------------
/cmake/FindATB.cmake:
--------------------------------------------------------------------------------
 1 | include(FindPackageHandleStandardArgs)
 2 | 
 3 | if (DEFINED ENV{ATB_HOME_PATH})
 4 |     set(ATB_HOME_PATH $ENV{ATB_HOME_PATH}
 5 |         CACHE STRING "atb default home")
 6 | else()
 7 |     set(ATB_HOME_PATH "/usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_0"
 8 |         CACHE STRING "atb toolkit default home")
 9 | endif()
10 | 
11 | # Include directories.
12 | find_path(ATB_INCLUDE_DIRS
13 |     NAMES atb/atb_infer.h
14 |     PATHS ${ATB_HOME_PATH}/include
15 | )
16 | 
17 | # Library dependencies.
18 | find_library(ATB_LIBRARY
19 |     NAMES atb
20 |     PATHS ${ATB_HOME_PATH}/lib
21 | )
22 | set(ATB_LIBRARIES ${ATB_LIBRARY})
23 | 
24 | #TODO (chenchiyu): construct modern cmake target for ATB
25 | message(STATUS "Found ATB: ATB_LIBRARIES: ${ATB_LIBRARIES}, ATB_INCLUDE_DIRS: ${ATB_INCLUDE_DIRS}")
26 | find_package_handle_standard_args(ATB DEFAULT_MSG ATB_LIBRARIES ATB_INCLUDE_DIRS)
27 | 


--------------------------------------------------------------------------------
/cmake/FindCANNToolkit.cmake:
--------------------------------------------------------------------------------
 1 | include(FindPackageHandleStandardArgs)
 2 | 
 3 | if (DEFINED ENV{ASCEND_TOOLKIT_HOME})
 4 |     set(ASCEND_TOOLKIT_HOME $ENV{ASCEND_TOOLKIT_HOME}
 5 |         CACHE STRING "ascend toolkit default home")
 6 | else()
 7 |     set(ASCEND_TOOLKIT_HOME "/usr/local/Ascend/ascend-toolkit/latest"
 8 |         CACHE STRING "ascend toolkit default home")
 9 | endif()
10 | 
11 | # Include directories.
12 | find_path(CANN_INCLUDE_DIRS
13 |     NAMES acl/acl.h acl/acl_rt.h hccl/hccl.h
14 |     PATHS ${ASCEND_TOOLKIT_HOME}/include
15 | )
16 | 
17 | # Library dependencies.
18 | find_library(HCCL_LIB
19 |     NAMES hccl
20 |     PATHS ${ASCEND_TOOLKIT_HOME}/lib64
21 | )
22 | if (HCCL_LIB)
23 |     list(APPEND CANN_LIBRARY ${HCCL_LIB})
24 | else()
25 |     message(FATAL_ERROR "libhccl.so not found")
26 | endif()
27 | 
28 | find_library(OPAPI_LIB
29 |     NAMES opapi
30 |     PATHS ${ASCEND_TOOLKIT_HOME}/lib64
31 | )
32 | if (OPAPI_LIB)
33 |     list(APPEND CANN_LIBRARY ${OPAPI_LIB})
34 | else()
35 |     message(FATAL_ERROR "libopapi.so not found")
36 | endif()
37 | 
38 | find_library(ASCENDCL_LIB
39 |     NAMES ascendcl
40 |     PATHS ${ASCEND_TOOLKIT_HOME}/lib64
41 | )
42 | if (ASCENDCL_LIB)
43 |     list(APPEND CANN_LIBRARY ${ASCENDCL_LIB})
44 | else()
45 |     message(FATAL_ERROR "libascendcl.so not found")
46 | endif()
47 | 
48 | set(CANN_LIBRARIES ${CANN_LIBRARY})
49 | 
50 | #TODO (chenchiyu): construct modern cmake target for CANNToolkit
51 | message(STATUS "Found CANN Toolkit: CANN_LIBRARIES: ${CANN_LIBRARIES}, CANN_INCLUDE_DIRS: ${CANN_INCLUDE_DIRS}")
52 | find_package_handle_standard_args(CANNToolkit DEFAULT_MSG CANN_LIBRARIES CANN_INCLUDE_DIRS)
53 | 


--------------------------------------------------------------------------------
/cmake/FindTorch_npu.cmake:
--------------------------------------------------------------------------------
 1 | include(FindPackageHandleStandardArgs)
 2 | 
 3 | # Include directories.
 4 | find_path(TORCH_NPU_INCLUDE_DIRS NAMES torch_npu/csrc/include/ops.h)
 5 | 
 6 | # Library dependencies.
 7 | find_library(TORCH_NPU_LIBRARY NAMES torch_npu npu_profiler)
 8 | 
 9 | if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
10 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'")
11 | endif()
12 | set(TORCH_NPU_LIBRARIES ${TORCH_NPU_LIBRARY})
13 | 
14 | # torch/csrc/python_headers depends Python.h
15 | find_package(Python COMPONENTS Interpreter Development)
16 | 
17 | #TODO (chenchiyu): construct modern cmake target for Torch_npu
18 | message(STATUS "Found Torch_npu: TORCH_NPU_LIBRARY: ${TORCH_NPU_LIBRARY}, TORCH_NPU_INCLUDE_DIRS: ${TORCH_NPU_INCLUDE_DIRS}")
19 | find_package_handle_standard_args(Torch_npu DEFAULT_MSG TORCH_NPU_LIBRARY TORCH_NPU_INCLUDE_DIRS)
20 | 


--------------------------------------------------------------------------------
/cmake/ascend.cmake:
--------------------------------------------------------------------------------
 1 | execute_process(
 2 |     COMMAND python -c "from torch.utils import cmake_prefix_path; \
 3 |     print(cmake_prefix_path + '/Torch', end='')"
 4 |     OUTPUT_VARIABLE Torch_DIR
 5 | )
 6 | 
 7 | execute_process(
 8 |     COMMAND python -c "from importlib.metadata import distribution; \
 9 |     print(str(distribution('torch_npu').locate_file('torch_npu')), end='')"
10 |     OUTPUT_VARIABLE Torch_npu_ROOT
11 | )
12 | 
13 | execute_process(
14 |     COMMAND python -c "import torch; \
15 |     print('1' if torch.compiled_with_cxx11_abi() else '0', end='')"
16 |     OUTPUT_VARIABLE _GLIBCXX_USE_CXX11_ABI
17 | )
18 | 
19 | execute_process(
20 |     COMMAND python -c "import torch;  from packaging import version; \
21 |     torch_version = version.parse(torch.__version__).base_version; \
22 |     print('1' if version.parse(torch_version) > version.parse('2.3.1') else '0', end='')"
23 |     OUTPUT_VARIABLE Torch_npu_VERSION_HIGHER_THAN_231
24 | )
25 | 
26 | find_package(Torch REQUIRED)
27 | find_package(Torch_npu REQUIRED)
28 | find_package(CANNToolkit REQUIRED)
29 | find_package(ATB)
30 | 


--------------------------------------------------------------------------------
/dlinfer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | import dlinfer.vendor as vendor
3 | 
4 | vendor.vendor_torch_init()
5 | __version__ = "0.2.0"
6 | 


--------------------------------------------------------------------------------
/dlinfer/framework/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/dlinfer/framework/lmdeploy_ext/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | import dlinfer.framework.transformers_ext
3 | from . import quants
4 | from . import cudagraph
5 | from . import device
6 | 


--------------------------------------------------------------------------------
/dlinfer/framework/lmdeploy_ext/cudagraph/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | import importlib
 3 | from functools import lru_cache
 4 | from dlinfer.vendor import vendor_name
 5 | 
 6 | 
 7 | graph_vendor = ["maca", "camb"]
 8 | 
 9 | 
10 | @lru_cache(1)
11 | def import_vendor_module(vendor_name_str):
12 |     if vendor_name_str in graph_vendor:
13 |         importlib.import_module(f".{vendor_name_str}_cudagraph", __package__)
14 | 
15 | 
16 | def vendor_graph_init():
17 |     import_vendor_module(vendor_name)
18 | 
19 | 
20 | vendor_graph_init()
21 | 


--------------------------------------------------------------------------------
/dlinfer/framework/lmdeploy_ext/device/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | import importlib
 3 | from functools import lru_cache
 4 | from dlinfer.vendor import vendor_name
 5 | 
 6 | 
 7 | vendor = ["camb", "ascend"]
 8 | 
 9 | 
10 | @lru_cache(1)
11 | def import_vendor_module(vendor_name_str):
12 |     if vendor_name_str in vendor:
13 |         importlib.import_module(f".{vendor_name_str}", __package__)
14 | 
15 | 
16 | def vendor_device_init():
17 |     import_vendor_module(vendor_name)
18 | 
19 | 
20 | vendor_device_init()
21 | 


--------------------------------------------------------------------------------
/dlinfer/framework/lmdeploy_ext/quants/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | import importlib
 3 | from functools import lru_cache
 4 | from dlinfer.vendor import vendor_name
 5 | 
 6 | 
 7 | awq_vendor = ["ascend"]
 8 | 
 9 | 
10 | @lru_cache(1)
11 | def import_vendor_module(vendor_name_str):
12 |     if vendor_name_str in awq_vendor:
13 |         importlib.import_module(f".{vendor_name_str}_awq", __package__)
14 | 
15 | 
16 | def vendor_quant_init():
17 |     import_vendor_module(vendor_name)
18 | 
19 | 
20 | vendor_quant_init()
21 | 


--------------------------------------------------------------------------------
/dlinfer/framework/transformers_ext/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | import importlib
 3 | import os, sys
 4 | import typing
 5 | from typing import Any, Dict, List, Optional, Union
 6 | import transformers
 7 | from .patch import apply_model_patches
 8 | 
 9 | 
10 | def patched_get_class_in_module(*args, **kwargs) -> typing.Type:
11 |     ret_class = transformers_get_class_in_module(*args, **kwargs)
12 |     apply_model_patches(importlib.import_module(ret_class.__module__))
13 |     return ret_class
14 | 
15 | 
16 | transformers_get_class_in_module = transformers.dynamic_module_utils.get_class_in_module
17 | transformers.dynamic_module_utils.get_class_in_module = patched_get_class_in_module
18 | 


--------------------------------------------------------------------------------
/dlinfer/framework/transformers_ext/cogvlm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import dlinfer.ops as ext_ops
 4 | 
 5 | 
 6 | def PatchedAttention_forward(self, x: "tensor(B, L, D)") -> "tensor(B, L, D)":
 7 |     B, L, H = x.shape
 8 |     qkv = self.query_key_value(x)
 9 |     qkv = qkv.reshape(B, L, 3, H).permute(2, 0, 1, 3)  # 3, B, L, H
10 |     q, k, v = qkv[0], qkv[1], qkv[2]
11 | 
12 |     out = ext_ops.prefill_attention(
13 |         q,
14 |         k,
15 |         v,
16 |         None,
17 |         None,
18 |         L,
19 |         self.num_heads,
20 |         self.num_heads,
21 |         [],
22 |         attn_output=q,
23 |     )
24 |     output = self.dense(out.view(B, L, -1))
25 |     return output
26 | 


--------------------------------------------------------------------------------
/dlinfer/framework/transformers_ext/internvl.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | import torch
 3 | import dlinfer.ops as ext_ops
 4 | 
 5 | 
 6 | def InternAttention_naive_attn(self, x):
 7 |     B, N, C = x.shape
 8 |     qkv = self.qkv(x).reshape(B, N, 3, C).permute(2, 0, 1, 3)
 9 |     q, k, v = qkv.unbind(0)
10 |     if self.qk_normalization:
11 |         q = self.q_norm(q)
12 |         k = self.k_norm(k)
13 | 
14 |     attn_output = ext_ops.prefill_attention(
15 |         q,
16 |         k,
17 |         v,
18 |         None,
19 |         None,
20 |         N,
21 |         self.num_heads,
22 |         self.num_heads,
23 |         [],
24 |         attn_output=q,
25 |     )
26 | 
27 |     x = self.proj(attn_output.reshape(B, N, C))
28 |     x = self.proj_drop(x)
29 |     return x
30 | 
31 | 
32 | def InternRMSNorm_forward(self, hidden_states):
33 |     return ext_ops.rms_norm(hidden_states, self.weight, self.variance_epsilon)
34 | 


--------------------------------------------------------------------------------
/dlinfer/framework/transformers_ext/patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | import transformers
 3 | import inspect
 4 | 
 5 | 
 6 | def apply_model_patches(module):
 7 |     if module.__name__.endswith(".modeling_internlm2"):
 8 |         from . import internlm2
 9 | 
10 |         module.InternLM2RMSNorm.forward = (
11 |             internlm2.modeling_internlm2_InternLM2RMSNorm_forward
12 |         )
13 |         module.InternLM2Attention.forward = (
14 |             internlm2.modeling_internlm2_InternLM2Attention_forward
15 |         )
16 |         module.InternLM2ForCausalLM.prepare_inputs_for_generation = (
17 |             internlm2.modeling_internlm2_InternLM2ForCausalLM_prepare_inputs_for_generation
18 |         )
19 |         transformers.cache_utils.DynamicCache.update = (
20 |             internlm2.transformers_cache_utils_dynamiccache_update
21 |         )
22 |     elif module.__name__.endswith(".modeling_internvl_chat"):
23 |         from . import internvl
24 | 
25 |         vit_module = inspect.getmodule(module.InternVisionModel)
26 |         vit_module.InternAttention._naive_attn = internvl.InternAttention_naive_attn
27 |         vit_module.InternRMSNorm.forward = internvl.InternRMSNorm_forward
28 |     elif module.__name__.endswith(".modeling_cogvlm"):
29 |         from . import cogvlm
30 | 
31 |         # get parent module from another source code file
32 |         vit_module = inspect.getmodule(module.EVA2CLIPModel)
33 |         vit_module.Attention.forward = cogvlm.PatchedAttention_forward
34 | 


--------------------------------------------------------------------------------
/dlinfer/graph/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | from dlinfer.utils.config import Config
3 | 
4 | 
5 | config = Config(enable_graph_mode=False)
6 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/__init__.py


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/dynamo_bridge/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/dynamo_bridge/__init__.py


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/dynamo_bridge/compile.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | from dlinfer.graph.dicp.dynamo_bridge.torch_version import is_torch_251_or_higher
 4 | 
 5 | if is_torch_251_or_higher:
 6 |     from torch._inductor.async_compile import AsyncCompile
 7 | else:
 8 |     from torch._inductor.codecache import AsyncCompile
 9 | 
10 | 
11 | class DeviceCompileJob:
12 |     __metaclass__ = ABCMeta
13 | 
14 |     def __init__(self):
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def get_key():
19 |         pass
20 | 
21 |     @abstractmethod
22 |     def get_compile_result():
23 |         pass
24 | 
25 | 
26 | class DeviceKernelCache:
27 |     cache = dict()
28 |     clear = staticmethod(cache.clear)
29 | 
30 |     @classmethod
31 |     def get_kernel(cls, device_compile_job):
32 |         key = device_compile_job.get_key()
33 |         if key not in cls.cache:
34 |             loaded = device_compile_job.get_compile_result()
35 |             cls.cache[key] = loaded
36 |             cls.cache[key].key = key
37 |         return cls.cache[key]
38 | 
39 | 
40 | class AsyncCompileKernel(AsyncCompile):
41 |     def compile_kernel(self, device_compile_job):
42 |         return DeviceKernelCache.get_kernel(device_compile_job).run
43 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/dynamo_bridge/decompositions.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from typing import Callable, Dict, Sequence, Union
 3 | 
 4 | import torch
 5 | from torch._decomp import register_decomposition
 6 | from torch._ops import OpOverload, OpOverloadPacket
 7 | 
 8 | dicp_decomposition_table = {}
 9 | aten = torch.ops.aten
10 | 
11 | 
12 | def register_decomposition_for_dicp(fn):
13 |     return register_decomposition(fn, registry=dicp_decomposition_table)
14 | 
15 | 
16 | @register_decomposition_for_dicp(aten.count_nonzero.default)
17 | def count_nonzero_default(x, dim=None):
18 |     cond = x != 0
19 |     dim = [] if dim is None else dim
20 |     return aten.sum.dim_IntList(cond, dim=dim, keepdim=False, dtype=torch.int64)
21 | 
22 | 
23 | def get_decompositions(
24 |     aten_ops: Sequence[Union[OpOverload, OpOverloadPacket]],
25 |     target_decomposition_table: Dict[OpOverload, Callable] = None,
26 | ) -> Dict[OpOverload, Callable]:
27 |     registry = dicp_decomposition_table
28 |     packets_to_overloads = defaultdict(list)
29 |     for opo in registry:
30 |         packets_to_overloads[opo.overloadpacket].append(opo)
31 |     decompositions = target_decomposition_table if target_decomposition_table else {}
32 |     for op in aten_ops:
33 |         if isinstance(op, OpOverloadPacket) and op in packets_to_overloads:
34 |             for op_overload in packets_to_overloads[op]:
35 |                 decompositions[op_overload] = registry[op_overload]
36 |         elif isinstance(op, OpOverload) and op in registry:
37 |             decompositions[op] = registry[op]
38 |     return decompositions
39 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/dynamo_bridge/torch_version.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from packaging import version
 3 | 
 4 | torch_version = version.parse(torch.__version__).base_version
 5 | 
 6 | is_torch_200 = False
 7 | is_torch_210 = False
 8 | is_torch_220 = False
 9 | is_torch_231 = False
10 | is_torch_251 = False
11 | 
12 | if torch_version.startswith("2.0"):
13 |     is_torch_200 = True
14 | elif torch_version.startswith("2.1."):
15 |     is_torch_210 = True
16 | elif torch_version.startswith("2.2."):
17 |     is_torch_220 = True
18 | elif torch_version.startswith("2.3.1"):
19 |     is_torch_231 = True
20 | elif torch_version.startswith("2.5.1"):
21 |     is_torch_251 = True
22 | else:
23 |     raise ValueError(f"unsupported dicp torch version: {torch.__version__}")
24 | 
25 | is_torch_210_or_higher = version.parse(torch_version) >= version.parse("2.1")
26 | is_torch_220_or_higher = version.parse(torch_version) >= version.parse("2.2")
27 | is_torch_231_or_higher = version.parse(torch_version) >= version.parse("2.3.1")
28 | is_torch_251_or_higher = version.parse(torch_version) >= version.parse("2.5.1")
29 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/__init__.py:
--------------------------------------------------------------------------------
 1 | def atbgraph(gm, fake_input_tensor):
 2 |     import torch._dynamo.config
 3 | 
 4 |     torch._dynamo.config.cache_size_limit = 256
 5 |     from dlinfer.graph.dicp.dynamo_bridge.compile_fx import compile_fx
 6 | 
 7 |     return compile_fx(gm, fake_input_tensor, "atbgraph")
 8 | 
 9 | 
10 | try:
11 |     from dlinfer.framework.lmdeploy_ext.dynamo import graph_mode_patch
12 | except Exception:
13 |     ...
14 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/vendor/AtbGraph/codegen/__init__.py


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/load_and_run.py:
--------------------------------------------------------------------------------
 1 | import atexit
 2 | import os
 3 | 
 4 | import acl
 5 | import numpy as np
 6 | import torch
 7 | from torch.profiler import record_function
 8 | 
 9 | 
10 | class AtbModel:
11 |     def __init__(self, model_path) -> None:
12 |         # print("### in_load_and_run_model_path:", model_path)
13 |         self.model = torch.classes.DICPModel.DICPModel(model_path)
14 | 
15 |     @record_function("load_and_run")
16 |     def run(self, inputs, outputs, param):
17 |         self.model.execute_out(inputs, outputs, param)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     pass
22 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.18)
 2 | project(dicp_model LANGUAGES CXX)
 3 | 
 4 | include(ascend)
 5 | 
 6 | set(THIRD_PARTY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
 7 | 
 8 | file(GLOB_RECURSE SOURCES
 9 |     ${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cpp
10 |     ${CMAKE_CURRENT_SOURCE_DIR}/ops/aclnn_ops/*.cpp
11 |     ${CMAKE_CURRENT_SOURCE_DIR}/ops/atb_ops/*.cpp
12 |     ${CMAKE_CURRENT_SOURCE_DIR}/ops/custom_ops/*.cpp
13 |     ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
14 |     ${CMAKE_CURRENT_SOURCE_DIR}/utils/*.cpp
15 | )
16 | 
17 | set(COMPILE_OPTIONS
18 |     -Wno-unused-function
19 |     -Wno-unused-variable
20 |     -Wno-unused-parameter
21 |     -Wno-attributes
22 |     -D_GLIBCXX_USE_CXX11_ABI=0
23 | )
24 | 
25 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
26 | 
27 | add_library(dicp_model SHARED ${SOURCES})
28 | 
29 | target_include_directories(
30 |     dicp_model PUBLIC
31 |     ${THIRD_PARTY_DIR}/json/single_include
32 |     ${THIRD_PARTY_DIR}/spdlog/include
33 |     ${THIRD_PARTY_DIR}/half/include
34 |     ${CMAKE_CURRENT_SOURCE_DIR}
35 |     ${TORCH_NPU_INCLUDE_DIRS}
36 |     ${CANN_INCLUDE_DIRS}
37 |     ${CANN_INCLUDE_DIRS}/aclnn
38 |     ${ATB_INCLUDE_DIRS}
39 | )
40 | 
41 | target_compile_options(dicp_model PRIVATE ${COMPILE_OPTIONS})
42 | 
43 | target_link_libraries(dicp_model PUBLIC
44 |     Python::Python
45 |     torch
46 |     ${TORCH_NPU_LIBRARY}
47 |     ${CANN_LIBRARY}
48 |     ${ATB_LIBRARY}
49 | )
50 | 
51 | file(RELATIVE_PATH OUTPUT_LIB_RELATIVE_PATH "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../")
52 | install(
53 |     TARGETS dicp_model
54 |     DESTINATION ${OUTPUT_LIB_RELATIVE_PATH}
55 | )
56 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/dicp_model.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include "dicp_model.h"
 4 | 
 5 | #include <acl/acl.h>
 6 | #include <atb/utils.h>
 7 | #include <torch/torch.h>
 8 | 
 9 | #include "model.h"
10 | #include "utils/log.h"
11 | #include "utils/misc.h"
12 | #include "utils/tensor_utils.h"
13 | 
14 | using namespace dicp;
15 | 
16 | DICPModel::DICPModel(const std::string& modelPath) : modelPath_(modelPath) {
17 |     modelId_ = utils::GetNewModelId();
18 |     DICP_LOG(INFO) << "DICPModel create start, modelId:" << modelId_ << ", modelPath:" << modelPath_;
19 |     model_ = std::make_shared<Model>(std::to_string(modelId_), modelPath);
20 | 
21 |     atb::Context* rawContext = nullptr;
22 |     auto st = atb::CreateContext(&rawContext);
23 |     DICP_LOG_IF(st != atb::NO_ERROR, ERROR) << "create atb context failed!";
24 |     context_ = std::move(std::unique_ptr<atb::Context, decltype(&atb::DestroyContext)>(rawContext, atb::DestroyContext));
25 | }
26 | 
27 | DICPModel::~DICPModel() { context_.reset(); };
28 | 
29 | void DICPModel::ExecuteOut(std::vector<torch::Tensor> atInTensors, std::vector<torch::Tensor> atOutTensors, const std::string& param) {
30 |     context_->SetExecuteStream(utils::GetCurrentStream());
31 | 
32 |     std::vector<atb::Tensor> inTensors;
33 |     tensor_utils::TransferAtTensor2AtbTensor(atInTensors, inTensors);
34 | 
35 |     std::vector<atb::Tensor> outTensors;
36 |     tensor_utils::TransferAtTensor2AtbTensor(atOutTensors, outTensors);
37 | 
38 |     model_->Execute(context_.get(), inTensors, outTensors, param);
39 | }
40 | 
41 | TORCH_LIBRARY(DICPModel, m) { m.class_<DICPModel>("DICPModel").def(torch::init<std::string>()).def("execute_out", &DICPModel::ExecuteOut); }
42 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/dicp_model.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <torch/custom_class.h>
 4 | #include <torch/script.h>
 5 | 
 6 | #include <memory>
 7 | #include <vector>
 8 | 
 9 | #include "model.h"
10 | 
11 | class DICPModel : public torch::CustomClassHolder {
12 | public:
13 |     DICPModel(const std::string& modelPath);
14 |     ~DICPModel();
15 |     void ExecuteOut(std::vector<torch::Tensor> atInTensors, std::vector<torch::Tensor> atOutTensors, const std::string& param);
16 | 
17 | private:
18 |     std::string modelPath_;
19 |     std::shared_ptr<dicp::Model> model_;
20 |     int modelId_ = 0;
21 |     std::shared_ptr<atb::Context> context_;
22 | };
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/acl_nn_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <acl/acl.h>
 4 | #include <aclnn/acl_meta.h>
 5 | #include <nlohmann/json.hpp>
 6 | 
 7 | #include <string>
 8 | 
 9 | #include "atb/operation.h"
10 | #include "ops/operation_creator.h"
11 | #include "utils/log.h"
12 | 
13 | namespace dicp {
14 | constexpr size_t SVECTOR_SIZE = 8;
15 | 
16 | struct AclNnTensor {
17 |     atb::Tensor atbTensor;
18 |     aclTensor* tensor = nullptr;
19 |     int CreateTensor(const std::string& opName);
20 |     int InitTensor(void* executor, const std::string& opName, const size_t index, bool isInput);
21 | };
22 | 
23 | class AclNnOperation : public atb::Operation {
24 | public:
25 |     explicit AclNnOperation(const std::string& name);
26 |     ~AclNnOperation() override;
27 |     std::string GetName() const override;
28 |     atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override;
29 |     atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override;
30 | 
31 | protected:
32 |     aclTensor* CreateAclTensor(const AclNnTensor& aclNnTensor);
33 |     atb::Status UpdateAclTensorDataPtr(const atb::VariantPack& variantPack);
34 |     AclNnTensor CreateTensor(atb::Tensor atbTensor);
35 |     int CreateAclTensors(const atb::VariantPack& variantPack);
36 |     std::string opName_;
37 |     atb::SVector<AclNnTensor> aclInTensors_;
38 |     atb::SVector<AclNnTensor> aclOutTensors_;
39 |     aclOpExecutor* aclExecutor_ = nullptr;
40 | 
41 | private:
42 |     virtual int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) = 0;
43 |     virtual int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) = 0;
44 | };
45 | }  // namespace dicp
46 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/add_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnAddOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnAddOperation(const std::string& name, float aplpha, const std::string& dtype);
11 |     ~AclNnAddOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     DICPScalar alpha_;
18 |     aclScalar* aclAlpha_ = nullptr;
19 | 
20 |     std::string dtype_;
21 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
22 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
23 | };
24 | 
25 | }  // namespace dicp
26 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/add_rms_norm_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnAddRmsNormOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnAddRmsNormOperation(const std::string& name, float epsilon);
 8 |     ~AclNnAddRmsNormOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     float epsilon = 1e-5;
15 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
16 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
17 | };
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/adds_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnAddsOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnAddsOperation(const std::string& name, float value, float aplpha, const std::string& dtype);
11 |     ~AclNnAddsOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     DICPScalar other_;
18 |     DICPScalar alpha_;
19 |     aclScalar* aclOther_ = nullptr;
20 |     aclScalar* aclAlpha_ = nullptr;
21 | 
22 |     std::string dtype_;
23 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
24 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
25 | };
26 | 
27 | }  // namespace dicp
28 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/arange_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnArangeOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnArangeOperation(const std::string& name, int64_t start, int64_t end, int64_t step, aclDataType dtype);
10 |     ~AclNnArangeOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int64_t start_;
17 |     int64_t end_;
18 |     int64_t step_;
19 |     int64_t sizeArange_;
20 |     aclDataType dtype_;
21 |     aclScalar* aclStart_ = nullptr;
22 |     aclScalar* aclEnd_ = nullptr;
23 |     aclScalar* aclStep_ = nullptr;
24 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
25 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
26 | };
27 | 
28 | }  // namespace dicp
29 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/batch_matmul_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnBatchMatMulOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnBatchMatMulOperation(const std::string& name, int8_t cubeMathType);
 8 |     ~AclNnBatchMatMulOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int8_t cubeMathType = 1;
15 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
16 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
17 | };
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/bincount_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnBincountOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnBincountOperation(const std::string& name, int64_t minlength);
 8 |     ~AclNnBincountOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int64_t minlength_;
15 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
16 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
17 | };
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/bitwise_not_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnBitwiseNotOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnBitwiseNotOperation(const std::string& name);
 8 |     ~AclNnBitwiseNotOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
15 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
16 | };
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/cast_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnCastOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnCastOperation(const std::string& name, aclDataType dtype);
 8 |     ~AclNnCastOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     aclDataType dtype_;
15 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
16 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
17 | };
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/cat_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnCatOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnCatOperation(const std::string& name, int32_t inputNum, int32_t concatDim);
 8 |     ~AclNnCatOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int32_t concatDim = -1;
15 |     int32_t inputNum = -1;
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/cumsum_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnCumsumOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnCumsumOperation(const std::string& name, int64_t dim, aclDataType dtype);
 8 |     ~AclNnCumsumOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int64_t dim_;
15 |     aclDataType dtype_;
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/div_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | class AclNnDivOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnDivOperation(const std::string& name);
10 |     ~AclNnDivOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/divs_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | class AclNnDivsOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnDivsOperation(const std::string& name, float divisor, const std::string& dtype);
10 |     ~AclNnDivsOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     DICPScalar divisor_;
17 |     aclScalar* aclDivisor_ = nullptr;
18 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
19 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
20 | };
21 | 
22 | }  // namespace dicp
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/dynamic_quant_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnDynamicQuantOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnDynamicQuantOperation(const std::string& name);
 8 |     ~AclNnDynamicQuantOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
15 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
16 | };
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/expand_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cstdint>
 3 | #include <vector>
 4 | 
 5 | #include "acl_nn_operation.h"
 6 | 
 7 | namespace dicp {
 8 | class AclNnExpandOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnExpandOperation(const std::string& name, std::vector<int64_t> size);
11 |     ~AclNnExpandOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     std::vector<int64_t> size_;
18 |     aclIntArray* aclSize_ = nullptr;
19 |     bool needUpdateSize_;
20 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
21 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
22 | };
23 | 
24 | }  // namespace dicp
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/gather_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | 
 5 | #include "acl_nn_operation.h"
 6 | #include "utils/scalar.h"
 7 | 
 8 | namespace dicp {
 9 | class AclNnGatherOperation : public AclNnOperation {
10 | public:
11 |     explicit AclNnGatherOperation(const std::string& name, int64_t dim);
12 |     ~AclNnGatherOperation() override;
13 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
14 |     uint32_t GetInputNum() const override;
15 |     uint32_t GetOutputNum() const override;
16 | 
17 | private:
18 |     int64_t dim_;
19 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
20 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
21 | };
22 | 
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/ge_scalar_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnGeScalarOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnGeScalarOperation(const std::string& name, float value, const std::string& dtype);
11 |     ~AclNnGeScalarOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     DICPScalar other_;
18 |     aclScalar* aclOther_ = nullptr;
19 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
20 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
21 | };
22 | 
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/grouped_matmul_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <string>
 5 | 
 6 | #include "acl_nn_operation.h"
 7 | 
 8 | namespace dicp {
 9 | 
10 | class AclNnGroupedMatmulOperation : public AclNnOperation {
11 | public:
12 |     explicit AclNnGroupedMatmulOperation(const std::string& name, int64_t splitItem);
13 |     ~AclNnGroupedMatmulOperation() override;
14 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
15 |     uint32_t GetInputNum() const override;
16 |     uint32_t GetOutputNum() const override;
17 | 
18 | private:
19 |     int64_t splitItem = 2;
20 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
21 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
22 | };
23 | 
24 | }  // namespace dicp
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/gt_scalar_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnGtScalarOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnGtScalarOperation(const std::string& name, const std::string& value, const std::string& dtype);
11 |     ~AclNnGtScalarOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     DICPScalar other_;
18 |     aclScalar* aclOther_ = nullptr;
19 |     bool need_update_value_;
20 |     std::string value_;
21 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
22 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
23 | };
24 | 
25 | }  // namespace dicp
26 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/index_select_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnIndexSelectOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnIndexSelectOperation(const std::string& name, int64_t dim);
10 |     ~AclNnIndexSelectOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int64_t dim_;
17 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
18 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
19 | };
20 | 
21 | }  // namespace dicp
22 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_copy_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnInplaceCopyOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnInplaceCopyOperation(const std::string& name);
 8 |     ~AclNnInplaceCopyOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
15 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
16 | };
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_div_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | class AclNnInplaceDivOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnInplaceDivOperation(const std::string& name);
10 |     ~AclNnInplaceDivOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_index_copy_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "acl_nn_operation.h"
 6 | 
 7 | namespace dicp {
 8 | class AclNnInplaceIndexCopyOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnInplaceIndexCopyOperation(const std::string& name, int64_t dim);
11 |     ~AclNnInplaceIndexCopyOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     int64_t dim_;
18 |     mutable std::vector<int64_t> indexVec_;
19 |     mutable aclTensor* index_;
20 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
21 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
22 | };
23 | 
24 | }  // namespace dicp
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_masked_fill_scalar_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | class AclNnInplaceMaskedFillScalar : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnInplaceMaskedFillScalar(const std::string& name, float value, const std::string& dtype);
10 |     ~AclNnInplaceMaskedFillScalar() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     DICPScalar value_;
17 |     aclScalar* aclValue_ = nullptr;
18 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
19 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
20 | };
21 | 
22 | }  // namespace dicp
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_scatter_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | 
 5 | #include "acl_nn_operation.h"
 6 | #include "utils/scalar.h"
 7 | 
 8 | namespace dicp {
 9 | class AclNnInplaceScatterOperation : public AclNnOperation {
10 | public:
11 |     explicit AclNnInplaceScatterOperation(const std::string& name, int64_t dim, int64_t reduceType);
12 |     ~AclNnInplaceScatterOperation() override;
13 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
14 |     uint32_t GetInputNum() const override;
15 |     uint32_t GetOutputNum() const override;
16 | 
17 | private:
18 |     int64_t dim_;
19 |     int64_t reduceType_;
20 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
21 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
22 | };
23 | 
24 | }  // namespace dicp
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/max_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnMaxOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnMaxOperation(const std::string& name);
 8 |     ~AclNnMaxOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
15 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
16 | };
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_finalize_routing_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnMoeFinalizeRoutingOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnMoeFinalizeRoutingOperation(const std::string& name);
10 |     ~AclNnMoeFinalizeRoutingOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_gating_topk_softmax.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnMoeGatingTopkSoftmaxOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnMoeGatingTopkSoftmaxOperation(const std::string& name, int64_t topk, int64_t renorm, bool outputSoftmaxResultFlag);
10 |     ~AclNnMoeGatingTopkSoftmaxOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int64_t topk_;
17 |     int64_t renorm_;
18 |     bool outputSoftmaxResultFlag_;
19 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
20 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
21 | };
22 | 
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_init_routing_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnMoeInitRoutingOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnMoeInitRoutingOperation(const std::string& name, int64_t numExperts);
10 |     ~AclNnMoeInitRoutingOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     mutable int64_t activeNum_;
17 |     int64_t numExperts_;
18 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
19 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
20 | };
21 | 
22 | }  // namespace dicp
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_token_permute_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class MoeTokenPermuteOperation : public AclNnOperation {
 8 | public:
 9 |     explicit MoeTokenPermuteOperation(const std::string& name);
10 |     ~MoeTokenPermuteOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_token_unpermute_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class MoeTokenUnpermuteOperation : public AclNnOperation {
 8 | public:
 9 |     explicit MoeTokenUnpermuteOperation(const std::string& name);
10 |     ~MoeTokenUnpermuteOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/mul_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnMulOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnMulOperation(const std::string& name);
11 |     ~AclNnMulOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
18 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
19 | };
20 | 
21 | }  // namespace dicp
22 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/muls_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnMulsOperation : public AclNnOperation {
 9 | public:
10 |     // value might be a SymInt type, we need to get the correct value at runtime.
11 |     explicit AclNnMulsOperation(const std::string& name, const std::string& value, const std::string& dtype);
12 |     ~AclNnMulsOperation() override;
13 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
14 |     uint32_t GetInputNum() const override;
15 |     uint32_t GetOutputNum() const override;
16 | 
17 | private:
18 |     DICPScalar other_;
19 |     aclScalar* aclOther_ = nullptr;
20 |     bool need_update_value_;
21 |     std::string value_;
22 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
23 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
24 | };
25 | 
26 | }  // namespace dicp
27 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/permute_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | 
 4 | #include "acl_nn_operation.h"
 5 | 
 6 | namespace dicp {
 7 | class AclNnPermuteOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnPermuteOperation(const std::string& name, std::vector<int64_t> dims);
10 |     ~AclNnPermuteOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     std::vector<int64_t> dims_;
17 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
18 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
19 | };
20 | 
21 | }  // namespace dicp
22 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/pow_tensor_scalar_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnPowTensorScalarOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnPowTensorScalarOperation(const std::string& name, float exponent, const std::string& dtype);
11 |     ~AclNnPowTensorScalarOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     DICPScalar exponent_;
18 |     aclScalar* aclExponent_;
19 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
20 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
21 | };
22 | 
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/pow_tensor_tensor_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnPowTensorTensorOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnPowTensorTensorOperation(const std::string& name);
10 |     ~AclNnPowTensorTensorOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
17 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
18 | };
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/quant_matmul_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnQuantMatmulOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnQuantMatmulOperation(const std::string& name, bool hasBias);
 8 |     ~AclNnQuantMatmulOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     bool hasBias_ = false;
15 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
16 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
17 | };
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/reciprocal_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnReciprocalOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnReciprocalOperation(const std::string& name);
 8 |     ~AclNnReciprocalOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
15 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
16 | };
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/reduce_sum_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "acl/acl.h"
 6 | #include "acl_nn_operation.h"
 7 | #include "utils/scalar.h"
 8 | namespace dicp {
 9 | 
10 | class AclNnReduceSumOperation : public AclNnOperation {
11 | public:
12 |     explicit AclNnReduceSumOperation(const std::string& name, const std::vector<int64_t>& dims, bool keepDim, const std::string& dtype);
13 |     ~AclNnReduceSumOperation() override;
14 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
15 |     uint32_t GetInputNum() const override;
16 |     uint32_t GetOutputNum() const override;
17 | 
18 | private:
19 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
20 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
21 | 
22 | private:
23 |     std::vector<int64_t> dims_;
24 |     aclIntArray* aclDims_ = nullptr;
25 |     bool keepDim_;
26 |     aclDataType dtype_;
27 | };
28 | 
29 | }  // namespace dicp
30 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/s_where_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "acl_nn_operation.h"
 3 | 
 4 | namespace dicp {
 5 | class AclNnSWhereOperation : public AclNnOperation {
 6 | public:
 7 |     explicit AclNnSWhereOperation(const std::string& name);
 8 |     ~AclNnSWhereOperation() override;
 9 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
10 |     uint32_t GetInputNum() const override;
11 |     uint32_t GetOutputNum() const override;
12 | 
13 | private:
14 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
15 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
16 | };
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/scatter_value_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnScatterValueOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnScatterValueOperation(const std::string& name, int64_t dim, float value, const std::string& value_dtype, int64_t reduce);
11 |     ~AclNnScatterValueOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     int64_t dim_;
18 |     int64_t reduce_;
19 |     DICPScalar value_;
20 |     aclScalar* aclValue_ = nullptr;
21 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
22 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
23 | };
24 | 
25 | }  // namespace dicp
26 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/slice_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnSliceOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnSliceOperation(const std::string& name, int64_t dim, int64_t start, int64_t end, int64_t step);
10 |     ~AclNnSliceOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int64_t dim_;
17 |     int64_t start_;
18 |     int64_t end_;
19 |     int64_t step_;
20 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
21 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
22 | };
23 | 
24 | }  // namespace dicp
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/softmax_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnSoftmaxOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnSoftmaxOperation(const std::string& name, int64_t dim);
10 |     ~AclNnSoftmaxOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int64_t dim_;
17 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
18 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
19 | };
20 | 
21 | }  // namespace dicp
22 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/split_with_size_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | 
 4 | #include "acl_nn_operation.h"
 5 | 
 6 | namespace dicp {
 7 | class AclNnSplitWithSizeOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnSplitWithSizeOperation(const std::string& name, int64_t splitDim, std::vector<int64_t> splitSizes);
10 |     ~AclNnSplitWithSizeOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int64_t splitDim_;
17 |     std::vector<int64_t> splitSizes_;
18 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
19 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
20 | };
21 | 
22 | }  // namespace dicp
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/sub_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnSubOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnSubOperation(const std::string& name, float aplpha, const std::string& dtype);
11 |     ~AclNnSubOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     DICPScalar alpha_;
18 |     aclScalar* aclAlpha_ = nullptr;
19 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
20 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
21 | };
22 | 
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/subs_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | 
 8 | class AclNnSubsOperation : public AclNnOperation {
 9 | public:
10 |     explicit AclNnSubsOperation(const std::string& name, float value, float aplpha, const std::string& dtype);
11 |     ~AclNnSubsOperation() override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
18 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
19 | 
20 | private:
21 |     DICPScalar other_;
22 |     DICPScalar alpha_;
23 |     aclScalar* aclOther_ = nullptr;
24 |     aclScalar* aclAlpha_ = nullptr;
25 | };
26 | 
27 | }  // namespace dicp
28 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/topk_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class AclNnTopkOperation : public AclNnOperation {
 8 | public:
 9 |     explicit AclNnTopkOperation(const std::string& name, int64_t k, int64_t dim);
10 |     ~AclNnTopkOperation() override;
11 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
12 |     uint32_t GetInputNum() const override;
13 |     uint32_t GetOutputNum() const override;
14 | 
15 | private:
16 |     int64_t k_;
17 |     int64_t dim_;
18 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
19 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
20 | };
21 | 
22 | }  // namespace dicp
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/activation.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* ActivationOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::ActivationParam param;
 7 |     if (paramJson.contains("activationType")) {
 8 |         auto value = paramJson["activationType"].get<int32_t>();
 9 |         param.activationType = static_cast<atb::infer::ActivationType>(value);
10 |     }
11 |     if (paramJson.contains("scale")) {
12 |         param.scale = paramJson["scale"].get<float>();
13 |     }
14 |     if (paramJson.contains("dim")) {
15 |         param.dim = paramJson["dim"].get<int32_t>();
16 |     }
17 |     DICP_LOG(INFO) << "ActivationParam:  activationType: " << param.activationType << " scale:" << param.scale << " dim:" << param.dim;
18 |     atb::Operation* op = nullptr;
19 |     CREATE_OPERATION_NO_RETURN(param, &op);
20 |     return op;
21 | }
22 | 
23 | REGISTER_ATB_OPERATION("ActivationOperation", ActivationOperationCreate);
24 | 
25 | }  // namespace dicp
26 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/allreduce.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | namespace dicp {
 3 | 
 4 | atb::Operation* AllReduceOperationCreate(const nlohmann::json& paramJson) {
 5 |     atb::infer::AllReduceParam param;
 6 |     if (paramJson.contains("rank")) {
 7 |         param.rank = paramJson["rank"].get<int32_t>();
 8 |     }
 9 |     if (paramJson.contains("rankSize")) {
10 |         param.rankSize = paramJson["rankSize"].get<int32_t>();
11 |     }
12 |     if (paramJson.contains("rankRoot")) {
13 |         param.rankRoot = paramJson["rankRoot"].get<int32_t>();
14 |     }
15 |     if (paramJson.contains("allReduceType")) {
16 |         param.allReduceType = paramJson["allReduceType"].get<std::string>();
17 |     }
18 |     if (paramJson.contains("backend")) {
19 |         param.backend = paramJson["backend"].get<std::string>();
20 |     }
21 |     if (paramJson.contains("commMode")) {
22 |         auto tmp = paramJson["commMode"].get<int32_t>();
23 |         param.commMode = static_cast<atb::infer::CommMode>(tmp);
24 |     }
25 |     if (paramJson.contains("commDomain")) {
26 |         param.commDomain = paramJson["commDomain"].get<std::string>();
27 |     }
28 |     if (paramJson.contains("rankTableFile")) {
29 |         param.rankTableFile = paramJson["rankTableFile"].get<std::string>();
30 |     }
31 |     DICP_LOG(INFO) << "AllReduceParam: rank:" << param.rank << ", rankSize:" << param.rankSize << ", backend:" << param.backend << ", allReduceType"
32 |                    << param.allReduceType << ". commDomain" << param.commDomain << ", rankTableFile" << param.rankTableFile;
33 |     atb::Operation* op = nullptr;
34 | 
35 |     CREATE_OPERATION_NO_RETURN(param, &op);
36 |     return op;
37 | }
38 | 
39 | REGISTER_ATB_OPERATION("AllReduceOperation", AllReduceOperationCreate);
40 | 
41 | }  // namespace dicp
42 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/atb_ops.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <nlohmann/json.hpp>
 4 | 
 5 | #include "atb/infer_op_params.h"
 6 | #include "atb/operation.h"
 7 | #include "ops/operation_creator.h"
 8 | #include "utils/log.h"
 9 | #include "utils/operation_util.h"
10 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/concat.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | [[maybe_unused]] atb::Operation* ConcatOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::ConcatParam param;
 7 |     if (paramJson.contains("concatDim")) {
 8 |         param.concatDim = paramJson["concatDim"].get<int32_t>();
 9 |     }
10 |     DICP_LOG(INFO) << "ConcatParam: concatDIm: " << param.concatDim;
11 |     atb::Operation* op = nullptr;
12 |     CREATE_OPERATION_NO_RETURN(param, &op);
13 |     return op;
14 | }
15 | 
16 | REGISTER_ATB_OPERATION("ConcatOperation", ConcatOperationCreate);
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/elewise.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* ElewiseOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::ElewiseParam param;
 7 |     if (paramJson.contains("elewiseType")) {
 8 |         auto tmp = paramJson["elewiseType"].get<int32_t>();
 9 |         param.elewiseType = static_cast<atb::infer::ElewiseParam::ElewiseType>(tmp);
10 |     }
11 |     if (paramJson.contains("quantParam")) {
12 |         auto quantJson = paramJson["quantParam"];
13 |         atb::infer::ElewiseParam::QuantParam quantParam;
14 |         if (quantJson.contains("inputScale")) {
15 |             quantParam.inputScale = quantJson["inputScale"].get<float>();
16 |         }
17 |         if (quantJson.contains("inputOffset")) {
18 |             quantParam.inputOffset = quantJson["inputOffset"].get<int32_t>();
19 |         }
20 |         param.quantParam = quantParam;
21 |     }
22 |     if (paramJson.contains("mulsParam")) {
23 |         auto mulsJson = paramJson["mulsParam"];
24 |         atb::infer::ElewiseParam::MulsParam mulsParam;
25 |         if (mulsJson.contains("varAttr")) {
26 |             mulsParam.varAttr = mulsJson["varAttr"].get<float>();
27 |         }
28 |         param.mulsParam = mulsParam;
29 |     }
30 |     if (paramJson.contains("outTensorType")) {
31 |         auto tmp = paramJson["outTensorType"].get<int32_t>();
32 |         param.outTensorType = static_cast<aclDataType>(tmp);
33 |     }
34 |     DICP_LOG(INFO) << "ElewiseParam: elewiseType:" << param.elewiseType << ", outTensorType:" << param.outTensorType;
35 |     atb::Operation* op = nullptr;
36 |     ;
37 |     CREATE_OPERATION_NO_RETURN(param, &op);
38 |     return op;
39 | }
40 | 
41 | REGISTER_ATB_OPERATION("ElewiseOperation", ElewiseOperationCreate);
42 | 
43 | }  // namespace dicp
44 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/gather.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* GatherOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::GatherParam param;
 7 |     if (paramJson.contains("axis")) {
 8 |         param.axis = paramJson["axis"].get<int64_t>();
 9 |     }
10 |     DICP_LOG(INFO) << "GatherParam: axis: " << param.axis;
11 |     atb::Operation* op = nullptr;
12 |     CREATE_OPERATION_NO_RETURN(param, &op);
13 |     return op;
14 | }
15 | 
16 | REGISTER_ATB_OPERATION("GatherOperation", GatherOperationCreate);
17 | 
18 | }  // namespace dicp
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/linear.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* LinearOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::LinearParam param;
 7 |     if (paramJson.contains("transposeA")) {
 8 |         param.transposeA = paramJson["transposeA"].get<bool>();
 9 |     }
10 |     if (paramJson.contains("transposeB")) {
11 |         param.transposeB = paramJson["transposeB"].get<bool>();
12 |     }
13 |     if (paramJson.contains("hasBias")) {
14 |         param.hasBias = paramJson["hasBias"].get<bool>();
15 |     }
16 |     if (paramJson.contains("outDataType")) {
17 |         param.outDataType = aclDataType(paramJson["outDataType"].get<int32_t>());
18 |     }
19 |     DICP_LOG(INFO) << "LinearParam transposeA:" << param.transposeA << ", transposeB:" << param.transposeB << ", hasBias:" << param.hasBias
20 |                    << ", outDataType:" << param.outDataType;
21 |     atb::Operation* op = nullptr;
22 |     ;
23 |     CREATE_OPERATION_NO_RETURN(param, &op);
24 |     return op;
25 | }
26 | 
27 | REGISTER_ATB_OPERATION("LinearOperation", LinearOperationCreate);
28 | 
29 | }  // namespace dicp
30 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/linear_parallel.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | namespace dicp {
 3 | 
 4 | atb::Operation* LinearParallelOperationCreate(const nlohmann::json& paramJson) {
 5 |     atb::infer::LinearParallelParam param;
 6 |     if (paramJson.contains("rank")) {
 7 |         param.rank = paramJson["rank"].get<int32_t>();
 8 |     }
 9 |     if (paramJson.contains("rankSize")) {
10 |         param.rankSize = paramJson["rankSize"].get<int32_t>();
11 |     }
12 |     if (paramJson.contains("rankRoot")) {
13 |         param.rankRoot = paramJson["rankRoot"].get<int32_t>();
14 |     }
15 |     if (paramJson.contains("hasResidual")) {
16 |         param.hasResidual = paramJson["hasResidual"].get<bool>();
17 |     }
18 |     if (paramJson.contains("parallelType")) {
19 |         auto type = paramJson["parallelType"].get<int32_t>();
20 |         param.type = static_cast<atb::infer::LinearParallelParam::ParallelType>(type);
21 |     }
22 |     if (paramJson.contains("backend")) {
23 |         param.backend = paramJson["backend"].get<std::string>();
24 |     }
25 |     if (paramJson.contains("commDomain")) {
26 |         param.commDomain = paramJson["commDomain"].get<std::string>();
27 |     }
28 |     if (paramJson.contains("commMode")) {
29 |         auto mode = paramJson["commMode"].get<int32_t>();
30 |         param.commMode = static_cast<atb::infer::CommMode>(mode);
31 |     }
32 |     if (paramJson.contains("rankTableFile")) {
33 |         param.rankTableFile = paramJson["rankTableFile"].get<std::string>();
34 |     }
35 |     DICP_LOG(INFO) << "LinearParallelParam: rank:" << param.rank << ", rankSize:" << param.rankSize << ", outDataType:" << param.outDataType
36 |                    << " backend:" << param.backend << ", commDomain:" << param.commDomain << ", commMode:" << param.commMode << ", rankTableFile"
37 |                    << param.rankTableFile;
38 |     atb::Operation* op = nullptr;
39 | 
40 |     CREATE_OPERATION_NO_RETURN(param, &op);
41 |     return op;
42 | }
43 | 
44 | REGISTER_ATB_OPERATION("LinearParallelOperation", LinearParallelOperationCreate);
45 | 
46 | }  // namespace dicp
47 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/paged_attention.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* PagedAttentionOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::PagedAttentionParam param;
 7 |     if (paramJson.contains("headNum")) {
 8 |         param.headNum = paramJson["headNum"].get<int32_t>();
 9 |     }
10 |     if (paramJson.contains("qkScale")) {
11 |         param.qkScale = paramJson["qkScale"].get<float>();
12 |     }
13 |     if (paramJson.contains("kvHeadNum")) {
14 |         param.kvHeadNum = paramJson["kvHeadNum"].get<int32_t>();
15 |     }
16 |     if (paramJson.contains("maskType")) {
17 |         auto value = paramJson["maskType"].get<int32_t>();
18 |         param.maskType = static_cast<atb::infer::PagedAttentionParam::MaskType>(value);
19 |     }
20 |     if (paramJson.contains("mlaVHeadSize")) {
21 |         param.mlaVHeadSize = paramJson["mlaVHeadSize"].get<int32_t>();
22 |     }
23 |     DICP_LOG(INFO) << "PagedAttentionParam: headNum" << param.headNum << " kvHeadNum: " << param.kvHeadNum << " qkScale: " << param.qkScale
24 |                    << " maskType: " << param.maskType;
25 |     atb::Operation* op = nullptr;
26 |     CREATE_OPERATION_NO_RETURN(param, &op);
27 |     return op;
28 | }
29 | 
30 | REGISTER_ATB_OPERATION("PagedAttentionOperation", PagedAttentionOperationCreate);
31 | 
32 | }  // namespace dicp
33 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/reduce.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | #include "utils/common.h"
 3 | 
 4 | namespace dicp {
 5 | 
 6 | atb::Operation* ReduceOperationCreate(const nlohmann::json& paramJson) {
 7 |     atb::infer::ReduceParam param;
 8 |     if (paramJson.contains("reduceType")) {
 9 |         auto type = paramJson["reduceType"].get<int32_t>();
10 |         param.reduceType = static_cast<atb::infer::ReduceParam::ReduceType>(type);
11 |     }
12 |     if (paramJson.contains("axis")) {
13 |         auto axis = paramJson["axis"].get<std::vector<int64_t>>();
14 |         param.axis.resize(axis.size());
15 |         for (size_t i = 0; i < axis.size(); ++i) {
16 |             param.axis[i] = axis[i];
17 |         }
18 |     }
19 |     DICP_LOG(INFO) << "ReduceParam: reduceType: " << param.reduceType << ", axis:" << svectorToString<int64_t>(param.axis);
20 |     atb::Operation* op = nullptr;
21 |     CREATE_OPERATION_NO_RETURN(param, &op);
22 |     return op;
23 | }
24 | 
25 | REGISTER_ATB_OPERATION("ReduceOperation", ReduceOperationCreate);
26 | 
27 | }  // namespace dicp
28 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/reshape_and_cache.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | inline atb::Operation* ReshapeAndCacheOperationCreate([[maybe_unused]] const nlohmann::json& paramJson) {
 6 |     atb::infer::ReshapeAndCacheParam param;
 7 |     if (paramJson.contains("KvCacheCfg")) {
 8 |         auto value = paramJson["KvCacheCfg"].get<int32_t>();
 9 |         param.kvCacheCfg = static_cast<atb::infer::ReshapeAndCacheParam::KvCacheCfg>(value);
10 |     }
11 |     DICP_LOG(INFO) << "ReshapeAndCacheParam: {}";
12 |     atb::Operation* op = nullptr;
13 |     CREATE_OPERATION_NO_RETURN(param, &op);
14 |     return op;
15 | }
16 | 
17 | REGISTER_ATB_OPERATION("ReshapeAndCacheOperation", ReshapeAndCacheOperationCreate);
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/rope.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* RopeOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::RopeParam param;
 7 |     if (paramJson.contains("rotaryCoeff")) {
 8 |         param.rotaryCoeff = paramJson["rotaryCoeff"].get<int32_t>();
 9 |     }
10 |     if (paramJson.contains("cosFormat")) {
11 |         param.cosFormat = paramJson["cosFormat"].get<int32_t>();
12 |     }
13 |     DICP_LOG(INFO) << "RopeParam: rotaryCoeff:" << param.rotaryCoeff << ", cosFormat:" << param.cosFormat;
14 |     atb::Operation* op = nullptr;
15 |     CREATE_OPERATION_NO_RETURN(param, &op);
16 |     return op;
17 | }
18 | 
19 | REGISTER_ATB_OPERATION("RopeOperation", RopeOperationCreate);
20 | 
21 | }  // namespace dicp
22 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/slice.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | #include "utils/common.h"
 3 | 
 4 | namespace dicp {
 5 | 
 6 | atb::Operation* SliceOperationCreate(const nlohmann::json& paramJson) {
 7 |     atb::infer::SliceParam param;
 8 |     if (paramJson.contains("offsets")) {
 9 |         auto tmp = paramJson["offsets"].get<std::vector<int64_t>>();
10 |         param.offsets.resize(tmp.size());
11 |         for (size_t i = 0; i < tmp.size(); ++i) {
12 |             param.offsets[i] = tmp[i];
13 |         }
14 |     }
15 |     if (paramJson.contains("size")) {
16 |         auto tmp = paramJson["size"].get<std::vector<int64_t>>();
17 |         param.size.resize(tmp.size());
18 |         for (size_t i = 0; i < tmp.size(); ++i) {
19 |             param.size[i] = tmp[i];
20 |         }
21 |     }
22 | 
23 |     DICP_LOG(INFO) << "SliceParam: offsets:" << svectorToString<int64_t>(param.offsets) << ", size:" << svectorToString<int64_t>(param.size);
24 |     atb::Operation* op = nullptr;
25 | 
26 |     CREATE_OPERATION_NO_RETURN(param, &op);
27 |     return op;
28 | }
29 | 
30 | REGISTER_ATB_OPERATION("SliceOperation", SliceOperationCreate);
31 | 
32 | }  // namespace dicp
33 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/softmax.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* SoftmaxOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::SoftmaxParam param;
 7 |     if (paramJson.contains("axes")) {
 8 |         auto tmp = paramJson["axes"].get<std::vector<int64_t>>();
 9 |         param.axes.resize(tmp.size());
10 |         for (size_t i = 0; i < tmp.size(); ++i) {
11 |             param.axes[i] = tmp[i];
12 |         }
13 |     }
14 |     DICP_LOG(INFO) << "SoftmaxParam: axes.size:" << param.axes.size() << " axes0: " << param.axes[0];
15 |     atb::Operation* op = nullptr;
16 |     CREATE_OPERATION_NO_RETURN(param, &op);
17 |     return op;
18 | }
19 | 
20 | REGISTER_ATB_OPERATION("SoftmaxOperation", SoftmaxOperationCreate);
21 | 
22 | }  // namespace dicp
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/sort.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* SortOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::SortParam param;
 7 |     if (paramJson.contains("num")) {
 8 |         auto tmp = paramJson["num"].get<int32_t>();
 9 |         param.num.resize(1);
10 |         param.num[0] = tmp;
11 |     }
12 |     DICP_LOG(INFO) << "SortParam: topk:" << param.num[0];
13 |     atb::Operation* op = nullptr;
14 |     CREATE_OPERATION_NO_RETURN(param, &op);
15 |     return op;
16 | }
17 | 
18 | REGISTER_ATB_OPERATION("SortOperation", SortOperationCreate);
19 | 
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/split.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* SplitOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::SplitParam param;
 7 |     if (paramJson.contains("splitDim")) {
 8 |         param.splitDim = paramJson["splitDim"].get<int32_t>();
 9 |     }
10 |     if (paramJson.contains("splitNum")) {
11 |         param.splitNum = paramJson["splitNum"].get<int32_t>();
12 |     }
13 |     DICP_LOG(INFO) << "SplitParam: splitDim: " << param.splitDim << " splitNum: " << param.splitNum;
14 |     atb::Operation* op = nullptr;
15 |     CREATE_OPERATION_NO_RETURN(param, &op);
16 |     return op;
17 | }
18 | 
19 | REGISTER_ATB_OPERATION("SplitOperation", SplitOperationCreate);
20 | 
21 | }  // namespace dicp
22 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/transdata.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* TransdataOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::TransdataParam param;
 7 |     if (paramJson.contains("transdataType")) {
 8 |         auto value = paramJson["transdataType"].get<int32_t>();
 9 |         param.transdataType = static_cast<atb::infer::TransdataParam::TransdataType>(value);
10 |     }
11 |     DICP_LOG(INFO) << "TransdataParam: transdataType: " << param.transdataType;
12 |     atb::Operation* op = nullptr;
13 |     CREATE_OPERATION_NO_RETURN(param, &op);
14 |     return op;
15 | }
16 | 
17 | REGISTER_ATB_OPERATION("TransdataOperation", TransdataOperationCreate);
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/transpose.cpp:
--------------------------------------------------------------------------------
 1 | #include "atb_ops.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | atb::Operation* TransposeOperationCreate(const nlohmann::json& paramJson) {
 6 |     atb::infer::TransposeParam param;
 7 |     if (paramJson.contains("perm")) {
 8 |         auto tmp = paramJson["perm"].get<std::vector<int32_t>>();
 9 |         param.perm.resize(tmp.size());
10 |         for (unsigned int i = 0; i < tmp.size(); ++i) {
11 |             param.perm[i] = tmp[i];
12 |         }
13 |     }
14 |     DICP_LOG(INFO) << "TransposeParam: perm: " << param.perm;
15 |     atb::Operation* op = nullptr;
16 |     CREATE_OPERATION_NO_RETURN(param, &op);
17 |     return op;
18 | }
19 | 
20 | REGISTER_ATB_OPERATION("TransposeOperation", TransposeOperationCreate);
21 | 
22 | }  // namespace dicp
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/masked_fill_scalar_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | #include "utils/scalar.h"
 5 | 
 6 | namespace dicp {
 7 | class MaskedFillScalarOperation : public atb::Operation {
 8 | public:
 9 |     explicit MaskedFillScalarOperation(const std::string& name, float value, const std::string& dtype);
10 |     ~MaskedFillScalarOperation() override;
11 |     std::string GetName() const override;
12 |     atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override;
13 |     atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override;
14 |     uint32_t GetInputNum() const override;
15 |     uint32_t GetOutputNum() const override;
16 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
17 | 
18 | private:
19 |     aclTensor* CreateAclTensor(const AclNnTensor& aclNnTensor);
20 |     AclNnTensor CreateTensor(atb::Tensor atbTensor);
21 |     int CreateAclTensors(const atb::VariantPack& variantPack);
22 | 
23 | private:
24 |     std::string opName_;
25 |     DICPScalar value_;
26 |     aclScalar* aclValue_ = nullptr;
27 |     DICPScalar one_;
28 |     aclScalar* aclOne_ = nullptr;
29 |     aclOpExecutor* aclMulsExecutor_ = nullptr;
30 |     aclOpExecutor* aclInplaceMaskedFillScalarExecutor_ = nullptr;
31 |     uint64_t mulsWorkspaceSize_ = 0;
32 |     uint64_t inplaceMaskedFillScalarWorkspaceSize_ = 0;
33 | 
34 | private:
35 |     atb::SVector<AclNnTensor> aclInTensors_;
36 |     atb::SVector<AclNnTensor> aclOutTensors_;
37 | };
38 | 
39 | }  // namespace dicp
40 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/new_empty_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class NewEmptyOperation : public AclNnOperation {
 8 | public:
 9 |     explicit NewEmptyOperation(const std::string& name, const std::vector<std::string>& size);
10 |     ~NewEmptyOperation() override;
11 |     std::string GetName() const override;
12 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
13 |     uint32_t GetInputNum() const override;
14 |     uint32_t GetOutputNum() const override;
15 | 
16 | private:
17 |     std::string opName_;
18 |     std::vector<int64_t> size_;
19 |     std::unordered_map<int64_t, std::string> dynamic_size_;
20 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
21 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
22 | };
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/prepare_moe_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | 
 5 | #include "ops/aclnn_ops/acl_nn_operation.h"
 6 | 
 7 | namespace dicp {
 8 | 
 9 | class PrepareMoeOperation : public atb::Operation {
10 | public:
11 |     explicit PrepareMoeOperation(const std::string& name, int64_t numExperts);
12 |     ~PrepareMoeOperation() override;
13 | 
14 |     std::string GetName() const override;
15 |     atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override;
16 |     atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override;
17 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
18 |     uint32_t GetInputNum() const override;
19 |     uint32_t GetOutputNum() const override;
20 | 
21 | protected:
22 |     std::string opName_;
23 |     int64_t numExperts_;
24 |     int64_t topk_;
25 |     int64_t seqLength_;
26 | 
27 |     aclOpExecutor* aclArangeExecutor_ = nullptr;
28 |     aclOpExecutor* aclPermuteExecutor_ = nullptr;
29 |     aclOpExecutor* aclBincountExecutor_ = nullptr;
30 |     aclOpExecutor* aclCumsumExecutor_ = nullptr;
31 | 
32 |     uint64_t arangeWorkspaceSize_ = 0;
33 |     uint64_t permuteWorkspaceSize_ = 0;
34 |     uint64_t bincountWorkspaceSize_ = 0;
35 |     uint64_t cumsumWorkspaceSize_ = 0;
36 | 
37 | private:
38 |     atb::SVector<AclNnTensor> aclInTensors_;
39 |     atb::SVector<AclNnTensor> aclOutTensors_;
40 | 
41 |     aclScalar* aclStart_ = nullptr;
42 |     aclScalar* aclEnd_ = nullptr;
43 |     aclScalar* aclStep_ = nullptr;
44 | 
45 |     AclNnTensor CreateTensor(atb::Tensor atbTensor);
46 |     int CreateAclTensors(const atb::VariantPack& variantPack);
47 | };
48 | 
49 | }  // namespace dicp
50 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/renormalize_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class RenormalizeOperation : public atb::Operation {
 8 | public:
 9 |     explicit RenormalizeOperation(const std::string& name, int64_t dim);
10 |     ~RenormalizeOperation() override;
11 | 
12 |     std::string GetName() const override;
13 |     atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override;
14 |     atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override;
15 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
16 |     uint32_t GetInputNum() const override;
17 |     uint32_t GetOutputNum() const override;
18 | 
19 | protected:
20 |     std::string opName_;
21 |     int64_t dim_;
22 | 
23 |     aclOpExecutor* aclReduceSumExecutor_ = nullptr;
24 |     aclOpExecutor* aclDivExecutor_ = nullptr;
25 | 
26 |     uint64_t reduceSumWorkspaceSize_ = 0;
27 |     uint64_t divWorkspaceSize_ = 0;
28 | 
29 | private:
30 |     atb::SVector<AclNnTensor> aclInTensors_;
31 |     atb::SVector<AclNnTensor> aclOutTensors_;
32 | 
33 |     AclNnTensor CreateTensor(atb::Tensor atbTensor);
34 |     int CreateAclTensors(const atb::VariantPack& variantPack);
35 | };
36 | 
37 | }  // namespace dicp
38 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/reshape_operation.cpp:
--------------------------------------------------------------------------------
 1 | #include "reshape_operation.h"
 2 | 
 3 | #include "utils/log.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | const int NUM1 = 1;
 8 | 
 9 | ReshapeOperation::ReshapeOperation(const std::string& name) : opName_(name) {}
10 | 
11 | std::string ReshapeOperation::GetName() const { return opName_; }
12 | 
13 | atb::Status ReshapeOperation::Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) { return atb::NO_ERROR; }
14 | 
15 | atb::Status ReshapeOperation::Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) {
16 |     return atb::NO_ERROR;
17 | }
18 | 
19 | uint32_t ReshapeOperation::GetInputNum() const { return NUM1; }
20 | 
21 | uint32_t ReshapeOperation::GetOutputNum() const { return NUM1; }
22 | 
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/reshape_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <bits/stdint-intn.h>
 4 | 
 5 | #include "atb/operation.h"
 6 | 
 7 | namespace dicp {
 8 | 
 9 | class ReshapeOperation : public atb::Operation {
10 | public:
11 |     explicit ReshapeOperation(const std::string& name);
12 |     ~ReshapeOperation(){};
13 |     std::string GetName() const override;
14 |     atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override;
15 |     atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override;
16 |     uint32_t GetInputNum() const override;
17 |     uint32_t GetOutputNum() const override;
18 | 
19 | protected:
20 |     std::string opName_;
21 | };
22 | 
23 | }  // namespace dicp
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/scalar_tensor_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <limits>
 5 | #include <stdexcept>
 6 | 
 7 | #include "ops/aclnn_ops/acl_nn_operation.h"
 8 | #include "utils/scalar.h"
 9 | 
10 | namespace dicp {
11 | 
12 | class ScalarTensorOperation : public atb::Operation {
13 | public:
14 |     explicit ScalarTensorOperation(const std::string& name, float value, const std::string& dtype);
15 |     ~ScalarTensorOperation();
16 |     std::string GetName() const override;
17 |     atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override;
18 |     atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override;
19 |     uint32_t GetInputNum() const override;
20 |     uint32_t GetOutputNum() const override;
21 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
22 | 
23 | private:
24 |     aclTensor* CreateAclTensor(const AclNnTensor& aclNnTensor);
25 |     AclNnTensor CreateTensor(atb::Tensor atbTensor);
26 |     int CreateAclTensors(const atb::VariantPack& variantPack);
27 | 
28 | private:
29 |     std::string opName_;
30 |     DICPScalar value_;
31 |     DICPScalar zero_;
32 |     DICPScalar alpha_;
33 |     aclScalar* aclValue_ = nullptr;
34 |     aclScalar* aclZero_ = nullptr;
35 |     aclScalar* aclAlpha_ = nullptr;
36 |     aclOpExecutor* aclZeroExecutor_ = nullptr;
37 |     aclOpExecutor* aclAddsExecutor_ = nullptr;
38 |     uint64_t mulsWorkspaceSize_ = 0;
39 |     uint64_t addsWorkspaceSize_ = 0;
40 |     atb::SVector<AclNnTensor> aclOutTensors_;
41 | };
42 | 
43 | }  // namespace dicp
44 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/slice_scatter_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <vector>
 5 | 
 6 | #include "ops/aclnn_ops/acl_nn_operation.h"
 7 | 
 8 | namespace dicp {
 9 | 
10 | class SliceScatterOperation : public AclNnOperation {
11 | public:
12 |     explicit SliceScatterOperation(const std::string& name, int64_t dim, int64_t start, int64_t end, int64_t step);
13 |     ~SliceScatterOperation() override;
14 | 
15 |     std::string GetName() const override;
16 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
17 |     uint32_t GetInputNum() const override;
18 |     uint32_t GetOutputNum() const override;
19 | 
20 | protected:
21 |     std::string opName_;
22 |     int64_t dim_, start_, end_, step_;
23 |     mutable std::vector<int64_t> beginVec_, endVec_, stridesVec_, axesVec_;
24 |     mutable aclIntArray *beginArray_, *endArray_, *stridesArray_, *axesArray_;
25 | 
26 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
27 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
28 | };
29 | 
30 | }  // namespace dicp
31 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/squeeze_operation.cpp:
--------------------------------------------------------------------------------
 1 | #include "squeeze_operation.h"
 2 | 
 3 | #include <algorithm>
 4 | #include <iterator>
 5 | 
 6 | #include "ops/operation_creator.h"
 7 | 
 8 | namespace dicp {
 9 | 
10 | SqueezeOperation::SqueezeOperation(const std::string& name, std::vector<int64_t> squeezeDim) : ReshapeOperation(name), squeezeDim_(std::move(squeezeDim)) {}
11 | 
12 | atb::Status SqueezeOperation::InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const {
13 |     DICP_LOG(INFO) << "SqueezeOperation: " << opName_ << " infer shape start";
14 |     outTensorDescs.at(0).format = inTensorDescs.at(0).format;
15 |     outTensorDescs.at(0).dtype = inTensorDescs.at(0).dtype;
16 | 
17 |     auto& oldShape = inTensorDescs.at(0).shape;
18 |     std::vector<int64_t> dimValues(oldShape.dims, oldShape.dims + oldShape.dimNum);
19 |     for (const auto& d : squeezeDim_) {
20 |         int offset = d < 0 ? d + oldShape.dimNum : d;
21 |         dimValues.erase(dimValues.begin() + offset);
22 |     }
23 |     outTensorDescs.at(0).shape.dimNum = dimValues.size();
24 |     std::copy(dimValues.begin(), dimValues.end(), outTensorDescs.at(0).shape.dims);
25 | 
26 |     DICP_LOG(INFO) << "SqueezeOperation: " << opName_ << " infer shape end, out shape: " << atbDimsToString(outTensorDescs.at(0).shape);
27 |     return atb::NO_ERROR;
28 | }
29 | 
30 | atb::Operation* CustomSqueezeOperationCreate(const nlohmann::json& paramJson) {
31 |     std::string opName;
32 |     std::vector<int64_t> squeezeDim;
33 |     if (paramJson.contains("name")) {
34 |         opName = paramJson["name"].get<std::string>();
35 |     }
36 |     if (paramJson.contains("squeezeDim")) {
37 |         squeezeDim = std::move(paramJson["squeezeDim"].get<std::vector<int64_t>>());
38 |     }
39 |     DICP_LOG(INFO) << "CustomSqueezeOperation: name: " << opName << " squeezeDim:" << vectorToString<int64_t>(squeezeDim);
40 |     atb::Operation* op = new SqueezeOperation(opName, squeezeDim);
41 |     return op;
42 | }
43 | 
44 | REGISTER_OPERATION(CustomSqueezeOperation, CustomSqueezeOperationCreate);
45 | 
46 | }  // namespace dicp
47 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/squeeze_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <bits/stdint-intn.h>
 4 | #include <nlohmann/json.hpp>
 5 | 
 6 | #include <vector>
 7 | 
 8 | #include "atb/operation.h"
 9 | #include "reshape_operation.h"
10 | #include "utils/common.h"
11 | #include "utils/log.h"
12 | namespace dicp {
13 | 
14 | class SqueezeOperation : public ReshapeOperation {
15 | public:
16 |     explicit SqueezeOperation(const std::string& name, std::vector<int64_t> squeezeDim);
17 |     ~SqueezeOperation(){};
18 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
19 | 
20 | private:
21 |     std::vector<int64_t> squeezeDim_;
22 | };
23 | 
24 | }  // namespace dicp
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/unsqueeze_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <bits/stdint-intn.h>
 4 | #include <nlohmann/json.hpp>
 5 | 
 6 | #include <vector>
 7 | 
 8 | #include "atb/operation.h"
 9 | #include "reshape_operation.h"
10 | #include "utils/common.h"
11 | #include "utils/log.h"
12 | namespace dicp {
13 | 
14 | class UnsqueezeOperation : public ReshapeOperation {
15 | public:
16 |     explicit UnsqueezeOperation(const std::string& name, std::vector<int64_t> unsqueezeDim);
17 |     ~UnsqueezeOperation(){};
18 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
19 | 
20 | private:
21 |     std::vector<int64_t> unsqueezeDim_;
22 | };
23 | 
24 | }  // namespace dicp
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/view_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <bits/stdint-intn.h>
 4 | #include <nlohmann/json.hpp>
 5 | 
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | #include "atb/operation.h"
10 | #include "reshape_operation.h"
11 | #include "utils/common.h"
12 | #include "utils/log.h"
13 | namespace dicp {
14 | 
15 | class ViewOperation : public ReshapeOperation {
16 | public:
17 |     explicit ViewOperation(const std::string& name, std::vector<int64_t> viewShape);
18 |     ~ViewOperation(){};
19 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
20 | 
21 | private:
22 |     std::vector<int64_t> shape_;
23 |     bool needInferDim_;
24 |     int inferDim_;
25 |     int otherProd_ = 1;
26 | };
27 | 
28 | }  // namespace dicp
29 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/zeros_like_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class ZerosLikeOperation : public AclNnOperation {
 8 | public:
 9 |     explicit ZerosLikeOperation(const std::string& name);
10 |     ~ZerosLikeOperation() override;
11 | 
12 |     std::string GetName() const override;
13 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
14 |     uint32_t GetInputNum() const override;
15 |     uint32_t GetOutputNum() const override;
16 | 
17 | protected:
18 |     std::string opName_;
19 |     std::vector<int64_t> size_;
20 |     aclDataType dtype_;
21 | 
22 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
23 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
24 | };
25 | 
26 | }  // namespace dicp
27 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/zeros_operation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ops/aclnn_ops/acl_nn_operation.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class ZerosOperation : public AclNnOperation {
 8 | public:
 9 |     explicit ZerosOperation(const std::string& name, const std::vector<std::string>& size, aclDataType dtype);
10 |     ~ZerosOperation() override;
11 | 
12 |     std::string GetName() const override;
13 |     atb::Status InferShape(const atb::SVector<atb::TensorDesc>& inTensorDescs, atb::SVector<atb::TensorDesc>& outTensorDescs) const override;
14 |     uint32_t GetInputNum() const override;
15 |     uint32_t GetOutputNum() const override;
16 | 
17 | protected:
18 |     std::string opName_;
19 |     std::vector<int64_t> size_;
20 |     std::unordered_map<int64_t, std::string> dynamic_size_;
21 |     bool has_dynamic_size_;
22 |     aclDataType dtype_;
23 | 
24 |     int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override;
25 |     int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override;
26 | };
27 | 
28 | }  // namespace dicp
29 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/operation_creator.cpp:
--------------------------------------------------------------------------------
 1 | #include "ops/operation_creator.h"
 2 | 
 3 | #include "utils/log.h"
 4 | 
 5 | namespace dicp {
 6 | 
 7 | std::unordered_map<std::string, OperationCreateFunc>& getGlobalFuncMap() {
 8 |     static std::unordered_map<std::string, OperationCreateFunc> funcMap;
 9 |     return funcMap;
10 | }
11 | 
12 | atb::Operation* CreateOperation(const std::string& opName, const nlohmann::json& paramJson) {
13 |     auto g_funcMap = getGlobalFuncMap();
14 |     auto it = g_funcMap.find(opName);
15 |     if (it == g_funcMap.end()) {
16 |         DICP_LOG(ERROR) << "not support opName:" << opName;
17 |         return nullptr;
18 |     }
19 | 
20 |     try {
21 |         return it->second(paramJson);
22 |     } catch (const std::exception& e) {
23 |         DICP_LOG(ERROR) << opName << " parse json fail, error:" << e.what();
24 |     }
25 |     return nullptr;
26 | }
27 | 
28 | }  // namespace dicp
29 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/operation_creator.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <nlohmann/json.hpp>
 4 | 
 5 | #include <iostream>
 6 | #include <string>
 7 | #include <unordered_map>
 8 | 
 9 | #include "atb/operation.h"
10 | 
11 | namespace dicp {
12 | 
13 | using OperationCreateFunc = std::function<atb::Operation*(const nlohmann::json& paramJson)>;
14 | 
15 | std::unordered_map<std::string, OperationCreateFunc>& getGlobalFuncMap();
16 | 
17 | struct RegisterOp {
18 |     RegisterOp(const std::string& name, OperationCreateFunc func) { getGlobalFuncMap()[name] = func; }
19 | };
20 | 
21 | #define CONCATENATE_DETAIL(x, y) x##y
22 | #define CONCATENATE(x, y) CONCATENATE_DETAIL(x, y)
23 | #define MAKE_UNIQUE_NAME(prefix) CONCATENATE(prefix, __COUNTER__)
24 | 
25 | #define REGISTER_OPERATION(OpName, CreateFunc) static RegisterOp reg##OpName(#OpName, CreateFunc);
26 | #define REGISTER_ATB_OPERATION(OpNameStr, CreateFunc) static RegisterOp MAKE_UNIQUE_NAME(reg_)(OpNameStr, CreateFunc);
27 | 
28 | atb::Operation* CreateOperation(const std::string& opName, const nlohmann::json& paramJson);
29 | 
30 | }  // namespace dicp
31 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/cfg/argv.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | #include <spdlog/cfg/helpers.h>
 6 | #include <spdlog/details/registry.h>
 7 | 
 8 | //
 9 | // Init log levels using each argv entry that starts with "SPDLOG_LEVEL="
10 | //
11 | // set all loggers to debug level:
12 | // example.exe "SPDLOG_LEVEL=debug"
13 | 
14 | // set logger1 to trace level
15 | // example.exe "SPDLOG_LEVEL=logger1=trace"
16 | 
17 | // turn off all logging except for logger1 and logger2:
18 | // example.exe "SPDLOG_LEVEL=off,logger1=debug,logger2=info"
19 | 
20 | namespace spdlog {
21 | namespace cfg {
22 | 
23 | // search for SPDLOG_LEVEL= in the args and use it to init the levels
24 | inline void load_argv_levels(int argc, const char** argv) {
25 |     const std::string spdlog_level_prefix = "SPDLOG_LEVEL=";
26 |     for (int i = 1; i < argc; i++) {
27 |         std::string arg = argv[i];
28 |         if (arg.find(spdlog_level_prefix) == 0) {
29 |             auto levels_string = arg.substr(spdlog_level_prefix.size());
30 |             helpers::load_levels(levels_string);
31 |         }
32 |     }
33 | }
34 | 
35 | inline void load_argv_levels(int argc, char** argv) { load_argv_levels(argc, const_cast<const char**>(argv)); }
36 | 
37 | }  // namespace cfg
38 | }  // namespace spdlog
39 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/cfg/env.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | #include <spdlog/cfg/helpers.h>
 6 | #include <spdlog/details/os.h>
 7 | #include <spdlog/details/registry.h>
 8 | 
 9 | //
10 | // Init levels and patterns from env variables SPDLOG_LEVEL
11 | // Inspired from Rust's "env_logger" crate (https://crates.io/crates/env_logger).
12 | // Note - fallback to "info" level on unrecognized levels
13 | //
14 | // Examples:
15 | //
16 | // set global level to debug:
17 | // export SPDLOG_LEVEL=debug
18 | //
19 | // turn off all logging except for logger1:
20 | // export SPDLOG_LEVEL="*=off,logger1=debug"
21 | //
22 | 
23 | // turn off all logging except for logger1 and logger2:
24 | // export SPDLOG_LEVEL="off,logger1=debug,logger2=info"
25 | 
26 | namespace spdlog {
27 | namespace cfg {
28 | inline void load_env_levels() {
29 |     auto env_val = details::os::getenv("SPDLOG_LEVEL");
30 |     if (!env_val.empty()) {
31 |         helpers::load_levels(env_val);
32 |     }
33 | }
34 | 
35 | }  // namespace cfg
36 | }  // namespace spdlog
37 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/cfg/helpers.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/common.h>
 7 | 
 8 | #include <unordered_map>
 9 | 
10 | namespace spdlog {
11 | namespace cfg {
12 | namespace helpers {
13 | //
14 | // Init levels from given string
15 | //
16 | // Examples:
17 | //
18 | // set global level to debug: "debug"
19 | // turn off all logging except for logger1: "off,logger1=debug"
20 | // turn off all logging except for logger1 and logger2: "off,logger1=debug,logger2=info"
21 | //
22 | SPDLOG_API void load_levels(const std::string& txt);
23 | }  // namespace helpers
24 | 
25 | }  // namespace cfg
26 | }  // namespace spdlog
27 | 
28 | #ifdef SPDLOG_HEADER_ONLY
29 | #include "helpers-inl.h"
30 | #endif  // SPDLOG_HEADER_ONLY
31 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/backtracer.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/circular_q.h>
 7 | #include <spdlog/details/log_msg_buffer.h>
 8 | 
 9 | #include <atomic>
10 | #include <functional>
11 | #include <mutex>
12 | 
13 | // Store log messages in circular buffer.
14 | // Useful for storing debug data in case of error/warning happens.
15 | 
16 | namespace spdlog {
17 | namespace details {
18 | class SPDLOG_API backtracer {
19 |     mutable std::mutex mutex_;
20 |     std::atomic<bool> enabled_{false};
21 |     circular_q<log_msg_buffer> messages_;
22 | 
23 | public:
24 |     backtracer() = default;
25 |     backtracer(const backtracer& other);
26 | 
27 |     backtracer(backtracer&& other) SPDLOG_NOEXCEPT;
28 |     backtracer& operator=(backtracer other);
29 | 
30 |     void enable(size_t size);
31 |     void disable();
32 |     bool enabled() const;
33 |     void push_back(const log_msg& msg);
34 |     bool empty() const;
35 | 
36 |     // pop all items in the q and apply the given fun on each of them.
37 |     void foreach_pop(std::function<void(const details::log_msg&)> fun);
38 | };
39 | 
40 | }  // namespace details
41 | }  // namespace spdlog
42 | 
43 | #ifdef SPDLOG_HEADER_ONLY
44 | #include "backtracer-inl.h"
45 | #endif
46 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/console_globals.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/null_mutex.h>
 7 | 
 8 | #include <mutex>
 9 | 
10 | namespace spdlog {
11 | namespace details {
12 | 
13 | struct console_mutex {
14 |     using mutex_t = std::mutex;
15 |     static mutex_t& mutex() {
16 |         static mutex_t s_mutex;
17 |         return s_mutex;
18 |     }
19 | };
20 | 
21 | struct console_nullmutex {
22 |     using mutex_t = null_mutex;
23 |     static mutex_t& mutex() {
24 |         static mutex_t s_mutex;
25 |         return s_mutex;
26 |     }
27 | };
28 | }  // namespace details
29 | }  // namespace spdlog
30 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/file_helper.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/common.h>
 7 | 
 8 | #include <tuple>
 9 | 
10 | namespace spdlog {
11 | namespace details {
12 | 
13 | // Helper class for file sinks.
14 | // When failing to open a file, retry several times(5) with a delay interval(10 ms).
15 | // Throw spdlog_ex exception on errors.
16 | 
17 | class SPDLOG_API file_helper {
18 | public:
19 |     file_helper() = default;
20 |     explicit file_helper(const file_event_handlers& event_handlers);
21 | 
22 |     file_helper(const file_helper&) = delete;
23 |     file_helper& operator=(const file_helper&) = delete;
24 |     ~file_helper();
25 | 
26 |     void open(const filename_t& fname, bool truncate = false);
27 |     void reopen(bool truncate);
28 |     void flush();
29 |     void sync();
30 |     void close();
31 |     void write(const memory_buf_t& buf);
32 |     size_t size() const;
33 |     const filename_t& filename() const;
34 | 
35 |     //
36 |     // return file path and its extension:
37 |     //
38 |     // "mylog.txt" => ("mylog", ".txt")
39 |     // "mylog" => ("mylog", "")
40 |     // "mylog." => ("mylog.", "")
41 |     // "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt")
42 |     //
43 |     // the starting dot in filenames is ignored (hidden files):
44 |     //
45 |     // ".mylog" => (".mylog". "")
46 |     // "my_folder/.mylog" => ("my_folder/.mylog", "")
47 |     // "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt")
48 |     static std::tuple<filename_t, filename_t> split_by_extension(const filename_t& fname);
49 | 
50 | private:
51 |     const int open_tries_ = 5;
52 |     const unsigned int open_interval_ = 10;
53 |     std::FILE* fd_{nullptr};
54 |     filename_t filename_;
55 |     file_event_handlers event_handlers_;
56 | };
57 | }  // namespace details
58 | }  // namespace spdlog
59 | 
60 | #ifdef SPDLOG_HEADER_ONLY
61 | #include "file_helper-inl.h"
62 | #endif
63 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg-inl.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef SPDLOG_HEADER_ONLY
 7 | #include <spdlog/details/log_msg.h>
 8 | #endif
 9 | 
10 | #include <spdlog/details/os.h>
11 | 
12 | namespace spdlog {
13 | namespace details {
14 | 
15 | SPDLOG_INLINE log_msg::log_msg(spdlog::log_clock::time_point log_time, spdlog::source_loc loc, string_view_t a_logger_name, spdlog::level::level_enum lvl,
16 |                                spdlog::string_view_t msg)
17 |     : logger_name(a_logger_name),
18 |       level(lvl),
19 |       time(log_time)
20 | #ifndef SPDLOG_NO_THREAD_ID
21 |       ,
22 |       thread_id(os::thread_id())
23 | #endif
24 |       ,
25 |       source(loc),
26 |       payload(msg) {
27 | }
28 | 
29 | SPDLOG_INLINE log_msg::log_msg(spdlog::source_loc loc, string_view_t a_logger_name, spdlog::level::level_enum lvl, spdlog::string_view_t msg)
30 |     : log_msg(os::now(), loc, a_logger_name, lvl, msg) {}
31 | 
32 | SPDLOG_INLINE log_msg::log_msg(string_view_t a_logger_name, spdlog::level::level_enum lvl, spdlog::string_view_t msg)
33 |     : log_msg(os::now(), source_loc{}, a_logger_name, lvl, msg) {}
34 | 
35 | }  // namespace details
36 | }  // namespace spdlog
37 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/common.h>
 7 | 
 8 | #include <string>
 9 | 
10 | namespace spdlog {
11 | namespace details {
12 | struct SPDLOG_API log_msg {
13 |     log_msg() = default;
14 |     log_msg(log_clock::time_point log_time, source_loc loc, string_view_t logger_name, level::level_enum lvl, string_view_t msg);
15 |     log_msg(source_loc loc, string_view_t logger_name, level::level_enum lvl, string_view_t msg);
16 |     log_msg(string_view_t logger_name, level::level_enum lvl, string_view_t msg);
17 |     log_msg(const log_msg& other) = default;
18 |     log_msg& operator=(const log_msg& other) = default;
19 | 
20 |     string_view_t logger_name;
21 |     level::level_enum level{level::off};
22 |     log_clock::time_point time;
23 |     size_t thread_id{0};
24 | 
25 |     // wrapping the formatted text with color (updated by pattern_formatter).
26 |     mutable size_t color_range_start{0};
27 |     mutable size_t color_range_end{0};
28 | 
29 |     source_loc source;
30 |     string_view_t payload;
31 | };
32 | }  // namespace details
33 | }  // namespace spdlog
34 | 
35 | #ifdef SPDLOG_HEADER_ONLY
36 | #include "log_msg-inl.h"
37 | #endif
38 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg_buffer-inl.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef SPDLOG_HEADER_ONLY
 7 | #include <spdlog/details/log_msg_buffer.h>
 8 | #endif
 9 | 
10 | namespace spdlog {
11 | namespace details {
12 | 
13 | SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg& orig_msg) : log_msg{orig_msg} {
14 |     buffer.append(logger_name.begin(), logger_name.end());
15 |     buffer.append(payload.begin(), payload.end());
16 |     update_string_views();
17 | }
18 | 
19 | SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg_buffer& other) : log_msg{other} {
20 |     buffer.append(logger_name.begin(), logger_name.end());
21 |     buffer.append(payload.begin(), payload.end());
22 |     update_string_views();
23 | }
24 | 
25 | SPDLOG_INLINE log_msg_buffer::log_msg_buffer(log_msg_buffer&& other) SPDLOG_NOEXCEPT : log_msg{other}, buffer{std::move(other.buffer)} {
26 |     update_string_views();
27 | }
28 | 
29 | SPDLOG_INLINE log_msg_buffer& log_msg_buffer::operator=(const log_msg_buffer& other) {
30 |     log_msg::operator=(other);
31 |     buffer.clear();
32 |     buffer.append(other.buffer.data(), other.buffer.data() + other.buffer.size());
33 |     update_string_views();
34 |     return *this;
35 | }
36 | 
37 | SPDLOG_INLINE log_msg_buffer& log_msg_buffer::operator=(log_msg_buffer&& other) SPDLOG_NOEXCEPT {
38 |     log_msg::operator=(other);
39 |     buffer = std::move(other.buffer);
40 |     update_string_views();
41 |     return *this;
42 | }
43 | 
44 | SPDLOG_INLINE void log_msg_buffer::update_string_views() {
45 |     logger_name = string_view_t{buffer.data(), logger_name.size()};
46 |     payload = string_view_t{buffer.data() + logger_name.size(), payload.size()};
47 | }
48 | 
49 | }  // namespace details
50 | }  // namespace spdlog
51 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg_buffer.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/log_msg.h>
 7 | 
 8 | namespace spdlog {
 9 | namespace details {
10 | 
11 | // Extend log_msg with internal buffer to store its payload.
12 | // This is needed since log_msg holds string_views that points to stack data.
13 | 
14 | class SPDLOG_API log_msg_buffer : public log_msg {
15 |     memory_buf_t buffer;
16 |     void update_string_views();
17 | 
18 | public:
19 |     log_msg_buffer() = default;
20 |     explicit log_msg_buffer(const log_msg& orig_msg);
21 |     log_msg_buffer(const log_msg_buffer& other);
22 |     log_msg_buffer(log_msg_buffer&& other) SPDLOG_NOEXCEPT;
23 |     log_msg_buffer& operator=(const log_msg_buffer& other);
24 |     log_msg_buffer& operator=(log_msg_buffer&& other) SPDLOG_NOEXCEPT;
25 | };
26 | 
27 | }  // namespace details
28 | }  // namespace spdlog
29 | 
30 | #ifdef SPDLOG_HEADER_ONLY
31 | #include "log_msg_buffer-inl.h"
32 | #endif
33 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/null_mutex.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <atomic>
 7 | #include <utility>
 8 | // null, no cost dummy "mutex" and dummy "atomic" int
 9 | 
10 | namespace spdlog {
11 | namespace details {
12 | struct null_mutex {
13 |     void lock() const {}
14 |     void unlock() const {}
15 | };
16 | 
17 | struct null_atomic_int {
18 |     int value;
19 |     null_atomic_int() = default;
20 | 
21 |     explicit null_atomic_int(int new_value) : value(new_value) {}
22 | 
23 |     int load(std::memory_order = std::memory_order_relaxed) const { return value; }
24 | 
25 |     void store(int new_value, std::memory_order = std::memory_order_relaxed) { value = new_value; }
26 | 
27 |     int exchange(int new_value, std::memory_order = std::memory_order_relaxed) {
28 |         std::swap(new_value, value);
29 |         return new_value;  // return value before the call
30 |     }
31 | };
32 | 
33 | }  // namespace details
34 | }  // namespace spdlog
35 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/periodic_worker-inl.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef SPDLOG_HEADER_ONLY
 7 | #include <spdlog/details/periodic_worker.h>
 8 | #endif
 9 | 
10 | namespace spdlog {
11 | namespace details {
12 | 
13 | // stop the worker thread and join it
14 | SPDLOG_INLINE periodic_worker::~periodic_worker() {
15 |     if (worker_thread_.joinable()) {
16 |         {
17 |             std::lock_guard<std::mutex> lock(mutex_);
18 |             active_ = false;
19 |         }
20 |         cv_.notify_one();
21 |         worker_thread_.join();
22 |     }
23 | }
24 | 
25 | }  // namespace details
26 | }  // namespace spdlog
27 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/periodic_worker.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | // periodic worker thread - periodically executes the given callback function.
 7 | //
 8 | // RAII over the owned thread:
 9 | //    creates the thread on construction.
10 | //    stops and joins the thread on destruction (if the thread is executing a callback, wait for it
11 | //    to finish first).
12 | 
13 | #include <chrono>
14 | #include <condition_variable>
15 | #include <functional>
16 | #include <mutex>
17 | #include <thread>
18 | namespace spdlog {
19 | namespace details {
20 | 
21 | class SPDLOG_API periodic_worker {
22 | public:
23 |     template <typename Rep, typename Period>
24 |     periodic_worker(const std::function<void()>& callback_fun, std::chrono::duration<Rep, Period> interval) {
25 |         active_ = (interval > std::chrono::duration<Rep, Period>::zero());
26 |         if (!active_) {
27 |             return;
28 |         }
29 | 
30 |         worker_thread_ = std::thread([this, callback_fun, interval]() {
31 |             for (;;) {
32 |                 std::unique_lock<std::mutex> lock(this->mutex_);
33 |                 if (this->cv_.wait_for(lock, interval, [this] { return !this->active_; })) {
34 |                     return;  // active_ == false, so exit this thread
35 |                 }
36 |                 callback_fun();
37 |             }
38 |         });
39 |     }
40 |     std::thread& get_thread() { return worker_thread_; }
41 |     periodic_worker(const periodic_worker&) = delete;
42 |     periodic_worker& operator=(const periodic_worker&) = delete;
43 |     // stop the worker thread and join it
44 |     ~periodic_worker();
45 | 
46 | private:
47 |     bool active_;
48 |     std::thread worker_thread_;
49 |     std::mutex mutex_;
50 |     std::condition_variable cv_;
51 | };
52 | }  // namespace details
53 | }  // namespace spdlog
54 | 
55 | #ifdef SPDLOG_HEADER_ONLY
56 | #include "periodic_worker-inl.h"
57 | #endif
58 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/synchronous_factory.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include "registry.h"
 7 | 
 8 | namespace spdlog {
 9 | 
10 | // Default logger factory-  creates synchronous loggers
11 | class logger;
12 | 
13 | struct synchronous_factory {
14 |     template <typename Sink, typename... SinkArgs>
15 |     static std::shared_ptr<spdlog::logger> create(std::string logger_name, SinkArgs&&... args) {
16 |         auto sink = std::make_shared<Sink>(std::forward<SinkArgs>(args)...);
17 |         auto new_logger = std::make_shared<spdlog::logger>(std::move(logger_name), std::move(sink));
18 |         details::registry::instance().initialize_logger(new_logger);
19 |         return new_logger;
20 |     }
21 | };
22 | }  // namespace spdlog
23 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/windows_include.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifndef NOMINMAX
 4 | #define NOMINMAX  // prevent windows redefining min/max
 5 | #endif
 6 | 
 7 | #ifndef WIN32_LEAN_AND_MEAN
 8 | #define WIN32_LEAN_AND_MEAN
 9 | #endif
10 | 
11 | #include <windows.h>
12 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/bundled/fmt.license.rst:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012 - present, Victor Zverovich
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | --- Optional exception to the license ---
23 | 
24 | As an exception, if, as a result of your compiling your source code, portions
25 | of this Software are embedded into a machine-executable object form of such
26 | source code, you may redistribute such embedded portions in such object form
27 | without including the above copyright and permission notices.
28 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/bundled/locale.h:
--------------------------------------------------------------------------------
1 | #include "xchar.h"
2 | #warning fmt/locale.h is deprecated, include fmt/format.h or fmt/xchar.h instead
3 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/chrono.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // include bundled or external copy of fmtlib's chrono support
 9 | //
10 | #include <spdlog/tweakme.h>
11 | 
12 | #if !defined(SPDLOG_USE_STD_FORMAT)
13 | #if !defined(SPDLOG_FMT_EXTERNAL)
14 | #ifdef SPDLOG_HEADER_ONLY
15 | #ifndef FMT_HEADER_ONLY
16 | #define FMT_HEADER_ONLY
17 | #endif
18 | #endif
19 | #include <spdlog/fmt/bundled/chrono.h>
20 | #else
21 | #include <fmt/chrono.h>
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/compile.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // include bundled or external copy of fmtlib's compile-time support
 9 | //
10 | #include <spdlog/tweakme.h>
11 | 
12 | #if !defined(SPDLOG_USE_STD_FORMAT)
13 | #if !defined(SPDLOG_FMT_EXTERNAL)
14 | #ifdef SPDLOG_HEADER_ONLY
15 | #ifndef FMT_HEADER_ONLY
16 | #define FMT_HEADER_ONLY
17 | #endif
18 | #endif
19 | #include <spdlog/fmt/bundled/compile.h>
20 | #else
21 | #include <fmt/compile.h>
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/fmt.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016-2018 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | //
 9 | // Include a bundled header-only copy of fmtlib or an external one.
10 | // By default spdlog include its own copy.
11 | //
12 | #include <spdlog/tweakme.h>
13 | 
14 | #if defined(SPDLOG_USE_STD_FORMAT)  // SPDLOG_USE_STD_FORMAT is defined - use std::format
15 | #include <format>
16 | #elif !defined(SPDLOG_FMT_EXTERNAL)
17 | #if !defined(SPDLOG_COMPILED_LIB) && !defined(FMT_HEADER_ONLY)
18 | #define FMT_HEADER_ONLY
19 | #endif
20 | #ifndef FMT_USE_WINDOWS_H
21 | #define FMT_USE_WINDOWS_H 0
22 | #endif
23 | 
24 | #include <spdlog/fmt/bundled/core.h>
25 | #include <spdlog/fmt/bundled/format.h>
26 | 
27 | #else  // SPDLOG_FMT_EXTERNAL is defined - use external fmtlib
28 | #include <fmt/core.h>
29 | #include <fmt/format.h>
30 | #endif
31 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/ostr.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // include bundled or external copy of fmtlib's ostream support
 9 | //
10 | #include <spdlog/tweakme.h>
11 | 
12 | #if !defined(SPDLOG_USE_STD_FORMAT)
13 | #if !defined(SPDLOG_FMT_EXTERNAL)
14 | #ifdef SPDLOG_HEADER_ONLY
15 | #ifndef FMT_HEADER_ONLY
16 | #define FMT_HEADER_ONLY
17 | #endif
18 | #endif
19 | #include <spdlog/fmt/bundled/ostream.h>
20 | #else
21 | #include <fmt/ostream.h>
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/ranges.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // include bundled or external copy of fmtlib's ranges support
 9 | //
10 | #include <spdlog/tweakme.h>
11 | 
12 | #if !defined(SPDLOG_USE_STD_FORMAT)
13 | #if !defined(SPDLOG_FMT_EXTERNAL)
14 | #ifdef SPDLOG_HEADER_ONLY
15 | #ifndef FMT_HEADER_ONLY
16 | #define FMT_HEADER_ONLY
17 | #endif
18 | #endif
19 | #include <spdlog/fmt/bundled/ranges.h>
20 | #else
21 | #include <fmt/ranges.h>
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/std.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // include bundled or external copy of fmtlib's std support (for formatting e.g.
 9 | // std::filesystem::path, std::thread::id, std::monostate, std::variant, ...)
10 | //
11 | #include <spdlog/tweakme.h>
12 | 
13 | #if !defined(SPDLOG_USE_STD_FORMAT)
14 | #if !defined(SPDLOG_FMT_EXTERNAL)
15 | #ifdef SPDLOG_HEADER_ONLY
16 | #ifndef FMT_HEADER_ONLY
17 | #define FMT_HEADER_ONLY
18 | #endif
19 | #endif
20 | #include <spdlog/fmt/bundled/std.h>
21 | #else
22 | #include <fmt/std.h>
23 | #endif
24 | #endif
25 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/xchar.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // include bundled or external copy of fmtlib's xchar support
 9 | //
10 | #include <spdlog/tweakme.h>
11 | 
12 | #if !defined(SPDLOG_USE_STD_FORMAT)
13 | #if !defined(SPDLOG_FMT_EXTERNAL)
14 | #ifdef SPDLOG_HEADER_ONLY
15 | #ifndef FMT_HEADER_ONLY
16 | #define FMT_HEADER_ONLY
17 | #endif
18 | #endif
19 | #include <spdlog/fmt/bundled/xchar.h>
20 | #else
21 | #include <fmt/xchar.h>
22 | #endif
23 | #endif
24 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/formatter.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/log_msg.h>
 7 | #include <spdlog/fmt/fmt.h>
 8 | 
 9 | namespace spdlog {
10 | 
11 | class formatter {
12 | public:
13 |     virtual ~formatter() = default;
14 |     virtual void format(const details::log_msg& msg, memory_buf_t& dest) = 0;
15 |     virtual std::unique_ptr<formatter> clone() const = 0;
16 | };
17 | }  // namespace spdlog
18 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fwd.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | namespace spdlog {
 7 | class logger;
 8 | class formatter;
 9 | 
10 | namespace sinks {
11 | class sink;
12 | }
13 | 
14 | namespace level {
15 | enum level_enum : int;
16 | }
17 | 
18 | }  // namespace spdlog
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/mdc.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #if defined(SPDLOG_NO_TLS)
 7 | #error "This header requires thread local storage support, but SPDLOG_NO_TLS is defined."
 8 | #endif
 9 | 
10 | #include <spdlog/common.h>
11 | 
12 | #include <map>
13 | #include <string>
14 | 
15 | // MDC is a simple map of key->string values stored in thread local storage whose content will be printed by the loggers.
16 | // Note: Not supported in async mode (thread local storage - so the async thread pool have different copy).
17 | //
18 | // Usage example:
19 | // spdlog::mdc::put("mdc_key_1", "mdc_value_1");
20 | // spdlog::info("Hello, {}", "World!");  // => [2024-04-26 02:08:05.040] [info] [mdc_key_1:mdc_value_1] Hello, World!
21 | 
22 | namespace spdlog {
23 | class SPDLOG_API mdc {
24 | public:
25 |     using mdc_map_t = std::map<std::string, std::string>;
26 | 
27 |     static void put(const std::string& key, const std::string& value) { get_context()[key] = value; }
28 | 
29 |     static std::string get(const std::string& key) {
30 |         auto& context = get_context();
31 |         auto it = context.find(key);
32 |         if (it != context.end()) {
33 |             return it->second;
34 |         }
35 |         return "";
36 |     }
37 | 
38 |     static void remove(const std::string& key) { get_context().erase(key); }
39 | 
40 |     static void clear() { get_context().clear(); }
41 | 
42 |     static mdc_map_t& get_context() {
43 |         static thread_local mdc_map_t context;
44 |         return context;
45 |     }
46 | };
47 | 
48 | }  // namespace spdlog
49 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/base_sink-inl.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef SPDLOG_HEADER_ONLY
 7 | #include <spdlog/sinks/base_sink.h>
 8 | #endif
 9 | 
10 | #include <spdlog/common.h>
11 | #include <spdlog/pattern_formatter.h>
12 | 
13 | #include <memory>
14 | #include <mutex>
15 | 
16 | template <typename Mutex>
17 | SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::base_sink() : formatter_{details::make_unique<spdlog::pattern_formatter>()} {}
18 | 
19 | template <typename Mutex>
20 | SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::base_sink(std::unique_ptr<spdlog::formatter> formatter) : formatter_{std::move(formatter)} {}
21 | 
22 | template <typename Mutex>
23 | void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::log(const details::log_msg& msg) {
24 |     std::lock_guard<Mutex> lock(mutex_);
25 |     sink_it_(msg);
26 | }
27 | 
28 | template <typename Mutex>
29 | void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::flush() {
30 |     std::lock_guard<Mutex> lock(mutex_);
31 |     flush_();
32 | }
33 | 
34 | template <typename Mutex>
35 | void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::set_pattern(const std::string& pattern) {
36 |     std::lock_guard<Mutex> lock(mutex_);
37 |     set_pattern_(pattern);
38 | }
39 | 
40 | template <typename Mutex>
41 | void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) {
42 |     std::lock_guard<Mutex> lock(mutex_);
43 |     set_formatter_(std::move(sink_formatter));
44 | }
45 | 
46 | template <typename Mutex>
47 | void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::set_pattern_(const std::string& pattern) {
48 |     set_formatter_(details::make_unique<spdlog::pattern_formatter>(pattern));
49 | }
50 | 
51 | template <typename Mutex>
52 | void SPDLOG_INLINE spdlog::sinks::base_sink<Mutex>::set_formatter_(std::unique_ptr<spdlog::formatter> sink_formatter) {
53 |     formatter_ = std::move(sink_formatter);
54 | }
55 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/base_sink.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | //
 6 | // base sink templated over a mutex (either dummy or real)
 7 | // concrete implementation should override the sink_it_() and flush_()  methods.
 8 | // locking is taken care of in this class - no locking needed by the
 9 | // implementers..
10 | //
11 | 
12 | #include <spdlog/common.h>
13 | #include <spdlog/details/log_msg.h>
14 | #include <spdlog/sinks/sink.h>
15 | 
16 | namespace spdlog {
17 | namespace sinks {
18 | template <typename Mutex>
19 | class SPDLOG_API base_sink : public sink {
20 | public:
21 |     base_sink();
22 |     explicit base_sink(std::unique_ptr<spdlog::formatter> formatter);
23 |     ~base_sink() override = default;
24 | 
25 |     base_sink(const base_sink&) = delete;
26 |     base_sink(base_sink&&) = delete;
27 | 
28 |     base_sink& operator=(const base_sink&) = delete;
29 |     base_sink& operator=(base_sink&&) = delete;
30 | 
31 |     void log(const details::log_msg& msg) final override;
32 |     void flush() final override;
33 |     void set_pattern(const std::string& pattern) final override;
34 |     void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) final override;
35 | 
36 | protected:
37 |     // sink formatter
38 |     std::unique_ptr<spdlog::formatter> formatter_;
39 |     Mutex mutex_;
40 | 
41 |     virtual void sink_it_(const details::log_msg& msg) = 0;
42 |     virtual void flush_() = 0;
43 |     virtual void set_pattern_(const std::string& pattern);
44 |     virtual void set_formatter_(std::unique_ptr<spdlog::formatter> sink_formatter);
45 | };
46 | }  // namespace sinks
47 | }  // namespace spdlog
48 | 
49 | #ifdef SPDLOG_HEADER_ONLY
50 | #include "base_sink-inl.h"
51 | #endif
52 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/basic_file_sink-inl.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef SPDLOG_HEADER_ONLY
 7 | #include <spdlog/sinks/basic_file_sink.h>
 8 | #endif
 9 | 
10 | #include <spdlog/common.h>
11 | #include <spdlog/details/os.h>
12 | 
13 | namespace spdlog {
14 | namespace sinks {
15 | 
16 | template <typename Mutex>
17 | SPDLOG_INLINE basic_file_sink<Mutex>::basic_file_sink(const filename_t& filename, bool truncate, const file_event_handlers& event_handlers)
18 |     : file_helper_{event_handlers} {
19 |     file_helper_.open(filename, truncate);
20 | }
21 | 
22 | template <typename Mutex>
23 | SPDLOG_INLINE const filename_t& basic_file_sink<Mutex>::filename() const {
24 |     return file_helper_.filename();
25 | }
26 | 
27 | template <typename Mutex>
28 | SPDLOG_INLINE void basic_file_sink<Mutex>::sink_it_(const details::log_msg& msg) {
29 |     memory_buf_t formatted;
30 |     base_sink<Mutex>::formatter_->format(msg, formatted);
31 |     file_helper_.write(formatted);
32 | }
33 | 
34 | template <typename Mutex>
35 | SPDLOG_INLINE void basic_file_sink<Mutex>::flush_() {
36 |     file_helper_.flush();
37 | }
38 | 
39 | }  // namespace sinks
40 | }  // namespace spdlog
41 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/callback_sink.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/null_mutex.h>
 7 | #include <spdlog/details/synchronous_factory.h>
 8 | #include <spdlog/sinks/base_sink.h>
 9 | 
10 | #include <mutex>
11 | #include <string>
12 | 
13 | namespace spdlog {
14 | 
15 | // callbacks type
16 | typedef std::function<void(const details::log_msg& msg)> custom_log_callback;
17 | 
18 | namespace sinks {
19 | /*
20 |  * Trivial callback sink, gets a callback function and calls it on each log
21 |  */
22 | template <typename Mutex>
23 | class callback_sink final : public base_sink<Mutex> {
24 | public:
25 |     explicit callback_sink(const custom_log_callback& callback) : callback_{callback} {}
26 | 
27 | protected:
28 |     void sink_it_(const details::log_msg& msg) override { callback_(msg); }
29 |     void flush_() override{};
30 | 
31 | private:
32 |     custom_log_callback callback_;
33 | };
34 | 
35 | using callback_sink_mt = callback_sink<std::mutex>;
36 | using callback_sink_st = callback_sink<details::null_mutex>;
37 | 
38 | }  // namespace sinks
39 | 
40 | //
41 | // factory functions
42 | //
43 | template <typename Factory = spdlog::synchronous_factory>
44 | inline std::shared_ptr<logger> callback_logger_mt(const std::string& logger_name, const custom_log_callback& callback) {
45 |     return Factory::template create<sinks::callback_sink_mt>(logger_name, callback);
46 | }
47 | 
48 | template <typename Factory = spdlog::synchronous_factory>
49 | inline std::shared_ptr<logger> callback_logger_st(const std::string& logger_name, const custom_log_callback& callback) {
50 |     return Factory::template create<sinks::callback_sink_st>(logger_name, callback);
51 | }
52 | 
53 | }  // namespace spdlog
54 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/null_sink.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/null_mutex.h>
 7 | #include <spdlog/details/synchronous_factory.h>
 8 | #include <spdlog/sinks/base_sink.h>
 9 | 
10 | #include <mutex>
11 | 
12 | namespace spdlog {
13 | namespace sinks {
14 | 
15 | template <typename Mutex>
16 | class null_sink : public base_sink<Mutex> {
17 | protected:
18 |     void sink_it_(const details::log_msg&) override {}
19 |     void flush_() override {}
20 | };
21 | 
22 | using null_sink_mt = null_sink<details::null_mutex>;
23 | using null_sink_st = null_sink<details::null_mutex>;
24 | 
25 | }  // namespace sinks
26 | 
27 | template <typename Factory = spdlog::synchronous_factory>
28 | inline std::shared_ptr<logger> null_logger_mt(const std::string& logger_name) {
29 |     auto null_logger = Factory::template create<sinks::null_sink_mt>(logger_name);
30 |     null_logger->set_level(level::off);
31 |     return null_logger;
32 | }
33 | 
34 | template <typename Factory = spdlog::synchronous_factory>
35 | inline std::shared_ptr<logger> null_logger_st(const std::string& logger_name) {
36 |     auto null_logger = Factory::template create<sinks::null_sink_st>(logger_name);
37 |     null_logger->set_level(level::off);
38 |     return null_logger;
39 | }
40 | 
41 | }  // namespace spdlog
42 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/ostream_sink.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/null_mutex.h>
 7 | #include <spdlog/sinks/base_sink.h>
 8 | 
 9 | #include <mutex>
10 | #include <ostream>
11 | 
12 | namespace spdlog {
13 | namespace sinks {
14 | template <typename Mutex>
15 | class ostream_sink final : public base_sink<Mutex> {
16 | public:
17 |     explicit ostream_sink(std::ostream& os, bool force_flush = false) : ostream_(os), force_flush_(force_flush) {}
18 |     ostream_sink(const ostream_sink&) = delete;
19 |     ostream_sink& operator=(const ostream_sink&) = delete;
20 | 
21 | protected:
22 |     void sink_it_(const details::log_msg& msg) override {
23 |         memory_buf_t formatted;
24 |         base_sink<Mutex>::formatter_->format(msg, formatted);
25 |         ostream_.write(formatted.data(), static_cast<std::streamsize>(formatted.size()));
26 |         if (force_flush_) {
27 |             ostream_.flush();
28 |         }
29 |     }
30 | 
31 |     void flush_() override { ostream_.flush(); }
32 | 
33 |     std::ostream& ostream_;
34 |     bool force_flush_;
35 | };
36 | 
37 | using ostream_sink_mt = ostream_sink<std::mutex>;
38 | using ostream_sink_st = ostream_sink<details::null_mutex>;
39 | 
40 | }  // namespace sinks
41 | }  // namespace spdlog
42 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/sink-inl.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef SPDLOG_HEADER_ONLY
 7 | #include <spdlog/sinks/sink.h>
 8 | #endif
 9 | 
10 | #include <spdlog/common.h>
11 | 
12 | SPDLOG_INLINE bool spdlog::sinks::sink::should_log(spdlog::level::level_enum msg_level) const { return msg_level >= level_.load(std::memory_order_relaxed); }
13 | 
14 | SPDLOG_INLINE void spdlog::sinks::sink::set_level(level::level_enum log_level) { level_.store(log_level, std::memory_order_relaxed); }
15 | 
16 | SPDLOG_INLINE spdlog::level::level_enum spdlog::sinks::sink::level() const {
17 |     return static_cast<spdlog::level::level_enum>(level_.load(std::memory_order_relaxed));
18 | }
19 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/sink.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/details/log_msg.h>
 7 | #include <spdlog/formatter.h>
 8 | 
 9 | namespace spdlog {
10 | 
11 | namespace sinks {
12 | class SPDLOG_API sink {
13 | public:
14 |     virtual ~sink() = default;
15 |     virtual void log(const details::log_msg& msg) = 0;
16 |     virtual void flush() = 0;
17 |     virtual void set_pattern(const std::string& pattern) = 0;
18 |     virtual void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) = 0;
19 | 
20 |     void set_level(level::level_enum log_level);
21 |     level::level_enum level() const;
22 |     bool should_log(level::level_enum msg_level) const;
23 | 
24 | protected:
25 |     // sink log level - default is all
26 |     level_t level_{level::trace};
27 | };
28 | 
29 | }  // namespace sinks
30 | }  // namespace spdlog
31 | 
32 | #ifdef SPDLOG_HEADER_ONLY
33 | #include "sink-inl.h"
34 | #endif
35 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/stdout_color_sinks-inl.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef SPDLOG_HEADER_ONLY
 7 | #include <spdlog/sinks/stdout_color_sinks.h>
 8 | #endif
 9 | 
10 | #include <spdlog/common.h>
11 | #include <spdlog/logger.h>
12 | 
13 | namespace spdlog {
14 | 
15 | template <typename Factory>
16 | SPDLOG_INLINE std::shared_ptr<logger> stdout_color_mt(const std::string& logger_name, color_mode mode) {
17 |     return Factory::template create<sinks::stdout_color_sink_mt>(logger_name, mode);
18 | }
19 | 
20 | template <typename Factory>
21 | SPDLOG_INLINE std::shared_ptr<logger> stdout_color_st(const std::string& logger_name, color_mode mode) {
22 |     return Factory::template create<sinks::stdout_color_sink_st>(logger_name, mode);
23 | }
24 | 
25 | template <typename Factory>
26 | SPDLOG_INLINE std::shared_ptr<logger> stderr_color_mt(const std::string& logger_name, color_mode mode) {
27 |     return Factory::template create<sinks::stderr_color_sink_mt>(logger_name, mode);
28 | }
29 | 
30 | template <typename Factory>
31 | SPDLOG_INLINE std::shared_ptr<logger> stderr_color_st(const std::string& logger_name, color_mode mode) {
32 |     return Factory::template create<sinks::stderr_color_sink_st>(logger_name, mode);
33 | }
34 | }  // namespace spdlog
35 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/stdout_color_sinks.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifdef _WIN32
 7 | #include <spdlog/sinks/wincolor_sink.h>
 8 | #else
 9 | #include <spdlog/sinks/ansicolor_sink.h>
10 | #endif
11 | 
12 | #include <spdlog/details/synchronous_factory.h>
13 | 
14 | namespace spdlog {
15 | namespace sinks {
16 | #ifdef _WIN32
17 | using stdout_color_sink_mt = wincolor_stdout_sink_mt;
18 | using stdout_color_sink_st = wincolor_stdout_sink_st;
19 | using stderr_color_sink_mt = wincolor_stderr_sink_mt;
20 | using stderr_color_sink_st = wincolor_stderr_sink_st;
21 | #else
22 | using stdout_color_sink_mt = ansicolor_stdout_sink_mt;
23 | using stdout_color_sink_st = ansicolor_stdout_sink_st;
24 | using stderr_color_sink_mt = ansicolor_stderr_sink_mt;
25 | using stderr_color_sink_st = ansicolor_stderr_sink_st;
26 | #endif
27 | }  // namespace sinks
28 | 
29 | template <typename Factory = spdlog::synchronous_factory>
30 | std::shared_ptr<logger> stdout_color_mt(const std::string& logger_name, color_mode mode = color_mode::automatic);
31 | 
32 | template <typename Factory = spdlog::synchronous_factory>
33 | std::shared_ptr<logger> stdout_color_st(const std::string& logger_name, color_mode mode = color_mode::automatic);
34 | 
35 | template <typename Factory = spdlog::synchronous_factory>
36 | std::shared_ptr<logger> stderr_color_mt(const std::string& logger_name, color_mode mode = color_mode::automatic);
37 | 
38 | template <typename Factory = spdlog::synchronous_factory>
39 | std::shared_ptr<logger> stderr_color_st(const std::string& logger_name, color_mode mode = color_mode::automatic);
40 | 
41 | }  // namespace spdlog
42 | 
43 | #ifdef SPDLOG_HEADER_ONLY
44 | #include "stdout_color_sinks-inl.h"
45 | #endif
46 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/udp_sink.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <spdlog/common.h>
 7 | #include <spdlog/details/null_mutex.h>
 8 | #include <spdlog/sinks/base_sink.h>
 9 | #ifdef _WIN32
10 | #include <spdlog/details/udp_client-windows.h>
11 | #else
12 | #include <spdlog/details/udp_client.h>
13 | #endif
14 | 
15 | #include <chrono>
16 | #include <functional>
17 | #include <mutex>
18 | #include <string>
19 | 
20 | // Simple udp client sink
21 | // Sends formatted log via udp
22 | 
23 | namespace spdlog {
24 | namespace sinks {
25 | 
26 | struct udp_sink_config {
27 |     std::string server_host;
28 |     uint16_t server_port;
29 | 
30 |     udp_sink_config(std::string host, uint16_t port) : server_host{std::move(host)}, server_port{port} {}
31 | };
32 | 
33 | template <typename Mutex>
34 | class udp_sink : public spdlog::sinks::base_sink<Mutex> {
35 | public:
36 |     // host can be hostname or ip address
37 |     explicit udp_sink(udp_sink_config sink_config) : client_{sink_config.server_host, sink_config.server_port} {}
38 | 
39 |     ~udp_sink() override = default;
40 | 
41 | protected:
42 |     void sink_it_(const spdlog::details::log_msg& msg) override {
43 |         spdlog::memory_buf_t formatted;
44 |         spdlog::sinks::base_sink<Mutex>::formatter_->format(msg, formatted);
45 |         client_.send(formatted.data(), formatted.size());
46 |     }
47 | 
48 |     void flush_() override {}
49 |     details::udp_client client_;
50 | };
51 | 
52 | using udp_sink_mt = udp_sink<std::mutex>;
53 | using udp_sink_st = udp_sink<spdlog::details::null_mutex>;
54 | 
55 | }  // namespace sinks
56 | 
57 | //
58 | // factory functions
59 | //
60 | template <typename Factory = spdlog::synchronous_factory>
61 | inline std::shared_ptr<logger> udp_logger_mt(const std::string& logger_name, sinks::udp_sink_config skin_config) {
62 |     return Factory::template create<sinks::udp_sink_mt>(logger_name, skin_config);
63 | }
64 | 
65 | }  // namespace spdlog
66 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/version.h:
--------------------------------------------------------------------------------
 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors.
 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 3 | 
 4 | #pragma once
 5 | 
 6 | #define SPDLOG_VER_MAJOR 1
 7 | #define SPDLOG_VER_MINOR 14
 8 | #define SPDLOG_VER_PATCH 1
 9 | 
10 | #define SPDLOG_TO_VERSION(major, minor, patch) (major * 10000 + minor * 100 + patch)
11 | #define SPDLOG_VERSION SPDLOG_TO_VERSION(SPDLOG_VER_MAJOR, SPDLOG_VER_MINOR, SPDLOG_VER_PATCH)
12 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/common.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils/common.h"
 2 | 
 3 | namespace dicp {
 4 | 
 5 | std::string atbDimsToString(const atb::Dims& d) {
 6 |     std::ostringstream oss;
 7 |     oss << "[";
 8 |     for (uint64_t i = 0; i < d.dimNum; ++i) {
 9 |         oss << d.dims[i];
10 |         if (i < d.dimNum - 1) {
11 |             oss << ", ";
12 |         }
13 |     }
14 |     oss << "]";
15 |     return oss.str();
16 | }
17 | 
18 | aclDataType get_acl_dtype(const std::string& dtype) {
19 |     if (dtype == "INT64") {
20 |         return ACL_INT64;
21 |     } else if (dtype == "INT32") {
22 |         return ACL_INT32;
23 |     } else if (dtype == "FLOAT") {
24 |         return ACL_FLOAT;
25 |     } else if (dtype == "FLOAT16") {
26 |         return ACL_FLOAT16;
27 |     } else if (dtype == "BF16") {
28 |         return ACL_BF16;
29 |     } else {
30 |         throw std::invalid_argument("Unsupported dtype: " + dtype);
31 |     }
32 | }
33 | 
34 | }  // namespace dicp
35 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/config.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils/config.h"
 2 | 
 3 | #include <cstdlib>
 4 | #include <string>
 5 | 
 6 | #include "utils/log.h"
 7 | 
 8 | namespace dicp {
 9 | 
10 | constexpr int GB_1 = 1024 * 1024 * 1024;
11 | 
12 | Config::Config() {
13 |     const char* envBufferSize = std::getenv("DICP_WORKSPACE_BUFFER_SIZE");
14 |     if (envBufferSize) {
15 |         workspaceBufferSize_ = std::stoull(envBufferSize);
16 |     } else {
17 |         workspaceBufferSize_ = 1 * GB_1;
18 |     }
19 | }
20 | 
21 | uint64_t Config::WorkspaceBufferSize() { return workspaceBufferSize_; }
22 | 
23 | Config& GetConfig() {
24 |     static Config config;
25 |     return config;
26 | }
27 | 
28 | }  // namespace dicp
29 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/config.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <atb/types.h>
 4 | 
 5 | namespace dicp {
 6 | 
 7 | class Config {
 8 | public:
 9 |     Config();
10 |     ~Config(){};
11 |     uint64_t WorkspaceBufferSize();
12 | 
13 | private:
14 |     uint64_t workspaceBufferSize_;
15 | };
16 | 
17 | Config& GetConfig();
18 | 
19 | }  // namespace dicp
20 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/global_dict.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils/global_dict.h"
 2 | 
 3 | #include <acl/acl.h>
 4 | 
 5 | #include <cstdlib>
 6 | #include <mutex>
 7 | 
 8 | #include "utils/config.h"
 9 | #include "utils/log.h"
10 | #include "utils/tensor_utils.h"
11 | 
12 | namespace dicp {
13 | 
14 | GlobalDict::GlobalDict() = default;
15 | 
16 | void GlobalDict::Register(const std::string& key) {
17 |     current_key_ = key;
18 |     data_.try_emplace(key);
19 | }
20 | 
21 | std::unordered_map<std::string, int>& GlobalDict::GetData() {
22 |     if (current_key_.empty() || !data_.count(current_key_)) {
23 |         throw std::runtime_error("Invalid GlobalDict access");
24 |     }
25 |     return data_.at(current_key_);
26 | }
27 | 
28 | GlobalDict& GetGlobalDict_() {
29 |     static GlobalDict global_dict;
30 |     return global_dict;
31 | }
32 | 
33 | void GlobalDict::Set(const std::string& key) {
34 |     if (current_key_.empty() || !data_.count(current_key_)) {
35 |         throw std::runtime_error("Invalid GlobalDict access");
36 |     }
37 |     current_key_ = key;
38 | }
39 | 
40 | void RegisterToGlobalDict(const std::string& key) {
41 |     auto& global_dict = GetGlobalDict_();
42 |     global_dict.Register(key);
43 | }
44 | 
45 | void SetGlobalDict(const std::string& key) {
46 |     auto& global_dict = GetGlobalDict_();
47 |     global_dict.Set(key);
48 | }
49 | 
50 | std::unordered_map<std::string, int>& GetGlobalDictData() { return GetGlobalDict_().GetData(); }
51 | 
52 | }  // namespace dicp
53 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/global_dict.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <atb/types.h>
 4 | #include <torch/torch.h>
 5 | 
 6 | #include <unordered_map>
 7 | #include <vector>
 8 | 
 9 | namespace dicp {
10 | 
11 | class GlobalDict {
12 | public:
13 |     GlobalDict();
14 |     ~GlobalDict(){};
15 |     void Register(const std::string& key);
16 |     void Set(const std::string& key);
17 |     std::unordered_map<std::string, int>& GetData();
18 | 
19 | private:
20 |     std::string current_key_;
21 |     std::unordered_map<std::string, std::unordered_map<std::string, int>> data_;
22 | };
23 | 
24 | void RegisterToGlobalDict(const std::string& key);
25 | void SetGlobalDict(const std::string& key);
26 | GlobalDict& GetGlobalDict_();
27 | 
28 | std::unordered_map<std::string, int>& GetGlobalDictData();
29 | 
30 | }  // namespace dicp
31 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/misc.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils/misc.h"
 2 | 
 3 | #include <acl/acl.h>
 4 | #include <torch_npu/csrc/core/npu/NPUStream.h>
 5 | 
 6 | #include "utils/log.h"
 7 | 
 8 | namespace dicp {
 9 | namespace utils {
10 | 
11 | void* GetCurrentStream() {
12 |     int32_t devId = 0;
13 |     aclrtGetDevice(&devId);
14 |     void* stream = c10_npu::getCurrentNPUStream(devId).stream();
15 |     DICP_LOG_IF(stream == nullptr, ERROR) << "get current stream failed!";
16 |     return stream;
17 | }
18 | 
19 | int GetNewModelId() {
20 |     static int modelId = 0;
21 |     return modelId++;
22 | }
23 | 
24 | }  // namespace utils
25 | }  // namespace dicp
26 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/misc.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace dicp {
 4 | namespace utils {
 5 | 
 6 | void* GetCurrentStream();
 7 | int GetNewModelId();
 8 | 
 9 | }  // namespace utils
10 | }  // namespace dicp
11 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/operation_util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <atb/atb_infer.h>
 4 | namespace dicp {
 5 | 
 6 | #define CREATE_OPERATION(param, operation)                              \
 7 |     do {                                                                \
 8 |         atb::Status atbStatus = atb::CreateOperation(param, operation); \
 9 |         if (atbStatus != atb::NO_ERROR) {                               \
10 |             return atbStatus;                                           \
11 |         }                                                               \
12 |     } while (0)
13 | 
14 | #define CREATE_OPERATION_NO_RETURN(param, operation)                    \
15 |     do {                                                                \
16 |         atb::Status atbStatus = atb::CreateOperation(param, operation); \
17 |         if (atbStatus != atb::NO_ERROR) {                               \
18 |         }                                                               \
19 |     } while (0)
20 | }  // namespace dicp
21 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/tensor_utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <atb/types.h>
 4 | #include <torch/torch.h>
 5 | 
 6 | #include <vector>
 7 | 
 8 | namespace dicp {
 9 | namespace tensor_utils {
10 | 
11 | template <aclDataType T>
12 | struct aclDataTypeMap;
13 | 
14 | template <>
15 | struct aclDataTypeMap<aclDataType::ACL_FLOAT16> {
16 |     using type = float16_t;
17 | };
18 | template <>
19 | struct aclDataTypeMap<aclDataType::ACL_INT64> {
20 |     using type = int64_t;
21 | };
22 | template <>
23 | struct aclDataTypeMap<aclDataType::ACL_INT32> {
24 |     using type = int32_t;
25 | };
26 | template <>
27 | struct aclDataTypeMap<aclDataType::ACL_INT8> {
28 |     using type = int8_t;
29 | };
30 | 
31 | std::string TensorToString(const atb::Tensor& tensor);
32 | std::string TensorDescToString(const atb::TensorDesc& tensorDesc);
33 | 
34 | atb::Tensor AtTensor2Tensor(const at::Tensor& atTensor);
35 | at::Tensor CreateAtTensorFromTensorDesc(const atb::TensorDesc& tensorDesc);
36 | int64_t TransferAtTensor2AtbTensor(std::vector<torch::Tensor>& atTensors, std::vector<atb::Tensor>& atbTensors);
37 | 
38 | template <aclDataType T>
39 | void copyAndPrint(const atb::Tensor tensor, int64_t tensorSize);
40 | int64_t DumpTensor(const atb::Tensor& tensor);
41 | 
42 | }  // namespace tensor_utils
43 | }  // namespace dicp
44 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/timer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <chrono>
 4 | #include <iostream>
 5 | #include <thread>
 6 | 
 7 | namespace dicp {
 8 | 
 9 | class Timer {
10 | public:
11 |     Timer() : startTimePoint(), endTimePoint(), isRunning(false) {}
12 | 
13 |     void start() {
14 |         startTimePoint = std::chrono::high_resolution_clock::now();
15 |         isRunning = true;
16 |     }
17 | 
18 |     void stop() {
19 |         if (isRunning) {
20 |             endTimePoint = std::chrono::high_resolution_clock::now();
21 |             isRunning = false;
22 |         }
23 |     }
24 | 
25 |     double ElapsedMicroSecond() const {
26 |         if (isRunning) {
27 |             auto currentTime = std::chrono::high_resolution_clock::now();
28 |             return std::chrono::duration_cast<std::chrono::microseconds>(currentTime - startTimePoint).count();
29 |         } else {
30 |             return std::chrono::duration_cast<std::chrono::microseconds>(endTimePoint - startTimePoint).count();
31 |         }
32 |     }
33 | 
34 |     double ElapsedSecond() const { return ElapsedMicroSecond() / 1e6; }
35 | 
36 |     void reset() {
37 |         startTimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>();
38 |         endTimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>();
39 |         isRunning = false;
40 |     }
41 | 
42 | private:
43 |     std::chrono::time_point<std::chrono::high_resolution_clock> startTimePoint;
44 |     std::chrono::time_point<std::chrono::high_resolution_clock> endTimePoint;
45 |     bool isRunning;
46 | };
47 | 
48 | }  // namespace dicp
49 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/workspace.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils/workspace.h"
 2 | 
 3 | #include <acl/acl.h>
 4 | 
 5 | #include <cstdlib>
 6 | 
 7 | #include "utils/config.h"
 8 | #include "utils/log.h"
 9 | #include "utils/tensor_utils.h"
10 | 
11 | namespace dicp {
12 | 
13 | constexpr int KB_1 = 1024;
14 | constexpr int MB_1 = 1024 * 1024;
15 | constexpr int GB_1 = 1024 * 1024 * 1024;
16 | 
17 | Workspace::Workspace() {
18 |     bufferSize_ = GetConfig().WorkspaceBufferSize();
19 | 
20 |     DICP_LOG(INFO) << "Workspace init, bufferSize:" << bufferSize_;
21 |     if (bufferSize_ > 0) {
22 |         atTensor_ = CreateAtTensor(bufferSize_);
23 |         buffer_ = atTensor_.data_ptr();
24 |     }
25 | }
26 | 
27 | void* Workspace::GetWorkspaceBuffer(uint64_t bufferSize) {
28 |     if (bufferSize <= bufferSize_) {
29 |         DICP_LOG(INFO) << "GetWorkspaceBuffer bufferSize:" << bufferSize << "<= bufferSize_:" << bufferSize_;
30 |         return atTensor_.data_ptr();
31 |     }
32 | 
33 |     if (aclrtSynchronizeDevice() != 0) {
34 |         return nullptr;
35 |     }
36 | 
37 |     atTensor_.reset();
38 |     atTensor_ = CreateAtTensor(bufferSize);
39 |     bufferSize_ = atTensor_.numel();
40 |     DICP_LOG(INFO) << "Workspace new bufferSize:" << bufferSize;
41 |     buffer_ = atTensor_.data_ptr();
42 |     return atTensor_.data_ptr();
43 | }
44 | 
45 | torch::Tensor Workspace::CreateAtTensor(uint64_t bufferSize) {
46 |     atb::TensorDesc tensorDesc;
47 |     tensorDesc.dtype = ACL_UINT8;
48 |     tensorDesc.format = ACL_FORMAT_ND;
49 | 
50 |     tensorDesc.shape.dimNum = 2;
51 |     tensorDesc.shape.dims[0] = KB_1;
52 |     tensorDesc.shape.dims[1] = (bufferSize + KB_1 - 1) / KB_1;
53 | 
54 |     return tensor_utils::CreateAtTensorFromTensorDesc(tensorDesc);
55 | }
56 | 
57 | void* GetWorkspaceBuffer(uint64_t bufferSize) {
58 |     static Workspace workspace;
59 |     return workspace.GetWorkspaceBuffer(bufferSize);
60 | }
61 | 
62 | }  // namespace dicp
63 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/workspace.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <atb/types.h>
 4 | #include <torch/torch.h>
 5 | 
 6 | #include <vector>
 7 | 
 8 | namespace dicp {
 9 | 
10 | class Workspace {
11 | public:
12 |     Workspace();
13 |     ~Workspace(){};
14 |     void* GetWorkspaceBuffer(uint64_t bufferSize);
15 | 
16 | private:
17 |     torch::Tensor CreateAtTensor(uint64_t bufferSize);
18 | 
19 | private:
20 |     void* buffer_ = nullptr;
21 |     uint64_t bufferSize_ = 0;
22 |     torch::Tensor atTensor_;
23 | };
24 | 
25 | void* GetWorkspaceBuffer(uint64_t bufferSize);
26 | 
27 | }  // namespace dicp
28 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/AtbGraph/config.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from dlinfer.graph.dicp.dynamo_bridge.decompositions import (
 4 |     get_decompositions,
 5 |     register_decomposition_for_dicp,
 6 | )
 7 | 
 8 | 
 9 | aten = torch.ops.aten
10 | 
11 | 
12 | @register_decomposition_for_dicp(aten.select.int)
13 | def select_int(tensor, dim, index):
14 |     if (
15 |         not isinstance(tensor.shape[0], torch.SymInt)
16 |         and tensor.shape[0] == 1
17 |         and dim == 0
18 |         and index == 0
19 |     ):
20 |         view_shape = [-1 if isinstance(x, torch.SymInt) else x for x in tensor.shape]
21 |         del view_shape[0]
22 |         return tensor.view(view_shape)
23 |     slice_res = aten.slice.Tensor(tensor, dim, index, index + 1, 1)
24 |     return slice_res.squeeze(dim)
25 | 
26 | 
27 | def get_decomp():
28 |     return get_decompositions(
29 |         [
30 |             aten.count_nonzero.default,
31 |             aten.select.int,
32 |         ]
33 |     )
34 | 
35 | 
36 | decomp = get_decomp()
37 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | if (DEVICE STREQUAL "ascend")
2 |     add_subdirectory(AtbGraph/codegen/runtime)
3 | endif()
4 | 


--------------------------------------------------------------------------------
/dlinfer/graph/dicp/vendor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/vendor/__init__.py


--------------------------------------------------------------------------------
/dlinfer/ops/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | from .llm import *
3 | 


--------------------------------------------------------------------------------
/dlinfer/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/dlinfer/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | class Config:
 3 |     def __init__(self, **kwargs):
 4 |         self._config = kwargs
 5 | 
 6 |     def __getattr__(self, name):
 7 |         if name in self._config:
 8 |             return self._config[name]
 9 |         raise AttributeError(f"{type(self).__name__} object has no attribute '{name}'")
10 | 
11 |     def __setattr__(self, name, value):
12 |         if name == "_config":
13 |             super().__setattr__(name, value)
14 |         else:
15 |             self._config[name] = value
16 | 
17 |     def __repr__(self):
18 |         return repr(self._config)
19 | 


--------------------------------------------------------------------------------
/dlinfer/utils/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | # decorator usage
3 | def register_ops(registry):
4 |     def wrapped_func(ops_func):
5 |         registry[ops_func.__name__] = ops_func
6 |         return ops_func
7 | 
8 |     return wrapped_func
9 | 


--------------------------------------------------------------------------------
/dlinfer/utils/type_annotation.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | from torch import Tensor
3 | from typing import Optional, Sequence, Union, Any, Tuple, Callable, Dict
4 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | import importlib
 3 | from pathlib import Path
 4 | from functools import lru_cache
 5 | import yaml
 6 | import torch
 7 | 
 8 | 
 9 | vendor_ops_registry = dict()
10 | vendor_is_initialized = False
11 | vendor_name_file = Path(__file__).parent / "vendor.yaml"
12 | linear_w8a8_scale_type = torch.Tensor
13 | dynamic_quant_scale_type = torch.Tensor
14 | 
15 | 
16 | with open(str(vendor_name_file), "r") as f:
17 |     config = yaml.safe_load(f)
18 |     vendor_name = config["vendor"]
19 |     dispatch_key = config["dispatch_key"]
20 | 
21 | 
22 | @lru_cache(1)
23 | def import_vendor_module(vendor_name_str):
24 |     return importlib.import_module(f".{vendor_name_str}", __package__)
25 | 
26 | 
27 | def vendor_torch_init():
28 |     import_vendor_module(vendor_name)
29 |     global vendor_is_initialized
30 |     vendor_is_initialized = True
31 |     global linear_w8a8_scale_type, dynamic_quant_scale_type
32 |     linear_w8a8_scale_type = torch.Tensor if vendor_name in ["ascend"] else float
33 |     dynamic_quant_scale_type = torch.Tensor if vendor_name in ["ascend"] else float
34 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.18)
 2 | project(ascend_extension)
 3 | 
 4 | include(ascend)
 5 | 
 6 | set(CSRC_FILES 
 7 |     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/init.cpp
 8 |     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/flash_attention.cpp
 9 |     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/moe_gating_topk_softmax.cpp
10 |     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/op_api_common.cpp
11 |     ${CMAKE_CURRENT_SOURCE_DIR}/csrc/torch_npu_utils.cpp
12 | )
13 | 
14 | if("${Torch_npu_VERSION_HIGHER_THAN_231}" STREQUAL "1")
15 |     list(APPEND CSRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/csrc/torch_npu_symbol_fix.cpp)
16 | endif()
17 | 
18 | add_library(
19 |     ${PROJECT_NAME} SHARED
20 |     ${CSRC_FILES}
21 | )
22 | 
23 | set_target_properties(
24 |     ${PROJECT_NAME} PROPERTIES
25 |     PREFIX ""
26 | )
27 | 
28 | target_compile_definitions(
29 |     ${PROJECT_NAME} PUBLIC
30 |     GLIBCXX_USE_CXX11_ABI=${_GLIBCXX_USE_CXX11_ABI}
31 | )
32 | 
33 | target_include_directories(
34 |     ${PROJECT_NAME} PUBLIC
35 |     ${CMAKE_CURRENT_SOURCE_DIR}
36 |     ${TORCH_NPU_INCLUDE_DIRS}
37 |     ${CANN_INCLUDE_DIRS}
38 | )
39 | 
40 | target_link_libraries(
41 |     ${PROJECT_NAME} PRIVATE
42 |     Python::Python
43 |     torch
44 |     ${TORCH_NPU_LIBRARY}
45 |     ${CANN_LIBRARY}
46 | )
47 | 
48 | file(RELATIVE_PATH OUTPUT_LIB_RELATIVE_PATH "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
49 | install(
50 |     TARGETS ${PROJECT_NAME}
51 |     DESTINATION ${OUTPUT_LIB_RELATIVE_PATH}
52 | )
53 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024, DeepLink. All rights reserved.
2 | from pathlib import Path
3 | 
4 | import torch
5 | from . import pytorch_patch, torch_npu_ops
6 | 
7 | torch.ops.load_library(str(Path(__file__).parent / "ascend_extension.so"))
8 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/csrc/init.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024, DeepLink. All rights reserved.
 2 | #include <torch/library.h>
 3 | 
 4 | #include "ascend_ops.hpp"
 5 | 
 6 | namespace {
 7 | 
 8 | TORCH_LIBRARY(npu_ext, m) {
 9 |     m.def(
10 |         "npu_prompt_flash_attention_out(Tensor query, Tensor key, Tensor value, Tensor(a!) attn_output, *, "
11 |         "Tensor? padding_mask=None, Tensor? atten_mask=None, int[]? actual_seq_lengths=None, int num_heads=1, "
12 |         "float scale_value=1.0, int pre_tokens=2147473647, int next_tokens=0, "
13 |         "str input_layout=\"BSH\", int num_key_value_heads=0) -> Tensor(a!)");
14 |     m.def(
15 |         "npu_incre_flash_attention_v4_out(Tensor query, Tensor key, Tensor value, Tensor(a!) attn_output, *, "
16 |         "Tensor? padding_mask=None, Tensor? atten_mask=None, int[]? actual_seq_lengths=None, "
17 |         "Tensor? antiquant_scale=None, Tensor? antiquant_offset=None, Tensor? block_table=None, "
18 |         "Tensor? dequant_scale1=None, Tensor? quant_scale1=None, Tensor? dequant_scale2=None, Tensor? quant_scale2=None, "
19 |         "Tensor? quant_offset2=None, Tensor? kv_padding_size=None, int num_heads=1, float scale_value=1.0, "
20 |         "str input_layout=\"BSH\", int num_key_value_heads=0, int block_size=0, int inner_precise=1) -> Tensor(a!)");
21 |     m.def(
22 |         "npu_moe_gating_topk_softmax(Tensor x, Tensor? finished_opt, int topk, Tensor(a!) y_out,"
23 |         "Tensor(b!) expert_idx_out, Tensor row_idx_out) -> (Tensor(a!), Tensor(b!))");
24 | }
25 | 
26 | }  // namespace
27 | 
28 | namespace {
29 | 
30 | TORCH_LIBRARY_IMPL(npu_ext, PrivateUse1, m) {
31 |     m.impl("npu_prompt_flash_attention_out", TORCH_FN(dlinfer::ascend::npu_prompt_flash_attention_out));
32 |     m.impl("npu_incre_flash_attention_v4_out", TORCH_FN(dlinfer::ascend::npu_incre_flash_attention_v4_out));
33 |     m.impl("npu_moe_gating_topk_softmax", TORCH_FN(dlinfer::ascend::npu_moe_gating_topk_softmax));
34 | }
35 | 
36 | }  // namespace
37 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/csrc/moe_gating_topk_softmax.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024, DeepLink. All rights reserved.
 2 | #include <ATen/ops/tensor.h>
 3 | 
 4 | #include <tuple>
 5 | 
 6 | #include "ascend_ops.hpp"
 7 | #include "op_api_common.hpp"
 8 | 
 9 | namespace dlinfer {
10 | 
11 | namespace ascend {
12 | 
13 | ::std::tuple<at::Tensor, at::Tensor> npu_moe_gating_topk_softmax(const at::Tensor& x, const c10::optional<at::Tensor>& finished_opt, int64_t topk,
14 |                                                                  at::Tensor& y_out, at::Tensor& expert_idx_out, at::Tensor& row_idx_out) {
15 |     EXEC_NPU_NO_FORMAT_CHECK_CMD(aclnnMoeGatingTopKSoftmax, x, finished_opt, topk, y_out, expert_idx_out, row_idx_out);
16 |     return std::tie(y_out, expert_idx_out);
17 | }
18 | 
19 | }  // namespace ascend
20 | 
21 | }  // namespace dlinfer
22 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/csrc/op_api_common.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024, DeepLink. All rights reserved.
 2 | #include "op_api_common.hpp"
 3 | 
 4 | void* GetOpApiFuncAddrFromFeatureLib(const char* api_name) {
 5 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(ops_infer_handler, "libaclnn_ops_infer.so");
 6 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(ops_train_handler, "libaclnn_ops_train.so");
 7 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(adv_infer_handler, "libaclnn_adv_infer.so");
 8 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(adv_train_handler, "libaclnn_adv_train.so");
 9 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(dvpp_handler, "libacl_dvpp_op.so");
10 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(sparse_handler, "libaclsparse.so");
11 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(optim_handler, "libacloptim.so");
12 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(fft_handler, "libaclfft.so");
13 |     GET_OP_API_FUNC_FROM_FEATURE_LIB(rand_handler, "libaclrand.so");
14 |     return nullptr;
15 | }
16 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/csrc/torch_npu_symbol_fix.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch_npu/csrc/core/npu/interface/AclInterface.h>
 2 | 
 3 | #include <stdexcept>
 4 | #include <string>
 5 | 
 6 | #include "acl/acl.h"
 7 | #include "acl/acl_rt.h"
 8 | 
 9 | namespace c10_npu {
10 | namespace acl {
11 | 
12 | // These functions are reimplemented to handle the missing symbol issue in
13 | // torch-npu >= 2.3.1. If these functions are called, it indicates an environment
14 | // setup issue and the program should terminate
15 | 
16 | aclError AclrtPeekAtLastError(aclrtLastErrLevel flag) {
17 |     throw std::runtime_error(
18 |         "Dlinfer AclrtPeekAtLastError should not be called. "
19 |         "Please check your environment setup.");
20 |     return ACL_ERROR;
21 | }
22 | }  // namespace acl
23 | 
24 | bool checkUceErrAndRepair() {
25 |     throw std::runtime_error(
26 |         "Dlinfer checkUceErrAndRepair should not be called. "
27 |         "Please check your environment setup.");
28 |     return false;
29 | }
30 | 
31 | }  // namespace c10_npu
32 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/pytorch_patch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | from packaging import version
 3 | 
 4 | import torch
 5 | import torch_npu
 6 | 
 7 | origin_torch_compile = torch.compile
 8 | from torch_npu.contrib import transfer_to_npu
 9 | 
10 | torch.compile = origin_torch_compile
11 | 
12 | if version.parse(torch.__version__) >= version.parse("2.2.0"):
13 |     from importlib import import_module
14 | 
15 |     target_module_str = "torch.utils._triton"
16 |     target_module = import_module(target_module_str)
17 |     func_str = "has_triton"
18 | 
19 |     def has_triton():
20 |         return False
21 | 
22 |     setattr(target_module, func_str, has_triton)
23 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/ascend/utils.py:
--------------------------------------------------------------------------------
 1 | from functools import lru_cache
 2 | import torch
 3 | 
 4 | 
 5 | class SocVersion:
 6 |     Ascend310P: str = "Ascend310P"
 7 |     Ascend910B: str = "Ascend910B"
 8 | 
 9 |     @classmethod
10 |     @lru_cache(maxsize=1)
11 |     def device_name(cls) -> str:
12 |         return torch.npu.get_device_name()
13 | 
14 |     @classmethod
15 |     def is_Ascend310P(cls) -> bool:
16 |         return cls.device_name().startswith(cls.Ascend310P)
17 | 
18 |     @classmethod
19 |     def is_Ascend910B(cls) -> bool:
20 |         return cls.device_name().startswith(cls.Ascend910B)
21 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/camb/CMakeLists.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/vendor/camb/CMakeLists.txt


--------------------------------------------------------------------------------
/dlinfer/vendor/camb/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from . import pytorch_patch, camb_ops
 4 | 
 5 | # TODO. weitao: camb torch-mlu-ops-v1.2.0 per_token_smooth_quantize need smooth_vec
 6 | SMOOTH_VEC = torch.ones(8192, dtype=torch.float32, device="mlu")
 7 | 
 8 | 
 9 | def next_power_of_2(n: int):
10 |     """Return the smallest power of 2 greater than or equal to n."""
11 |     n -= 1
12 |     n |= n >> 1
13 |     n |= n >> 2
14 |     n |= n >> 4
15 |     n |= n >> 8
16 |     n |= n >> 16
17 |     n |= n >> 32
18 |     n += 1
19 |     return n
20 | 
21 | 
22 | def update_smooth(length):
23 |     global SMOOTH_VEC
24 |     if length > SMOOTH_VEC.shape[0]:
25 |         SMOOTH_VEC = torch.ones(
26 |             next_power_of_2(length), dtype=torch.float32, device="mlu"
27 |         )
28 |     return SMOOTH_VEC
29 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/camb/pytorch_patch.py:
--------------------------------------------------------------------------------
1 | from torch_mlu.utils.gpu_migration import migration
2 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/__init__.py:
--------------------------------------------------------------------------------
1 | from .maca_ops import *
2 | 
3 | device_str = "cuda"
4 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/csrc/attention/attention_dtypes.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "attention_generic.cuh"
4 | #include "dtype_bfloat16.cuh"
5 | #include "dtype_float16.cuh"
6 | #include "dtype_float32.cuh"
7 | #include "dtype_fp8.cuh"
8 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/csrc/attention/attention_generic.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Adapted from
 3 |  * https://github.com/NVIDIA/FasterTransformer/blob/release/v5.3_tag/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h
 4 |  * Copyright (c) 2023, The vLLM team.
 5 |  * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
 6 |  *
 7 |  * Licensed under the Apache License, Version 2.0 (the "License");
 8 |  * you may not use this file except in compliance with the License.
 9 |  * You may obtain a copy of the License at
10 |  *
11 |  *     http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  */
19 | #pragma once
20 | 
21 | #include <stdint.h>
22 | 
23 | namespace vllm {
24 | 
25 | // A vector type to store Q, K, V elements.
26 | template <typename T, int VEC_SIZE>
27 | struct Vec {};
28 | 
29 | // A vector type to store FP32 accumulators.
30 | template <typename T>
31 | struct FloatVec {};
32 | 
33 | // Template vector operations.
34 | template <typename Acc, typename A, typename B>
35 | inline __device__ Acc mul(A a, B b);
36 | 
37 | template <typename T>
38 | inline __device__ float sum(T v);
39 | 
40 | template <typename T>
41 | inline __device__ float dot(T a, T b) {
42 |   return sum(mul<T, T, T>(a, b));
43 | }
44 | 
45 | template <typename A, typename T>
46 | inline __device__ float dot(T a, T b) {
47 |   return sum(mul<A, T, T>(a, b));
48 | }
49 | 
50 | template <typename T>
51 | inline __device__ void zero(T& dst) {
52 |   constexpr int WORDS = sizeof(T) / 4;
53 |   union {
54 |     T raw;
55 |     uint32_t words[WORDS];
56 |   } tmp;
57 | 
58 | #pragma unroll
59 |   for (int ii = 0; ii < WORDS; ++ii) {
60 |     tmp.words[ii] = 0u;
61 |   }
62 |   dst = tmp.raw;
63 | }
64 | 
65 | }  // namespace vllm
66 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/csrc/attention/dtype_fp8.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "attention_generic.cuh"
 4 | 
 5 | #include <stdint.h>
 6 | #ifdef ENABLE_FP8
 7 |   #ifndef USE_ROCM
 8 |     #include <cuda_fp8.h>
 9 |   #endif  // USE_ROCM
10 | #endif    // ENABLE_FP8
11 | 
12 | namespace vllm {
13 | 
14 | enum class Fp8KVCacheDataType {
15 |   kAuto = 0,
16 |   kFp8E4M3 = 1,
17 |   kFp8E5M2 = 2,
18 | };
19 | 
20 | // fp8 vector types for quantization of kv cache
21 | template <>
22 | struct Vec<uint8_t, 1> {
23 |   using Type = uint8_t;
24 | };
25 | 
26 | template <>
27 | struct Vec<uint8_t, 2> {
28 |   using Type = uint16_t;
29 | };
30 | 
31 | template <>
32 | struct Vec<uint8_t, 4> {
33 |   using Type = uint32_t;
34 | };
35 | 
36 | template <>
37 | struct Vec<uint8_t, 8> {
38 |   using Type = uint2;
39 | };
40 | 
41 | }  // namespace vllm
42 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/csrc/cache.h:
--------------------------------------------------------------------------------
 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include <torch/all.h>
 5 | 
 6 | #include <map>
 7 | #include <vector>
 8 | 
 9 | void swap_blocks(torch::Tensor& src, torch::Tensor& dst, const torch::Tensor& block_mapping);
10 | 
11 | // Note: the key_caches and value_caches vectors are constant but
12 | // not the Tensors they contain. The vectors need to be const refs
13 | // in order to satisfy pytorch's C++ operator registration code.
14 | void copy_blocks(std::vector<torch::Tensor> const& key_caches, std::vector<torch::Tensor> const& value_caches, const torch::Tensor& block_mapping);
15 | 
16 | void reshape_and_cache(torch::Tensor& key, torch::Tensor& value, torch::Tensor& key_cache, torch::Tensor& value_cache, torch::Tensor& slot_mapping,
17 |                        const std::string& kv_cache_dtype, const double k_scale, const double v_scale);
18 | 
19 | void reshape_and_cache_new(torch::Tensor& key, torch::Tensor& value, torch::Tensor& key_cache, torch::Tensor& value_cache, torch::Tensor& slot_mapping,
20 |                            const std::string& kv_cache_dtype, const double kv_scale, const double v_scale);
21 | 
22 | void reshape_and_cache_flash(torch::Tensor& key, torch::Tensor& value, torch::Tensor& key_cache, torch::Tensor& value_cache, torch::Tensor& slot_mapping,
23 |                              const std::string& kv_cache_dtype, const double k_scale, const double v_scale);
24 | 
25 | // Just for unittest
26 | void convert_fp8(torch::Tensor& dst_cache, torch::Tensor& src_cache, const double scale, const std::string& kv_cache_dtype);
27 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/csrc/cuda_compat.h:
--------------------------------------------------------------------------------
 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #ifdef USE_ROCM
 5 | #include <hip/hip_runtime.h>
 6 | #endif
 7 | 
 8 | #ifndef USE_ROCM
 9 | #define WARP_SIZE 32
10 | #else
11 | #define WARP_SIZE warpSize
12 | #endif
13 | 
14 | #ifndef USE_ROCM
15 | #define VLLM_LDG(arg) __ldg(arg)
16 | #else
17 | #define VLLM_LDG(arg) *(arg)
18 | #endif
19 | 
20 | #ifndef USE_ROCM
21 | #define VLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor_sync(uint32_t(-1), var, lane_mask)
22 | #define VLLM_SHFL_XOR_SYNC_WIDTH(var, lane_mask, width) __shfl_xor_sync(uint32_t(-1), var, lane_mask, width)
23 | #else
24 | #define VLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor(var, lane_mask)
25 | #define VLLM_SHFL_XOR_SYNC_WIDTH(var, lane_mask, width) __shfl_xor(var, lane_mask, width)
26 | #endif
27 | 
28 | #ifndef USE_ROCM
29 | #define VLLM_SHFL_SYNC(var, src_lane) __shfl_sync(uint32_t(-1), var, src_lane)
30 | #else
31 | #define VLLM_SHFL_SYNC(var, src_lane) __shfl(var, src_lane)
32 | #endif
33 | 
34 | #ifndef USE_ROCM
35 | #define VLLM_SHFL_DOWN_SYNC(var, lane_delta) __shfl_down_sync(uint32_t(-1), var, lane_delta)
36 | #else
37 | #define VLLM_SHFL_DOWN_SYNC(var, lane_delta) __shfl_down(var, lane_delta)
38 | #endif
39 | 
40 | #ifndef USE_ROCM
41 | #define VLLM_DevFuncAttribute_SET_MaxDynamicSharedMemorySize(FUNC, VAL) cudaFuncSetAttribute(FUNC, cudaFuncAttributeMaxDynamicSharedMemorySize, VAL)
42 | #else
43 | #define VLLM_DevFuncAttribute_SET_MaxDynamicSharedMemorySize(FUNC, VAL) hipFuncSetAttribute(FUNC, hipFuncAttributeMaxDynamicSharedMemorySize, VAL)
44 | #endif
45 | 
46 | #define MXWARP_SIZE 64
47 | #ifndef USE_ROCM
48 | #define MXVLLM_SHFL_SYNC(var, src_lane) __shfl_sync(uint64_t(-1), var, src_lane)
49 | #else
50 | #define MXVLLM_SHFL_SYNC(var, src_lane) __shfl(var, src_lane)
51 | #endif
52 | 
53 | #ifndef USE_ROCM
54 | #define MXVLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor_sync(uint64_t(-1), var, lane_mask)
55 | #else
56 | #define MXVLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor(var, lane_mask)
57 | #endif
58 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/csrc/dispatch_utils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Adapted from
 3 |  * https://github.com/pytorch/pytorch/blob/v2.0.1/aten/src/ATen/Dispatch.h
 4 |  */
 5 | #pragma once
 6 | 
 7 | #include <torch/all.h>
 8 | 
 9 | #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...)           \
10 |     AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \
11 |     AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__)  \
12 |     AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__)
13 | 
14 | #define VLLM_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
15 | 
16 | #define VLLM_DISPATCH_CASE_FLOATING_AND_BYTE_TYPES(...)     \
17 |     AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__)    \
18 |     AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__)     \
19 |     AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) \
20 |     AT_DISPATCH_CASE(at::ScalarType::Byte, __VA_ARGS__)
21 | 
22 | #define VLLM_DISPATCH_FLOATING_AND_BYTE_TYPES(TYPE, NAME, ...) AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_AND_BYTE_TYPES(__VA_ARGS__))
23 | 
24 | #define VLLM_DISPATCH_CASE_INTEGRAL_TYPES(...)           \
25 |     AT_DISPATCH_CASE(at::ScalarType::Byte, __VA_ARGS__)  \
26 |     AT_DISPATCH_CASE(at::ScalarType::Char, __VA_ARGS__)  \
27 |     AT_DISPATCH_CASE(at::ScalarType::Short, __VA_ARGS__) \
28 |     AT_DISPATCH_CASE(at::ScalarType::Int, __VA_ARGS__)   \
29 |     AT_DISPATCH_CASE(at::ScalarType::Long, __VA_ARGS__)
30 | 
31 | #define VLLM_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_INTEGRAL_TYPES(__VA_ARGS__))
32 | 


--------------------------------------------------------------------------------
/dlinfer/vendor/maca/csrc/moe/moe_ops.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <torch/all.h>
4 | 
5 | void topk_softmax(torch::Tensor& topk_weights, torch::Tensor& topk_indices, torch::Tensor& token_expert_indices, torch::Tensor& gating_output);
6 | 


--------------------------------------------------------------------------------
/docs/quant/ascend_kv_quant.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # KV Cache量化
 3 | 
 4 | 目前在华为Atlas 800T A2设备，由于算子功能限制，在算子模式下，仅支持离线量化。
 5 | 
 6 | ## KV Cache量化前提
 7 | 
 8 | - **依赖**
 9 | 
10 | ```shell
11 | torch==2.1.0
12 | torchvision==0.16.0
13 | torch-npu==2.1.0.post6
14 | ```
15 | 
16 | - **工具**
17 | 
18 | ```shell
19 | amct_pytorch==0.22.2(Ascend-cann-amct_8.0.RC2)
20 | ```
21 | 
22 | ## KV Cache量化示例
23 | 
24 | 在当前目录执行如下命令，得到量化因子记录文件，用户根据实际情况修改示例程序中的model_path（VL模型需要用其语言模型的权重）和dataset_path，并根据模型结构修改quant_layers。
25 | 
26 | ```python
27 | python3 ascend_scales_offsets.py
28 | ```
29 | 
30 | 推理成功后，在当前目录会生成量化日志文件./amct_log/amct_pytorch.log和./outputs文件夹，该文件夹内包含以下内容：
31 | 
32 | - **config.json**：量化配置文件，描述了如何对模型中的每一层进行量化。
33 | - **record.txt**：量化因子记录文件。
34 | 
35 | 用户在使用lmdeploy时，通过环境变量ASCEND_QUANT_RECORD_FILE指定量化因子路径，并通过参数quant_policy=8，即可使用量化因子记录文件完成推理。
36 | 示例代码如下：
37 | 
38 | ```python
39 | import lmdeploy
40 | from lmdeploy import PytorchEngineConfig
41 | if __name__ == "__main__":
42 |     pipe = lmdeploy.pipeline("/path_to_model",
43 |                             backend_config = PytorchEngineConfig(tp=1,
44 |                             cache_max_entry_count=0.4, device_type="ascend",
45 |                             eager_mode=True, quant_policy=8))
46 |     question = ["Shanghai is", "Please introduce China", "How are you?"]
47 |     response = pipe(question, request_output_len=256, do_preprocess=False)
48 |     for idx, r in enumerate(response):
49 |         print(f"Q: {question[idx]}")
50 |         print(f"A: {r.text}")
51 |         print()
52 | ```
53 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "pyyaml",
4 |     "scikit-build",
5 |     "setuptools==69.5.1",
6 | ]
7 | build-backend = "setuptools.build_meta"
8 | 


--------------------------------------------------------------------------------
/requirements/ascend/build.txt:
--------------------------------------------------------------------------------
1 | pyyaml
2 | ninja
3 | setuptools==69.5.1
4 | wheel
5 | scikit-build
6 | cmake>=3.18
7 | -r torch.txt
8 | 


--------------------------------------------------------------------------------
/requirements/ascend/cann.txt:
--------------------------------------------------------------------------------
1 | decorator
2 | attrs
3 | psutil
4 | absl-py
5 | cloudpickle
6 | ml-dtypes
7 | scipy
8 | tornado
9 | 


--------------------------------------------------------------------------------
/requirements/ascend/full.txt:
--------------------------------------------------------------------------------
1 | -r build.txt
2 | -r runtime.txt
3 | 


--------------------------------------------------------------------------------
/requirements/ascend/runtime.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | -r torch.txt
3 | -r cann.txt
4 | 


--------------------------------------------------------------------------------
/requirements/ascend/torch.txt:
--------------------------------------------------------------------------------
1 | torch==2.3.1
2 | torchvision==0.18.1
3 | torch-npu==2.3.1
4 | numpy<2.0.0
5 | pyyaml
6 | 


--------------------------------------------------------------------------------
/requirements/camb/build.txt:
--------------------------------------------------------------------------------
1 | pyyaml
2 | setuptools==69.5.1
3 | wheel
4 | scikit-build
5 | cmake>=3.18
6 | -r torch.txt


--------------------------------------------------------------------------------
/requirements/camb/full.txt:
--------------------------------------------------------------------------------
1 | -r build.txt
2 | -r runtime.txt


--------------------------------------------------------------------------------
/requirements/camb/runtime.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | torch-mlu
3 | torch-mlu-ops
4 | -r torch.txt


--------------------------------------------------------------------------------
/requirements/camb/torch.txt:
--------------------------------------------------------------------------------
1 | numpy<2.0.0
2 | pyyaml
3 | torch==2.4.0


--------------------------------------------------------------------------------
/requirements/maca/build.txt:
--------------------------------------------------------------------------------
1 | ninja
2 | setuptools
3 | wheel
4 | scikit-build
5 | cmake>=3.18
6 | -r torch.txt
7 | 


--------------------------------------------------------------------------------
/requirements/maca/full.txt:
--------------------------------------------------------------------------------
1 | -r build.txt
2 | -r runtime.txt
3 | 


--------------------------------------------------------------------------------
/requirements/maca/runtime.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | -r torch.txt
3 | 


--------------------------------------------------------------------------------
/requirements/maca/torch.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/run_format.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | cd "$(dirname "$0")" || exit 1
 6 | 
 7 | extract_first_version() {
 8 |   version_pattern="\b[0-9]+(\.[0-9]+)+\b"
 9 |   if [[ $1 =~ $version_pattern ]]; then
10 |     echo "${BASH_REMATCH[0]}"
11 |   else
12 |     echo "Unknown"
13 |   fi
14 | }
15 | 
16 | get_cmd_version() {
17 |   cmd=$1
18 |   extract_first_version "$($cmd --version 2>&1 | head -n 1)"
19 | }
20 | 
21 | check_cmd_version() {
22 |   cmd=$1
23 |   required_version=$2
24 |   required_version_regex=^${required_version//x/[0-9]+}
25 |   command -v "$cmd" >/dev/null || (echo "$cmd not found" && exit 1)
26 |   current_version=$(get_cmd_version "$cmd")
27 |   if [[ $current_version =~ $required_version_regex ]]; then
28 |     echo "$cmd $required_version found, version: $current_version"
29 |   else
30 |     echo "WARNING! GitHub Actions CI uses $cmd $required_version, current version: $current_version"
31 |   fi
32 | }
33 | 
34 | # format all C/C++ files in current git repository with clang-format
35 | check_cmd_version clang-format 17.x
36 | git ls-files |\
37 |   grep -E '^.+\.(c|h|cpp|cc|cxx|hpp|hh|hxx)$' |
38 |   grep -Ev "dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/" |
39 |   xargs clang-format -i --style=file
40 | 
41 | check_cmd_version black 24.x
42 | # format all Python files in current git repository with black
43 | # now only for dipu
44 | git ls-files |\
45 |   grep -E '^.+\.py$' |\
46 |   xargs black
47 | 


--------------------------------------------------------------------------------
/scripts/build_wheel.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | REPO_ROOT=$(cd $(dirname $(dirname $0)); pwd)
 5 | pip install -U build
 6 | rm -rf ${REPO_ROOT}/_skbuild ${REPO_ROOT}/dlinfer*.egg*
 7 | export DEVICE=${DEVICE:-ascend}
 8 | python -m build \
 9 |     -C="--build-option=--plat-name" \
10 |     -C="--build-option=manylinux2014_$(uname -m)" \
11 |     -v -w .
12 | 


--------------------------------------------------------------------------------
/scripts/build_wheel_allpy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | eval "$(conda shell.bash hook)"
 5 | 
 6 | REPO_ROOT=$(cd $(dirname $(dirname $0)); pwd)
 7 | cd ${REPO_ROOT}
 8 | 
 9 | PY_VERSION_LIST=("3.8" "3.9" "3.10")
10 | for PY_VERSION in ${PY_VERSION_LIST[@]}; do
11 |     echo start building wheels for python${PY_VERSION}
12 |     PY_VERSION_NAME=${PY_VERSION/./}
13 |     ENV_NAME=dlinfer_build_py${PY_VERSION_NAME}
14 |     conda env remove -n ${ENV_NAME} -y
15 |     conda create -n ${ENV_NAME} python=${PY_VERSION} -y
16 |     conda activate ${ENV_NAME}
17 |     pip install -U build 
18 |     bash ${REPO_ROOT}/scripts/build_wheel.sh
19 |     conda deactivate
20 |     conda env remove -n ${ENV_NAME} -y
21 |     echo end building wheels for python${PY_VERSION}
22 | done
23 | 


--------------------------------------------------------------------------------
/tests/readme.md:
--------------------------------------------------------------------------------
 1 | # ReadMe for test model for your self
 2 | 
 3 | ## How to add model for CI
 4 | 
 5 | 1. 将模型权重等下载到ci机器的/data2/share_data目录(如/data2/share_data/llama_model_data/llama-2-7b-chat-hf).
 6 | 2. 在config.yml中的pytorch_chat_model下添加上述模型文件夹.
 7 | 3. 如果该模型的 `tp` > 1，需要在 `config.yml` 中的 `tp_config` 下面添加
 8 |    "模型名：tp_num"（如 Mixtral-8x7B-Instruct-v0.1: 2）。
 9 | 
10 | ## How to run test locally
11 | 
12 | 1. 修改config.yml中对应的模型路径和log_path
13 | 
14 | 2. `export DLINFER_TEST_DIR=/path/to/dlinfer/tests`
15 | 
16 | 3. 运行
17 | 
18 |    ```bash
19 |    #!/bin/bash
20 |    cd /path/to/tests
21 |    #run tp=1 model on lmdeploy
22 |    pytest ./ -m 'lmdeploy' -s -x --alluredir=allure-results --clean-alluredir
23 |    #run tp=2 chat_model on lmdeploy
24 |    python ./test_lmdeploy/e2e/test_model_tp2.py --model_type=chat --device_type=ascend
25 |    #run tp=2 vl_model on lmdeploy
26 |    python ./test_lmdeploy/e2e/test_model_tp2.py --model_type=vl --device_type=ascend
27 |    ```
28 | 


--------------------------------------------------------------------------------
/tests/test_lmdeploy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/tests/test_lmdeploy/__init__.py


--------------------------------------------------------------------------------
/tests/test_lmdeploy/e2e/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/tests/test_lmdeploy/e2e/__init__.py


--------------------------------------------------------------------------------
/tests/test_lmdeploy/e2e/config.yaml:
--------------------------------------------------------------------------------
 1 | tp_config:
 2 |     Mixtral-8x7B-Instruct-v0.1: 2
 3 |     InternVL2-26B: 2
 4 |     cogvlm2-llama3-chat-19B: 2
 5 | 
 6 | graph_config:
 7 |     internlm3-8b-instruct: True
 8 |     Meta-Llama-3-8B-Instruct: True
 9 |     Mixtral-8x7B-Instruct-v0.1: True
10 |     Qwen2.5-7B-Instruct: True
11 |     Qwen2-VL-7B-Instruct: True
12 |     InternVL2-2B: True
13 |     InternVL2-26B: True
14 | 
15 | pytorch_chat_model:
16 |     - internlm_model/internlm3-8b-instruct
17 |     - llama_model/Meta-Llama-3.1-8B-Instruct
18 |     - mixtral_model/Mixtral-8x7B-Instruct-v0.1
19 |     - qwen_model/Qwen2.5-7B-Instruct
20 | 
21 | pytorch_vl_model:
22 |     - internvl_model/InternVL2-26B
23 |     - internvl_model/InternVL2-2B
24 |     # - cogvlm_model_data/cogvlm-chat
25 |     # - cogvlm_model_data/cogvlm2-llama3-chat-19B
26 |     - qwen_model/Qwen2-VL-7B-Instruct
27 | 


--------------------------------------------------------------------------------
/tests/test_lmdeploy/e2e/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, DeepLink. All rights reserved.
 2 | # Copyright (c) OpenMMLab. All rights reserved.
 3 | 
 4 | import pytest
 5 | 
 6 | from test_lmdeploy.utils.config_utils import get_config, get_case_config
 7 | 
 8 | 
 9 | @pytest.fixture(scope="session")
10 | def config():
11 |     return get_config()
12 | 
13 | 
14 | @pytest.fixture(scope="class", autouse=True)
15 | def common_case_config():
16 |     return get_case_config()
17 | 


--------------------------------------------------------------------------------
/tests/test_lmdeploy/e2e/prompt_case.yaml:
--------------------------------------------------------------------------------
 1 | identity:
 2 |     - 你好，你叫什么名字#hi, what's your name:
 3 | chinese_poem_case:
 4 |     - 给我一首中文诗，需要添加标点符号，请用中文回答Give me a Chinese poem in Chinese:
 5 |         - contain:
 6 |             - "，"
 7 |             - "。"
 8 |             - poem
 9 |             - poetry
10 |             - \n
11 |         - len_g:
12 |             5
13 | code_testcase:
14 |     - 使用python编写一个int数组的冒泡排序代码:
15 |         - contain:
16 |             - def
17 |             - bubble
18 |             - 冒泡
19 |             - code
20 |             - python
21 |         - llama2:
22 |             - contain:
23 |                 - def
24 |                 - bubble
25 |                 - 冒泡
26 |                 - code
27 |                 - python
28 |                 - assist
29 |                 - however


--------------------------------------------------------------------------------
/tests/test_lmdeploy/e2e/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | python_files = test*_*.py  # test file
3 | python_classes = Test*     # test class
4 | python_functions = test_*  # test function
5 | pytest_runtest_call.tryfirst = True
6 | filterwarnings = ignore::UserWarning
7 | reruns = 2
8 | reruns_delay = 10
9 | 


--------------------------------------------------------------------------------
/tests/test_lmdeploy/scripts/test_model_tp2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 获取DLINFER_TEST_DIR环境变量
 4 | if [ -z "$DLINFER_TEST_DIR" ]; then
 5 |     echo "DLINFER_TEST_DIR environment variable is not set"
 6 |     exit 1
 7 | fi
 8 | echo "DLINFER_TEST_DIR: $DLINFER_TEST_DIR"
 9 | 
10 | # 获取chat模型列表
11 | echo "Getting chat model list..."
12 | chat_model_tp2_list=$(python -c "from test_lmdeploy.utils.config_utils import get_torch_model_list; print(' '.join(get_torch_model_list(tp_num=2)))")
13 | echo "chat_model_tp2_list: $chat_model_tp2_list"
14 | 
15 | # 遍历chat模型列表
16 | for model_case in $chat_model_tp2_list; do
17 |     python $DLINFER_TEST_DIR/test_lmdeploy/e2e/test_model_tp2.py --model_case="$model_case" --model_type=chat --device_type=ascend
18 |     if [ $? -ne 0 ]; then
19 |         echo "The test for chat model $model_case failed. Exiting."
20 |         exit 1
21 |     fi
22 | done
23 | 
24 | # 获取vl模型列表
25 | echo "Getting vl model list..."
26 | vl_model_tp2_list=$(python -c "from test_lmdeploy.utils.config_utils import get_torch_model_list; print(' '.join(get_torch_model_list(tp_num=2, model_type='vl_model')))")
27 | echo "vl_model_tp2_list: $vl_model_tp2_list"
28 | 
29 | for model_case in $vl_model_tp2_list; do
30 |     python $DLINFER_TEST_DIR/test_lmdeploy/e2e/test_model_tp2.py --model_case="$model_case" --model_type=vl --device_type=ascend
31 |     if [ $? -ne 0 ]; then
32 |         echo "The test for vl model $model_case failed. Exiting."
33 |         exit 1
34 |     fi
35 | done


--------------------------------------------------------------------------------
/tests/test_lmdeploy/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/tests/test_lmdeploy/utils/__init__.py


--------------------------------------------------------------------------------