├── .clang-format ├── .github ├── CODEOWNERS ├── ci │ └── fix-exit-multi-npu.patch └── workflows │ ├── format.yaml │ └── main.yml ├── .gitignore ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CODE_OF_CONDUCT_cn.md ├── Contributors.md ├── LICENSE ├── README.md ├── assets └── dlinfer_arch.png ├── benchmark ├── profile_generation.py └── profile_restful_api.py ├── cmake ├── FindATB.cmake ├── FindCANNToolkit.cmake ├── FindTorch_npu.cmake └── ascend.cmake ├── dlinfer ├── __init__.py ├── framework │ ├── __init__.py │ ├── lmdeploy_ext │ │ ├── __init__.py │ │ ├── cudagraph │ │ │ ├── __init__.py │ │ │ ├── camb_cudagraph.py │ │ │ └── maca_cudagraph.py │ │ ├── device │ │ │ ├── __init__.py │ │ │ ├── ascend.py │ │ │ └── camb.py │ │ ├── dynamo │ │ │ └── graph_mode_patch.py │ │ └── quants │ │ │ ├── __init__.py │ │ │ └── ascend_awq.py │ └── transformers_ext │ │ ├── __init__.py │ │ ├── cogvlm.py │ │ ├── internlm2.py │ │ ├── internvl.py │ │ └── patch.py ├── graph │ ├── __init__.py │ ├── custom_op.py │ └── dicp │ │ ├── __init__.py │ │ ├── dynamo_bridge │ │ ├── __init__.py │ │ ├── compile.py │ │ ├── compile_fx.py │ │ ├── conversion.py │ │ ├── decompositions.py │ │ ├── graph.py │ │ ├── op_transformer.py │ │ ├── operator.py │ │ ├── pt_patch.py │ │ ├── torch_version.py │ │ └── utils.py │ │ └── vendor │ │ ├── AtbGraph │ │ ├── __init__.py │ │ ├── atb_op.py │ │ ├── codegen │ │ │ ├── __init__.py │ │ │ ├── atb.py │ │ │ ├── atb_graph.py │ │ │ ├── atb_infer_param.py │ │ │ ├── atb_op.py │ │ │ ├── load_and_run.py │ │ │ ├── runtime │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── dicp_model.cpp │ │ │ │ ├── dicp_model.h │ │ │ │ ├── model.cpp │ │ │ │ ├── model.h │ │ │ │ ├── ops │ │ │ │ │ ├── aclnn_ops │ │ │ │ │ │ ├── acl_nn_operation.cpp │ │ │ │ │ │ ├── acl_nn_operation.h │ │ │ │ │ │ ├── add_operation.cpp │ │ │ │ │ │ ├── add_operation.h │ │ │ │ │ │ ├── add_rms_norm_operation.cpp │ │ │ │ │ │ ├── add_rms_norm_operation.h │ │ │ │ │ │ ├── adds_operation.cpp │ │ │ │ │ │ ├── adds_operation.h │ │ │ │ │ │ ├── arange_operation.cpp │ │ │ │ │ │ ├── arange_operation.h │ │ │ │ │ │ ├── batch_matmul_operation.cpp │ │ │ │ │ │ ├── batch_matmul_operation.h │ │ │ │ │ │ ├── bincount_operation.cpp │ │ │ │ │ │ ├── bincount_operation.h │ │ │ │ │ │ ├── bitwise_not_operation.cpp │ │ │ │ │ │ ├── bitwise_not_operation.h │ │ │ │ │ │ ├── cast_operation.cpp │ │ │ │ │ │ ├── cast_operation.h │ │ │ │ │ │ ├── cat_operation.cpp │ │ │ │ │ │ ├── cat_operation.h │ │ │ │ │ │ ├── cumsum_operation.cpp │ │ │ │ │ │ ├── cumsum_operation.h │ │ │ │ │ │ ├── div_operation.cpp │ │ │ │ │ │ ├── div_operation.h │ │ │ │ │ │ ├── divs_operation.cpp │ │ │ │ │ │ ├── divs_operation.h │ │ │ │ │ │ ├── dynamic_quant_operation.cpp │ │ │ │ │ │ ├── dynamic_quant_operation.h │ │ │ │ │ │ ├── expand_operation.cpp │ │ │ │ │ │ ├── expand_operation.h │ │ │ │ │ │ ├── gather_operation.cpp │ │ │ │ │ │ ├── gather_operation.h │ │ │ │ │ │ ├── ge_scalar_operation.cpp │ │ │ │ │ │ ├── ge_scalar_operation.h │ │ │ │ │ │ ├── grouped_matmul_operation.cpp │ │ │ │ │ │ ├── grouped_matmul_operation.h │ │ │ │ │ │ ├── gt_scalar_operation.cpp │ │ │ │ │ │ ├── gt_scalar_operation.h │ │ │ │ │ │ ├── index_select_operation.cpp │ │ │ │ │ │ ├── index_select_operation.h │ │ │ │ │ │ ├── inplace_copy_operation.cpp │ │ │ │ │ │ ├── inplace_copy_operation.h │ │ │ │ │ │ ├── inplace_div_operation.cpp │ │ │ │ │ │ ├── inplace_div_operation.h │ │ │ │ │ │ ├── inplace_index_copy_operation.cpp │ │ │ │ │ │ ├── inplace_index_copy_operation.h │ │ │ │ │ │ ├── inplace_masked_fill_scalar_operation.cpp │ │ │ │ │ │ ├── inplace_masked_fill_scalar_operation.h │ │ │ │ │ │ ├── inplace_scatter_operation.cpp │ │ │ │ │ │ ├── inplace_scatter_operation.h │ │ │ │ │ │ ├── max_operation.cpp │ │ │ │ │ │ ├── max_operation.h │ │ │ │ │ │ ├── moe_finalize_routing_operation.cpp │ │ │ │ │ │ ├── moe_finalize_routing_operation.h │ │ │ │ │ │ ├── moe_gating_topk_softmax.cpp │ │ │ │ │ │ ├── moe_gating_topk_softmax.h │ │ │ │ │ │ ├── moe_init_routing_operation.cpp │ │ │ │ │ │ ├── moe_init_routing_operation.h │ │ │ │ │ │ ├── moe_token_permute_operation.cpp │ │ │ │ │ │ ├── moe_token_permute_operation.h │ │ │ │ │ │ ├── moe_token_unpermute_operation.cpp │ │ │ │ │ │ ├── moe_token_unpermute_operation.h │ │ │ │ │ │ ├── mul_operation.cpp │ │ │ │ │ │ ├── mul_operation.h │ │ │ │ │ │ ├── muls_operation.cpp │ │ │ │ │ │ ├── muls_operation.h │ │ │ │ │ │ ├── permute_operation.cpp │ │ │ │ │ │ ├── permute_operation.h │ │ │ │ │ │ ├── pow_tensor_scalar_operation.cpp │ │ │ │ │ │ ├── pow_tensor_scalar_operation.h │ │ │ │ │ │ ├── pow_tensor_tensor_operation.cpp │ │ │ │ │ │ ├── pow_tensor_tensor_operation.h │ │ │ │ │ │ ├── quant_matmul_operation.cpp │ │ │ │ │ │ ├── quant_matmul_operation.h │ │ │ │ │ │ ├── reciprocal_operation.cpp │ │ │ │ │ │ ├── reciprocal_operation.h │ │ │ │ │ │ ├── reduce_sum_operation.cpp │ │ │ │ │ │ ├── reduce_sum_operation.h │ │ │ │ │ │ ├── s_where_operation.cpp │ │ │ │ │ │ ├── s_where_operation.h │ │ │ │ │ │ ├── scatter_value_operation.cpp │ │ │ │ │ │ ├── scatter_value_operation.h │ │ │ │ │ │ ├── slice_operation.cpp │ │ │ │ │ │ ├── slice_operation.h │ │ │ │ │ │ ├── softmax_operation.cpp │ │ │ │ │ │ ├── softmax_operation.h │ │ │ │ │ │ ├── split_with_size_operation.cpp │ │ │ │ │ │ ├── split_with_size_operation.h │ │ │ │ │ │ ├── sub_operation.cpp │ │ │ │ │ │ ├── sub_operation.h │ │ │ │ │ │ ├── subs_operation.cpp │ │ │ │ │ │ ├── subs_operation.h │ │ │ │ │ │ ├── topk_operation.cpp │ │ │ │ │ │ └── topk_operation.h │ │ │ │ │ ├── atb_ops │ │ │ │ │ │ ├── activation.cpp │ │ │ │ │ │ ├── allreduce.cpp │ │ │ │ │ │ ├── atb_ops.h │ │ │ │ │ │ ├── concat.cpp │ │ │ │ │ │ ├── elewise.cpp │ │ │ │ │ │ ├── gather.cpp │ │ │ │ │ │ ├── linear.cpp │ │ │ │ │ │ ├── linear_parallel.cpp │ │ │ │ │ │ ├── paged_attention.cpp │ │ │ │ │ │ ├── reduce.cpp │ │ │ │ │ │ ├── reshape_and_cache.cpp │ │ │ │ │ │ ├── rms_norm.cpp │ │ │ │ │ │ ├── rope.cpp │ │ │ │ │ │ ├── self_attention.cpp │ │ │ │ │ │ ├── slice.cpp │ │ │ │ │ │ ├── softmax.cpp │ │ │ │ │ │ ├── sort.cpp │ │ │ │ │ │ ├── split.cpp │ │ │ │ │ │ ├── transdata.cpp │ │ │ │ │ │ └── transpose.cpp │ │ │ │ │ ├── custom_ops │ │ │ │ │ │ ├── masked_fill_scalar_operation.cpp │ │ │ │ │ │ ├── masked_fill_scalar_operation.h │ │ │ │ │ │ ├── new_empty_operation.cpp │ │ │ │ │ │ ├── new_empty_operation.h │ │ │ │ │ │ ├── prepare_moe_operation.cpp │ │ │ │ │ │ ├── prepare_moe_operation.h │ │ │ │ │ │ ├── renormalize_operation.cpp │ │ │ │ │ │ ├── renormalize_operation.h │ │ │ │ │ │ ├── reshape_operation.cpp │ │ │ │ │ │ ├── reshape_operation.h │ │ │ │ │ │ ├── scalar_tensor_operaion.cpp │ │ │ │ │ │ ├── scalar_tensor_operation.h │ │ │ │ │ │ ├── slice_scatter_operation.cpp │ │ │ │ │ │ ├── slice_scatter_operation.h │ │ │ │ │ │ ├── squeeze_operation.cpp │ │ │ │ │ │ ├── squeeze_operation.h │ │ │ │ │ │ ├── unsqueeze_operation.cpp │ │ │ │ │ │ ├── unsqueeze_operation.h │ │ │ │ │ │ ├── view_operation.cpp │ │ │ │ │ │ ├── view_operation.h │ │ │ │ │ │ ├── zeros_like_operation.cpp │ │ │ │ │ │ ├── zeros_like_operation.h │ │ │ │ │ │ ├── zeros_operation.cpp │ │ │ │ │ │ └── zeros_operation.h │ │ │ │ │ ├── operation_creator.cpp │ │ │ │ │ └── operation_creator.h │ │ │ │ ├── third_party │ │ │ │ │ ├── half │ │ │ │ │ │ └── include │ │ │ │ │ │ │ └── half.hpp │ │ │ │ │ ├── json │ │ │ │ │ │ └── single_include │ │ │ │ │ │ │ └── nlohmann │ │ │ │ │ │ │ ├── json.hpp │ │ │ │ │ │ │ └── json_fwd.hpp │ │ │ │ │ └── spdlog │ │ │ │ │ │ └── include │ │ │ │ │ │ └── spdlog │ │ │ │ │ │ ├── async.h │ │ │ │ │ │ ├── async_logger-inl.h │ │ │ │ │ │ ├── async_logger.h │ │ │ │ │ │ ├── cfg │ │ │ │ │ │ ├── argv.h │ │ │ │ │ │ ├── env.h │ │ │ │ │ │ ├── helpers-inl.h │ │ │ │ │ │ └── helpers.h │ │ │ │ │ │ ├── common-inl.h │ │ │ │ │ │ ├── common.h │ │ │ │ │ │ ├── details │ │ │ │ │ │ ├── backtracer-inl.h │ │ │ │ │ │ ├── backtracer.h │ │ │ │ │ │ ├── circular_q.h │ │ │ │ │ │ ├── console_globals.h │ │ │ │ │ │ ├── file_helper-inl.h │ │ │ │ │ │ ├── file_helper.h │ │ │ │ │ │ ├── fmt_helper.h │ │ │ │ │ │ ├── log_msg-inl.h │ │ │ │ │ │ ├── log_msg.h │ │ │ │ │ │ ├── log_msg_buffer-inl.h │ │ │ │ │ │ ├── log_msg_buffer.h │ │ │ │ │ │ ├── mpmc_blocking_q.h │ │ │ │ │ │ ├── null_mutex.h │ │ │ │ │ │ ├── os-inl.h │ │ │ │ │ │ ├── os.h │ │ │ │ │ │ ├── periodic_worker-inl.h │ │ │ │ │ │ ├── periodic_worker.h │ │ │ │ │ │ ├── registry-inl.h │ │ │ │ │ │ ├── registry.h │ │ │ │ │ │ ├── synchronous_factory.h │ │ │ │ │ │ ├── tcp_client-windows.h │ │ │ │ │ │ ├── tcp_client.h │ │ │ │ │ │ ├── thread_pool-inl.h │ │ │ │ │ │ ├── thread_pool.h │ │ │ │ │ │ ├── udp_client-windows.h │ │ │ │ │ │ ├── udp_client.h │ │ │ │ │ │ └── windows_include.h │ │ │ │ │ │ ├── fmt │ │ │ │ │ │ ├── bin_to_hex.h │ │ │ │ │ │ ├── bundled │ │ │ │ │ │ │ ├── args.h │ │ │ │ │ │ │ ├── chrono.h │ │ │ │ │ │ │ ├── color.h │ │ │ │ │ │ │ ├── compile.h │ │ │ │ │ │ │ ├── core.h │ │ │ │ │ │ │ ├── fmt.license.rst │ │ │ │ │ │ │ ├── format-inl.h │ │ │ │ │ │ │ ├── format.h │ │ │ │ │ │ │ ├── locale.h │ │ │ │ │ │ │ ├── os.h │ │ │ │ │ │ │ ├── ostream.h │ │ │ │ │ │ │ ├── printf.h │ │ │ │ │ │ │ ├── ranges.h │ │ │ │ │ │ │ ├── std.h │ │ │ │ │ │ │ └── xchar.h │ │ │ │ │ │ ├── chrono.h │ │ │ │ │ │ ├── compile.h │ │ │ │ │ │ ├── fmt.h │ │ │ │ │ │ ├── ostr.h │ │ │ │ │ │ ├── ranges.h │ │ │ │ │ │ ├── std.h │ │ │ │ │ │ └── xchar.h │ │ │ │ │ │ ├── formatter.h │ │ │ │ │ │ ├── fwd.h │ │ │ │ │ │ ├── logger-inl.h │ │ │ │ │ │ ├── logger.h │ │ │ │ │ │ ├── mdc.h │ │ │ │ │ │ ├── pattern_formatter-inl.h │ │ │ │ │ │ ├── pattern_formatter.h │ │ │ │ │ │ ├── sinks │ │ │ │ │ │ ├── android_sink.h │ │ │ │ │ │ ├── ansicolor_sink-inl.h │ │ │ │ │ │ ├── ansicolor_sink.h │ │ │ │ │ │ ├── base_sink-inl.h │ │ │ │ │ │ ├── base_sink.h │ │ │ │ │ │ ├── basic_file_sink-inl.h │ │ │ │ │ │ ├── basic_file_sink.h │ │ │ │ │ │ ├── callback_sink.h │ │ │ │ │ │ ├── daily_file_sink.h │ │ │ │ │ │ ├── dist_sink.h │ │ │ │ │ │ ├── dup_filter_sink.h │ │ │ │ │ │ ├── hourly_file_sink.h │ │ │ │ │ │ ├── kafka_sink.h │ │ │ │ │ │ ├── mongo_sink.h │ │ │ │ │ │ ├── msvc_sink.h │ │ │ │ │ │ ├── null_sink.h │ │ │ │ │ │ ├── ostream_sink.h │ │ │ │ │ │ ├── qt_sinks.h │ │ │ │ │ │ ├── ringbuffer_sink.h │ │ │ │ │ │ ├── rotating_file_sink-inl.h │ │ │ │ │ │ ├── rotating_file_sink.h │ │ │ │ │ │ ├── sink-inl.h │ │ │ │ │ │ ├── sink.h │ │ │ │ │ │ ├── stdout_color_sinks-inl.h │ │ │ │ │ │ ├── stdout_color_sinks.h │ │ │ │ │ │ ├── stdout_sinks-inl.h │ │ │ │ │ │ ├── stdout_sinks.h │ │ │ │ │ │ ├── syslog_sink.h │ │ │ │ │ │ ├── systemd_sink.h │ │ │ │ │ │ ├── tcp_sink.h │ │ │ │ │ │ ├── udp_sink.h │ │ │ │ │ │ ├── win_eventlog_sink.h │ │ │ │ │ │ ├── wincolor_sink-inl.h │ │ │ │ │ │ └── wincolor_sink.h │ │ │ │ │ │ ├── spdlog-inl.h │ │ │ │ │ │ ├── spdlog.h │ │ │ │ │ │ ├── stopwatch.h │ │ │ │ │ │ ├── tweakme.h │ │ │ │ │ │ └── version.h │ │ │ │ └── utils │ │ │ │ │ ├── common.cpp │ │ │ │ │ ├── common.h │ │ │ │ │ ├── config.cpp │ │ │ │ │ ├── config.h │ │ │ │ │ ├── global_dict.cpp │ │ │ │ │ ├── global_dict.h │ │ │ │ │ ├── log.h │ │ │ │ │ ├── misc.cpp │ │ │ │ │ ├── misc.h │ │ │ │ │ ├── operation_util.h │ │ │ │ │ ├── scalar.cpp │ │ │ │ │ ├── scalar.h │ │ │ │ │ ├── tensor_utils.cpp │ │ │ │ │ ├── tensor_utils.h │ │ │ │ │ ├── timer.h │ │ │ │ │ ├── workspace.cpp │ │ │ │ │ └── workspace.h │ │ │ └── utils.py │ │ ├── compile_job.py │ │ ├── config.py │ │ ├── conversion.py │ │ ├── ext_ops.py │ │ ├── infer_res_utils.py │ │ ├── opset_convert.py │ │ └── pattern_replacement.py │ │ ├── CMakeLists.txt │ │ └── __init__.py ├── ops │ ├── __init__.py │ └── llm.py ├── utils │ ├── __init__.py │ ├── config.py │ ├── registry.py │ └── type_annotation.py └── vendor │ ├── __init__.py │ ├── ascend │ ├── CMakeLists.txt │ ├── __init__.py │ ├── csrc │ │ ├── ascend_ops.hpp │ │ ├── flash_attention.cpp │ │ ├── init.cpp │ │ ├── moe_gating_topk_softmax.cpp │ │ ├── op_api_common.cpp │ │ ├── op_api_common.hpp │ │ ├── torch_npu_symbol_fix.cpp │ │ ├── torch_npu_utils.cpp │ │ └── torch_npu_utils.hpp │ ├── pytorch_patch.py │ ├── torch_npu_ops.py │ └── utils.py │ ├── camb │ ├── CMakeLists.txt │ ├── __init__.py │ ├── camb_ops.py │ └── pytorch_patch.py │ └── maca │ ├── CMakeLists.txt │ ├── __init__.py │ ├── context_flashattention.py │ ├── csrc │ ├── CMakeLists.txt │ ├── activation_kernels.cu │ ├── attention │ │ ├── attention_dtypes.h │ │ ├── attention_generic.cuh │ │ ├── attention_kernels.cu │ │ ├── attention_utils.cuh │ │ ├── dtype_bfloat16.cuh │ │ ├── dtype_float16.cuh │ │ ├── dtype_float32.cuh │ │ └── dtype_fp8.cuh │ ├── cache.h │ ├── cache_kernels.cu │ ├── cuda_compat.h │ ├── dispatch_utils.h │ ├── layernorm_kernels.cu │ ├── moe │ │ ├── moe_ops.h │ │ └── topk_softmax_kernels.cu │ ├── moe_align_block_size_kernels.cu │ ├── ops.h │ ├── pos_encoding_kernels.cu │ ├── pybind.cpp │ ├── quantization │ │ └── fp8 │ │ │ ├── amd │ │ │ ├── hip_float8.h │ │ │ ├── hip_float8_impl.h │ │ │ └── quant_utils.cuh │ │ │ ├── common.cu │ │ │ ├── fp8_marlin.cu │ │ │ └── nvidia │ │ │ └── quant_utils.cuh │ └── reduction_utils.cuh │ └── maca_ops.py ├── docs └── quant │ ├── ascend_kv_quant.md │ └── ascend_scales_offsets.py ├── pyproject.toml ├── requirements ├── ascend │ ├── build.txt │ ├── cann.txt │ ├── full.txt │ ├── runtime.txt │ └── torch.txt ├── camb │ ├── build.txt │ ├── full.txt │ ├── runtime.txt │ └── torch.txt └── maca │ ├── build.txt │ ├── full.txt │ ├── runtime.txt │ └── torch.txt ├── run_format.sh ├── scripts ├── build_wheel.sh └── build_wheel_allpy.sh ├── setup.py └── tests ├── readme.md └── test_lmdeploy ├── __init__.py ├── e2e ├── __init__.py ├── config.yaml ├── conftest.py ├── prompt_case.yaml ├── pytest.ini ├── test_model_tp1.py └── test_model_tp2.py ├── scripts └── test_model_tp2.sh └── utils ├── __init__.py ├── config_utils.py ├── pipeline_chat.py └── rule_condition_assert.py /.clang-format: -------------------------------------------------------------------------------- 1 | # This file is used by clang-format to autoformat dlinfer source code 2 | # 3 | # The clang-format is part of llvm toolchain. 4 | # It need to install llvm and clang to format source code style. 5 | # 6 | # The basic usage is, 7 | # clang-format -i -style=file PATH/TO/SOURCE/CODE 8 | # 9 | # The -style=file implicit use ".clang-format" file located in one of 10 | # parent directory. 11 | # The -i means inplace change. 12 | # 13 | # The document of clang-format is 14 | # http://clang.llvm.org/docs/ClangFormat.html 15 | # http://clang.llvm.org/docs/ClangFormatStyleOptions.html 16 | --- 17 | Language: Cpp 18 | BasedOnStyle: Google 19 | IndentWidth: 4 20 | TabWidth: 4 21 | ContinuationIndentWidth: 4 22 | AccessModifierOffset: -4 # The private/protected/public has no indent in class 23 | Standard: Cpp11 24 | AllowAllParametersOfDeclarationOnNextLine: true 25 | BinPackParameters: true 26 | BinPackArguments: false 27 | BreakAfterAttributes: Leave 28 | ColumnLimit: 160 29 | DerivePointerAlignment: false 30 | PointerAlignment: Left 31 | ReferenceAlignment: Left 32 | InsertNewlineAtEOF: true 33 | SpacesBeforeTrailingComments: 2 34 | IncludeIsMainSourceRegex: (\.cu)$ 35 | IncludeCategories: 36 | - Regex: '^<.*\.h(pp)?>' 37 | Priority: 1 38 | - Regex: '^<.*' 39 | Priority: 2 40 | - Regex: '.*' 41 | Priority: 3 42 | ... 43 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # IMPORTANT: 2 | # This file is ONLY used to merge PRs. Approvals from people in this file are required for merging. 3 | # 4 | # WARNING: The last matching pattern takes the most precedence and OVERWRITES previous rules. 5 | # Please be very careful when adding new patterns. 6 | 7 | # ---------- base ---------- 8 | * @jinminxi104 9 | 10 | # ---------- ci ---------- 11 | /cmake/ @jinminxi104 @CyCle1024 12 | 13 | # ---------- ci ---------- 14 | /tests/ @jinminxi104 @wugeshui 15 | -------------------------------------------------------------------------------- /.github/workflows/format.yaml: -------------------------------------------------------------------------------- 1 | name: dlinfer format ci 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | branches: 7 | - main 8 | push: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | markdownlint: 14 | runs-on: ubuntu-latest 15 | if: github.repository == 'DeepLink-org/dlinfer' 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 16 21 | - name: Collect changed files 22 | uses: tj-actions/changed-files@v40 23 | id: changed-files 24 | with: 25 | files: '**/*.md' 26 | separator: ',' 27 | - name: MarkdownLint 28 | if: steps.changed-files.outputs.any_changed == 'true' 29 | uses: DavidAnson/markdownlint-cli2-action@v14 30 | with: 31 | globs: ${{ steps.changed-files.outputs.all_changed_files }} 32 | separator: ',' 33 | 34 | clang-format: 35 | needs: markdownlint 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v4 39 | - uses: cpp-linter/cpp-linter-action@v2 40 | id: cpp-lint 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.CI_TOKEN}} 43 | with: 44 | style: file 45 | ignore: 'dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/*' 46 | tidy-checks: '-*' # disable clang tidy at this stage 47 | version: 17 48 | - name: Fail test 49 | if: steps.cpp-lint.outputs.checks-failed > 0 50 | run: echo "Some files failed the linting checks!" && exit 1 51 | 52 | python-black: 53 | needs: markdownlint 54 | runs-on: ubuntu-latest 55 | steps: 56 | - uses: actions/checkout@v4 57 | - uses: psf/black@stable 58 | env: 59 | GITHUB_TOKEN: ${{ secrets.CI_TOKEN}} 60 | with: # see: https://black.readthedocs.io/en/stable/getting_started.html 61 | version: "~= 24.3.0" 62 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(dlinfer LANGUAGES CXX) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | 8 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 9 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) 10 | 11 | # the default CMAKE_BUILD_TYPE is Release 12 | if(NOT CMAKE_BUILD_TYPE) 13 | set(CMAKE_BUILD_TYPE "Release") 14 | endif() 15 | 16 | set(DEVICE "" CACHE STRING "device string, default empty string") 17 | string(TOLOWER "${DEVICE}" DEVICE) 18 | 19 | list(APPEND SUPPORTED_DEVICE "ascend" "maca" "camb") 20 | 21 | if(NOT DEVICE) 22 | message(FATAL_ERROR "Please specify variable DEVICE of dlinfer!") 23 | elseif(NOT DEVICE IN_LIST SUPPORTED_DEVICE) 24 | message(FATAL_ERROR "Device ${DEVICE} is not supported! Supported devices: ${SUPPORTED_DEVICE}") 25 | endif() 26 | 27 | add_subdirectory(dlinfer/vendor/${DEVICE}) 28 | add_subdirectory(dlinfer/graph/dicp/vendor) 29 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT_cn.md: -------------------------------------------------------------------------------- 1 | # 参与者公约 2 | 3 | ## 我们的保证 4 | 5 | 为了促进一个开放透明且友好的环境,我们作为贡献者和维护者保证:无论年龄、种族、民族、性别认同和表达(方式)、体型、身体健全与否、经验水平、国籍、个人表现、宗教或性别取向,参与者在我们项目和社区中都免于骚扰。 6 | 7 | ## 我们的标准 8 | 9 | 有助于创造正面环境的行为包括但不限于: 10 | * 使用友好和包容性语言 11 | * 尊重不同的观点和经历 12 | * 耐心地接受建设性批评 13 | * 关注对社区最有利的事情 14 | * 友善对待其他社区成员 15 | 16 | 身为参与者不能接受的行为包括但不限于: 17 | * 使用与性有关的言语或是图像,以及不受欢迎的性骚扰 18 | * 捣乱/煽动/造谣的行为或进行侮辱/贬损的评论,人身攻击及政治攻击 19 | * 公开或私下的骚扰 20 | * 未经许可地发布他人的个人资料,例如住址或是电子地址 21 | * 其他可以被合理地认定为不恰当或者违反职业操守的行为 22 | 23 | ## 我们的责任 24 | 25 | 项目维护者有责任为「可接受的行为」标准做出诠释,以及对已发生的不被接受的行为采取恰当且公平的纠正措施。 26 | 27 | 项目维护者有权利及责任去删除、编辑、拒绝与本行为标准有所违背的评论(comments)、提交(commits)、代码、wiki 编辑、问题(issues)和其他贡献,以及项目维护者可暂时或永久性的禁止任何他们认为有不适当、威胁、冒犯、有害行为的贡献者。 28 | 29 | ## 使用范围 30 | 31 | 当一个人代表该项目或是其社区时,本行为标准适用于其项目平台和公共平台。 32 | 33 | 代表项目或是社区的情况,举例来说包括使用官方项目的电子邮件地址、通过官方的社区媒体账号发布或线上或线下事件中担任指定代表。 34 | 35 | 该项目的呈现方式可由其项目维护者进行进一步的定义及解释。 36 | 37 | ## 强制执行 38 | 39 | 可以通过peizhilin@pjlab.org.cn,来联系项目团队来举报滥用、骚扰或其他不被接受的行为。 40 | 41 | 任何维护团队认为有必要且适合的所有投诉都将进行审查及调查,并做出相对应的回应。项目小组有对事件回报者有保密的义务。具体执行的方针近一步细节可能会单独公布。 42 | 43 | 没有切实地遵守或是执行本行为标准的项目维护人员,可能会因项目领导人或是其他成员的决定,暂时或是永久地取消其参与资格。 44 | 45 | ## 来源 46 | 47 | 本行为标准改编自[贡献者公约][主页],版本 1.4 48 | 可在此观看https://www.contributor-covenant.org/zh-cn/version/1/4/code-of-conduct.html 49 | 50 | [主页]: https://www.contributor-covenant.org 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, DeepLink 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /assets/dlinfer_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/assets/dlinfer_arch.png -------------------------------------------------------------------------------- /cmake/FindATB.cmake: -------------------------------------------------------------------------------- 1 | include(FindPackageHandleStandardArgs) 2 | 3 | if (DEFINED ENV{ATB_HOME_PATH}) 4 | set(ATB_HOME_PATH $ENV{ATB_HOME_PATH} 5 | CACHE STRING "atb default home") 6 | else() 7 | set(ATB_HOME_PATH "/usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_0" 8 | CACHE STRING "atb toolkit default home") 9 | endif() 10 | 11 | # Include directories. 12 | find_path(ATB_INCLUDE_DIRS 13 | NAMES atb/atb_infer.h 14 | PATHS ${ATB_HOME_PATH}/include 15 | ) 16 | 17 | # Library dependencies. 18 | find_library(ATB_LIBRARY 19 | NAMES atb 20 | PATHS ${ATB_HOME_PATH}/lib 21 | ) 22 | set(ATB_LIBRARIES ${ATB_LIBRARY}) 23 | 24 | #TODO (chenchiyu): construct modern cmake target for ATB 25 | message(STATUS "Found ATB: ATB_LIBRARIES: ${ATB_LIBRARIES}, ATB_INCLUDE_DIRS: ${ATB_INCLUDE_DIRS}") 26 | find_package_handle_standard_args(ATB DEFAULT_MSG ATB_LIBRARIES ATB_INCLUDE_DIRS) 27 | -------------------------------------------------------------------------------- /cmake/FindCANNToolkit.cmake: -------------------------------------------------------------------------------- 1 | include(FindPackageHandleStandardArgs) 2 | 3 | if (DEFINED ENV{ASCEND_TOOLKIT_HOME}) 4 | set(ASCEND_TOOLKIT_HOME $ENV{ASCEND_TOOLKIT_HOME} 5 | CACHE STRING "ascend toolkit default home") 6 | else() 7 | set(ASCEND_TOOLKIT_HOME "/usr/local/Ascend/ascend-toolkit/latest" 8 | CACHE STRING "ascend toolkit default home") 9 | endif() 10 | 11 | # Include directories. 12 | find_path(CANN_INCLUDE_DIRS 13 | NAMES acl/acl.h acl/acl_rt.h hccl/hccl.h 14 | PATHS ${ASCEND_TOOLKIT_HOME}/include 15 | ) 16 | 17 | # Library dependencies. 18 | find_library(HCCL_LIB 19 | NAMES hccl 20 | PATHS ${ASCEND_TOOLKIT_HOME}/lib64 21 | ) 22 | if (HCCL_LIB) 23 | list(APPEND CANN_LIBRARY ${HCCL_LIB}) 24 | else() 25 | message(FATAL_ERROR "libhccl.so not found") 26 | endif() 27 | 28 | find_library(OPAPI_LIB 29 | NAMES opapi 30 | PATHS ${ASCEND_TOOLKIT_HOME}/lib64 31 | ) 32 | if (OPAPI_LIB) 33 | list(APPEND CANN_LIBRARY ${OPAPI_LIB}) 34 | else() 35 | message(FATAL_ERROR "libopapi.so not found") 36 | endif() 37 | 38 | find_library(ASCENDCL_LIB 39 | NAMES ascendcl 40 | PATHS ${ASCEND_TOOLKIT_HOME}/lib64 41 | ) 42 | if (ASCENDCL_LIB) 43 | list(APPEND CANN_LIBRARY ${ASCENDCL_LIB}) 44 | else() 45 | message(FATAL_ERROR "libascendcl.so not found") 46 | endif() 47 | 48 | set(CANN_LIBRARIES ${CANN_LIBRARY}) 49 | 50 | #TODO (chenchiyu): construct modern cmake target for CANNToolkit 51 | message(STATUS "Found CANN Toolkit: CANN_LIBRARIES: ${CANN_LIBRARIES}, CANN_INCLUDE_DIRS: ${CANN_INCLUDE_DIRS}") 52 | find_package_handle_standard_args(CANNToolkit DEFAULT_MSG CANN_LIBRARIES CANN_INCLUDE_DIRS) 53 | -------------------------------------------------------------------------------- /cmake/FindTorch_npu.cmake: -------------------------------------------------------------------------------- 1 | include(FindPackageHandleStandardArgs) 2 | 3 | # Include directories. 4 | find_path(TORCH_NPU_INCLUDE_DIRS NAMES torch_npu/csrc/include/ops.h) 5 | 6 | # Library dependencies. 7 | find_library(TORCH_NPU_LIBRARY NAMES torch_npu npu_profiler) 8 | 9 | if (CMAKE_SYSTEM_NAME STREQUAL "Linux") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILENAME__='\"$$(notdir $$(abspath $$<))\"'") 11 | endif() 12 | set(TORCH_NPU_LIBRARIES ${TORCH_NPU_LIBRARY}) 13 | 14 | # torch/csrc/python_headers depends Python.h 15 | find_package(Python COMPONENTS Interpreter Development) 16 | 17 | #TODO (chenchiyu): construct modern cmake target for Torch_npu 18 | message(STATUS "Found Torch_npu: TORCH_NPU_LIBRARY: ${TORCH_NPU_LIBRARY}, TORCH_NPU_INCLUDE_DIRS: ${TORCH_NPU_INCLUDE_DIRS}") 19 | find_package_handle_standard_args(Torch_npu DEFAULT_MSG TORCH_NPU_LIBRARY TORCH_NPU_INCLUDE_DIRS) 20 | -------------------------------------------------------------------------------- /cmake/ascend.cmake: -------------------------------------------------------------------------------- 1 | execute_process( 2 | COMMAND python -c "from torch.utils import cmake_prefix_path; \ 3 | print(cmake_prefix_path + '/Torch', end='')" 4 | OUTPUT_VARIABLE Torch_DIR 5 | ) 6 | 7 | execute_process( 8 | COMMAND python -c "from importlib.metadata import distribution; \ 9 | print(str(distribution('torch_npu').locate_file('torch_npu')), end='')" 10 | OUTPUT_VARIABLE Torch_npu_ROOT 11 | ) 12 | 13 | execute_process( 14 | COMMAND python -c "import torch; \ 15 | print('1' if torch.compiled_with_cxx11_abi() else '0', end='')" 16 | OUTPUT_VARIABLE _GLIBCXX_USE_CXX11_ABI 17 | ) 18 | 19 | execute_process( 20 | COMMAND python -c "import torch; from packaging import version; \ 21 | torch_version = version.parse(torch.__version__).base_version; \ 22 | print('1' if version.parse(torch_version) > version.parse('2.3.1') else '0', end='')" 23 | OUTPUT_VARIABLE Torch_npu_VERSION_HIGHER_THAN_231 24 | ) 25 | 26 | find_package(Torch REQUIRED) 27 | find_package(Torch_npu REQUIRED) 28 | find_package(CANNToolkit REQUIRED) 29 | find_package(ATB) 30 | -------------------------------------------------------------------------------- /dlinfer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import dlinfer.vendor as vendor 3 | 4 | vendor.vendor_torch_init() 5 | __version__ = "0.2.0" 6 | -------------------------------------------------------------------------------- /dlinfer/framework/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | -------------------------------------------------------------------------------- /dlinfer/framework/lmdeploy_ext/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import dlinfer.framework.transformers_ext 3 | from . import quants 4 | from . import cudagraph 5 | from . import device 6 | -------------------------------------------------------------------------------- /dlinfer/framework/lmdeploy_ext/cudagraph/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import importlib 3 | from functools import lru_cache 4 | from dlinfer.vendor import vendor_name 5 | 6 | 7 | graph_vendor = ["maca", "camb"] 8 | 9 | 10 | @lru_cache(1) 11 | def import_vendor_module(vendor_name_str): 12 | if vendor_name_str in graph_vendor: 13 | importlib.import_module(f".{vendor_name_str}_cudagraph", __package__) 14 | 15 | 16 | def vendor_graph_init(): 17 | import_vendor_module(vendor_name) 18 | 19 | 20 | vendor_graph_init() 21 | -------------------------------------------------------------------------------- /dlinfer/framework/lmdeploy_ext/device/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import importlib 3 | from functools import lru_cache 4 | from dlinfer.vendor import vendor_name 5 | 6 | 7 | vendor = ["camb", "ascend"] 8 | 9 | 10 | @lru_cache(1) 11 | def import_vendor_module(vendor_name_str): 12 | if vendor_name_str in vendor: 13 | importlib.import_module(f".{vendor_name_str}", __package__) 14 | 15 | 16 | def vendor_device_init(): 17 | import_vendor_module(vendor_name) 18 | 19 | 20 | vendor_device_init() 21 | -------------------------------------------------------------------------------- /dlinfer/framework/lmdeploy_ext/quants/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import importlib 3 | from functools import lru_cache 4 | from dlinfer.vendor import vendor_name 5 | 6 | 7 | awq_vendor = ["ascend"] 8 | 9 | 10 | @lru_cache(1) 11 | def import_vendor_module(vendor_name_str): 12 | if vendor_name_str in awq_vendor: 13 | importlib.import_module(f".{vendor_name_str}_awq", __package__) 14 | 15 | 16 | def vendor_quant_init(): 17 | import_vendor_module(vendor_name) 18 | 19 | 20 | vendor_quant_init() 21 | -------------------------------------------------------------------------------- /dlinfer/framework/transformers_ext/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import importlib 3 | import os, sys 4 | import typing 5 | from typing import Any, Dict, List, Optional, Union 6 | import transformers 7 | from .patch import apply_model_patches 8 | 9 | 10 | def patched_get_class_in_module(*args, **kwargs) -> typing.Type: 11 | ret_class = transformers_get_class_in_module(*args, **kwargs) 12 | apply_model_patches(importlib.import_module(ret_class.__module__)) 13 | return ret_class 14 | 15 | 16 | transformers_get_class_in_module = transformers.dynamic_module_utils.get_class_in_module 17 | transformers.dynamic_module_utils.get_class_in_module = patched_get_class_in_module 18 | -------------------------------------------------------------------------------- /dlinfer/framework/transformers_ext/cogvlm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import dlinfer.ops as ext_ops 4 | 5 | 6 | def PatchedAttention_forward(self, x: "tensor(B, L, D)") -> "tensor(B, L, D)": 7 | B, L, H = x.shape 8 | qkv = self.query_key_value(x) 9 | qkv = qkv.reshape(B, L, 3, H).permute(2, 0, 1, 3) # 3, B, L, H 10 | q, k, v = qkv[0], qkv[1], qkv[2] 11 | 12 | out = ext_ops.prefill_attention( 13 | q, 14 | k, 15 | v, 16 | None, 17 | None, 18 | L, 19 | self.num_heads, 20 | self.num_heads, 21 | [], 22 | attn_output=q, 23 | ) 24 | output = self.dense(out.view(B, L, -1)) 25 | return output 26 | -------------------------------------------------------------------------------- /dlinfer/framework/transformers_ext/internvl.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import torch 3 | import dlinfer.ops as ext_ops 4 | 5 | 6 | def InternAttention_naive_attn(self, x): 7 | B, N, C = x.shape 8 | qkv = self.qkv(x).reshape(B, N, 3, C).permute(2, 0, 1, 3) 9 | q, k, v = qkv.unbind(0) 10 | if self.qk_normalization: 11 | q = self.q_norm(q) 12 | k = self.k_norm(k) 13 | 14 | attn_output = ext_ops.prefill_attention( 15 | q, 16 | k, 17 | v, 18 | None, 19 | None, 20 | N, 21 | self.num_heads, 22 | self.num_heads, 23 | [], 24 | attn_output=q, 25 | ) 26 | 27 | x = self.proj(attn_output.reshape(B, N, C)) 28 | x = self.proj_drop(x) 29 | return x 30 | 31 | 32 | def InternRMSNorm_forward(self, hidden_states): 33 | return ext_ops.rms_norm(hidden_states, self.weight, self.variance_epsilon) 34 | -------------------------------------------------------------------------------- /dlinfer/framework/transformers_ext/patch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import transformers 3 | import inspect 4 | 5 | 6 | def apply_model_patches(module): 7 | if module.__name__.endswith(".modeling_internlm2"): 8 | from . import internlm2 9 | 10 | module.InternLM2RMSNorm.forward = ( 11 | internlm2.modeling_internlm2_InternLM2RMSNorm_forward 12 | ) 13 | module.InternLM2Attention.forward = ( 14 | internlm2.modeling_internlm2_InternLM2Attention_forward 15 | ) 16 | module.InternLM2ForCausalLM.prepare_inputs_for_generation = ( 17 | internlm2.modeling_internlm2_InternLM2ForCausalLM_prepare_inputs_for_generation 18 | ) 19 | transformers.cache_utils.DynamicCache.update = ( 20 | internlm2.transformers_cache_utils_dynamiccache_update 21 | ) 22 | elif module.__name__.endswith(".modeling_internvl_chat"): 23 | from . import internvl 24 | 25 | vit_module = inspect.getmodule(module.InternVisionModel) 26 | vit_module.InternAttention._naive_attn = internvl.InternAttention_naive_attn 27 | vit_module.InternRMSNorm.forward = internvl.InternRMSNorm_forward 28 | elif module.__name__.endswith(".modeling_cogvlm"): 29 | from . import cogvlm 30 | 31 | # get parent module from another source code file 32 | vit_module = inspect.getmodule(module.EVA2CLIPModel) 33 | vit_module.Attention.forward = cogvlm.PatchedAttention_forward 34 | -------------------------------------------------------------------------------- /dlinfer/graph/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | from dlinfer.utils.config import Config 3 | 4 | 5 | config = Config(enable_graph_mode=False) 6 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/__init__.py -------------------------------------------------------------------------------- /dlinfer/graph/dicp/dynamo_bridge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/dynamo_bridge/__init__.py -------------------------------------------------------------------------------- /dlinfer/graph/dicp/dynamo_bridge/compile.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | from dlinfer.graph.dicp.dynamo_bridge.torch_version import is_torch_251_or_higher 4 | 5 | if is_torch_251_or_higher: 6 | from torch._inductor.async_compile import AsyncCompile 7 | else: 8 | from torch._inductor.codecache import AsyncCompile 9 | 10 | 11 | class DeviceCompileJob: 12 | __metaclass__ = ABCMeta 13 | 14 | def __init__(self): 15 | pass 16 | 17 | @abstractmethod 18 | def get_key(): 19 | pass 20 | 21 | @abstractmethod 22 | def get_compile_result(): 23 | pass 24 | 25 | 26 | class DeviceKernelCache: 27 | cache = dict() 28 | clear = staticmethod(cache.clear) 29 | 30 | @classmethod 31 | def get_kernel(cls, device_compile_job): 32 | key = device_compile_job.get_key() 33 | if key not in cls.cache: 34 | loaded = device_compile_job.get_compile_result() 35 | cls.cache[key] = loaded 36 | cls.cache[key].key = key 37 | return cls.cache[key] 38 | 39 | 40 | class AsyncCompileKernel(AsyncCompile): 41 | def compile_kernel(self, device_compile_job): 42 | return DeviceKernelCache.get_kernel(device_compile_job).run 43 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/dynamo_bridge/decompositions.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Callable, Dict, Sequence, Union 3 | 4 | import torch 5 | from torch._decomp import register_decomposition 6 | from torch._ops import OpOverload, OpOverloadPacket 7 | 8 | dicp_decomposition_table = {} 9 | aten = torch.ops.aten 10 | 11 | 12 | def register_decomposition_for_dicp(fn): 13 | return register_decomposition(fn, registry=dicp_decomposition_table) 14 | 15 | 16 | @register_decomposition_for_dicp(aten.count_nonzero.default) 17 | def count_nonzero_default(x, dim=None): 18 | cond = x != 0 19 | dim = [] if dim is None else dim 20 | return aten.sum.dim_IntList(cond, dim=dim, keepdim=False, dtype=torch.int64) 21 | 22 | 23 | def get_decompositions( 24 | aten_ops: Sequence[Union[OpOverload, OpOverloadPacket]], 25 | target_decomposition_table: Dict[OpOverload, Callable] = None, 26 | ) -> Dict[OpOverload, Callable]: 27 | registry = dicp_decomposition_table 28 | packets_to_overloads = defaultdict(list) 29 | for opo in registry: 30 | packets_to_overloads[opo.overloadpacket].append(opo) 31 | decompositions = target_decomposition_table if target_decomposition_table else {} 32 | for op in aten_ops: 33 | if isinstance(op, OpOverloadPacket) and op in packets_to_overloads: 34 | for op_overload in packets_to_overloads[op]: 35 | decompositions[op_overload] = registry[op_overload] 36 | elif isinstance(op, OpOverload) and op in registry: 37 | decompositions[op] = registry[op] 38 | return decompositions 39 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/dynamo_bridge/torch_version.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from packaging import version 3 | 4 | torch_version = version.parse(torch.__version__).base_version 5 | 6 | is_torch_200 = False 7 | is_torch_210 = False 8 | is_torch_220 = False 9 | is_torch_231 = False 10 | is_torch_251 = False 11 | 12 | if torch_version.startswith("2.0"): 13 | is_torch_200 = True 14 | elif torch_version.startswith("2.1."): 15 | is_torch_210 = True 16 | elif torch_version.startswith("2.2."): 17 | is_torch_220 = True 18 | elif torch_version.startswith("2.3.1"): 19 | is_torch_231 = True 20 | elif torch_version.startswith("2.5.1"): 21 | is_torch_251 = True 22 | else: 23 | raise ValueError(f"unsupported dicp torch version: {torch.__version__}") 24 | 25 | is_torch_210_or_higher = version.parse(torch_version) >= version.parse("2.1") 26 | is_torch_220_or_higher = version.parse(torch_version) >= version.parse("2.2") 27 | is_torch_231_or_higher = version.parse(torch_version) >= version.parse("2.3.1") 28 | is_torch_251_or_higher = version.parse(torch_version) >= version.parse("2.5.1") 29 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/__init__.py: -------------------------------------------------------------------------------- 1 | def atbgraph(gm, fake_input_tensor): 2 | import torch._dynamo.config 3 | 4 | torch._dynamo.config.cache_size_limit = 256 5 | from dlinfer.graph.dicp.dynamo_bridge.compile_fx import compile_fx 6 | 7 | return compile_fx(gm, fake_input_tensor, "atbgraph") 8 | 9 | 10 | try: 11 | from dlinfer.framework.lmdeploy_ext.dynamo import graph_mode_patch 12 | except Exception: 13 | ... 14 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/vendor/AtbGraph/codegen/__init__.py -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/load_and_run.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import os 3 | 4 | import acl 5 | import numpy as np 6 | import torch 7 | from torch.profiler import record_function 8 | 9 | 10 | class AtbModel: 11 | def __init__(self, model_path) -> None: 12 | # print("### in_load_and_run_model_path:", model_path) 13 | self.model = torch.classes.DICPModel.DICPModel(model_path) 14 | 15 | @record_function("load_and_run") 16 | def run(self, inputs, outputs, param): 17 | self.model.execute_out(inputs, outputs, param) 18 | 19 | 20 | if __name__ == "__main__": 21 | pass 22 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(dicp_model LANGUAGES CXX) 3 | 4 | include(ascend) 5 | 6 | set(THIRD_PARTY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party) 7 | 8 | file(GLOB_RECURSE SOURCES 9 | ${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cpp 10 | ${CMAKE_CURRENT_SOURCE_DIR}/ops/aclnn_ops/*.cpp 11 | ${CMAKE_CURRENT_SOURCE_DIR}/ops/atb_ops/*.cpp 12 | ${CMAKE_CURRENT_SOURCE_DIR}/ops/custom_ops/*.cpp 13 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 14 | ${CMAKE_CURRENT_SOURCE_DIR}/utils/*.cpp 15 | ) 16 | 17 | set(COMPILE_OPTIONS 18 | -Wno-unused-function 19 | -Wno-unused-variable 20 | -Wno-unused-parameter 21 | -Wno-attributes 22 | -D_GLIBCXX_USE_CXX11_ABI=0 23 | ) 24 | 25 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") 26 | 27 | add_library(dicp_model SHARED ${SOURCES}) 28 | 29 | target_include_directories( 30 | dicp_model PUBLIC 31 | ${THIRD_PARTY_DIR}/json/single_include 32 | ${THIRD_PARTY_DIR}/spdlog/include 33 | ${THIRD_PARTY_DIR}/half/include 34 | ${CMAKE_CURRENT_SOURCE_DIR} 35 | ${TORCH_NPU_INCLUDE_DIRS} 36 | ${CANN_INCLUDE_DIRS} 37 | ${CANN_INCLUDE_DIRS}/aclnn 38 | ${ATB_INCLUDE_DIRS} 39 | ) 40 | 41 | target_compile_options(dicp_model PRIVATE ${COMPILE_OPTIONS}) 42 | 43 | target_link_libraries(dicp_model PUBLIC 44 | Python::Python 45 | torch 46 | ${TORCH_NPU_LIBRARY} 47 | ${CANN_LIBRARY} 48 | ${ATB_LIBRARY} 49 | ) 50 | 51 | file(RELATIVE_PATH OUTPUT_LIB_RELATIVE_PATH "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../") 52 | install( 53 | TARGETS dicp_model 54 | DESTINATION ${OUTPUT_LIB_RELATIVE_PATH} 55 | ) 56 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/dicp_model.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "dicp_model.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "model.h" 10 | #include "utils/log.h" 11 | #include "utils/misc.h" 12 | #include "utils/tensor_utils.h" 13 | 14 | using namespace dicp; 15 | 16 | DICPModel::DICPModel(const std::string& modelPath) : modelPath_(modelPath) { 17 | modelId_ = utils::GetNewModelId(); 18 | DICP_LOG(INFO) << "DICPModel create start, modelId:" << modelId_ << ", modelPath:" << modelPath_; 19 | model_ = std::make_shared(std::to_string(modelId_), modelPath); 20 | 21 | atb::Context* rawContext = nullptr; 22 | auto st = atb::CreateContext(&rawContext); 23 | DICP_LOG_IF(st != atb::NO_ERROR, ERROR) << "create atb context failed!"; 24 | context_ = std::move(std::unique_ptr(rawContext, atb::DestroyContext)); 25 | } 26 | 27 | DICPModel::~DICPModel() { context_.reset(); }; 28 | 29 | void DICPModel::ExecuteOut(std::vector atInTensors, std::vector atOutTensors, const std::string& param) { 30 | context_->SetExecuteStream(utils::GetCurrentStream()); 31 | 32 | std::vector inTensors; 33 | tensor_utils::TransferAtTensor2AtbTensor(atInTensors, inTensors); 34 | 35 | std::vector outTensors; 36 | tensor_utils::TransferAtTensor2AtbTensor(atOutTensors, outTensors); 37 | 38 | model_->Execute(context_.get(), inTensors, outTensors, param); 39 | } 40 | 41 | TORCH_LIBRARY(DICPModel, m) { m.class_("DICPModel").def(torch::init()).def("execute_out", &DICPModel::ExecuteOut); } 42 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/dicp_model.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "model.h" 10 | 11 | class DICPModel : public torch::CustomClassHolder { 12 | public: 13 | DICPModel(const std::string& modelPath); 14 | ~DICPModel(); 15 | void ExecuteOut(std::vector atInTensors, std::vector atOutTensors, const std::string& param); 16 | 17 | private: 18 | std::string modelPath_; 19 | std::shared_ptr model_; 20 | int modelId_ = 0; 21 | std::shared_ptr context_; 22 | }; 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/acl_nn_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "atb/operation.h" 10 | #include "ops/operation_creator.h" 11 | #include "utils/log.h" 12 | 13 | namespace dicp { 14 | constexpr size_t SVECTOR_SIZE = 8; 15 | 16 | struct AclNnTensor { 17 | atb::Tensor atbTensor; 18 | aclTensor* tensor = nullptr; 19 | int CreateTensor(const std::string& opName); 20 | int InitTensor(void* executor, const std::string& opName, const size_t index, bool isInput); 21 | }; 22 | 23 | class AclNnOperation : public atb::Operation { 24 | public: 25 | explicit AclNnOperation(const std::string& name); 26 | ~AclNnOperation() override; 27 | std::string GetName() const override; 28 | atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override; 29 | atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override; 30 | 31 | protected: 32 | aclTensor* CreateAclTensor(const AclNnTensor& aclNnTensor); 33 | atb::Status UpdateAclTensorDataPtr(const atb::VariantPack& variantPack); 34 | AclNnTensor CreateTensor(atb::Tensor atbTensor); 35 | int CreateAclTensors(const atb::VariantPack& variantPack); 36 | std::string opName_; 37 | atb::SVector aclInTensors_; 38 | atb::SVector aclOutTensors_; 39 | aclOpExecutor* aclExecutor_ = nullptr; 40 | 41 | private: 42 | virtual int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) = 0; 43 | virtual int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) = 0; 44 | }; 45 | } // namespace dicp 46 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/add_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnAddOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnAddOperation(const std::string& name, float aplpha, const std::string& dtype); 11 | ~AclNnAddOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | DICPScalar alpha_; 18 | aclScalar* aclAlpha_ = nullptr; 19 | 20 | std::string dtype_; 21 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 22 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 23 | }; 24 | 25 | } // namespace dicp 26 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/add_rms_norm_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnAddRmsNormOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnAddRmsNormOperation(const std::string& name, float epsilon); 8 | ~AclNnAddRmsNormOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | float epsilon = 1e-5; 15 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 16 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 17 | }; 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/adds_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnAddsOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnAddsOperation(const std::string& name, float value, float aplpha, const std::string& dtype); 11 | ~AclNnAddsOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | DICPScalar other_; 18 | DICPScalar alpha_; 19 | aclScalar* aclOther_ = nullptr; 20 | aclScalar* aclAlpha_ = nullptr; 21 | 22 | std::string dtype_; 23 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 24 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 25 | }; 26 | 27 | } // namespace dicp 28 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/arange_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnArangeOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnArangeOperation(const std::string& name, int64_t start, int64_t end, int64_t step, aclDataType dtype); 10 | ~AclNnArangeOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int64_t start_; 17 | int64_t end_; 18 | int64_t step_; 19 | int64_t sizeArange_; 20 | aclDataType dtype_; 21 | aclScalar* aclStart_ = nullptr; 22 | aclScalar* aclEnd_ = nullptr; 23 | aclScalar* aclStep_ = nullptr; 24 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 25 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 26 | }; 27 | 28 | } // namespace dicp 29 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/batch_matmul_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnBatchMatMulOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnBatchMatMulOperation(const std::string& name, int8_t cubeMathType); 8 | ~AclNnBatchMatMulOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int8_t cubeMathType = 1; 15 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 16 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 17 | }; 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/bincount_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnBincountOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnBincountOperation(const std::string& name, int64_t minlength); 8 | ~AclNnBincountOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int64_t minlength_; 15 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 16 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 17 | }; 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/bitwise_not_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnBitwiseNotOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnBitwiseNotOperation(const std::string& name); 8 | ~AclNnBitwiseNotOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 15 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 16 | }; 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/cast_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnCastOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnCastOperation(const std::string& name, aclDataType dtype); 8 | ~AclNnCastOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | aclDataType dtype_; 15 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 16 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 17 | }; 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/cat_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnCatOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnCatOperation(const std::string& name, int32_t inputNum, int32_t concatDim); 8 | ~AclNnCatOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int32_t concatDim = -1; 15 | int32_t inputNum = -1; 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/cumsum_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnCumsumOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnCumsumOperation(const std::string& name, int64_t dim, aclDataType dtype); 8 | ~AclNnCumsumOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int64_t dim_; 15 | aclDataType dtype_; 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/div_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | class AclNnDivOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnDivOperation(const std::string& name); 10 | ~AclNnDivOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/divs_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | class AclNnDivsOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnDivsOperation(const std::string& name, float divisor, const std::string& dtype); 10 | ~AclNnDivsOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | DICPScalar divisor_; 17 | aclScalar* aclDivisor_ = nullptr; 18 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 19 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 20 | }; 21 | 22 | } // namespace dicp 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/dynamic_quant_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnDynamicQuantOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnDynamicQuantOperation(const std::string& name); 8 | ~AclNnDynamicQuantOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 15 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 16 | }; 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/expand_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "acl_nn_operation.h" 6 | 7 | namespace dicp { 8 | class AclNnExpandOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnExpandOperation(const std::string& name, std::vector size); 11 | ~AclNnExpandOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | std::vector size_; 18 | aclIntArray* aclSize_ = nullptr; 19 | bool needUpdateSize_; 20 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 21 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 22 | }; 23 | 24 | } // namespace dicp 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/gather_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "acl_nn_operation.h" 6 | #include "utils/scalar.h" 7 | 8 | namespace dicp { 9 | class AclNnGatherOperation : public AclNnOperation { 10 | public: 11 | explicit AclNnGatherOperation(const std::string& name, int64_t dim); 12 | ~AclNnGatherOperation() override; 13 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 14 | uint32_t GetInputNum() const override; 15 | uint32_t GetOutputNum() const override; 16 | 17 | private: 18 | int64_t dim_; 19 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 20 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 21 | }; 22 | 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/ge_scalar_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnGeScalarOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnGeScalarOperation(const std::string& name, float value, const std::string& dtype); 11 | ~AclNnGeScalarOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | DICPScalar other_; 18 | aclScalar* aclOther_ = nullptr; 19 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 20 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 21 | }; 22 | 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/grouped_matmul_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "acl_nn_operation.h" 7 | 8 | namespace dicp { 9 | 10 | class AclNnGroupedMatmulOperation : public AclNnOperation { 11 | public: 12 | explicit AclNnGroupedMatmulOperation(const std::string& name, int64_t splitItem); 13 | ~AclNnGroupedMatmulOperation() override; 14 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 15 | uint32_t GetInputNum() const override; 16 | uint32_t GetOutputNum() const override; 17 | 18 | private: 19 | int64_t splitItem = 2; 20 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 21 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 22 | }; 23 | 24 | } // namespace dicp 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/gt_scalar_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnGtScalarOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnGtScalarOperation(const std::string& name, const std::string& value, const std::string& dtype); 11 | ~AclNnGtScalarOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | DICPScalar other_; 18 | aclScalar* aclOther_ = nullptr; 19 | bool need_update_value_; 20 | std::string value_; 21 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 22 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 23 | }; 24 | 25 | } // namespace dicp 26 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/index_select_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnIndexSelectOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnIndexSelectOperation(const std::string& name, int64_t dim); 10 | ~AclNnIndexSelectOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int64_t dim_; 17 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 18 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 19 | }; 20 | 21 | } // namespace dicp 22 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_copy_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnInplaceCopyOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnInplaceCopyOperation(const std::string& name); 8 | ~AclNnInplaceCopyOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 15 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 16 | }; 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_div_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | class AclNnInplaceDivOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnInplaceDivOperation(const std::string& name); 10 | ~AclNnInplaceDivOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_index_copy_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "acl_nn_operation.h" 6 | 7 | namespace dicp { 8 | class AclNnInplaceIndexCopyOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnInplaceIndexCopyOperation(const std::string& name, int64_t dim); 11 | ~AclNnInplaceIndexCopyOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | int64_t dim_; 18 | mutable std::vector indexVec_; 19 | mutable aclTensor* index_; 20 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 21 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 22 | }; 23 | 24 | } // namespace dicp 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_masked_fill_scalar_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | class AclNnInplaceMaskedFillScalar : public AclNnOperation { 8 | public: 9 | explicit AclNnInplaceMaskedFillScalar(const std::string& name, float value, const std::string& dtype); 10 | ~AclNnInplaceMaskedFillScalar() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | DICPScalar value_; 17 | aclScalar* aclValue_ = nullptr; 18 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 19 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 20 | }; 21 | 22 | } // namespace dicp 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/inplace_scatter_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "acl_nn_operation.h" 6 | #include "utils/scalar.h" 7 | 8 | namespace dicp { 9 | class AclNnInplaceScatterOperation : public AclNnOperation { 10 | public: 11 | explicit AclNnInplaceScatterOperation(const std::string& name, int64_t dim, int64_t reduceType); 12 | ~AclNnInplaceScatterOperation() override; 13 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 14 | uint32_t GetInputNum() const override; 15 | uint32_t GetOutputNum() const override; 16 | 17 | private: 18 | int64_t dim_; 19 | int64_t reduceType_; 20 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 21 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 22 | }; 23 | 24 | } // namespace dicp 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/max_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnMaxOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnMaxOperation(const std::string& name); 8 | ~AclNnMaxOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 15 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 16 | }; 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_finalize_routing_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnMoeFinalizeRoutingOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnMoeFinalizeRoutingOperation(const std::string& name); 10 | ~AclNnMoeFinalizeRoutingOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_gating_topk_softmax.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnMoeGatingTopkSoftmaxOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnMoeGatingTopkSoftmaxOperation(const std::string& name, int64_t topk, int64_t renorm, bool outputSoftmaxResultFlag); 10 | ~AclNnMoeGatingTopkSoftmaxOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int64_t topk_; 17 | int64_t renorm_; 18 | bool outputSoftmaxResultFlag_; 19 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 20 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 21 | }; 22 | 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_init_routing_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnMoeInitRoutingOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnMoeInitRoutingOperation(const std::string& name, int64_t numExperts); 10 | ~AclNnMoeInitRoutingOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | mutable int64_t activeNum_; 17 | int64_t numExperts_; 18 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 19 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 20 | }; 21 | 22 | } // namespace dicp 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_token_permute_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class MoeTokenPermuteOperation : public AclNnOperation { 8 | public: 9 | explicit MoeTokenPermuteOperation(const std::string& name); 10 | ~MoeTokenPermuteOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/moe_token_unpermute_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class MoeTokenUnpermuteOperation : public AclNnOperation { 8 | public: 9 | explicit MoeTokenUnpermuteOperation(const std::string& name); 10 | ~MoeTokenUnpermuteOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/mul_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnMulOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnMulOperation(const std::string& name); 11 | ~AclNnMulOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 18 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 19 | }; 20 | 21 | } // namespace dicp 22 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/muls_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnMulsOperation : public AclNnOperation { 9 | public: 10 | // value might be a SymInt type, we need to get the correct value at runtime. 11 | explicit AclNnMulsOperation(const std::string& name, const std::string& value, const std::string& dtype); 12 | ~AclNnMulsOperation() override; 13 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 14 | uint32_t GetInputNum() const override; 15 | uint32_t GetOutputNum() const override; 16 | 17 | private: 18 | DICPScalar other_; 19 | aclScalar* aclOther_ = nullptr; 20 | bool need_update_value_; 21 | std::string value_; 22 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 23 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 24 | }; 25 | 26 | } // namespace dicp 27 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/permute_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "acl_nn_operation.h" 5 | 6 | namespace dicp { 7 | class AclNnPermuteOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnPermuteOperation(const std::string& name, std::vector dims); 10 | ~AclNnPermuteOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | std::vector dims_; 17 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 18 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 19 | }; 20 | 21 | } // namespace dicp 22 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/pow_tensor_scalar_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnPowTensorScalarOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnPowTensorScalarOperation(const std::string& name, float exponent, const std::string& dtype); 11 | ~AclNnPowTensorScalarOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | DICPScalar exponent_; 18 | aclScalar* aclExponent_; 19 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 20 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 21 | }; 22 | 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/pow_tensor_tensor_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnPowTensorTensorOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnPowTensorTensorOperation(const std::string& name); 10 | ~AclNnPowTensorTensorOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 17 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 18 | }; 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/quant_matmul_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnQuantMatmulOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnQuantMatmulOperation(const std::string& name, bool hasBias); 8 | ~AclNnQuantMatmulOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | bool hasBias_ = false; 15 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 16 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 17 | }; 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/reciprocal_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnReciprocalOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnReciprocalOperation(const std::string& name); 8 | ~AclNnReciprocalOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 15 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 16 | }; 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/reduce_sum_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "acl/acl.h" 6 | #include "acl_nn_operation.h" 7 | #include "utils/scalar.h" 8 | namespace dicp { 9 | 10 | class AclNnReduceSumOperation : public AclNnOperation { 11 | public: 12 | explicit AclNnReduceSumOperation(const std::string& name, const std::vector& dims, bool keepDim, const std::string& dtype); 13 | ~AclNnReduceSumOperation() override; 14 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 15 | uint32_t GetInputNum() const override; 16 | uint32_t GetOutputNum() const override; 17 | 18 | private: 19 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 20 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 21 | 22 | private: 23 | std::vector dims_; 24 | aclIntArray* aclDims_ = nullptr; 25 | bool keepDim_; 26 | aclDataType dtype_; 27 | }; 28 | 29 | } // namespace dicp 30 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/s_where_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "acl_nn_operation.h" 3 | 4 | namespace dicp { 5 | class AclNnSWhereOperation : public AclNnOperation { 6 | public: 7 | explicit AclNnSWhereOperation(const std::string& name); 8 | ~AclNnSWhereOperation() override; 9 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 10 | uint32_t GetInputNum() const override; 11 | uint32_t GetOutputNum() const override; 12 | 13 | private: 14 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 15 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 16 | }; 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/scatter_value_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnScatterValueOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnScatterValueOperation(const std::string& name, int64_t dim, float value, const std::string& value_dtype, int64_t reduce); 11 | ~AclNnScatterValueOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | int64_t dim_; 18 | int64_t reduce_; 19 | DICPScalar value_; 20 | aclScalar* aclValue_ = nullptr; 21 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 22 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 23 | }; 24 | 25 | } // namespace dicp 26 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/slice_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnSliceOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnSliceOperation(const std::string& name, int64_t dim, int64_t start, int64_t end, int64_t step); 10 | ~AclNnSliceOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int64_t dim_; 17 | int64_t start_; 18 | int64_t end_; 19 | int64_t step_; 20 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 21 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 22 | }; 23 | 24 | } // namespace dicp 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/softmax_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnSoftmaxOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnSoftmaxOperation(const std::string& name, int64_t dim); 10 | ~AclNnSoftmaxOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int64_t dim_; 17 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 18 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 19 | }; 20 | 21 | } // namespace dicp 22 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/split_with_size_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "acl_nn_operation.h" 5 | 6 | namespace dicp { 7 | class AclNnSplitWithSizeOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnSplitWithSizeOperation(const std::string& name, int64_t splitDim, std::vector splitSizes); 10 | ~AclNnSplitWithSizeOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int64_t splitDim_; 17 | std::vector splitSizes_; 18 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 19 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 20 | }; 21 | 22 | } // namespace dicp 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/sub_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnSubOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnSubOperation(const std::string& name, float aplpha, const std::string& dtype); 11 | ~AclNnSubOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | DICPScalar alpha_; 18 | aclScalar* aclAlpha_ = nullptr; 19 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 20 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 21 | }; 22 | 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/subs_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | 8 | class AclNnSubsOperation : public AclNnOperation { 9 | public: 10 | explicit AclNnSubsOperation(const std::string& name, float value, float aplpha, const std::string& dtype); 11 | ~AclNnSubsOperation() override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 18 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 19 | 20 | private: 21 | DICPScalar other_; 22 | DICPScalar alpha_; 23 | aclScalar* aclOther_ = nullptr; 24 | aclScalar* aclAlpha_ = nullptr; 25 | }; 26 | 27 | } // namespace dicp 28 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/aclnn_ops/topk_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class AclNnTopkOperation : public AclNnOperation { 8 | public: 9 | explicit AclNnTopkOperation(const std::string& name, int64_t k, int64_t dim); 10 | ~AclNnTopkOperation() override; 11 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 12 | uint32_t GetInputNum() const override; 13 | uint32_t GetOutputNum() const override; 14 | 15 | private: 16 | int64_t k_; 17 | int64_t dim_; 18 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 19 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 20 | }; 21 | 22 | } // namespace dicp 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/activation.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* ActivationOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::ActivationParam param; 7 | if (paramJson.contains("activationType")) { 8 | auto value = paramJson["activationType"].get(); 9 | param.activationType = static_cast(value); 10 | } 11 | if (paramJson.contains("scale")) { 12 | param.scale = paramJson["scale"].get(); 13 | } 14 | if (paramJson.contains("dim")) { 15 | param.dim = paramJson["dim"].get(); 16 | } 17 | DICP_LOG(INFO) << "ActivationParam: activationType: " << param.activationType << " scale:" << param.scale << " dim:" << param.dim; 18 | atb::Operation* op = nullptr; 19 | CREATE_OPERATION_NO_RETURN(param, &op); 20 | return op; 21 | } 22 | 23 | REGISTER_ATB_OPERATION("ActivationOperation", ActivationOperationCreate); 24 | 25 | } // namespace dicp 26 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/allreduce.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | namespace dicp { 3 | 4 | atb::Operation* AllReduceOperationCreate(const nlohmann::json& paramJson) { 5 | atb::infer::AllReduceParam param; 6 | if (paramJson.contains("rank")) { 7 | param.rank = paramJson["rank"].get(); 8 | } 9 | if (paramJson.contains("rankSize")) { 10 | param.rankSize = paramJson["rankSize"].get(); 11 | } 12 | if (paramJson.contains("rankRoot")) { 13 | param.rankRoot = paramJson["rankRoot"].get(); 14 | } 15 | if (paramJson.contains("allReduceType")) { 16 | param.allReduceType = paramJson["allReduceType"].get(); 17 | } 18 | if (paramJson.contains("backend")) { 19 | param.backend = paramJson["backend"].get(); 20 | } 21 | if (paramJson.contains("commMode")) { 22 | auto tmp = paramJson["commMode"].get(); 23 | param.commMode = static_cast(tmp); 24 | } 25 | if (paramJson.contains("commDomain")) { 26 | param.commDomain = paramJson["commDomain"].get(); 27 | } 28 | if (paramJson.contains("rankTableFile")) { 29 | param.rankTableFile = paramJson["rankTableFile"].get(); 30 | } 31 | DICP_LOG(INFO) << "AllReduceParam: rank:" << param.rank << ", rankSize:" << param.rankSize << ", backend:" << param.backend << ", allReduceType" 32 | << param.allReduceType << ". commDomain" << param.commDomain << ", rankTableFile" << param.rankTableFile; 33 | atb::Operation* op = nullptr; 34 | 35 | CREATE_OPERATION_NO_RETURN(param, &op); 36 | return op; 37 | } 38 | 39 | REGISTER_ATB_OPERATION("AllReduceOperation", AllReduceOperationCreate); 40 | 41 | } // namespace dicp 42 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/atb_ops.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "atb/infer_op_params.h" 6 | #include "atb/operation.h" 7 | #include "ops/operation_creator.h" 8 | #include "utils/log.h" 9 | #include "utils/operation_util.h" 10 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/concat.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | [[maybe_unused]] atb::Operation* ConcatOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::ConcatParam param; 7 | if (paramJson.contains("concatDim")) { 8 | param.concatDim = paramJson["concatDim"].get(); 9 | } 10 | DICP_LOG(INFO) << "ConcatParam: concatDIm: " << param.concatDim; 11 | atb::Operation* op = nullptr; 12 | CREATE_OPERATION_NO_RETURN(param, &op); 13 | return op; 14 | } 15 | 16 | REGISTER_ATB_OPERATION("ConcatOperation", ConcatOperationCreate); 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/elewise.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* ElewiseOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::ElewiseParam param; 7 | if (paramJson.contains("elewiseType")) { 8 | auto tmp = paramJson["elewiseType"].get(); 9 | param.elewiseType = static_cast(tmp); 10 | } 11 | if (paramJson.contains("quantParam")) { 12 | auto quantJson = paramJson["quantParam"]; 13 | atb::infer::ElewiseParam::QuantParam quantParam; 14 | if (quantJson.contains("inputScale")) { 15 | quantParam.inputScale = quantJson["inputScale"].get(); 16 | } 17 | if (quantJson.contains("inputOffset")) { 18 | quantParam.inputOffset = quantJson["inputOffset"].get(); 19 | } 20 | param.quantParam = quantParam; 21 | } 22 | if (paramJson.contains("mulsParam")) { 23 | auto mulsJson = paramJson["mulsParam"]; 24 | atb::infer::ElewiseParam::MulsParam mulsParam; 25 | if (mulsJson.contains("varAttr")) { 26 | mulsParam.varAttr = mulsJson["varAttr"].get(); 27 | } 28 | param.mulsParam = mulsParam; 29 | } 30 | if (paramJson.contains("outTensorType")) { 31 | auto tmp = paramJson["outTensorType"].get(); 32 | param.outTensorType = static_cast(tmp); 33 | } 34 | DICP_LOG(INFO) << "ElewiseParam: elewiseType:" << param.elewiseType << ", outTensorType:" << param.outTensorType; 35 | atb::Operation* op = nullptr; 36 | ; 37 | CREATE_OPERATION_NO_RETURN(param, &op); 38 | return op; 39 | } 40 | 41 | REGISTER_ATB_OPERATION("ElewiseOperation", ElewiseOperationCreate); 42 | 43 | } // namespace dicp 44 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/gather.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* GatherOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::GatherParam param; 7 | if (paramJson.contains("axis")) { 8 | param.axis = paramJson["axis"].get(); 9 | } 10 | DICP_LOG(INFO) << "GatherParam: axis: " << param.axis; 11 | atb::Operation* op = nullptr; 12 | CREATE_OPERATION_NO_RETURN(param, &op); 13 | return op; 14 | } 15 | 16 | REGISTER_ATB_OPERATION("GatherOperation", GatherOperationCreate); 17 | 18 | } // namespace dicp 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/linear.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* LinearOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::LinearParam param; 7 | if (paramJson.contains("transposeA")) { 8 | param.transposeA = paramJson["transposeA"].get(); 9 | } 10 | if (paramJson.contains("transposeB")) { 11 | param.transposeB = paramJson["transposeB"].get(); 12 | } 13 | if (paramJson.contains("hasBias")) { 14 | param.hasBias = paramJson["hasBias"].get(); 15 | } 16 | if (paramJson.contains("outDataType")) { 17 | param.outDataType = aclDataType(paramJson["outDataType"].get()); 18 | } 19 | DICP_LOG(INFO) << "LinearParam transposeA:" << param.transposeA << ", transposeB:" << param.transposeB << ", hasBias:" << param.hasBias 20 | << ", outDataType:" << param.outDataType; 21 | atb::Operation* op = nullptr; 22 | ; 23 | CREATE_OPERATION_NO_RETURN(param, &op); 24 | return op; 25 | } 26 | 27 | REGISTER_ATB_OPERATION("LinearOperation", LinearOperationCreate); 28 | 29 | } // namespace dicp 30 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/linear_parallel.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | namespace dicp { 3 | 4 | atb::Operation* LinearParallelOperationCreate(const nlohmann::json& paramJson) { 5 | atb::infer::LinearParallelParam param; 6 | if (paramJson.contains("rank")) { 7 | param.rank = paramJson["rank"].get(); 8 | } 9 | if (paramJson.contains("rankSize")) { 10 | param.rankSize = paramJson["rankSize"].get(); 11 | } 12 | if (paramJson.contains("rankRoot")) { 13 | param.rankRoot = paramJson["rankRoot"].get(); 14 | } 15 | if (paramJson.contains("hasResidual")) { 16 | param.hasResidual = paramJson["hasResidual"].get(); 17 | } 18 | if (paramJson.contains("parallelType")) { 19 | auto type = paramJson["parallelType"].get(); 20 | param.type = static_cast(type); 21 | } 22 | if (paramJson.contains("backend")) { 23 | param.backend = paramJson["backend"].get(); 24 | } 25 | if (paramJson.contains("commDomain")) { 26 | param.commDomain = paramJson["commDomain"].get(); 27 | } 28 | if (paramJson.contains("commMode")) { 29 | auto mode = paramJson["commMode"].get(); 30 | param.commMode = static_cast(mode); 31 | } 32 | if (paramJson.contains("rankTableFile")) { 33 | param.rankTableFile = paramJson["rankTableFile"].get(); 34 | } 35 | DICP_LOG(INFO) << "LinearParallelParam: rank:" << param.rank << ", rankSize:" << param.rankSize << ", outDataType:" << param.outDataType 36 | << " backend:" << param.backend << ", commDomain:" << param.commDomain << ", commMode:" << param.commMode << ", rankTableFile" 37 | << param.rankTableFile; 38 | atb::Operation* op = nullptr; 39 | 40 | CREATE_OPERATION_NO_RETURN(param, &op); 41 | return op; 42 | } 43 | 44 | REGISTER_ATB_OPERATION("LinearParallelOperation", LinearParallelOperationCreate); 45 | 46 | } // namespace dicp 47 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/paged_attention.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* PagedAttentionOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::PagedAttentionParam param; 7 | if (paramJson.contains("headNum")) { 8 | param.headNum = paramJson["headNum"].get(); 9 | } 10 | if (paramJson.contains("qkScale")) { 11 | param.qkScale = paramJson["qkScale"].get(); 12 | } 13 | if (paramJson.contains("kvHeadNum")) { 14 | param.kvHeadNum = paramJson["kvHeadNum"].get(); 15 | } 16 | if (paramJson.contains("maskType")) { 17 | auto value = paramJson["maskType"].get(); 18 | param.maskType = static_cast(value); 19 | } 20 | if (paramJson.contains("mlaVHeadSize")) { 21 | param.mlaVHeadSize = paramJson["mlaVHeadSize"].get(); 22 | } 23 | DICP_LOG(INFO) << "PagedAttentionParam: headNum" << param.headNum << " kvHeadNum: " << param.kvHeadNum << " qkScale: " << param.qkScale 24 | << " maskType: " << param.maskType; 25 | atb::Operation* op = nullptr; 26 | CREATE_OPERATION_NO_RETURN(param, &op); 27 | return op; 28 | } 29 | 30 | REGISTER_ATB_OPERATION("PagedAttentionOperation", PagedAttentionOperationCreate); 31 | 32 | } // namespace dicp 33 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/reduce.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | #include "utils/common.h" 3 | 4 | namespace dicp { 5 | 6 | atb::Operation* ReduceOperationCreate(const nlohmann::json& paramJson) { 7 | atb::infer::ReduceParam param; 8 | if (paramJson.contains("reduceType")) { 9 | auto type = paramJson["reduceType"].get(); 10 | param.reduceType = static_cast(type); 11 | } 12 | if (paramJson.contains("axis")) { 13 | auto axis = paramJson["axis"].get>(); 14 | param.axis.resize(axis.size()); 15 | for (size_t i = 0; i < axis.size(); ++i) { 16 | param.axis[i] = axis[i]; 17 | } 18 | } 19 | DICP_LOG(INFO) << "ReduceParam: reduceType: " << param.reduceType << ", axis:" << svectorToString(param.axis); 20 | atb::Operation* op = nullptr; 21 | CREATE_OPERATION_NO_RETURN(param, &op); 22 | return op; 23 | } 24 | 25 | REGISTER_ATB_OPERATION("ReduceOperation", ReduceOperationCreate); 26 | 27 | } // namespace dicp 28 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/reshape_and_cache.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | inline atb::Operation* ReshapeAndCacheOperationCreate([[maybe_unused]] const nlohmann::json& paramJson) { 6 | atb::infer::ReshapeAndCacheParam param; 7 | if (paramJson.contains("KvCacheCfg")) { 8 | auto value = paramJson["KvCacheCfg"].get(); 9 | param.kvCacheCfg = static_cast(value); 10 | } 11 | DICP_LOG(INFO) << "ReshapeAndCacheParam: {}"; 12 | atb::Operation* op = nullptr; 13 | CREATE_OPERATION_NO_RETURN(param, &op); 14 | return op; 15 | } 16 | 17 | REGISTER_ATB_OPERATION("ReshapeAndCacheOperation", ReshapeAndCacheOperationCreate); 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/rope.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* RopeOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::RopeParam param; 7 | if (paramJson.contains("rotaryCoeff")) { 8 | param.rotaryCoeff = paramJson["rotaryCoeff"].get(); 9 | } 10 | if (paramJson.contains("cosFormat")) { 11 | param.cosFormat = paramJson["cosFormat"].get(); 12 | } 13 | DICP_LOG(INFO) << "RopeParam: rotaryCoeff:" << param.rotaryCoeff << ", cosFormat:" << param.cosFormat; 14 | atb::Operation* op = nullptr; 15 | CREATE_OPERATION_NO_RETURN(param, &op); 16 | return op; 17 | } 18 | 19 | REGISTER_ATB_OPERATION("RopeOperation", RopeOperationCreate); 20 | 21 | } // namespace dicp 22 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/slice.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | #include "utils/common.h" 3 | 4 | namespace dicp { 5 | 6 | atb::Operation* SliceOperationCreate(const nlohmann::json& paramJson) { 7 | atb::infer::SliceParam param; 8 | if (paramJson.contains("offsets")) { 9 | auto tmp = paramJson["offsets"].get>(); 10 | param.offsets.resize(tmp.size()); 11 | for (size_t i = 0; i < tmp.size(); ++i) { 12 | param.offsets[i] = tmp[i]; 13 | } 14 | } 15 | if (paramJson.contains("size")) { 16 | auto tmp = paramJson["size"].get>(); 17 | param.size.resize(tmp.size()); 18 | for (size_t i = 0; i < tmp.size(); ++i) { 19 | param.size[i] = tmp[i]; 20 | } 21 | } 22 | 23 | DICP_LOG(INFO) << "SliceParam: offsets:" << svectorToString(param.offsets) << ", size:" << svectorToString(param.size); 24 | atb::Operation* op = nullptr; 25 | 26 | CREATE_OPERATION_NO_RETURN(param, &op); 27 | return op; 28 | } 29 | 30 | REGISTER_ATB_OPERATION("SliceOperation", SliceOperationCreate); 31 | 32 | } // namespace dicp 33 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/softmax.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* SoftmaxOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::SoftmaxParam param; 7 | if (paramJson.contains("axes")) { 8 | auto tmp = paramJson["axes"].get>(); 9 | param.axes.resize(tmp.size()); 10 | for (size_t i = 0; i < tmp.size(); ++i) { 11 | param.axes[i] = tmp[i]; 12 | } 13 | } 14 | DICP_LOG(INFO) << "SoftmaxParam: axes.size:" << param.axes.size() << " axes0: " << param.axes[0]; 15 | atb::Operation* op = nullptr; 16 | CREATE_OPERATION_NO_RETURN(param, &op); 17 | return op; 18 | } 19 | 20 | REGISTER_ATB_OPERATION("SoftmaxOperation", SoftmaxOperationCreate); 21 | 22 | } // namespace dicp 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/sort.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* SortOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::SortParam param; 7 | if (paramJson.contains("num")) { 8 | auto tmp = paramJson["num"].get(); 9 | param.num.resize(1); 10 | param.num[0] = tmp; 11 | } 12 | DICP_LOG(INFO) << "SortParam: topk:" << param.num[0]; 13 | atb::Operation* op = nullptr; 14 | CREATE_OPERATION_NO_RETURN(param, &op); 15 | return op; 16 | } 17 | 18 | REGISTER_ATB_OPERATION("SortOperation", SortOperationCreate); 19 | 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/split.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* SplitOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::SplitParam param; 7 | if (paramJson.contains("splitDim")) { 8 | param.splitDim = paramJson["splitDim"].get(); 9 | } 10 | if (paramJson.contains("splitNum")) { 11 | param.splitNum = paramJson["splitNum"].get(); 12 | } 13 | DICP_LOG(INFO) << "SplitParam: splitDim: " << param.splitDim << " splitNum: " << param.splitNum; 14 | atb::Operation* op = nullptr; 15 | CREATE_OPERATION_NO_RETURN(param, &op); 16 | return op; 17 | } 18 | 19 | REGISTER_ATB_OPERATION("SplitOperation", SplitOperationCreate); 20 | 21 | } // namespace dicp 22 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/transdata.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* TransdataOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::TransdataParam param; 7 | if (paramJson.contains("transdataType")) { 8 | auto value = paramJson["transdataType"].get(); 9 | param.transdataType = static_cast(value); 10 | } 11 | DICP_LOG(INFO) << "TransdataParam: transdataType: " << param.transdataType; 12 | atb::Operation* op = nullptr; 13 | CREATE_OPERATION_NO_RETURN(param, &op); 14 | return op; 15 | } 16 | 17 | REGISTER_ATB_OPERATION("TransdataOperation", TransdataOperationCreate); 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/atb_ops/transpose.cpp: -------------------------------------------------------------------------------- 1 | #include "atb_ops.h" 2 | 3 | namespace dicp { 4 | 5 | atb::Operation* TransposeOperationCreate(const nlohmann::json& paramJson) { 6 | atb::infer::TransposeParam param; 7 | if (paramJson.contains("perm")) { 8 | auto tmp = paramJson["perm"].get>(); 9 | param.perm.resize(tmp.size()); 10 | for (unsigned int i = 0; i < tmp.size(); ++i) { 11 | param.perm[i] = tmp[i]; 12 | } 13 | } 14 | DICP_LOG(INFO) << "TransposeParam: perm: " << param.perm; 15 | atb::Operation* op = nullptr; 16 | CREATE_OPERATION_NO_RETURN(param, &op); 17 | return op; 18 | } 19 | 20 | REGISTER_ATB_OPERATION("TransposeOperation", TransposeOperationCreate); 21 | 22 | } // namespace dicp 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/masked_fill_scalar_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | #include "utils/scalar.h" 5 | 6 | namespace dicp { 7 | class MaskedFillScalarOperation : public atb::Operation { 8 | public: 9 | explicit MaskedFillScalarOperation(const std::string& name, float value, const std::string& dtype); 10 | ~MaskedFillScalarOperation() override; 11 | std::string GetName() const override; 12 | atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override; 13 | atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override; 14 | uint32_t GetInputNum() const override; 15 | uint32_t GetOutputNum() const override; 16 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 17 | 18 | private: 19 | aclTensor* CreateAclTensor(const AclNnTensor& aclNnTensor); 20 | AclNnTensor CreateTensor(atb::Tensor atbTensor); 21 | int CreateAclTensors(const atb::VariantPack& variantPack); 22 | 23 | private: 24 | std::string opName_; 25 | DICPScalar value_; 26 | aclScalar* aclValue_ = nullptr; 27 | DICPScalar one_; 28 | aclScalar* aclOne_ = nullptr; 29 | aclOpExecutor* aclMulsExecutor_ = nullptr; 30 | aclOpExecutor* aclInplaceMaskedFillScalarExecutor_ = nullptr; 31 | uint64_t mulsWorkspaceSize_ = 0; 32 | uint64_t inplaceMaskedFillScalarWorkspaceSize_ = 0; 33 | 34 | private: 35 | atb::SVector aclInTensors_; 36 | atb::SVector aclOutTensors_; 37 | }; 38 | 39 | } // namespace dicp 40 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/new_empty_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class NewEmptyOperation : public AclNnOperation { 8 | public: 9 | explicit NewEmptyOperation(const std::string& name, const std::vector& size); 10 | ~NewEmptyOperation() override; 11 | std::string GetName() const override; 12 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 13 | uint32_t GetInputNum() const override; 14 | uint32_t GetOutputNum() const override; 15 | 16 | private: 17 | std::string opName_; 18 | std::vector size_; 19 | std::unordered_map dynamic_size_; 20 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 21 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 22 | }; 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/prepare_moe_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "ops/aclnn_ops/acl_nn_operation.h" 6 | 7 | namespace dicp { 8 | 9 | class PrepareMoeOperation : public atb::Operation { 10 | public: 11 | explicit PrepareMoeOperation(const std::string& name, int64_t numExperts); 12 | ~PrepareMoeOperation() override; 13 | 14 | std::string GetName() const override; 15 | atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override; 16 | atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override; 17 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 18 | uint32_t GetInputNum() const override; 19 | uint32_t GetOutputNum() const override; 20 | 21 | protected: 22 | std::string opName_; 23 | int64_t numExperts_; 24 | int64_t topk_; 25 | int64_t seqLength_; 26 | 27 | aclOpExecutor* aclArangeExecutor_ = nullptr; 28 | aclOpExecutor* aclPermuteExecutor_ = nullptr; 29 | aclOpExecutor* aclBincountExecutor_ = nullptr; 30 | aclOpExecutor* aclCumsumExecutor_ = nullptr; 31 | 32 | uint64_t arangeWorkspaceSize_ = 0; 33 | uint64_t permuteWorkspaceSize_ = 0; 34 | uint64_t bincountWorkspaceSize_ = 0; 35 | uint64_t cumsumWorkspaceSize_ = 0; 36 | 37 | private: 38 | atb::SVector aclInTensors_; 39 | atb::SVector aclOutTensors_; 40 | 41 | aclScalar* aclStart_ = nullptr; 42 | aclScalar* aclEnd_ = nullptr; 43 | aclScalar* aclStep_ = nullptr; 44 | 45 | AclNnTensor CreateTensor(atb::Tensor atbTensor); 46 | int CreateAclTensors(const atb::VariantPack& variantPack); 47 | }; 48 | 49 | } // namespace dicp 50 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/renormalize_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class RenormalizeOperation : public atb::Operation { 8 | public: 9 | explicit RenormalizeOperation(const std::string& name, int64_t dim); 10 | ~RenormalizeOperation() override; 11 | 12 | std::string GetName() const override; 13 | atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override; 14 | atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override; 15 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 16 | uint32_t GetInputNum() const override; 17 | uint32_t GetOutputNum() const override; 18 | 19 | protected: 20 | std::string opName_; 21 | int64_t dim_; 22 | 23 | aclOpExecutor* aclReduceSumExecutor_ = nullptr; 24 | aclOpExecutor* aclDivExecutor_ = nullptr; 25 | 26 | uint64_t reduceSumWorkspaceSize_ = 0; 27 | uint64_t divWorkspaceSize_ = 0; 28 | 29 | private: 30 | atb::SVector aclInTensors_; 31 | atb::SVector aclOutTensors_; 32 | 33 | AclNnTensor CreateTensor(atb::Tensor atbTensor); 34 | int CreateAclTensors(const atb::VariantPack& variantPack); 35 | }; 36 | 37 | } // namespace dicp 38 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/reshape_operation.cpp: -------------------------------------------------------------------------------- 1 | #include "reshape_operation.h" 2 | 3 | #include "utils/log.h" 4 | 5 | namespace dicp { 6 | 7 | const int NUM1 = 1; 8 | 9 | ReshapeOperation::ReshapeOperation(const std::string& name) : opName_(name) {} 10 | 11 | std::string ReshapeOperation::GetName() const { return opName_; } 12 | 13 | atb::Status ReshapeOperation::Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) { return atb::NO_ERROR; } 14 | 15 | atb::Status ReshapeOperation::Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) { 16 | return atb::NO_ERROR; 17 | } 18 | 19 | uint32_t ReshapeOperation::GetInputNum() const { return NUM1; } 20 | 21 | uint32_t ReshapeOperation::GetOutputNum() const { return NUM1; } 22 | 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/reshape_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "atb/operation.h" 6 | 7 | namespace dicp { 8 | 9 | class ReshapeOperation : public atb::Operation { 10 | public: 11 | explicit ReshapeOperation(const std::string& name); 12 | ~ReshapeOperation(){}; 13 | std::string GetName() const override; 14 | atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override; 15 | atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override; 16 | uint32_t GetInputNum() const override; 17 | uint32_t GetOutputNum() const override; 18 | 19 | protected: 20 | std::string opName_; 21 | }; 22 | 23 | } // namespace dicp 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/scalar_tensor_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ops/aclnn_ops/acl_nn_operation.h" 8 | #include "utils/scalar.h" 9 | 10 | namespace dicp { 11 | 12 | class ScalarTensorOperation : public atb::Operation { 13 | public: 14 | explicit ScalarTensorOperation(const std::string& name, float value, const std::string& dtype); 15 | ~ScalarTensorOperation(); 16 | std::string GetName() const override; 17 | atb::Status Setup(const atb::VariantPack& variantPack, uint64_t& workspaceSize, atb::Context* context) override; 18 | atb::Status Execute(const atb::VariantPack& variantPack, uint8_t* workspace, uint64_t workspaceSize, atb::Context* context) override; 19 | uint32_t GetInputNum() const override; 20 | uint32_t GetOutputNum() const override; 21 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 22 | 23 | private: 24 | aclTensor* CreateAclTensor(const AclNnTensor& aclNnTensor); 25 | AclNnTensor CreateTensor(atb::Tensor atbTensor); 26 | int CreateAclTensors(const atb::VariantPack& variantPack); 27 | 28 | private: 29 | std::string opName_; 30 | DICPScalar value_; 31 | DICPScalar zero_; 32 | DICPScalar alpha_; 33 | aclScalar* aclValue_ = nullptr; 34 | aclScalar* aclZero_ = nullptr; 35 | aclScalar* aclAlpha_ = nullptr; 36 | aclOpExecutor* aclZeroExecutor_ = nullptr; 37 | aclOpExecutor* aclAddsExecutor_ = nullptr; 38 | uint64_t mulsWorkspaceSize_ = 0; 39 | uint64_t addsWorkspaceSize_ = 0; 40 | atb::SVector aclOutTensors_; 41 | }; 42 | 43 | } // namespace dicp 44 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/slice_scatter_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "ops/aclnn_ops/acl_nn_operation.h" 7 | 8 | namespace dicp { 9 | 10 | class SliceScatterOperation : public AclNnOperation { 11 | public: 12 | explicit SliceScatterOperation(const std::string& name, int64_t dim, int64_t start, int64_t end, int64_t step); 13 | ~SliceScatterOperation() override; 14 | 15 | std::string GetName() const override; 16 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 17 | uint32_t GetInputNum() const override; 18 | uint32_t GetOutputNum() const override; 19 | 20 | protected: 21 | std::string opName_; 22 | int64_t dim_, start_, end_, step_; 23 | mutable std::vector beginVec_, endVec_, stridesVec_, axesVec_; 24 | mutable aclIntArray *beginArray_, *endArray_, *stridesArray_, *axesArray_; 25 | 26 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 27 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 28 | }; 29 | 30 | } // namespace dicp 31 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/squeeze_operation.cpp: -------------------------------------------------------------------------------- 1 | #include "squeeze_operation.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "ops/operation_creator.h" 7 | 8 | namespace dicp { 9 | 10 | SqueezeOperation::SqueezeOperation(const std::string& name, std::vector squeezeDim) : ReshapeOperation(name), squeezeDim_(std::move(squeezeDim)) {} 11 | 12 | atb::Status SqueezeOperation::InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const { 13 | DICP_LOG(INFO) << "SqueezeOperation: " << opName_ << " infer shape start"; 14 | outTensorDescs.at(0).format = inTensorDescs.at(0).format; 15 | outTensorDescs.at(0).dtype = inTensorDescs.at(0).dtype; 16 | 17 | auto& oldShape = inTensorDescs.at(0).shape; 18 | std::vector dimValues(oldShape.dims, oldShape.dims + oldShape.dimNum); 19 | for (const auto& d : squeezeDim_) { 20 | int offset = d < 0 ? d + oldShape.dimNum : d; 21 | dimValues.erase(dimValues.begin() + offset); 22 | } 23 | outTensorDescs.at(0).shape.dimNum = dimValues.size(); 24 | std::copy(dimValues.begin(), dimValues.end(), outTensorDescs.at(0).shape.dims); 25 | 26 | DICP_LOG(INFO) << "SqueezeOperation: " << opName_ << " infer shape end, out shape: " << atbDimsToString(outTensorDescs.at(0).shape); 27 | return atb::NO_ERROR; 28 | } 29 | 30 | atb::Operation* CustomSqueezeOperationCreate(const nlohmann::json& paramJson) { 31 | std::string opName; 32 | std::vector squeezeDim; 33 | if (paramJson.contains("name")) { 34 | opName = paramJson["name"].get(); 35 | } 36 | if (paramJson.contains("squeezeDim")) { 37 | squeezeDim = std::move(paramJson["squeezeDim"].get>()); 38 | } 39 | DICP_LOG(INFO) << "CustomSqueezeOperation: name: " << opName << " squeezeDim:" << vectorToString(squeezeDim); 40 | atb::Operation* op = new SqueezeOperation(opName, squeezeDim); 41 | return op; 42 | } 43 | 44 | REGISTER_OPERATION(CustomSqueezeOperation, CustomSqueezeOperationCreate); 45 | 46 | } // namespace dicp 47 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/squeeze_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "atb/operation.h" 9 | #include "reshape_operation.h" 10 | #include "utils/common.h" 11 | #include "utils/log.h" 12 | namespace dicp { 13 | 14 | class SqueezeOperation : public ReshapeOperation { 15 | public: 16 | explicit SqueezeOperation(const std::string& name, std::vector squeezeDim); 17 | ~SqueezeOperation(){}; 18 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 19 | 20 | private: 21 | std::vector squeezeDim_; 22 | }; 23 | 24 | } // namespace dicp 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/unsqueeze_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "atb/operation.h" 9 | #include "reshape_operation.h" 10 | #include "utils/common.h" 11 | #include "utils/log.h" 12 | namespace dicp { 13 | 14 | class UnsqueezeOperation : public ReshapeOperation { 15 | public: 16 | explicit UnsqueezeOperation(const std::string& name, std::vector unsqueezeDim); 17 | ~UnsqueezeOperation(){}; 18 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 19 | 20 | private: 21 | std::vector unsqueezeDim_; 22 | }; 23 | 24 | } // namespace dicp 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/view_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "atb/operation.h" 10 | #include "reshape_operation.h" 11 | #include "utils/common.h" 12 | #include "utils/log.h" 13 | namespace dicp { 14 | 15 | class ViewOperation : public ReshapeOperation { 16 | public: 17 | explicit ViewOperation(const std::string& name, std::vector viewShape); 18 | ~ViewOperation(){}; 19 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 20 | 21 | private: 22 | std::vector shape_; 23 | bool needInferDim_; 24 | int inferDim_; 25 | int otherProd_ = 1; 26 | }; 27 | 28 | } // namespace dicp 29 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/zeros_like_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class ZerosLikeOperation : public AclNnOperation { 8 | public: 9 | explicit ZerosLikeOperation(const std::string& name); 10 | ~ZerosLikeOperation() override; 11 | 12 | std::string GetName() const override; 13 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 14 | uint32_t GetInputNum() const override; 15 | uint32_t GetOutputNum() const override; 16 | 17 | protected: 18 | std::string opName_; 19 | std::vector size_; 20 | aclDataType dtype_; 21 | 22 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 23 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 24 | }; 25 | 26 | } // namespace dicp 27 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/custom_ops/zeros_operation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ops/aclnn_ops/acl_nn_operation.h" 4 | 5 | namespace dicp { 6 | 7 | class ZerosOperation : public AclNnOperation { 8 | public: 9 | explicit ZerosOperation(const std::string& name, const std::vector& size, aclDataType dtype); 10 | ~ZerosOperation() override; 11 | 12 | std::string GetName() const override; 13 | atb::Status InferShape(const atb::SVector& inTensorDescs, atb::SVector& outTensorDescs) const override; 14 | uint32_t GetInputNum() const override; 15 | uint32_t GetOutputNum() const override; 16 | 17 | protected: 18 | std::string opName_; 19 | std::vector size_; 20 | std::unordered_map dynamic_size_; 21 | bool has_dynamic_size_; 22 | aclDataType dtype_; 23 | 24 | int SetAclNnWorkspaceExecutor(uint64_t& workspaceSize) override; 25 | int CallAclExecute(uint8_t* workspace, uint64_t workspaceSize, aclOpExecutor* aclExecutor, aclrtStream stream) override; 26 | }; 27 | 28 | } // namespace dicp 29 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/operation_creator.cpp: -------------------------------------------------------------------------------- 1 | #include "ops/operation_creator.h" 2 | 3 | #include "utils/log.h" 4 | 5 | namespace dicp { 6 | 7 | std::unordered_map& getGlobalFuncMap() { 8 | static std::unordered_map funcMap; 9 | return funcMap; 10 | } 11 | 12 | atb::Operation* CreateOperation(const std::string& opName, const nlohmann::json& paramJson) { 13 | auto g_funcMap = getGlobalFuncMap(); 14 | auto it = g_funcMap.find(opName); 15 | if (it == g_funcMap.end()) { 16 | DICP_LOG(ERROR) << "not support opName:" << opName; 17 | return nullptr; 18 | } 19 | 20 | try { 21 | return it->second(paramJson); 22 | } catch (const std::exception& e) { 23 | DICP_LOG(ERROR) << opName << " parse json fail, error:" << e.what(); 24 | } 25 | return nullptr; 26 | } 27 | 28 | } // namespace dicp 29 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/ops/operation_creator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "atb/operation.h" 10 | 11 | namespace dicp { 12 | 13 | using OperationCreateFunc = std::function; 14 | 15 | std::unordered_map& getGlobalFuncMap(); 16 | 17 | struct RegisterOp { 18 | RegisterOp(const std::string& name, OperationCreateFunc func) { getGlobalFuncMap()[name] = func; } 19 | }; 20 | 21 | #define CONCATENATE_DETAIL(x, y) x##y 22 | #define CONCATENATE(x, y) CONCATENATE_DETAIL(x, y) 23 | #define MAKE_UNIQUE_NAME(prefix) CONCATENATE(prefix, __COUNTER__) 24 | 25 | #define REGISTER_OPERATION(OpName, CreateFunc) static RegisterOp reg##OpName(#OpName, CreateFunc); 26 | #define REGISTER_ATB_OPERATION(OpNameStr, CreateFunc) static RegisterOp MAKE_UNIQUE_NAME(reg_)(OpNameStr, CreateFunc); 27 | 28 | atb::Operation* CreateOperation(const std::string& opName, const nlohmann::json& paramJson); 29 | 30 | } // namespace dicp 31 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/cfg/argv.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | #include 6 | #include 7 | 8 | // 9 | // Init log levels using each argv entry that starts with "SPDLOG_LEVEL=" 10 | // 11 | // set all loggers to debug level: 12 | // example.exe "SPDLOG_LEVEL=debug" 13 | 14 | // set logger1 to trace level 15 | // example.exe "SPDLOG_LEVEL=logger1=trace" 16 | 17 | // turn off all logging except for logger1 and logger2: 18 | // example.exe "SPDLOG_LEVEL=off,logger1=debug,logger2=info" 19 | 20 | namespace spdlog { 21 | namespace cfg { 22 | 23 | // search for SPDLOG_LEVEL= in the args and use it to init the levels 24 | inline void load_argv_levels(int argc, const char** argv) { 25 | const std::string spdlog_level_prefix = "SPDLOG_LEVEL="; 26 | for (int i = 1; i < argc; i++) { 27 | std::string arg = argv[i]; 28 | if (arg.find(spdlog_level_prefix) == 0) { 29 | auto levels_string = arg.substr(spdlog_level_prefix.size()); 30 | helpers::load_levels(levels_string); 31 | } 32 | } 33 | } 34 | 35 | inline void load_argv_levels(int argc, char** argv) { load_argv_levels(argc, const_cast(argv)); } 36 | 37 | } // namespace cfg 38 | } // namespace spdlog 39 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/cfg/env.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | #include 6 | #include 7 | #include 8 | 9 | // 10 | // Init levels and patterns from env variables SPDLOG_LEVEL 11 | // Inspired from Rust's "env_logger" crate (https://crates.io/crates/env_logger). 12 | // Note - fallback to "info" level on unrecognized levels 13 | // 14 | // Examples: 15 | // 16 | // set global level to debug: 17 | // export SPDLOG_LEVEL=debug 18 | // 19 | // turn off all logging except for logger1: 20 | // export SPDLOG_LEVEL="*=off,logger1=debug" 21 | // 22 | 23 | // turn off all logging except for logger1 and logger2: 24 | // export SPDLOG_LEVEL="off,logger1=debug,logger2=info" 25 | 26 | namespace spdlog { 27 | namespace cfg { 28 | inline void load_env_levels() { 29 | auto env_val = details::os::getenv("SPDLOG_LEVEL"); 30 | if (!env_val.empty()) { 31 | helpers::load_levels(env_val); 32 | } 33 | } 34 | 35 | } // namespace cfg 36 | } // namespace spdlog 37 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/cfg/helpers.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include 9 | 10 | namespace spdlog { 11 | namespace cfg { 12 | namespace helpers { 13 | // 14 | // Init levels from given string 15 | // 16 | // Examples: 17 | // 18 | // set global level to debug: "debug" 19 | // turn off all logging except for logger1: "off,logger1=debug" 20 | // turn off all logging except for logger1 and logger2: "off,logger1=debug,logger2=info" 21 | // 22 | SPDLOG_API void load_levels(const std::string& txt); 23 | } // namespace helpers 24 | 25 | } // namespace cfg 26 | } // namespace spdlog 27 | 28 | #ifdef SPDLOG_HEADER_ONLY 29 | #include "helpers-inl.h" 30 | #endif // SPDLOG_HEADER_ONLY 31 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/backtracer.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | // Store log messages in circular buffer. 14 | // Useful for storing debug data in case of error/warning happens. 15 | 16 | namespace spdlog { 17 | namespace details { 18 | class SPDLOG_API backtracer { 19 | mutable std::mutex mutex_; 20 | std::atomic enabled_{false}; 21 | circular_q messages_; 22 | 23 | public: 24 | backtracer() = default; 25 | backtracer(const backtracer& other); 26 | 27 | backtracer(backtracer&& other) SPDLOG_NOEXCEPT; 28 | backtracer& operator=(backtracer other); 29 | 30 | void enable(size_t size); 31 | void disable(); 32 | bool enabled() const; 33 | void push_back(const log_msg& msg); 34 | bool empty() const; 35 | 36 | // pop all items in the q and apply the given fun on each of them. 37 | void foreach_pop(std::function fun); 38 | }; 39 | 40 | } // namespace details 41 | } // namespace spdlog 42 | 43 | #ifdef SPDLOG_HEADER_ONLY 44 | #include "backtracer-inl.h" 45 | #endif 46 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/console_globals.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include 9 | 10 | namespace spdlog { 11 | namespace details { 12 | 13 | struct console_mutex { 14 | using mutex_t = std::mutex; 15 | static mutex_t& mutex() { 16 | static mutex_t s_mutex; 17 | return s_mutex; 18 | } 19 | }; 20 | 21 | struct console_nullmutex { 22 | using mutex_t = null_mutex; 23 | static mutex_t& mutex() { 24 | static mutex_t s_mutex; 25 | return s_mutex; 26 | } 27 | }; 28 | } // namespace details 29 | } // namespace spdlog 30 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/file_helper.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include 9 | 10 | namespace spdlog { 11 | namespace details { 12 | 13 | // Helper class for file sinks. 14 | // When failing to open a file, retry several times(5) with a delay interval(10 ms). 15 | // Throw spdlog_ex exception on errors. 16 | 17 | class SPDLOG_API file_helper { 18 | public: 19 | file_helper() = default; 20 | explicit file_helper(const file_event_handlers& event_handlers); 21 | 22 | file_helper(const file_helper&) = delete; 23 | file_helper& operator=(const file_helper&) = delete; 24 | ~file_helper(); 25 | 26 | void open(const filename_t& fname, bool truncate = false); 27 | void reopen(bool truncate); 28 | void flush(); 29 | void sync(); 30 | void close(); 31 | void write(const memory_buf_t& buf); 32 | size_t size() const; 33 | const filename_t& filename() const; 34 | 35 | // 36 | // return file path and its extension: 37 | // 38 | // "mylog.txt" => ("mylog", ".txt") 39 | // "mylog" => ("mylog", "") 40 | // "mylog." => ("mylog.", "") 41 | // "/dir1/dir2/mylog.txt" => ("/dir1/dir2/mylog", ".txt") 42 | // 43 | // the starting dot in filenames is ignored (hidden files): 44 | // 45 | // ".mylog" => (".mylog". "") 46 | // "my_folder/.mylog" => ("my_folder/.mylog", "") 47 | // "my_folder/.mylog.txt" => ("my_folder/.mylog", ".txt") 48 | static std::tuple split_by_extension(const filename_t& fname); 49 | 50 | private: 51 | const int open_tries_ = 5; 52 | const unsigned int open_interval_ = 10; 53 | std::FILE* fd_{nullptr}; 54 | filename_t filename_; 55 | file_event_handlers event_handlers_; 56 | }; 57 | } // namespace details 58 | } // namespace spdlog 59 | 60 | #ifdef SPDLOG_HEADER_ONLY 61 | #include "file_helper-inl.h" 62 | #endif 63 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifndef SPDLOG_HEADER_ONLY 7 | #include 8 | #endif 9 | 10 | #include 11 | 12 | namespace spdlog { 13 | namespace details { 14 | 15 | SPDLOG_INLINE log_msg::log_msg(spdlog::log_clock::time_point log_time, spdlog::source_loc loc, string_view_t a_logger_name, spdlog::level::level_enum lvl, 16 | spdlog::string_view_t msg) 17 | : logger_name(a_logger_name), 18 | level(lvl), 19 | time(log_time) 20 | #ifndef SPDLOG_NO_THREAD_ID 21 | , 22 | thread_id(os::thread_id()) 23 | #endif 24 | , 25 | source(loc), 26 | payload(msg) { 27 | } 28 | 29 | SPDLOG_INLINE log_msg::log_msg(spdlog::source_loc loc, string_view_t a_logger_name, spdlog::level::level_enum lvl, spdlog::string_view_t msg) 30 | : log_msg(os::now(), loc, a_logger_name, lvl, msg) {} 31 | 32 | SPDLOG_INLINE log_msg::log_msg(string_view_t a_logger_name, spdlog::level::level_enum lvl, spdlog::string_view_t msg) 33 | : log_msg(os::now(), source_loc{}, a_logger_name, lvl, msg) {} 34 | 35 | } // namespace details 36 | } // namespace spdlog 37 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #include 9 | 10 | namespace spdlog { 11 | namespace details { 12 | struct SPDLOG_API log_msg { 13 | log_msg() = default; 14 | log_msg(log_clock::time_point log_time, source_loc loc, string_view_t logger_name, level::level_enum lvl, string_view_t msg); 15 | log_msg(source_loc loc, string_view_t logger_name, level::level_enum lvl, string_view_t msg); 16 | log_msg(string_view_t logger_name, level::level_enum lvl, string_view_t msg); 17 | log_msg(const log_msg& other) = default; 18 | log_msg& operator=(const log_msg& other) = default; 19 | 20 | string_view_t logger_name; 21 | level::level_enum level{level::off}; 22 | log_clock::time_point time; 23 | size_t thread_id{0}; 24 | 25 | // wrapping the formatted text with color (updated by pattern_formatter). 26 | mutable size_t color_range_start{0}; 27 | mutable size_t color_range_end{0}; 28 | 29 | source_loc source; 30 | string_view_t payload; 31 | }; 32 | } // namespace details 33 | } // namespace spdlog 34 | 35 | #ifdef SPDLOG_HEADER_ONLY 36 | #include "log_msg-inl.h" 37 | #endif 38 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg_buffer-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifndef SPDLOG_HEADER_ONLY 7 | #include 8 | #endif 9 | 10 | namespace spdlog { 11 | namespace details { 12 | 13 | SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg& orig_msg) : log_msg{orig_msg} { 14 | buffer.append(logger_name.begin(), logger_name.end()); 15 | buffer.append(payload.begin(), payload.end()); 16 | update_string_views(); 17 | } 18 | 19 | SPDLOG_INLINE log_msg_buffer::log_msg_buffer(const log_msg_buffer& other) : log_msg{other} { 20 | buffer.append(logger_name.begin(), logger_name.end()); 21 | buffer.append(payload.begin(), payload.end()); 22 | update_string_views(); 23 | } 24 | 25 | SPDLOG_INLINE log_msg_buffer::log_msg_buffer(log_msg_buffer&& other) SPDLOG_NOEXCEPT : log_msg{other}, buffer{std::move(other.buffer)} { 26 | update_string_views(); 27 | } 28 | 29 | SPDLOG_INLINE log_msg_buffer& log_msg_buffer::operator=(const log_msg_buffer& other) { 30 | log_msg::operator=(other); 31 | buffer.clear(); 32 | buffer.append(other.buffer.data(), other.buffer.data() + other.buffer.size()); 33 | update_string_views(); 34 | return *this; 35 | } 36 | 37 | SPDLOG_INLINE log_msg_buffer& log_msg_buffer::operator=(log_msg_buffer&& other) SPDLOG_NOEXCEPT { 38 | log_msg::operator=(other); 39 | buffer = std::move(other.buffer); 40 | update_string_views(); 41 | return *this; 42 | } 43 | 44 | SPDLOG_INLINE void log_msg_buffer::update_string_views() { 45 | logger_name = string_view_t{buffer.data(), logger_name.size()}; 46 | payload = string_view_t{buffer.data() + logger_name.size(), payload.size()}; 47 | } 48 | 49 | } // namespace details 50 | } // namespace spdlog 51 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/log_msg_buffer.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | namespace spdlog { 9 | namespace details { 10 | 11 | // Extend log_msg with internal buffer to store its payload. 12 | // This is needed since log_msg holds string_views that points to stack data. 13 | 14 | class SPDLOG_API log_msg_buffer : public log_msg { 15 | memory_buf_t buffer; 16 | void update_string_views(); 17 | 18 | public: 19 | log_msg_buffer() = default; 20 | explicit log_msg_buffer(const log_msg& orig_msg); 21 | log_msg_buffer(const log_msg_buffer& other); 22 | log_msg_buffer(log_msg_buffer&& other) SPDLOG_NOEXCEPT; 23 | log_msg_buffer& operator=(const log_msg_buffer& other); 24 | log_msg_buffer& operator=(log_msg_buffer&& other) SPDLOG_NOEXCEPT; 25 | }; 26 | 27 | } // namespace details 28 | } // namespace spdlog 29 | 30 | #ifdef SPDLOG_HEADER_ONLY 31 | #include "log_msg_buffer-inl.h" 32 | #endif 33 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | // null, no cost dummy "mutex" and dummy "atomic" int 9 | 10 | namespace spdlog { 11 | namespace details { 12 | struct null_mutex { 13 | void lock() const {} 14 | void unlock() const {} 15 | }; 16 | 17 | struct null_atomic_int { 18 | int value; 19 | null_atomic_int() = default; 20 | 21 | explicit null_atomic_int(int new_value) : value(new_value) {} 22 | 23 | int load(std::memory_order = std::memory_order_relaxed) const { return value; } 24 | 25 | void store(int new_value, std::memory_order = std::memory_order_relaxed) { value = new_value; } 26 | 27 | int exchange(int new_value, std::memory_order = std::memory_order_relaxed) { 28 | std::swap(new_value, value); 29 | return new_value; // return value before the call 30 | } 31 | }; 32 | 33 | } // namespace details 34 | } // namespace spdlog 35 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/periodic_worker-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifndef SPDLOG_HEADER_ONLY 7 | #include 8 | #endif 9 | 10 | namespace spdlog { 11 | namespace details { 12 | 13 | // stop the worker thread and join it 14 | SPDLOG_INLINE periodic_worker::~periodic_worker() { 15 | if (worker_thread_.joinable()) { 16 | { 17 | std::lock_guard lock(mutex_); 18 | active_ = false; 19 | } 20 | cv_.notify_one(); 21 | worker_thread_.join(); 22 | } 23 | } 24 | 25 | } // namespace details 26 | } // namespace spdlog 27 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/periodic_worker.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | // periodic worker thread - periodically executes the given callback function. 7 | // 8 | // RAII over the owned thread: 9 | // creates the thread on construction. 10 | // stops and joins the thread on destruction (if the thread is executing a callback, wait for it 11 | // to finish first). 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | namespace spdlog { 19 | namespace details { 20 | 21 | class SPDLOG_API periodic_worker { 22 | public: 23 | template 24 | periodic_worker(const std::function& callback_fun, std::chrono::duration interval) { 25 | active_ = (interval > std::chrono::duration::zero()); 26 | if (!active_) { 27 | return; 28 | } 29 | 30 | worker_thread_ = std::thread([this, callback_fun, interval]() { 31 | for (;;) { 32 | std::unique_lock lock(this->mutex_); 33 | if (this->cv_.wait_for(lock, interval, [this] { return !this->active_; })) { 34 | return; // active_ == false, so exit this thread 35 | } 36 | callback_fun(); 37 | } 38 | }); 39 | } 40 | std::thread& get_thread() { return worker_thread_; } 41 | periodic_worker(const periodic_worker&) = delete; 42 | periodic_worker& operator=(const periodic_worker&) = delete; 43 | // stop the worker thread and join it 44 | ~periodic_worker(); 45 | 46 | private: 47 | bool active_; 48 | std::thread worker_thread_; 49 | std::mutex mutex_; 50 | std::condition_variable cv_; 51 | }; 52 | } // namespace details 53 | } // namespace spdlog 54 | 55 | #ifdef SPDLOG_HEADER_ONLY 56 | #include "periodic_worker-inl.h" 57 | #endif 58 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/synchronous_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include "registry.h" 7 | 8 | namespace spdlog { 9 | 10 | // Default logger factory- creates synchronous loggers 11 | class logger; 12 | 13 | struct synchronous_factory { 14 | template 15 | static std::shared_ptr create(std::string logger_name, SinkArgs&&... args) { 16 | auto sink = std::make_shared(std::forward(args)...); 17 | auto new_logger = std::make_shared(std::move(logger_name), std::move(sink)); 18 | details::registry::instance().initialize_logger(new_logger); 19 | return new_logger; 20 | } 21 | }; 22 | } // namespace spdlog 23 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/details/windows_include.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef NOMINMAX 4 | #define NOMINMAX // prevent windows redefining min/max 5 | #endif 6 | 7 | #ifndef WIN32_LEAN_AND_MEAN 8 | #define WIN32_LEAN_AND_MEAN 9 | #endif 10 | 11 | #include 12 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/bundled/fmt.license.rst: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 - present, Victor Zverovich 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | --- Optional exception to the license --- 23 | 24 | As an exception, if, as a result of your compiling your source code, portions 25 | of this Software are embedded into a machine-executable object form of such 26 | source code, you may redistribute such embedded portions in such object form 27 | without including the above copyright and permission notices. 28 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/bundled/locale.h: -------------------------------------------------------------------------------- 1 | #include "xchar.h" 2 | #warning fmt/locale.h is deprecated, include fmt/format.h or fmt/xchar.h instead 3 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/chrono.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's chrono support 9 | // 10 | #include 11 | 12 | #if !defined(SPDLOG_USE_STD_FORMAT) 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | #ifdef SPDLOG_HEADER_ONLY 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #endif 19 | #include 20 | #else 21 | #include 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/compile.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's compile-time support 9 | // 10 | #include 11 | 12 | #if !defined(SPDLOG_USE_STD_FORMAT) 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | #ifdef SPDLOG_HEADER_ONLY 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #endif 19 | #include 20 | #else 21 | #include 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/fmt.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016-2018 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // 9 | // Include a bundled header-only copy of fmtlib or an external one. 10 | // By default spdlog include its own copy. 11 | // 12 | #include 13 | 14 | #if defined(SPDLOG_USE_STD_FORMAT) // SPDLOG_USE_STD_FORMAT is defined - use std::format 15 | #include 16 | #elif !defined(SPDLOG_FMT_EXTERNAL) 17 | #if !defined(SPDLOG_COMPILED_LIB) && !defined(FMT_HEADER_ONLY) 18 | #define FMT_HEADER_ONLY 19 | #endif 20 | #ifndef FMT_USE_WINDOWS_H 21 | #define FMT_USE_WINDOWS_H 0 22 | #endif 23 | 24 | #include 25 | #include 26 | 27 | #else // SPDLOG_FMT_EXTERNAL is defined - use external fmtlib 28 | #include 29 | #include 30 | #endif 31 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/ostr.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's ostream support 9 | // 10 | #include 11 | 12 | #if !defined(SPDLOG_USE_STD_FORMAT) 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | #ifdef SPDLOG_HEADER_ONLY 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #endif 19 | #include 20 | #else 21 | #include 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/ranges.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's ranges support 9 | // 10 | #include 11 | 12 | #if !defined(SPDLOG_USE_STD_FORMAT) 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | #ifdef SPDLOG_HEADER_ONLY 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #endif 19 | #include 20 | #else 21 | #include 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/std.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's std support (for formatting e.g. 9 | // std::filesystem::path, std::thread::id, std::monostate, std::variant, ...) 10 | // 11 | #include 12 | 13 | #if !defined(SPDLOG_USE_STD_FORMAT) 14 | #if !defined(SPDLOG_FMT_EXTERNAL) 15 | #ifdef SPDLOG_HEADER_ONLY 16 | #ifndef FMT_HEADER_ONLY 17 | #define FMT_HEADER_ONLY 18 | #endif 19 | #endif 20 | #include 21 | #else 22 | #include 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fmt/xchar.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's xchar support 9 | // 10 | #include 11 | 12 | #if !defined(SPDLOG_USE_STD_FORMAT) 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | #ifdef SPDLOG_HEADER_ONLY 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #endif 19 | #include 20 | #else 21 | #include 22 | #endif 23 | #endif 24 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | namespace spdlog { 10 | 11 | class formatter { 12 | public: 13 | virtual ~formatter() = default; 14 | virtual void format(const details::log_msg& msg, memory_buf_t& dest) = 0; 15 | virtual std::unique_ptr clone() const = 0; 16 | }; 17 | } // namespace spdlog 18 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/fwd.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | namespace spdlog { 7 | class logger; 8 | class formatter; 9 | 10 | namespace sinks { 11 | class sink; 12 | } 13 | 14 | namespace level { 15 | enum level_enum : int; 16 | } 17 | 18 | } // namespace spdlog 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/mdc.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #if defined(SPDLOG_NO_TLS) 7 | #error "This header requires thread local storage support, but SPDLOG_NO_TLS is defined." 8 | #endif 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | // MDC is a simple map of key->string values stored in thread local storage whose content will be printed by the loggers. 16 | // Note: Not supported in async mode (thread local storage - so the async thread pool have different copy). 17 | // 18 | // Usage example: 19 | // spdlog::mdc::put("mdc_key_1", "mdc_value_1"); 20 | // spdlog::info("Hello, {}", "World!"); // => [2024-04-26 02:08:05.040] [info] [mdc_key_1:mdc_value_1] Hello, World! 21 | 22 | namespace spdlog { 23 | class SPDLOG_API mdc { 24 | public: 25 | using mdc_map_t = std::map; 26 | 27 | static void put(const std::string& key, const std::string& value) { get_context()[key] = value; } 28 | 29 | static std::string get(const std::string& key) { 30 | auto& context = get_context(); 31 | auto it = context.find(key); 32 | if (it != context.end()) { 33 | return it->second; 34 | } 35 | return ""; 36 | } 37 | 38 | static void remove(const std::string& key) { get_context().erase(key); } 39 | 40 | static void clear() { get_context().clear(); } 41 | 42 | static mdc_map_t& get_context() { 43 | static thread_local mdc_map_t context; 44 | return context; 45 | } 46 | }; 47 | 48 | } // namespace spdlog 49 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/base_sink-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifndef SPDLOG_HEADER_ONLY 7 | #include 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | template 17 | SPDLOG_INLINE spdlog::sinks::base_sink::base_sink() : formatter_{details::make_unique()} {} 18 | 19 | template 20 | SPDLOG_INLINE spdlog::sinks::base_sink::base_sink(std::unique_ptr formatter) : formatter_{std::move(formatter)} {} 21 | 22 | template 23 | void SPDLOG_INLINE spdlog::sinks::base_sink::log(const details::log_msg& msg) { 24 | std::lock_guard lock(mutex_); 25 | sink_it_(msg); 26 | } 27 | 28 | template 29 | void SPDLOG_INLINE spdlog::sinks::base_sink::flush() { 30 | std::lock_guard lock(mutex_); 31 | flush_(); 32 | } 33 | 34 | template 35 | void SPDLOG_INLINE spdlog::sinks::base_sink::set_pattern(const std::string& pattern) { 36 | std::lock_guard lock(mutex_); 37 | set_pattern_(pattern); 38 | } 39 | 40 | template 41 | void SPDLOG_INLINE spdlog::sinks::base_sink::set_formatter(std::unique_ptr sink_formatter) { 42 | std::lock_guard lock(mutex_); 43 | set_formatter_(std::move(sink_formatter)); 44 | } 45 | 46 | template 47 | void SPDLOG_INLINE spdlog::sinks::base_sink::set_pattern_(const std::string& pattern) { 48 | set_formatter_(details::make_unique(pattern)); 49 | } 50 | 51 | template 52 | void SPDLOG_INLINE spdlog::sinks::base_sink::set_formatter_(std::unique_ptr sink_formatter) { 53 | formatter_ = std::move(sink_formatter); 54 | } 55 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | // 6 | // base sink templated over a mutex (either dummy or real) 7 | // concrete implementation should override the sink_it_() and flush_() methods. 8 | // locking is taken care of in this class - no locking needed by the 9 | // implementers.. 10 | // 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | namespace spdlog { 17 | namespace sinks { 18 | template 19 | class SPDLOG_API base_sink : public sink { 20 | public: 21 | base_sink(); 22 | explicit base_sink(std::unique_ptr formatter); 23 | ~base_sink() override = default; 24 | 25 | base_sink(const base_sink&) = delete; 26 | base_sink(base_sink&&) = delete; 27 | 28 | base_sink& operator=(const base_sink&) = delete; 29 | base_sink& operator=(base_sink&&) = delete; 30 | 31 | void log(const details::log_msg& msg) final override; 32 | void flush() final override; 33 | void set_pattern(const std::string& pattern) final override; 34 | void set_formatter(std::unique_ptr sink_formatter) final override; 35 | 36 | protected: 37 | // sink formatter 38 | std::unique_ptr formatter_; 39 | Mutex mutex_; 40 | 41 | virtual void sink_it_(const details::log_msg& msg) = 0; 42 | virtual void flush_() = 0; 43 | virtual void set_pattern_(const std::string& pattern); 44 | virtual void set_formatter_(std::unique_ptr sink_formatter); 45 | }; 46 | } // namespace sinks 47 | } // namespace spdlog 48 | 49 | #ifdef SPDLOG_HEADER_ONLY 50 | #include "base_sink-inl.h" 51 | #endif 52 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/basic_file_sink-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifndef SPDLOG_HEADER_ONLY 7 | #include 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace spdlog { 14 | namespace sinks { 15 | 16 | template 17 | SPDLOG_INLINE basic_file_sink::basic_file_sink(const filename_t& filename, bool truncate, const file_event_handlers& event_handlers) 18 | : file_helper_{event_handlers} { 19 | file_helper_.open(filename, truncate); 20 | } 21 | 22 | template 23 | SPDLOG_INLINE const filename_t& basic_file_sink::filename() const { 24 | return file_helper_.filename(); 25 | } 26 | 27 | template 28 | SPDLOG_INLINE void basic_file_sink::sink_it_(const details::log_msg& msg) { 29 | memory_buf_t formatted; 30 | base_sink::formatter_->format(msg, formatted); 31 | file_helper_.write(formatted); 32 | } 33 | 34 | template 35 | SPDLOG_INLINE void basic_file_sink::flush_() { 36 | file_helper_.flush(); 37 | } 38 | 39 | } // namespace sinks 40 | } // namespace spdlog 41 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/callback_sink.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace spdlog { 14 | 15 | // callbacks type 16 | typedef std::function custom_log_callback; 17 | 18 | namespace sinks { 19 | /* 20 | * Trivial callback sink, gets a callback function and calls it on each log 21 | */ 22 | template 23 | class callback_sink final : public base_sink { 24 | public: 25 | explicit callback_sink(const custom_log_callback& callback) : callback_{callback} {} 26 | 27 | protected: 28 | void sink_it_(const details::log_msg& msg) override { callback_(msg); } 29 | void flush_() override{}; 30 | 31 | private: 32 | custom_log_callback callback_; 33 | }; 34 | 35 | using callback_sink_mt = callback_sink; 36 | using callback_sink_st = callback_sink; 37 | 38 | } // namespace sinks 39 | 40 | // 41 | // factory functions 42 | // 43 | template 44 | inline std::shared_ptr callback_logger_mt(const std::string& logger_name, const custom_log_callback& callback) { 45 | return Factory::template create(logger_name, callback); 46 | } 47 | 48 | template 49 | inline std::shared_ptr callback_logger_st(const std::string& logger_name, const custom_log_callback& callback) { 50 | return Factory::template create(logger_name, callback); 51 | } 52 | 53 | } // namespace spdlog 54 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | namespace spdlog { 13 | namespace sinks { 14 | 15 | template 16 | class null_sink : public base_sink { 17 | protected: 18 | void sink_it_(const details::log_msg&) override {} 19 | void flush_() override {} 20 | }; 21 | 22 | using null_sink_mt = null_sink; 23 | using null_sink_st = null_sink; 24 | 25 | } // namespace sinks 26 | 27 | template 28 | inline std::shared_ptr null_logger_mt(const std::string& logger_name) { 29 | auto null_logger = Factory::template create(logger_name); 30 | null_logger->set_level(level::off); 31 | return null_logger; 32 | } 33 | 34 | template 35 | inline std::shared_ptr null_logger_st(const std::string& logger_name) { 36 | auto null_logger = Factory::template create(logger_name); 37 | null_logger->set_level(level::off); 38 | return null_logger; 39 | } 40 | 41 | } // namespace spdlog 42 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace spdlog { 13 | namespace sinks { 14 | template 15 | class ostream_sink final : public base_sink { 16 | public: 17 | explicit ostream_sink(std::ostream& os, bool force_flush = false) : ostream_(os), force_flush_(force_flush) {} 18 | ostream_sink(const ostream_sink&) = delete; 19 | ostream_sink& operator=(const ostream_sink&) = delete; 20 | 21 | protected: 22 | void sink_it_(const details::log_msg& msg) override { 23 | memory_buf_t formatted; 24 | base_sink::formatter_->format(msg, formatted); 25 | ostream_.write(formatted.data(), static_cast(formatted.size())); 26 | if (force_flush_) { 27 | ostream_.flush(); 28 | } 29 | } 30 | 31 | void flush_() override { ostream_.flush(); } 32 | 33 | std::ostream& ostream_; 34 | bool force_flush_; 35 | }; 36 | 37 | using ostream_sink_mt = ostream_sink; 38 | using ostream_sink_st = ostream_sink; 39 | 40 | } // namespace sinks 41 | } // namespace spdlog 42 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/sink-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifndef SPDLOG_HEADER_ONLY 7 | #include 8 | #endif 9 | 10 | #include 11 | 12 | SPDLOG_INLINE bool spdlog::sinks::sink::should_log(spdlog::level::level_enum msg_level) const { return msg_level >= level_.load(std::memory_order_relaxed); } 13 | 14 | SPDLOG_INLINE void spdlog::sinks::sink::set_level(level::level_enum log_level) { level_.store(log_level, std::memory_order_relaxed); } 15 | 16 | SPDLOG_INLINE spdlog::level::level_enum spdlog::sinks::sink::level() const { 17 | return static_cast(level_.load(std::memory_order_relaxed)); 18 | } 19 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | namespace spdlog { 10 | 11 | namespace sinks { 12 | class SPDLOG_API sink { 13 | public: 14 | virtual ~sink() = default; 15 | virtual void log(const details::log_msg& msg) = 0; 16 | virtual void flush() = 0; 17 | virtual void set_pattern(const std::string& pattern) = 0; 18 | virtual void set_formatter(std::unique_ptr sink_formatter) = 0; 19 | 20 | void set_level(level::level_enum log_level); 21 | level::level_enum level() const; 22 | bool should_log(level::level_enum msg_level) const; 23 | 24 | protected: 25 | // sink log level - default is all 26 | level_t level_{level::trace}; 27 | }; 28 | 29 | } // namespace sinks 30 | } // namespace spdlog 31 | 32 | #ifdef SPDLOG_HEADER_ONLY 33 | #include "sink-inl.h" 34 | #endif 35 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/stdout_color_sinks-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifndef SPDLOG_HEADER_ONLY 7 | #include 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace spdlog { 14 | 15 | template 16 | SPDLOG_INLINE std::shared_ptr stdout_color_mt(const std::string& logger_name, color_mode mode) { 17 | return Factory::template create(logger_name, mode); 18 | } 19 | 20 | template 21 | SPDLOG_INLINE std::shared_ptr stdout_color_st(const std::string& logger_name, color_mode mode) { 22 | return Factory::template create(logger_name, mode); 23 | } 24 | 25 | template 26 | SPDLOG_INLINE std::shared_ptr stderr_color_mt(const std::string& logger_name, color_mode mode) { 27 | return Factory::template create(logger_name, mode); 28 | } 29 | 30 | template 31 | SPDLOG_INLINE std::shared_ptr stderr_color_st(const std::string& logger_name, color_mode mode) { 32 | return Factory::template create(logger_name, mode); 33 | } 34 | } // namespace spdlog 35 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/stdout_color_sinks.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #ifdef _WIN32 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | #include 13 | 14 | namespace spdlog { 15 | namespace sinks { 16 | #ifdef _WIN32 17 | using stdout_color_sink_mt = wincolor_stdout_sink_mt; 18 | using stdout_color_sink_st = wincolor_stdout_sink_st; 19 | using stderr_color_sink_mt = wincolor_stderr_sink_mt; 20 | using stderr_color_sink_st = wincolor_stderr_sink_st; 21 | #else 22 | using stdout_color_sink_mt = ansicolor_stdout_sink_mt; 23 | using stdout_color_sink_st = ansicolor_stdout_sink_st; 24 | using stderr_color_sink_mt = ansicolor_stderr_sink_mt; 25 | using stderr_color_sink_st = ansicolor_stderr_sink_st; 26 | #endif 27 | } // namespace sinks 28 | 29 | template 30 | std::shared_ptr stdout_color_mt(const std::string& logger_name, color_mode mode = color_mode::automatic); 31 | 32 | template 33 | std::shared_ptr stdout_color_st(const std::string& logger_name, color_mode mode = color_mode::automatic); 34 | 35 | template 36 | std::shared_ptr stderr_color_mt(const std::string& logger_name, color_mode mode = color_mode::automatic); 37 | 38 | template 39 | std::shared_ptr stderr_color_st(const std::string& logger_name, color_mode mode = color_mode::automatic); 40 | 41 | } // namespace spdlog 42 | 43 | #ifdef SPDLOG_HEADER_ONLY 44 | #include "stdout_color_sinks-inl.h" 45 | #endif 46 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/sinks/udp_sink.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #ifdef _WIN32 10 | #include 11 | #else 12 | #include 13 | #endif 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | // Simple udp client sink 21 | // Sends formatted log via udp 22 | 23 | namespace spdlog { 24 | namespace sinks { 25 | 26 | struct udp_sink_config { 27 | std::string server_host; 28 | uint16_t server_port; 29 | 30 | udp_sink_config(std::string host, uint16_t port) : server_host{std::move(host)}, server_port{port} {} 31 | }; 32 | 33 | template 34 | class udp_sink : public spdlog::sinks::base_sink { 35 | public: 36 | // host can be hostname or ip address 37 | explicit udp_sink(udp_sink_config sink_config) : client_{sink_config.server_host, sink_config.server_port} {} 38 | 39 | ~udp_sink() override = default; 40 | 41 | protected: 42 | void sink_it_(const spdlog::details::log_msg& msg) override { 43 | spdlog::memory_buf_t formatted; 44 | spdlog::sinks::base_sink::formatter_->format(msg, formatted); 45 | client_.send(formatted.data(), formatted.size()); 46 | } 47 | 48 | void flush_() override {} 49 | details::udp_client client_; 50 | }; 51 | 52 | using udp_sink_mt = udp_sink; 53 | using udp_sink_st = udp_sink; 54 | 55 | } // namespace sinks 56 | 57 | // 58 | // factory functions 59 | // 60 | template 61 | inline std::shared_ptr udp_logger_mt(const std::string& logger_name, sinks::udp_sink_config skin_config) { 62 | return Factory::template create(logger_name, skin_config); 63 | } 64 | 65 | } // namespace spdlog 66 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/spdlog/include/spdlog/version.h: -------------------------------------------------------------------------------- 1 | // Copyright(c) 2015-present, Gabi Melman & spdlog contributors. 2 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 3 | 4 | #pragma once 5 | 6 | #define SPDLOG_VER_MAJOR 1 7 | #define SPDLOG_VER_MINOR 14 8 | #define SPDLOG_VER_PATCH 1 9 | 10 | #define SPDLOG_TO_VERSION(major, minor, patch) (major * 10000 + minor * 100 + patch) 11 | #define SPDLOG_VERSION SPDLOG_TO_VERSION(SPDLOG_VER_MAJOR, SPDLOG_VER_MINOR, SPDLOG_VER_PATCH) 12 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/common.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/common.h" 2 | 3 | namespace dicp { 4 | 5 | std::string atbDimsToString(const atb::Dims& d) { 6 | std::ostringstream oss; 7 | oss << "["; 8 | for (uint64_t i = 0; i < d.dimNum; ++i) { 9 | oss << d.dims[i]; 10 | if (i < d.dimNum - 1) { 11 | oss << ", "; 12 | } 13 | } 14 | oss << "]"; 15 | return oss.str(); 16 | } 17 | 18 | aclDataType get_acl_dtype(const std::string& dtype) { 19 | if (dtype == "INT64") { 20 | return ACL_INT64; 21 | } else if (dtype == "INT32") { 22 | return ACL_INT32; 23 | } else if (dtype == "FLOAT") { 24 | return ACL_FLOAT; 25 | } else if (dtype == "FLOAT16") { 26 | return ACL_FLOAT16; 27 | } else if (dtype == "BF16") { 28 | return ACL_BF16; 29 | } else { 30 | throw std::invalid_argument("Unsupported dtype: " + dtype); 31 | } 32 | } 33 | 34 | } // namespace dicp 35 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/config.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/config.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "utils/log.h" 7 | 8 | namespace dicp { 9 | 10 | constexpr int GB_1 = 1024 * 1024 * 1024; 11 | 12 | Config::Config() { 13 | const char* envBufferSize = std::getenv("DICP_WORKSPACE_BUFFER_SIZE"); 14 | if (envBufferSize) { 15 | workspaceBufferSize_ = std::stoull(envBufferSize); 16 | } else { 17 | workspaceBufferSize_ = 1 * GB_1; 18 | } 19 | } 20 | 21 | uint64_t Config::WorkspaceBufferSize() { return workspaceBufferSize_; } 22 | 23 | Config& GetConfig() { 24 | static Config config; 25 | return config; 26 | } 27 | 28 | } // namespace dicp 29 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace dicp { 6 | 7 | class Config { 8 | public: 9 | Config(); 10 | ~Config(){}; 11 | uint64_t WorkspaceBufferSize(); 12 | 13 | private: 14 | uint64_t workspaceBufferSize_; 15 | }; 16 | 17 | Config& GetConfig(); 18 | 19 | } // namespace dicp 20 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/global_dict.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/global_dict.h" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include "utils/config.h" 9 | #include "utils/log.h" 10 | #include "utils/tensor_utils.h" 11 | 12 | namespace dicp { 13 | 14 | GlobalDict::GlobalDict() = default; 15 | 16 | void GlobalDict::Register(const std::string& key) { 17 | current_key_ = key; 18 | data_.try_emplace(key); 19 | } 20 | 21 | std::unordered_map& GlobalDict::GetData() { 22 | if (current_key_.empty() || !data_.count(current_key_)) { 23 | throw std::runtime_error("Invalid GlobalDict access"); 24 | } 25 | return data_.at(current_key_); 26 | } 27 | 28 | GlobalDict& GetGlobalDict_() { 29 | static GlobalDict global_dict; 30 | return global_dict; 31 | } 32 | 33 | void GlobalDict::Set(const std::string& key) { 34 | if (current_key_.empty() || !data_.count(current_key_)) { 35 | throw std::runtime_error("Invalid GlobalDict access"); 36 | } 37 | current_key_ = key; 38 | } 39 | 40 | void RegisterToGlobalDict(const std::string& key) { 41 | auto& global_dict = GetGlobalDict_(); 42 | global_dict.Register(key); 43 | } 44 | 45 | void SetGlobalDict(const std::string& key) { 46 | auto& global_dict = GetGlobalDict_(); 47 | global_dict.Set(key); 48 | } 49 | 50 | std::unordered_map& GetGlobalDictData() { return GetGlobalDict_().GetData(); } 51 | 52 | } // namespace dicp 53 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/global_dict.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace dicp { 10 | 11 | class GlobalDict { 12 | public: 13 | GlobalDict(); 14 | ~GlobalDict(){}; 15 | void Register(const std::string& key); 16 | void Set(const std::string& key); 17 | std::unordered_map& GetData(); 18 | 19 | private: 20 | std::string current_key_; 21 | std::unordered_map> data_; 22 | }; 23 | 24 | void RegisterToGlobalDict(const std::string& key); 25 | void SetGlobalDict(const std::string& key); 26 | GlobalDict& GetGlobalDict_(); 27 | 28 | std::unordered_map& GetGlobalDictData(); 29 | 30 | } // namespace dicp 31 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/misc.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/misc.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "utils/log.h" 7 | 8 | namespace dicp { 9 | namespace utils { 10 | 11 | void* GetCurrentStream() { 12 | int32_t devId = 0; 13 | aclrtGetDevice(&devId); 14 | void* stream = c10_npu::getCurrentNPUStream(devId).stream(); 15 | DICP_LOG_IF(stream == nullptr, ERROR) << "get current stream failed!"; 16 | return stream; 17 | } 18 | 19 | int GetNewModelId() { 20 | static int modelId = 0; 21 | return modelId++; 22 | } 23 | 24 | } // namespace utils 25 | } // namespace dicp 26 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/misc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace dicp { 4 | namespace utils { 5 | 6 | void* GetCurrentStream(); 7 | int GetNewModelId(); 8 | 9 | } // namespace utils 10 | } // namespace dicp 11 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/operation_util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | namespace dicp { 5 | 6 | #define CREATE_OPERATION(param, operation) \ 7 | do { \ 8 | atb::Status atbStatus = atb::CreateOperation(param, operation); \ 9 | if (atbStatus != atb::NO_ERROR) { \ 10 | return atbStatus; \ 11 | } \ 12 | } while (0) 13 | 14 | #define CREATE_OPERATION_NO_RETURN(param, operation) \ 15 | do { \ 16 | atb::Status atbStatus = atb::CreateOperation(param, operation); \ 17 | if (atbStatus != atb::NO_ERROR) { \ 18 | } \ 19 | } while (0) 20 | } // namespace dicp 21 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/tensor_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace dicp { 9 | namespace tensor_utils { 10 | 11 | template 12 | struct aclDataTypeMap; 13 | 14 | template <> 15 | struct aclDataTypeMap { 16 | using type = float16_t; 17 | }; 18 | template <> 19 | struct aclDataTypeMap { 20 | using type = int64_t; 21 | }; 22 | template <> 23 | struct aclDataTypeMap { 24 | using type = int32_t; 25 | }; 26 | template <> 27 | struct aclDataTypeMap { 28 | using type = int8_t; 29 | }; 30 | 31 | std::string TensorToString(const atb::Tensor& tensor); 32 | std::string TensorDescToString(const atb::TensorDesc& tensorDesc); 33 | 34 | atb::Tensor AtTensor2Tensor(const at::Tensor& atTensor); 35 | at::Tensor CreateAtTensorFromTensorDesc(const atb::TensorDesc& tensorDesc); 36 | int64_t TransferAtTensor2AtbTensor(std::vector& atTensors, std::vector& atbTensors); 37 | 38 | template 39 | void copyAndPrint(const atb::Tensor tensor, int64_t tensorSize); 40 | int64_t DumpTensor(const atb::Tensor& tensor); 41 | 42 | } // namespace tensor_utils 43 | } // namespace dicp 44 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace dicp { 8 | 9 | class Timer { 10 | public: 11 | Timer() : startTimePoint(), endTimePoint(), isRunning(false) {} 12 | 13 | void start() { 14 | startTimePoint = std::chrono::high_resolution_clock::now(); 15 | isRunning = true; 16 | } 17 | 18 | void stop() { 19 | if (isRunning) { 20 | endTimePoint = std::chrono::high_resolution_clock::now(); 21 | isRunning = false; 22 | } 23 | } 24 | 25 | double ElapsedMicroSecond() const { 26 | if (isRunning) { 27 | auto currentTime = std::chrono::high_resolution_clock::now(); 28 | return std::chrono::duration_cast(currentTime - startTimePoint).count(); 29 | } else { 30 | return std::chrono::duration_cast(endTimePoint - startTimePoint).count(); 31 | } 32 | } 33 | 34 | double ElapsedSecond() const { return ElapsedMicroSecond() / 1e6; } 35 | 36 | void reset() { 37 | startTimePoint = std::chrono::time_point(); 38 | endTimePoint = std::chrono::time_point(); 39 | isRunning = false; 40 | } 41 | 42 | private: 43 | std::chrono::time_point startTimePoint; 44 | std::chrono::time_point endTimePoint; 45 | bool isRunning; 46 | }; 47 | 48 | } // namespace dicp 49 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/workspace.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/workspace.h" 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "utils/config.h" 8 | #include "utils/log.h" 9 | #include "utils/tensor_utils.h" 10 | 11 | namespace dicp { 12 | 13 | constexpr int KB_1 = 1024; 14 | constexpr int MB_1 = 1024 * 1024; 15 | constexpr int GB_1 = 1024 * 1024 * 1024; 16 | 17 | Workspace::Workspace() { 18 | bufferSize_ = GetConfig().WorkspaceBufferSize(); 19 | 20 | DICP_LOG(INFO) << "Workspace init, bufferSize:" << bufferSize_; 21 | if (bufferSize_ > 0) { 22 | atTensor_ = CreateAtTensor(bufferSize_); 23 | buffer_ = atTensor_.data_ptr(); 24 | } 25 | } 26 | 27 | void* Workspace::GetWorkspaceBuffer(uint64_t bufferSize) { 28 | if (bufferSize <= bufferSize_) { 29 | DICP_LOG(INFO) << "GetWorkspaceBuffer bufferSize:" << bufferSize << "<= bufferSize_:" << bufferSize_; 30 | return atTensor_.data_ptr(); 31 | } 32 | 33 | if (aclrtSynchronizeDevice() != 0) { 34 | return nullptr; 35 | } 36 | 37 | atTensor_.reset(); 38 | atTensor_ = CreateAtTensor(bufferSize); 39 | bufferSize_ = atTensor_.numel(); 40 | DICP_LOG(INFO) << "Workspace new bufferSize:" << bufferSize; 41 | buffer_ = atTensor_.data_ptr(); 42 | return atTensor_.data_ptr(); 43 | } 44 | 45 | torch::Tensor Workspace::CreateAtTensor(uint64_t bufferSize) { 46 | atb::TensorDesc tensorDesc; 47 | tensorDesc.dtype = ACL_UINT8; 48 | tensorDesc.format = ACL_FORMAT_ND; 49 | 50 | tensorDesc.shape.dimNum = 2; 51 | tensorDesc.shape.dims[0] = KB_1; 52 | tensorDesc.shape.dims[1] = (bufferSize + KB_1 - 1) / KB_1; 53 | 54 | return tensor_utils::CreateAtTensorFromTensorDesc(tensorDesc); 55 | } 56 | 57 | void* GetWorkspaceBuffer(uint64_t bufferSize) { 58 | static Workspace workspace; 59 | return workspace.GetWorkspaceBuffer(bufferSize); 60 | } 61 | 62 | } // namespace dicp 63 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/utils/workspace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace dicp { 9 | 10 | class Workspace { 11 | public: 12 | Workspace(); 13 | ~Workspace(){}; 14 | void* GetWorkspaceBuffer(uint64_t bufferSize); 15 | 16 | private: 17 | torch::Tensor CreateAtTensor(uint64_t bufferSize); 18 | 19 | private: 20 | void* buffer_ = nullptr; 21 | uint64_t bufferSize_ = 0; 22 | torch::Tensor atTensor_; 23 | }; 24 | 25 | void* GetWorkspaceBuffer(uint64_t bufferSize); 26 | 27 | } // namespace dicp 28 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/AtbGraph/config.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from dlinfer.graph.dicp.dynamo_bridge.decompositions import ( 4 | get_decompositions, 5 | register_decomposition_for_dicp, 6 | ) 7 | 8 | 9 | aten = torch.ops.aten 10 | 11 | 12 | @register_decomposition_for_dicp(aten.select.int) 13 | def select_int(tensor, dim, index): 14 | if ( 15 | not isinstance(tensor.shape[0], torch.SymInt) 16 | and tensor.shape[0] == 1 17 | and dim == 0 18 | and index == 0 19 | ): 20 | view_shape = [-1 if isinstance(x, torch.SymInt) else x for x in tensor.shape] 21 | del view_shape[0] 22 | return tensor.view(view_shape) 23 | slice_res = aten.slice.Tensor(tensor, dim, index, index + 1, 1) 24 | return slice_res.squeeze(dim) 25 | 26 | 27 | def get_decomp(): 28 | return get_decompositions( 29 | [ 30 | aten.count_nonzero.default, 31 | aten.select.int, 32 | ] 33 | ) 34 | 35 | 36 | decomp = get_decomp() 37 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (DEVICE STREQUAL "ascend") 2 | add_subdirectory(AtbGraph/codegen/runtime) 3 | endif() 4 | -------------------------------------------------------------------------------- /dlinfer/graph/dicp/vendor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/graph/dicp/vendor/__init__.py -------------------------------------------------------------------------------- /dlinfer/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | from .llm import * 3 | -------------------------------------------------------------------------------- /dlinfer/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | -------------------------------------------------------------------------------- /dlinfer/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | class Config: 3 | def __init__(self, **kwargs): 4 | self._config = kwargs 5 | 6 | def __getattr__(self, name): 7 | if name in self._config: 8 | return self._config[name] 9 | raise AttributeError(f"{type(self).__name__} object has no attribute '{name}'") 10 | 11 | def __setattr__(self, name, value): 12 | if name == "_config": 13 | super().__setattr__(name, value) 14 | else: 15 | self._config[name] = value 16 | 17 | def __repr__(self): 18 | return repr(self._config) 19 | -------------------------------------------------------------------------------- /dlinfer/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | # decorator usage 3 | def register_ops(registry): 4 | def wrapped_func(ops_func): 5 | registry[ops_func.__name__] = ops_func 6 | return ops_func 7 | 8 | return wrapped_func 9 | -------------------------------------------------------------------------------- /dlinfer/utils/type_annotation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | from torch import Tensor 3 | from typing import Optional, Sequence, Union, Any, Tuple, Callable, Dict 4 | -------------------------------------------------------------------------------- /dlinfer/vendor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | import importlib 3 | from pathlib import Path 4 | from functools import lru_cache 5 | import yaml 6 | import torch 7 | 8 | 9 | vendor_ops_registry = dict() 10 | vendor_is_initialized = False 11 | vendor_name_file = Path(__file__).parent / "vendor.yaml" 12 | linear_w8a8_scale_type = torch.Tensor 13 | dynamic_quant_scale_type = torch.Tensor 14 | 15 | 16 | with open(str(vendor_name_file), "r") as f: 17 | config = yaml.safe_load(f) 18 | vendor_name = config["vendor"] 19 | dispatch_key = config["dispatch_key"] 20 | 21 | 22 | @lru_cache(1) 23 | def import_vendor_module(vendor_name_str): 24 | return importlib.import_module(f".{vendor_name_str}", __package__) 25 | 26 | 27 | def vendor_torch_init(): 28 | import_vendor_module(vendor_name) 29 | global vendor_is_initialized 30 | vendor_is_initialized = True 31 | global linear_w8a8_scale_type, dynamic_quant_scale_type 32 | linear_w8a8_scale_type = torch.Tensor if vendor_name in ["ascend"] else float 33 | dynamic_quant_scale_type = torch.Tensor if vendor_name in ["ascend"] else float 34 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(ascend_extension) 3 | 4 | include(ascend) 5 | 6 | set(CSRC_FILES 7 | ${CMAKE_CURRENT_SOURCE_DIR}/csrc/init.cpp 8 | ${CMAKE_CURRENT_SOURCE_DIR}/csrc/flash_attention.cpp 9 | ${CMAKE_CURRENT_SOURCE_DIR}/csrc/moe_gating_topk_softmax.cpp 10 | ${CMAKE_CURRENT_SOURCE_DIR}/csrc/op_api_common.cpp 11 | ${CMAKE_CURRENT_SOURCE_DIR}/csrc/torch_npu_utils.cpp 12 | ) 13 | 14 | if("${Torch_npu_VERSION_HIGHER_THAN_231}" STREQUAL "1") 15 | list(APPEND CSRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/csrc/torch_npu_symbol_fix.cpp) 16 | endif() 17 | 18 | add_library( 19 | ${PROJECT_NAME} SHARED 20 | ${CSRC_FILES} 21 | ) 22 | 23 | set_target_properties( 24 | ${PROJECT_NAME} PROPERTIES 25 | PREFIX "" 26 | ) 27 | 28 | target_compile_definitions( 29 | ${PROJECT_NAME} PUBLIC 30 | GLIBCXX_USE_CXX11_ABI=${_GLIBCXX_USE_CXX11_ABI} 31 | ) 32 | 33 | target_include_directories( 34 | ${PROJECT_NAME} PUBLIC 35 | ${CMAKE_CURRENT_SOURCE_DIR} 36 | ${TORCH_NPU_INCLUDE_DIRS} 37 | ${CANN_INCLUDE_DIRS} 38 | ) 39 | 40 | target_link_libraries( 41 | ${PROJECT_NAME} PRIVATE 42 | Python::Python 43 | torch 44 | ${TORCH_NPU_LIBRARY} 45 | ${CANN_LIBRARY} 46 | ) 47 | 48 | file(RELATIVE_PATH OUTPUT_LIB_RELATIVE_PATH "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") 49 | install( 50 | TARGETS ${PROJECT_NAME} 51 | DESTINATION ${OUTPUT_LIB_RELATIVE_PATH} 52 | ) 53 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | from pathlib import Path 3 | 4 | import torch 5 | from . import pytorch_patch, torch_npu_ops 6 | 7 | torch.ops.load_library(str(Path(__file__).parent / "ascend_extension.so")) 8 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/csrc/init.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024, DeepLink. All rights reserved. 2 | #include 3 | 4 | #include "ascend_ops.hpp" 5 | 6 | namespace { 7 | 8 | TORCH_LIBRARY(npu_ext, m) { 9 | m.def( 10 | "npu_prompt_flash_attention_out(Tensor query, Tensor key, Tensor value, Tensor(a!) attn_output, *, " 11 | "Tensor? padding_mask=None, Tensor? atten_mask=None, int[]? actual_seq_lengths=None, int num_heads=1, " 12 | "float scale_value=1.0, int pre_tokens=2147473647, int next_tokens=0, " 13 | "str input_layout=\"BSH\", int num_key_value_heads=0) -> Tensor(a!)"); 14 | m.def( 15 | "npu_incre_flash_attention_v4_out(Tensor query, Tensor key, Tensor value, Tensor(a!) attn_output, *, " 16 | "Tensor? padding_mask=None, Tensor? atten_mask=None, int[]? actual_seq_lengths=None, " 17 | "Tensor? antiquant_scale=None, Tensor? antiquant_offset=None, Tensor? block_table=None, " 18 | "Tensor? dequant_scale1=None, Tensor? quant_scale1=None, Tensor? dequant_scale2=None, Tensor? quant_scale2=None, " 19 | "Tensor? quant_offset2=None, Tensor? kv_padding_size=None, int num_heads=1, float scale_value=1.0, " 20 | "str input_layout=\"BSH\", int num_key_value_heads=0, int block_size=0, int inner_precise=1) -> Tensor(a!)"); 21 | m.def( 22 | "npu_moe_gating_topk_softmax(Tensor x, Tensor? finished_opt, int topk, Tensor(a!) y_out," 23 | "Tensor(b!) expert_idx_out, Tensor row_idx_out) -> (Tensor(a!), Tensor(b!))"); 24 | } 25 | 26 | } // namespace 27 | 28 | namespace { 29 | 30 | TORCH_LIBRARY_IMPL(npu_ext, PrivateUse1, m) { 31 | m.impl("npu_prompt_flash_attention_out", TORCH_FN(dlinfer::ascend::npu_prompt_flash_attention_out)); 32 | m.impl("npu_incre_flash_attention_v4_out", TORCH_FN(dlinfer::ascend::npu_incre_flash_attention_v4_out)); 33 | m.impl("npu_moe_gating_topk_softmax", TORCH_FN(dlinfer::ascend::npu_moe_gating_topk_softmax)); 34 | } 35 | 36 | } // namespace 37 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/csrc/moe_gating_topk_softmax.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024, DeepLink. All rights reserved. 2 | #include 3 | 4 | #include 5 | 6 | #include "ascend_ops.hpp" 7 | #include "op_api_common.hpp" 8 | 9 | namespace dlinfer { 10 | 11 | namespace ascend { 12 | 13 | ::std::tuple npu_moe_gating_topk_softmax(const at::Tensor& x, const c10::optional& finished_opt, int64_t topk, 14 | at::Tensor& y_out, at::Tensor& expert_idx_out, at::Tensor& row_idx_out) { 15 | EXEC_NPU_NO_FORMAT_CHECK_CMD(aclnnMoeGatingTopKSoftmax, x, finished_opt, topk, y_out, expert_idx_out, row_idx_out); 16 | return std::tie(y_out, expert_idx_out); 17 | } 18 | 19 | } // namespace ascend 20 | 21 | } // namespace dlinfer 22 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/csrc/op_api_common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2024, DeepLink. All rights reserved. 2 | #include "op_api_common.hpp" 3 | 4 | void* GetOpApiFuncAddrFromFeatureLib(const char* api_name) { 5 | GET_OP_API_FUNC_FROM_FEATURE_LIB(ops_infer_handler, "libaclnn_ops_infer.so"); 6 | GET_OP_API_FUNC_FROM_FEATURE_LIB(ops_train_handler, "libaclnn_ops_train.so"); 7 | GET_OP_API_FUNC_FROM_FEATURE_LIB(adv_infer_handler, "libaclnn_adv_infer.so"); 8 | GET_OP_API_FUNC_FROM_FEATURE_LIB(adv_train_handler, "libaclnn_adv_train.so"); 9 | GET_OP_API_FUNC_FROM_FEATURE_LIB(dvpp_handler, "libacl_dvpp_op.so"); 10 | GET_OP_API_FUNC_FROM_FEATURE_LIB(sparse_handler, "libaclsparse.so"); 11 | GET_OP_API_FUNC_FROM_FEATURE_LIB(optim_handler, "libacloptim.so"); 12 | GET_OP_API_FUNC_FROM_FEATURE_LIB(fft_handler, "libaclfft.so"); 13 | GET_OP_API_FUNC_FROM_FEATURE_LIB(rand_handler, "libaclrand.so"); 14 | return nullptr; 15 | } 16 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/csrc/torch_npu_symbol_fix.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "acl/acl.h" 7 | #include "acl/acl_rt.h" 8 | 9 | namespace c10_npu { 10 | namespace acl { 11 | 12 | // These functions are reimplemented to handle the missing symbol issue in 13 | // torch-npu >= 2.3.1. If these functions are called, it indicates an environment 14 | // setup issue and the program should terminate 15 | 16 | aclError AclrtPeekAtLastError(aclrtLastErrLevel flag) { 17 | throw std::runtime_error( 18 | "Dlinfer AclrtPeekAtLastError should not be called. " 19 | "Please check your environment setup."); 20 | return ACL_ERROR; 21 | } 22 | } // namespace acl 23 | 24 | bool checkUceErrAndRepair() { 25 | throw std::runtime_error( 26 | "Dlinfer checkUceErrAndRepair should not be called. " 27 | "Please check your environment setup."); 28 | return false; 29 | } 30 | 31 | } // namespace c10_npu 32 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/pytorch_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | from packaging import version 3 | 4 | import torch 5 | import torch_npu 6 | 7 | origin_torch_compile = torch.compile 8 | from torch_npu.contrib import transfer_to_npu 9 | 10 | torch.compile = origin_torch_compile 11 | 12 | if version.parse(torch.__version__) >= version.parse("2.2.0"): 13 | from importlib import import_module 14 | 15 | target_module_str = "torch.utils._triton" 16 | target_module = import_module(target_module_str) 17 | func_str = "has_triton" 18 | 19 | def has_triton(): 20 | return False 21 | 22 | setattr(target_module, func_str, has_triton) 23 | -------------------------------------------------------------------------------- /dlinfer/vendor/ascend/utils.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | import torch 3 | 4 | 5 | class SocVersion: 6 | Ascend310P: str = "Ascend310P" 7 | Ascend910B: str = "Ascend910B" 8 | 9 | @classmethod 10 | @lru_cache(maxsize=1) 11 | def device_name(cls) -> str: 12 | return torch.npu.get_device_name() 13 | 14 | @classmethod 15 | def is_Ascend310P(cls) -> bool: 16 | return cls.device_name().startswith(cls.Ascend310P) 17 | 18 | @classmethod 19 | def is_Ascend910B(cls) -> bool: 20 | return cls.device_name().startswith(cls.Ascend910B) 21 | -------------------------------------------------------------------------------- /dlinfer/vendor/camb/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/dlinfer/vendor/camb/CMakeLists.txt -------------------------------------------------------------------------------- /dlinfer/vendor/camb/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from . import pytorch_patch, camb_ops 4 | 5 | # TODO. weitao: camb torch-mlu-ops-v1.2.0 per_token_smooth_quantize need smooth_vec 6 | SMOOTH_VEC = torch.ones(8192, dtype=torch.float32, device="mlu") 7 | 8 | 9 | def next_power_of_2(n: int): 10 | """Return the smallest power of 2 greater than or equal to n.""" 11 | n -= 1 12 | n |= n >> 1 13 | n |= n >> 2 14 | n |= n >> 4 15 | n |= n >> 8 16 | n |= n >> 16 17 | n |= n >> 32 18 | n += 1 19 | return n 20 | 21 | 22 | def update_smooth(length): 23 | global SMOOTH_VEC 24 | if length > SMOOTH_VEC.shape[0]: 25 | SMOOTH_VEC = torch.ones( 26 | next_power_of_2(length), dtype=torch.float32, device="mlu" 27 | ) 28 | return SMOOTH_VEC 29 | -------------------------------------------------------------------------------- /dlinfer/vendor/camb/pytorch_patch.py: -------------------------------------------------------------------------------- 1 | from torch_mlu.utils.gpu_migration import migration 2 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/__init__.py: -------------------------------------------------------------------------------- 1 | from .maca_ops import * 2 | 3 | device_str = "cuda" 4 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/csrc/attention/attention_dtypes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "attention_generic.cuh" 4 | #include "dtype_bfloat16.cuh" 5 | #include "dtype_float16.cuh" 6 | #include "dtype_float32.cuh" 7 | #include "dtype_fp8.cuh" 8 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/csrc/attention/attention_generic.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Adapted from 3 | * https://github.com/NVIDIA/FasterTransformer/blob/release/v5.3_tag/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h 4 | * Copyright (c) 2023, The vLLM team. 5 | * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); 8 | * you may not use this file except in compliance with the License. 9 | * You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | #pragma once 20 | 21 | #include 22 | 23 | namespace vllm { 24 | 25 | // A vector type to store Q, K, V elements. 26 | template 27 | struct Vec {}; 28 | 29 | // A vector type to store FP32 accumulators. 30 | template 31 | struct FloatVec {}; 32 | 33 | // Template vector operations. 34 | template 35 | inline __device__ Acc mul(A a, B b); 36 | 37 | template 38 | inline __device__ float sum(T v); 39 | 40 | template 41 | inline __device__ float dot(T a, T b) { 42 | return sum(mul(a, b)); 43 | } 44 | 45 | template 46 | inline __device__ float dot(T a, T b) { 47 | return sum(mul(a, b)); 48 | } 49 | 50 | template 51 | inline __device__ void zero(T& dst) { 52 | constexpr int WORDS = sizeof(T) / 4; 53 | union { 54 | T raw; 55 | uint32_t words[WORDS]; 56 | } tmp; 57 | 58 | #pragma unroll 59 | for (int ii = 0; ii < WORDS; ++ii) { 60 | tmp.words[ii] = 0u; 61 | } 62 | dst = tmp.raw; 63 | } 64 | 65 | } // namespace vllm 66 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/csrc/attention/dtype_fp8.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "attention_generic.cuh" 4 | 5 | #include 6 | #ifdef ENABLE_FP8 7 | #ifndef USE_ROCM 8 | #include 9 | #endif // USE_ROCM 10 | #endif // ENABLE_FP8 11 | 12 | namespace vllm { 13 | 14 | enum class Fp8KVCacheDataType { 15 | kAuto = 0, 16 | kFp8E4M3 = 1, 17 | kFp8E5M2 = 2, 18 | }; 19 | 20 | // fp8 vector types for quantization of kv cache 21 | template <> 22 | struct Vec { 23 | using Type = uint8_t; 24 | }; 25 | 26 | template <> 27 | struct Vec { 28 | using Type = uint16_t; 29 | }; 30 | 31 | template <> 32 | struct Vec { 33 | using Type = uint32_t; 34 | }; 35 | 36 | template <> 37 | struct Vec { 38 | using Type = uint2; 39 | }; 40 | 41 | } // namespace vllm 42 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/csrc/cache.h: -------------------------------------------------------------------------------- 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights Reserved. 2 | #pragma once 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | void swap_blocks(torch::Tensor& src, torch::Tensor& dst, const torch::Tensor& block_mapping); 10 | 11 | // Note: the key_caches and value_caches vectors are constant but 12 | // not the Tensors they contain. The vectors need to be const refs 13 | // in order to satisfy pytorch's C++ operator registration code. 14 | void copy_blocks(std::vector const& key_caches, std::vector const& value_caches, const torch::Tensor& block_mapping); 15 | 16 | void reshape_and_cache(torch::Tensor& key, torch::Tensor& value, torch::Tensor& key_cache, torch::Tensor& value_cache, torch::Tensor& slot_mapping, 17 | const std::string& kv_cache_dtype, const double k_scale, const double v_scale); 18 | 19 | void reshape_and_cache_new(torch::Tensor& key, torch::Tensor& value, torch::Tensor& key_cache, torch::Tensor& value_cache, torch::Tensor& slot_mapping, 20 | const std::string& kv_cache_dtype, const double kv_scale, const double v_scale); 21 | 22 | void reshape_and_cache_flash(torch::Tensor& key, torch::Tensor& value, torch::Tensor& key_cache, torch::Tensor& value_cache, torch::Tensor& slot_mapping, 23 | const std::string& kv_cache_dtype, const double k_scale, const double v_scale); 24 | 25 | // Just for unittest 26 | void convert_fp8(torch::Tensor& dst_cache, torch::Tensor& src_cache, const double scale, const std::string& kv_cache_dtype); 27 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/csrc/cuda_compat.h: -------------------------------------------------------------------------------- 1 | // 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights Reserved. 2 | #pragma once 3 | 4 | #ifdef USE_ROCM 5 | #include 6 | #endif 7 | 8 | #ifndef USE_ROCM 9 | #define WARP_SIZE 32 10 | #else 11 | #define WARP_SIZE warpSize 12 | #endif 13 | 14 | #ifndef USE_ROCM 15 | #define VLLM_LDG(arg) __ldg(arg) 16 | #else 17 | #define VLLM_LDG(arg) *(arg) 18 | #endif 19 | 20 | #ifndef USE_ROCM 21 | #define VLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor_sync(uint32_t(-1), var, lane_mask) 22 | #define VLLM_SHFL_XOR_SYNC_WIDTH(var, lane_mask, width) __shfl_xor_sync(uint32_t(-1), var, lane_mask, width) 23 | #else 24 | #define VLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor(var, lane_mask) 25 | #define VLLM_SHFL_XOR_SYNC_WIDTH(var, lane_mask, width) __shfl_xor(var, lane_mask, width) 26 | #endif 27 | 28 | #ifndef USE_ROCM 29 | #define VLLM_SHFL_SYNC(var, src_lane) __shfl_sync(uint32_t(-1), var, src_lane) 30 | #else 31 | #define VLLM_SHFL_SYNC(var, src_lane) __shfl(var, src_lane) 32 | #endif 33 | 34 | #ifndef USE_ROCM 35 | #define VLLM_SHFL_DOWN_SYNC(var, lane_delta) __shfl_down_sync(uint32_t(-1), var, lane_delta) 36 | #else 37 | #define VLLM_SHFL_DOWN_SYNC(var, lane_delta) __shfl_down(var, lane_delta) 38 | #endif 39 | 40 | #ifndef USE_ROCM 41 | #define VLLM_DevFuncAttribute_SET_MaxDynamicSharedMemorySize(FUNC, VAL) cudaFuncSetAttribute(FUNC, cudaFuncAttributeMaxDynamicSharedMemorySize, VAL) 42 | #else 43 | #define VLLM_DevFuncAttribute_SET_MaxDynamicSharedMemorySize(FUNC, VAL) hipFuncSetAttribute(FUNC, hipFuncAttributeMaxDynamicSharedMemorySize, VAL) 44 | #endif 45 | 46 | #define MXWARP_SIZE 64 47 | #ifndef USE_ROCM 48 | #define MXVLLM_SHFL_SYNC(var, src_lane) __shfl_sync(uint64_t(-1), var, src_lane) 49 | #else 50 | #define MXVLLM_SHFL_SYNC(var, src_lane) __shfl(var, src_lane) 51 | #endif 52 | 53 | #ifndef USE_ROCM 54 | #define MXVLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor_sync(uint64_t(-1), var, lane_mask) 55 | #else 56 | #define MXVLLM_SHFL_XOR_SYNC(var, lane_mask) __shfl_xor(var, lane_mask) 57 | #endif 58 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/csrc/dispatch_utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Adapted from 3 | * https://github.com/pytorch/pytorch/blob/v2.0.1/aten/src/ATen/Dispatch.h 4 | */ 5 | #pragma once 6 | 7 | #include 8 | 9 | #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...) \ 10 | AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \ 11 | AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__) \ 12 | AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) 13 | 14 | #define VLLM_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__)) 15 | 16 | #define VLLM_DISPATCH_CASE_FLOATING_AND_BYTE_TYPES(...) \ 17 | AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \ 18 | AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__) \ 19 | AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) \ 20 | AT_DISPATCH_CASE(at::ScalarType::Byte, __VA_ARGS__) 21 | 22 | #define VLLM_DISPATCH_FLOATING_AND_BYTE_TYPES(TYPE, NAME, ...) AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_AND_BYTE_TYPES(__VA_ARGS__)) 23 | 24 | #define VLLM_DISPATCH_CASE_INTEGRAL_TYPES(...) \ 25 | AT_DISPATCH_CASE(at::ScalarType::Byte, __VA_ARGS__) \ 26 | AT_DISPATCH_CASE(at::ScalarType::Char, __VA_ARGS__) \ 27 | AT_DISPATCH_CASE(at::ScalarType::Short, __VA_ARGS__) \ 28 | AT_DISPATCH_CASE(at::ScalarType::Int, __VA_ARGS__) \ 29 | AT_DISPATCH_CASE(at::ScalarType::Long, __VA_ARGS__) 30 | 31 | #define VLLM_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...) AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_INTEGRAL_TYPES(__VA_ARGS__)) 32 | -------------------------------------------------------------------------------- /dlinfer/vendor/maca/csrc/moe/moe_ops.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void topk_softmax(torch::Tensor& topk_weights, torch::Tensor& topk_indices, torch::Tensor& token_expert_indices, torch::Tensor& gating_output); 6 | -------------------------------------------------------------------------------- /docs/quant/ascend_kv_quant.md: -------------------------------------------------------------------------------- 1 | 2 | # KV Cache量化 3 | 4 | 目前在华为Atlas 800T A2设备,由于算子功能限制,在算子模式下,仅支持离线量化。 5 | 6 | ## KV Cache量化前提 7 | 8 | - **依赖** 9 | 10 | ```shell 11 | torch==2.1.0 12 | torchvision==0.16.0 13 | torch-npu==2.1.0.post6 14 | ``` 15 | 16 | - **工具** 17 | 18 | ```shell 19 | amct_pytorch==0.22.2(Ascend-cann-amct_8.0.RC2) 20 | ``` 21 | 22 | ## KV Cache量化示例 23 | 24 | 在当前目录执行如下命令,得到量化因子记录文件,用户根据实际情况修改示例程序中的model_path(VL模型需要用其语言模型的权重)和dataset_path,并根据模型结构修改quant_layers。 25 | 26 | ```python 27 | python3 ascend_scales_offsets.py 28 | ``` 29 | 30 | 推理成功后,在当前目录会生成量化日志文件./amct_log/amct_pytorch.log和./outputs文件夹,该文件夹内包含以下内容: 31 | 32 | - **config.json**:量化配置文件,描述了如何对模型中的每一层进行量化。 33 | - **record.txt**:量化因子记录文件。 34 | 35 | 用户在使用lmdeploy时,通过环境变量ASCEND_QUANT_RECORD_FILE指定量化因子路径,并通过参数quant_policy=8,即可使用量化因子记录文件完成推理。 36 | 示例代码如下: 37 | 38 | ```python 39 | import lmdeploy 40 | from lmdeploy import PytorchEngineConfig 41 | if __name__ == "__main__": 42 | pipe = lmdeploy.pipeline("/path_to_model", 43 | backend_config = PytorchEngineConfig(tp=1, 44 | cache_max_entry_count=0.4, device_type="ascend", 45 | eager_mode=True, quant_policy=8)) 46 | question = ["Shanghai is", "Please introduce China", "How are you?"] 47 | response = pipe(question, request_output_len=256, do_preprocess=False) 48 | for idx, r in enumerate(response): 49 | print(f"Q: {question[idx]}") 50 | print(f"A: {r.text}") 51 | print() 52 | ``` 53 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "pyyaml", 4 | "scikit-build", 5 | "setuptools==69.5.1", 6 | ] 7 | build-backend = "setuptools.build_meta" 8 | -------------------------------------------------------------------------------- /requirements/ascend/build.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | ninja 3 | setuptools==69.5.1 4 | wheel 5 | scikit-build 6 | cmake>=3.18 7 | -r torch.txt 8 | -------------------------------------------------------------------------------- /requirements/ascend/cann.txt: -------------------------------------------------------------------------------- 1 | decorator 2 | attrs 3 | psutil 4 | absl-py 5 | cloudpickle 6 | ml-dtypes 7 | scipy 8 | tornado 9 | -------------------------------------------------------------------------------- /requirements/ascend/full.txt: -------------------------------------------------------------------------------- 1 | -r build.txt 2 | -r runtime.txt 3 | -------------------------------------------------------------------------------- /requirements/ascend/runtime.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | -r torch.txt 3 | -r cann.txt 4 | -------------------------------------------------------------------------------- /requirements/ascend/torch.txt: -------------------------------------------------------------------------------- 1 | torch==2.3.1 2 | torchvision==0.18.1 3 | torch-npu==2.3.1 4 | numpy<2.0.0 5 | pyyaml 6 | -------------------------------------------------------------------------------- /requirements/camb/build.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | setuptools==69.5.1 3 | wheel 4 | scikit-build 5 | cmake>=3.18 6 | -r torch.txt -------------------------------------------------------------------------------- /requirements/camb/full.txt: -------------------------------------------------------------------------------- 1 | -r build.txt 2 | -r runtime.txt -------------------------------------------------------------------------------- /requirements/camb/runtime.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | torch-mlu 3 | torch-mlu-ops 4 | -r torch.txt -------------------------------------------------------------------------------- /requirements/camb/torch.txt: -------------------------------------------------------------------------------- 1 | numpy<2.0.0 2 | pyyaml 3 | torch==2.4.0 -------------------------------------------------------------------------------- /requirements/maca/build.txt: -------------------------------------------------------------------------------- 1 | ninja 2 | setuptools 3 | wheel 4 | scikit-build 5 | cmake>=3.18 6 | -r torch.txt 7 | -------------------------------------------------------------------------------- /requirements/maca/full.txt: -------------------------------------------------------------------------------- 1 | -r build.txt 2 | -r runtime.txt 3 | -------------------------------------------------------------------------------- /requirements/maca/runtime.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | -r torch.txt 3 | -------------------------------------------------------------------------------- /requirements/maca/torch.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | -------------------------------------------------------------------------------- /run_format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | cd "$(dirname "$0")" || exit 1 6 | 7 | extract_first_version() { 8 | version_pattern="\b[0-9]+(\.[0-9]+)+\b" 9 | if [[ $1 =~ $version_pattern ]]; then 10 | echo "${BASH_REMATCH[0]}" 11 | else 12 | echo "Unknown" 13 | fi 14 | } 15 | 16 | get_cmd_version() { 17 | cmd=$1 18 | extract_first_version "$($cmd --version 2>&1 | head -n 1)" 19 | } 20 | 21 | check_cmd_version() { 22 | cmd=$1 23 | required_version=$2 24 | required_version_regex=^${required_version//x/[0-9]+} 25 | command -v "$cmd" >/dev/null || (echo "$cmd not found" && exit 1) 26 | current_version=$(get_cmd_version "$cmd") 27 | if [[ $current_version =~ $required_version_regex ]]; then 28 | echo "$cmd $required_version found, version: $current_version" 29 | else 30 | echo "WARNING! GitHub Actions CI uses $cmd $required_version, current version: $current_version" 31 | fi 32 | } 33 | 34 | # format all C/C++ files in current git repository with clang-format 35 | check_cmd_version clang-format 17.x 36 | git ls-files |\ 37 | grep -E '^.+\.(c|h|cpp|cc|cxx|hpp|hh|hxx)$' | 38 | grep -Ev "dlinfer/graph/dicp/vendor/AtbGraph/codegen/runtime/third_party/" | 39 | xargs clang-format -i --style=file 40 | 41 | check_cmd_version black 24.x 42 | # format all Python files in current git repository with black 43 | # now only for dipu 44 | git ls-files |\ 45 | grep -E '^.+\.py$' |\ 46 | xargs black 47 | -------------------------------------------------------------------------------- /scripts/build_wheel.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | REPO_ROOT=$(cd $(dirname $(dirname $0)); pwd) 5 | pip install -U build 6 | rm -rf ${REPO_ROOT}/_skbuild ${REPO_ROOT}/dlinfer*.egg* 7 | export DEVICE=${DEVICE:-ascend} 8 | python -m build \ 9 | -C="--build-option=--plat-name" \ 10 | -C="--build-option=manylinux2014_$(uname -m)" \ 11 | -v -w . 12 | -------------------------------------------------------------------------------- /scripts/build_wheel_allpy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | eval "$(conda shell.bash hook)" 5 | 6 | REPO_ROOT=$(cd $(dirname $(dirname $0)); pwd) 7 | cd ${REPO_ROOT} 8 | 9 | PY_VERSION_LIST=("3.8" "3.9" "3.10") 10 | for PY_VERSION in ${PY_VERSION_LIST[@]}; do 11 | echo start building wheels for python${PY_VERSION} 12 | PY_VERSION_NAME=${PY_VERSION/./} 13 | ENV_NAME=dlinfer_build_py${PY_VERSION_NAME} 14 | conda env remove -n ${ENV_NAME} -y 15 | conda create -n ${ENV_NAME} python=${PY_VERSION} -y 16 | conda activate ${ENV_NAME} 17 | pip install -U build 18 | bash ${REPO_ROOT}/scripts/build_wheel.sh 19 | conda deactivate 20 | conda env remove -n ${ENV_NAME} -y 21 | echo end building wheels for python${PY_VERSION} 22 | done 23 | -------------------------------------------------------------------------------- /tests/readme.md: -------------------------------------------------------------------------------- 1 | # ReadMe for test model for your self 2 | 3 | ## How to add model for CI 4 | 5 | 1. 将模型权重等下载到ci机器的/data2/share_data目录(如/data2/share_data/llama_model_data/llama-2-7b-chat-hf). 6 | 2. 在config.yml中的pytorch_chat_model下添加上述模型文件夹. 7 | 3. 如果该模型的 `tp` > 1,需要在 `config.yml` 中的 `tp_config` 下面添加 8 | "模型名:tp_num"(如 Mixtral-8x7B-Instruct-v0.1: 2)。 9 | 10 | ## How to run test locally 11 | 12 | 1. 修改config.yml中对应的模型路径和log_path 13 | 14 | 2. `export DLINFER_TEST_DIR=/path/to/dlinfer/tests` 15 | 16 | 3. 运行 17 | 18 | ```bash 19 | #!/bin/bash 20 | cd /path/to/tests 21 | #run tp=1 model on lmdeploy 22 | pytest ./ -m 'lmdeploy' -s -x --alluredir=allure-results --clean-alluredir 23 | #run tp=2 chat_model on lmdeploy 24 | python ./test_lmdeploy/e2e/test_model_tp2.py --model_type=chat --device_type=ascend 25 | #run tp=2 vl_model on lmdeploy 26 | python ./test_lmdeploy/e2e/test_model_tp2.py --model_type=vl --device_type=ascend 27 | ``` 28 | -------------------------------------------------------------------------------- /tests/test_lmdeploy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/tests/test_lmdeploy/__init__.py -------------------------------------------------------------------------------- /tests/test_lmdeploy/e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/tests/test_lmdeploy/e2e/__init__.py -------------------------------------------------------------------------------- /tests/test_lmdeploy/e2e/config.yaml: -------------------------------------------------------------------------------- 1 | tp_config: 2 | Mixtral-8x7B-Instruct-v0.1: 2 3 | InternVL2-26B: 2 4 | cogvlm2-llama3-chat-19B: 2 5 | 6 | graph_config: 7 | internlm3-8b-instruct: True 8 | Meta-Llama-3-8B-Instruct: True 9 | Mixtral-8x7B-Instruct-v0.1: True 10 | Qwen2.5-7B-Instruct: True 11 | Qwen2-VL-7B-Instruct: True 12 | InternVL2-2B: True 13 | InternVL2-26B: True 14 | 15 | pytorch_chat_model: 16 | - internlm_model/internlm3-8b-instruct 17 | - llama_model/Meta-Llama-3.1-8B-Instruct 18 | - mixtral_model/Mixtral-8x7B-Instruct-v0.1 19 | - qwen_model/Qwen2.5-7B-Instruct 20 | 21 | pytorch_vl_model: 22 | - internvl_model/InternVL2-26B 23 | - internvl_model/InternVL2-2B 24 | # - cogvlm_model_data/cogvlm-chat 25 | # - cogvlm_model_data/cogvlm2-llama3-chat-19B 26 | - qwen_model/Qwen2-VL-7B-Instruct 27 | -------------------------------------------------------------------------------- /tests/test_lmdeploy/e2e/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, DeepLink. All rights reserved. 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | 4 | import pytest 5 | 6 | from test_lmdeploy.utils.config_utils import get_config, get_case_config 7 | 8 | 9 | @pytest.fixture(scope="session") 10 | def config(): 11 | return get_config() 12 | 13 | 14 | @pytest.fixture(scope="class", autouse=True) 15 | def common_case_config(): 16 | return get_case_config() 17 | -------------------------------------------------------------------------------- /tests/test_lmdeploy/e2e/prompt_case.yaml: -------------------------------------------------------------------------------- 1 | identity: 2 | - 你好,你叫什么名字#hi, what's your name: 3 | chinese_poem_case: 4 | - 给我一首中文诗,需要添加标点符号,请用中文回答Give me a Chinese poem in Chinese: 5 | - contain: 6 | - "," 7 | - "。" 8 | - poem 9 | - poetry 10 | - \n 11 | - len_g: 12 | 5 13 | code_testcase: 14 | - 使用python编写一个int数组的冒泡排序代码: 15 | - contain: 16 | - def 17 | - bubble 18 | - 冒泡 19 | - code 20 | - python 21 | - llama2: 22 | - contain: 23 | - def 24 | - bubble 25 | - 冒泡 26 | - code 27 | - python 28 | - assist 29 | - however -------------------------------------------------------------------------------- /tests/test_lmdeploy/e2e/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | python_files = test*_*.py # test file 3 | python_classes = Test* # test class 4 | python_functions = test_* # test function 5 | pytest_runtest_call.tryfirst = True 6 | filterwarnings = ignore::UserWarning 7 | reruns = 2 8 | reruns_delay = 10 9 | -------------------------------------------------------------------------------- /tests/test_lmdeploy/scripts/test_model_tp2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 获取DLINFER_TEST_DIR环境变量 4 | if [ -z "$DLINFER_TEST_DIR" ]; then 5 | echo "DLINFER_TEST_DIR environment variable is not set" 6 | exit 1 7 | fi 8 | echo "DLINFER_TEST_DIR: $DLINFER_TEST_DIR" 9 | 10 | # 获取chat模型列表 11 | echo "Getting chat model list..." 12 | chat_model_tp2_list=$(python -c "from test_lmdeploy.utils.config_utils import get_torch_model_list; print(' '.join(get_torch_model_list(tp_num=2)))") 13 | echo "chat_model_tp2_list: $chat_model_tp2_list" 14 | 15 | # 遍历chat模型列表 16 | for model_case in $chat_model_tp2_list; do 17 | python $DLINFER_TEST_DIR/test_lmdeploy/e2e/test_model_tp2.py --model_case="$model_case" --model_type=chat --device_type=ascend 18 | if [ $? -ne 0 ]; then 19 | echo "The test for chat model $model_case failed. Exiting." 20 | exit 1 21 | fi 22 | done 23 | 24 | # 获取vl模型列表 25 | echo "Getting vl model list..." 26 | vl_model_tp2_list=$(python -c "from test_lmdeploy.utils.config_utils import get_torch_model_list; print(' '.join(get_torch_model_list(tp_num=2, model_type='vl_model')))") 27 | echo "vl_model_tp2_list: $vl_model_tp2_list" 28 | 29 | for model_case in $vl_model_tp2_list; do 30 | python $DLINFER_TEST_DIR/test_lmdeploy/e2e/test_model_tp2.py --model_case="$model_case" --model_type=vl --device_type=ascend 31 | if [ $? -ne 0 ]; then 32 | echo "The test for vl model $model_case failed. Exiting." 33 | exit 1 34 | fi 35 | done -------------------------------------------------------------------------------- /tests/test_lmdeploy/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepLink-org/dlinfer/64eed2662b73b264b61a9539fa0fca77ba0003bb/tests/test_lmdeploy/utils/__init__.py --------------------------------------------------------------------------------