├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cli_demo.py
├── demo.py
├── include
    └── kernel.hpp
├── inference_test.cpp
├── kernel
    ├── __init__.py
    ├── bind.cpp
    ├── ckernel.py
    ├── kernel.cpp
    ├── logits_processor.py
    ├── pykernel.py
    ├── pykernel_no_past.py
    ├── pykernel_no_past_new.py
    ├── pykernel_no_past_old.py
    ├── pykernel_with_past.py
    └── setup.py
├── onnx_export
    ├── change_onnx2.py
    ├── export2onnx.py
    ├── export2onnx_v2.py
    ├── export2onnx_v2_no_cache.py
    ├── export_compare_data.py
    ├── export_test.py
    ├── export_test_v2.py
    ├── export_test_v3.py
    ├── modeling_chatglm.py
    ├── run_onnx_cpu.py
    ├── run_onnx_cuda.py
    ├── run_onnx_trt.py
    └── utils.py
├── requirements.txt
└── tensorrt_export
    ├── compare.py
    ├── onnx2trt_no_cache.py
    ├── onnx2trt_with_cache.py
    ├── onnx_trt_compare.sh
    ├── onnx_trt_compare_fp16.py
    ├── onnx_trt_compare_fp32.py
    ├── onnx_trt_compare_runing.py
    ├── read_trt_profile.py
    ├── test.py
    ├── test_data_inputs.py
    ├── test_data_outputs.py
    ├── trt_check_no_past.py
    └── trt_check_with_past.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/.gitignore


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/CMakeLists.txt


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/README.md


--------------------------------------------------------------------------------
/cli_demo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/cli_demo.py


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/demo.py


--------------------------------------------------------------------------------
/include/kernel.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/include/kernel.hpp


--------------------------------------------------------------------------------
/inference_test.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/inference_test.cpp


--------------------------------------------------------------------------------
/kernel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/__init__.py


--------------------------------------------------------------------------------
/kernel/bind.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/bind.cpp


--------------------------------------------------------------------------------
/kernel/ckernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/ckernel.py


--------------------------------------------------------------------------------
/kernel/kernel.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/kernel.cpp


--------------------------------------------------------------------------------
/kernel/logits_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/logits_processor.py


--------------------------------------------------------------------------------
/kernel/pykernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/pykernel.py


--------------------------------------------------------------------------------
/kernel/pykernel_no_past.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/pykernel_no_past.py


--------------------------------------------------------------------------------
/kernel/pykernel_no_past_new.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/pykernel_no_past_new.py


--------------------------------------------------------------------------------
/kernel/pykernel_no_past_old.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/pykernel_no_past_old.py


--------------------------------------------------------------------------------
/kernel/pykernel_with_past.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/pykernel_with_past.py


--------------------------------------------------------------------------------
/kernel/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/kernel/setup.py


--------------------------------------------------------------------------------
/onnx_export/change_onnx2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/change_onnx2.py


--------------------------------------------------------------------------------
/onnx_export/export2onnx.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/export2onnx.py


--------------------------------------------------------------------------------
/onnx_export/export2onnx_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/export2onnx_v2.py


--------------------------------------------------------------------------------
/onnx_export/export2onnx_v2_no_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/export2onnx_v2_no_cache.py


--------------------------------------------------------------------------------
/onnx_export/export_compare_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/export_compare_data.py


--------------------------------------------------------------------------------
/onnx_export/export_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/export_test.py


--------------------------------------------------------------------------------
/onnx_export/export_test_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/export_test_v2.py


--------------------------------------------------------------------------------
/onnx_export/export_test_v3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/export_test_v3.py


--------------------------------------------------------------------------------
/onnx_export/modeling_chatglm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/modeling_chatglm.py


--------------------------------------------------------------------------------
/onnx_export/run_onnx_cpu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/run_onnx_cpu.py


--------------------------------------------------------------------------------
/onnx_export/run_onnx_cuda.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/run_onnx_cuda.py


--------------------------------------------------------------------------------
/onnx_export/run_onnx_trt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/run_onnx_trt.py


--------------------------------------------------------------------------------
/onnx_export/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/onnx_export/utils.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/requirements.txt


--------------------------------------------------------------------------------
/tensorrt_export/compare.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/compare.py


--------------------------------------------------------------------------------
/tensorrt_export/onnx2trt_no_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/onnx2trt_no_cache.py


--------------------------------------------------------------------------------
/tensorrt_export/onnx2trt_with_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/onnx2trt_with_cache.py


--------------------------------------------------------------------------------
/tensorrt_export/onnx_trt_compare.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/onnx_trt_compare.sh


--------------------------------------------------------------------------------
/tensorrt_export/onnx_trt_compare_fp16.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/onnx_trt_compare_fp16.py


--------------------------------------------------------------------------------
/tensorrt_export/onnx_trt_compare_fp32.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/onnx_trt_compare_fp32.py


--------------------------------------------------------------------------------
/tensorrt_export/onnx_trt_compare_runing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/onnx_trt_compare_runing.py


--------------------------------------------------------------------------------
/tensorrt_export/read_trt_profile.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/read_trt_profile.py


--------------------------------------------------------------------------------
/tensorrt_export/test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/test.py


--------------------------------------------------------------------------------
/tensorrt_export/test_data_inputs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/test_data_inputs.py


--------------------------------------------------------------------------------
/tensorrt_export/test_data_outputs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/test_data_outputs.py


--------------------------------------------------------------------------------
/tensorrt_export/trt_check_no_past.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/trt_check_no_past.py


--------------------------------------------------------------------------------
/tensorrt_export/trt_check_with_past.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tlntin/ChatGLM2-6B-TensorRT/HEAD/tensorrt_export/trt_check_with_past.py


--------------------------------------------------------------------------------