├── LICENSE
├── README.md
├── Triton大模型部署.pdf
├── app.py
├── end_to_end_grpc_client.py
├── img
    ├── content.jpg
    └── face.jpg
├── langchain_chatglm3.py
├── langchain_chatglm3_triton.py
├── requirements.txt
├── service
    ├── __init__.py
    ├── chatglm_service.py
    ├── chatglm_triton_service.py
    ├── chatglm_trtllm_service.py
    ├── config.py
    ├── knowledge_service.py
    └── utils.py
├── tensorrt_llm
    ├── __init__.py
    ├── build.py
    ├── process.py
    ├── quantize.py
    ├── requirements.txt
    ├── run_chat_trt.py
    ├── run_hf.py
    ├── see_chatglm3_model.py
    ├── smoothquant.py
    ├── utils.py
    ├── visualize.py
    └── weight.py
├── triton_inference_server
    └── model_repo
    │   ├── ensemble
    │       └── config.pbtxt
    │   ├── postprocessing
    │       ├── 1
    │       │   ├── __pycache__
    │       │   │   └── model.cpython-310.pyc
    │       │   └── model.py
    │       └── config.pbtxt
    │   ├── preprocessing
    │       ├── 1
    │       │   ├── __pycache__
    │       │   │   └── model.cpython-310.pyc
    │       │   └── model.py
    │       └── config.pbtxt
    │   ├── tensorrt_llm
    │       └── config.pbtxt
    │   └── tensorrt_llm_bls
    │       ├── 1
    │           ├── __pycache__
    │           │   └── model.cpython-310.pyc
    │           └── model.py
    │       └── config.pbtxt
└── vLLM
    ├── chatglm3_quant_awq.py
    ├── client.py
    ├── langchang_chatglm3_vllm.py
    ├── model_repo
        └── vllm_model
        │   ├── 1
        │       └── model.json
        │   └── config.pbtxt
    ├── offline_chatglm3.py
    ├── prompts.txt
    └── results.txt


/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/README.md


--------------------------------------------------------------------------------
/Triton大模型部署.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/Triton大模型部署.pdf


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/app.py


--------------------------------------------------------------------------------
/end_to_end_grpc_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/end_to_end_grpc_client.py


--------------------------------------------------------------------------------
/img/content.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/img/content.jpg


--------------------------------------------------------------------------------
/img/face.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/img/face.jpg


--------------------------------------------------------------------------------
/langchain_chatglm3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/langchain_chatglm3.py


--------------------------------------------------------------------------------
/langchain_chatglm3_triton.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/langchain_chatglm3_triton.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/requirements.txt


--------------------------------------------------------------------------------
/service/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/__init__.py


--------------------------------------------------------------------------------
/service/chatglm_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/chatglm_service.py


--------------------------------------------------------------------------------
/service/chatglm_triton_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/chatglm_triton_service.py


--------------------------------------------------------------------------------
/service/chatglm_trtllm_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/chatglm_trtllm_service.py


--------------------------------------------------------------------------------
/service/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/config.py


--------------------------------------------------------------------------------
/service/knowledge_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/knowledge_service.py


--------------------------------------------------------------------------------
/service/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/utils.py


--------------------------------------------------------------------------------
/tensorrt_llm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tensorrt_llm/build.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/build.py


--------------------------------------------------------------------------------
/tensorrt_llm/process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/process.py


--------------------------------------------------------------------------------
/tensorrt_llm/quantize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/quantize.py


--------------------------------------------------------------------------------
/tensorrt_llm/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/requirements.txt


--------------------------------------------------------------------------------
/tensorrt_llm/run_chat_trt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/run_chat_trt.py


--------------------------------------------------------------------------------
/tensorrt_llm/run_hf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/run_hf.py


--------------------------------------------------------------------------------
/tensorrt_llm/see_chatglm3_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/see_chatglm3_model.py


--------------------------------------------------------------------------------
/tensorrt_llm/smoothquant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/smoothquant.py


--------------------------------------------------------------------------------
/tensorrt_llm/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/utils.py


--------------------------------------------------------------------------------
/tensorrt_llm/visualize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/visualize.py


--------------------------------------------------------------------------------
/tensorrt_llm/weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/weight.py


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/ensemble/config.pbtxt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/ensemble/config.pbtxt


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/postprocessing/1/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/postprocessing/1/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/postprocessing/1/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/postprocessing/1/model.py


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/postprocessing/config.pbtxt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/postprocessing/config.pbtxt


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/preprocessing/1/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/preprocessing/1/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/preprocessing/1/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/preprocessing/1/model.py


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/preprocessing/config.pbtxt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/preprocessing/config.pbtxt


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/tensorrt_llm/config.pbtxt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm/config.pbtxt


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/tensorrt_llm_bls/1/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm_bls/1/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/tensorrt_llm_bls/1/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm_bls/1/model.py


--------------------------------------------------------------------------------
/triton_inference_server/model_repo/tensorrt_llm_bls/config.pbtxt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm_bls/config.pbtxt


--------------------------------------------------------------------------------
/vLLM/chatglm3_quant_awq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/chatglm3_quant_awq.py


--------------------------------------------------------------------------------
/vLLM/client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/client.py


--------------------------------------------------------------------------------
/vLLM/langchang_chatglm3_vllm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/langchang_chatglm3_vllm.py


--------------------------------------------------------------------------------
/vLLM/model_repo/vllm_model/1/model.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/model_repo/vllm_model/1/model.json


--------------------------------------------------------------------------------
/vLLM/model_repo/vllm_model/config.pbtxt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/model_repo/vllm_model/config.pbtxt


--------------------------------------------------------------------------------
/vLLM/offline_chatglm3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/offline_chatglm3.py


--------------------------------------------------------------------------------
/vLLM/prompts.txt:
--------------------------------------------------------------------------------
1 | 你好
2 | 你能干什么


--------------------------------------------------------------------------------
/vLLM/results.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/results.txt


--------------------------------------------------------------------------------