├── LICENSE ├── README.md ├── Triton大模型部署.pdf ├── app.py ├── end_to_end_grpc_client.py ├── img ├── content.jpg └── face.jpg ├── langchain_chatglm3.py ├── langchain_chatglm3_triton.py ├── requirements.txt ├── service ├── __init__.py ├── chatglm_service.py ├── chatglm_triton_service.py ├── chatglm_trtllm_service.py ├── config.py ├── knowledge_service.py └── utils.py ├── tensorrt_llm ├── __init__.py ├── build.py ├── process.py ├── quantize.py ├── requirements.txt ├── run_chat_trt.py ├── run_hf.py ├── see_chatglm3_model.py ├── smoothquant.py ├── utils.py ├── visualize.py └── weight.py ├── triton_inference_server └── model_repo │ ├── ensemble │ └── config.pbtxt │ ├── postprocessing │ ├── 1 │ │ ├── __pycache__ │ │ │ └── model.cpython-310.pyc │ │ └── model.py │ └── config.pbtxt │ ├── preprocessing │ ├── 1 │ │ ├── __pycache__ │ │ │ └── model.cpython-310.pyc │ │ └── model.py │ └── config.pbtxt │ ├── tensorrt_llm │ └── config.pbtxt │ └── tensorrt_llm_bls │ ├── 1 │ ├── __pycache__ │ │ └── model.cpython-310.pyc │ └── model.py │ └── config.pbtxt └── vLLM ├── chatglm3_quant_awq.py ├── client.py ├── langchang_chatglm3_vllm.py ├── model_repo └── vllm_model │ ├── 1 │ └── model.json │ └── config.pbtxt ├── offline_chatglm3.py ├── prompts.txt └── results.txt /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/README.md -------------------------------------------------------------------------------- /Triton大模型部署.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/Triton大模型部署.pdf -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/app.py -------------------------------------------------------------------------------- /end_to_end_grpc_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/end_to_end_grpc_client.py -------------------------------------------------------------------------------- /img/content.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/img/content.jpg -------------------------------------------------------------------------------- /img/face.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/img/face.jpg -------------------------------------------------------------------------------- /langchain_chatglm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/langchain_chatglm3.py -------------------------------------------------------------------------------- /langchain_chatglm3_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/langchain_chatglm3_triton.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/requirements.txt -------------------------------------------------------------------------------- /service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/__init__.py -------------------------------------------------------------------------------- /service/chatglm_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/chatglm_service.py -------------------------------------------------------------------------------- /service/chatglm_triton_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/chatglm_triton_service.py -------------------------------------------------------------------------------- /service/chatglm_trtllm_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/chatglm_trtllm_service.py -------------------------------------------------------------------------------- /service/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/config.py -------------------------------------------------------------------------------- /service/knowledge_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/knowledge_service.py -------------------------------------------------------------------------------- /service/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/service/utils.py -------------------------------------------------------------------------------- /tensorrt_llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tensorrt_llm/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/build.py -------------------------------------------------------------------------------- /tensorrt_llm/process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/process.py -------------------------------------------------------------------------------- /tensorrt_llm/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/quantize.py -------------------------------------------------------------------------------- /tensorrt_llm/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/requirements.txt -------------------------------------------------------------------------------- /tensorrt_llm/run_chat_trt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/run_chat_trt.py -------------------------------------------------------------------------------- /tensorrt_llm/run_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/run_hf.py -------------------------------------------------------------------------------- /tensorrt_llm/see_chatglm3_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/see_chatglm3_model.py -------------------------------------------------------------------------------- /tensorrt_llm/smoothquant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/smoothquant.py -------------------------------------------------------------------------------- /tensorrt_llm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/utils.py -------------------------------------------------------------------------------- /tensorrt_llm/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/visualize.py -------------------------------------------------------------------------------- /tensorrt_llm/weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/tensorrt_llm/weight.py -------------------------------------------------------------------------------- /triton_inference_server/model_repo/ensemble/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/ensemble/config.pbtxt -------------------------------------------------------------------------------- /triton_inference_server/model_repo/postprocessing/1/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/postprocessing/1/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /triton_inference_server/model_repo/postprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/postprocessing/1/model.py -------------------------------------------------------------------------------- /triton_inference_server/model_repo/postprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/postprocessing/config.pbtxt -------------------------------------------------------------------------------- /triton_inference_server/model_repo/preprocessing/1/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/preprocessing/1/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /triton_inference_server/model_repo/preprocessing/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/preprocessing/1/model.py -------------------------------------------------------------------------------- /triton_inference_server/model_repo/preprocessing/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/preprocessing/config.pbtxt -------------------------------------------------------------------------------- /triton_inference_server/model_repo/tensorrt_llm/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm/config.pbtxt -------------------------------------------------------------------------------- /triton_inference_server/model_repo/tensorrt_llm_bls/1/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm_bls/1/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /triton_inference_server/model_repo/tensorrt_llm_bls/1/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm_bls/1/model.py -------------------------------------------------------------------------------- /triton_inference_server/model_repo/tensorrt_llm_bls/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/triton_inference_server/model_repo/tensorrt_llm_bls/config.pbtxt -------------------------------------------------------------------------------- /vLLM/chatglm3_quant_awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/chatglm3_quant_awq.py -------------------------------------------------------------------------------- /vLLM/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/client.py -------------------------------------------------------------------------------- /vLLM/langchang_chatglm3_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/langchang_chatglm3_vllm.py -------------------------------------------------------------------------------- /vLLM/model_repo/vllm_model/1/model.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/model_repo/vllm_model/1/model.json -------------------------------------------------------------------------------- /vLLM/model_repo/vllm_model/config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/model_repo/vllm_model/config.pbtxt -------------------------------------------------------------------------------- /vLLM/offline_chatglm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/offline_chatglm3.py -------------------------------------------------------------------------------- /vLLM/prompts.txt: -------------------------------------------------------------------------------- 1 | 你好 2 | 你能干什么 -------------------------------------------------------------------------------- /vLLM/results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/TensorRT-LLM-ChatGLM3/HEAD/vLLM/results.txt --------------------------------------------------------------------------------