├── .clang-format ├── .github └── ISSUE_TEMPLATE │ ├── config.yml │ └── issue.md ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── deps.cmake ├── grpc_serving.cmake ├── install.cmake ├── llm.cmake ├── pplllmserving-config.cmake.in ├── sentencepiece.cmake └── xxhash.cmake ├── docs └── llama_guide.md ├── samples └── integration-cuda │ ├── CMakeLists.txt │ └── main.cc ├── src ├── backends │ └── cuda │ │ ├── post_processor.cc │ │ ├── post_processor.h │ │ ├── resource_manager.cc │ │ └── resource_manager.h ├── common │ ├── config.cc │ ├── config.h │ ├── connection.h │ ├── post_processor.h │ ├── profiler.cc │ ├── profiler.h │ ├── request.h │ ├── resource.h │ └── response.h ├── engine │ ├── llm_engine.cc │ └── llm_engine.h ├── generated │ └── onnx │ │ ├── v23.4 │ │ ├── llm.grpc.pb.cc │ │ ├── llm.grpc.pb.h │ │ ├── llm.pb.cc │ │ ├── llm.pb.h │ │ ├── sentencepiece.pb.cc │ │ ├── sentencepiece.pb.h │ │ ├── sentencepiece_model.pb.cc │ │ └── sentencepiece_model.pb.h │ │ └── v3.1.0 │ │ ├── sentencepiece.pb.cc │ │ ├── sentencepiece.pb.h │ │ ├── sentencepiece.proto │ │ ├── sentencepiece_model.pb.cc │ │ ├── sentencepiece_model.pb.h │ │ └── sentencepiece_model.proto ├── generator │ ├── llm_generator.cc │ └── llm_generator.h ├── onnx │ └── onnx.proto ├── serving │ └── grpc │ │ ├── grpc_server.cc │ │ ├── grpc_server.h │ │ └── proto │ │ └── llm.proto ├── tokenizer │ ├── models │ │ ├── baichuan │ │ │ └── baichuan_tokenizer.h │ │ ├── internlm │ │ │ └── internlm_tokenizer.h │ │ ├── llama │ │ │ └── llama_tokenizer.h │ │ └── llama3 │ │ │ ├── llama3_tokenizer.h │ │ │ └── tokenizer_config.json │ ├── tokenizer.h │ ├── tokenizer_factory.h │ ├── tokenizer_impl.h │ ├── tokenizer_impl_hf.h │ └── tokenizer_impl_sp.h └── utils │ ├── index_manager.h │ ├── mpsc_request_scheduler.h │ ├── prefix_cache_manager.h │ ├── utils.cc │ └── utils.h ├── test ├── CMakeLists.txt └── test_prefix_cache_mgr.cc └── tools ├── CMakeLists.txt ├── backtrace.h ├── benchmark_prefix_cache_offline.cc ├── client_pressure.cc ├── client_qps_measure.cc ├── client_qps_measure_token_in_out.cc ├── client_sample.cc ├── client_sample_token_in_out.cc ├── llm_server.cc ├── offline_inference.cc ├── samples_1024.json ├── samples_2048.json ├── samples_4096.json ├── samples_8192.json ├── simple_flags.cc └── simple_flags.h /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/.clang-format -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/.github/ISSUE_TEMPLATE/issue.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .vscode/ 3 | deps/ 4 | ppl-build/ -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/README.md -------------------------------------------------------------------------------- /cmake/deps.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/cmake/deps.cmake -------------------------------------------------------------------------------- /cmake/grpc_serving.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/cmake/grpc_serving.cmake -------------------------------------------------------------------------------- /cmake/install.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/cmake/install.cmake -------------------------------------------------------------------------------- /cmake/llm.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/cmake/llm.cmake -------------------------------------------------------------------------------- /cmake/pplllmserving-config.cmake.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/cmake/pplllmserving-config.cmake.in -------------------------------------------------------------------------------- /cmake/sentencepiece.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/cmake/sentencepiece.cmake -------------------------------------------------------------------------------- /cmake/xxhash.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/cmake/xxhash.cmake -------------------------------------------------------------------------------- /docs/llama_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/docs/llama_guide.md -------------------------------------------------------------------------------- /samples/integration-cuda/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/samples/integration-cuda/CMakeLists.txt -------------------------------------------------------------------------------- /samples/integration-cuda/main.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/samples/integration-cuda/main.cc -------------------------------------------------------------------------------- /src/backends/cuda/post_processor.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/backends/cuda/post_processor.cc -------------------------------------------------------------------------------- /src/backends/cuda/post_processor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/backends/cuda/post_processor.h -------------------------------------------------------------------------------- /src/backends/cuda/resource_manager.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/backends/cuda/resource_manager.cc -------------------------------------------------------------------------------- /src/backends/cuda/resource_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/backends/cuda/resource_manager.h -------------------------------------------------------------------------------- /src/common/config.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/config.cc -------------------------------------------------------------------------------- /src/common/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/config.h -------------------------------------------------------------------------------- /src/common/connection.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/connection.h -------------------------------------------------------------------------------- /src/common/post_processor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/post_processor.h -------------------------------------------------------------------------------- /src/common/profiler.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/profiler.cc -------------------------------------------------------------------------------- /src/common/profiler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/profiler.h -------------------------------------------------------------------------------- /src/common/request.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/request.h -------------------------------------------------------------------------------- /src/common/resource.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/resource.h -------------------------------------------------------------------------------- /src/common/response.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/common/response.h -------------------------------------------------------------------------------- /src/engine/llm_engine.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/engine/llm_engine.cc -------------------------------------------------------------------------------- /src/engine/llm_engine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/engine/llm_engine.h -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/llm.grpc.pb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/llm.grpc.pb.cc -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/llm.grpc.pb.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/llm.grpc.pb.h -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/llm.pb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/llm.pb.cc -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/llm.pb.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/llm.pb.h -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/sentencepiece.pb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/sentencepiece.pb.cc -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/sentencepiece.pb.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/sentencepiece.pb.h -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/sentencepiece_model.pb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/sentencepiece_model.pb.cc -------------------------------------------------------------------------------- /src/generated/onnx/v23.4/sentencepiece_model.pb.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v23.4/sentencepiece_model.pb.h -------------------------------------------------------------------------------- /src/generated/onnx/v3.1.0/sentencepiece.pb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v3.1.0/sentencepiece.pb.cc -------------------------------------------------------------------------------- /src/generated/onnx/v3.1.0/sentencepiece.pb.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v3.1.0/sentencepiece.pb.h -------------------------------------------------------------------------------- /src/generated/onnx/v3.1.0/sentencepiece.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v3.1.0/sentencepiece.proto -------------------------------------------------------------------------------- /src/generated/onnx/v3.1.0/sentencepiece_model.pb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v3.1.0/sentencepiece_model.pb.cc -------------------------------------------------------------------------------- /src/generated/onnx/v3.1.0/sentencepiece_model.pb.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v3.1.0/sentencepiece_model.pb.h -------------------------------------------------------------------------------- /src/generated/onnx/v3.1.0/sentencepiece_model.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generated/onnx/v3.1.0/sentencepiece_model.proto -------------------------------------------------------------------------------- /src/generator/llm_generator.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generator/llm_generator.cc -------------------------------------------------------------------------------- /src/generator/llm_generator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/generator/llm_generator.h -------------------------------------------------------------------------------- /src/onnx/onnx.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/onnx/onnx.proto -------------------------------------------------------------------------------- /src/serving/grpc/grpc_server.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/serving/grpc/grpc_server.cc -------------------------------------------------------------------------------- /src/serving/grpc/grpc_server.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/serving/grpc/grpc_server.h -------------------------------------------------------------------------------- /src/serving/grpc/proto/llm.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/serving/grpc/proto/llm.proto -------------------------------------------------------------------------------- /src/tokenizer/models/baichuan/baichuan_tokenizer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/models/baichuan/baichuan_tokenizer.h -------------------------------------------------------------------------------- /src/tokenizer/models/internlm/internlm_tokenizer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/models/internlm/internlm_tokenizer.h -------------------------------------------------------------------------------- /src/tokenizer/models/llama/llama_tokenizer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/models/llama/llama_tokenizer.h -------------------------------------------------------------------------------- /src/tokenizer/models/llama3/llama3_tokenizer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/models/llama3/llama3_tokenizer.h -------------------------------------------------------------------------------- /src/tokenizer/models/llama3/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/models/llama3/tokenizer_config.json -------------------------------------------------------------------------------- /src/tokenizer/tokenizer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/tokenizer.h -------------------------------------------------------------------------------- /src/tokenizer/tokenizer_factory.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/tokenizer_factory.h -------------------------------------------------------------------------------- /src/tokenizer/tokenizer_impl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/tokenizer_impl.h -------------------------------------------------------------------------------- /src/tokenizer/tokenizer_impl_hf.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/tokenizer_impl_hf.h -------------------------------------------------------------------------------- /src/tokenizer/tokenizer_impl_sp.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/tokenizer/tokenizer_impl_sp.h -------------------------------------------------------------------------------- /src/utils/index_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/utils/index_manager.h -------------------------------------------------------------------------------- /src/utils/mpsc_request_scheduler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/utils/mpsc_request_scheduler.h -------------------------------------------------------------------------------- /src/utils/prefix_cache_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/utils/prefix_cache_manager.h -------------------------------------------------------------------------------- /src/utils/utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/utils/utils.cc -------------------------------------------------------------------------------- /src/utils/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/src/utils/utils.h -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/test/CMakeLists.txt -------------------------------------------------------------------------------- /test/test_prefix_cache_mgr.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/test/test_prefix_cache_mgr.cc -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/CMakeLists.txt -------------------------------------------------------------------------------- /tools/backtrace.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/backtrace.h -------------------------------------------------------------------------------- /tools/benchmark_prefix_cache_offline.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/benchmark_prefix_cache_offline.cc -------------------------------------------------------------------------------- /tools/client_pressure.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/client_pressure.cc -------------------------------------------------------------------------------- /tools/client_qps_measure.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/client_qps_measure.cc -------------------------------------------------------------------------------- /tools/client_qps_measure_token_in_out.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/client_qps_measure_token_in_out.cc -------------------------------------------------------------------------------- /tools/client_sample.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/client_sample.cc -------------------------------------------------------------------------------- /tools/client_sample_token_in_out.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/client_sample_token_in_out.cc -------------------------------------------------------------------------------- /tools/llm_server.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/llm_server.cc -------------------------------------------------------------------------------- /tools/offline_inference.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/offline_inference.cc -------------------------------------------------------------------------------- /tools/samples_1024.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/samples_1024.json -------------------------------------------------------------------------------- /tools/samples_2048.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/samples_2048.json -------------------------------------------------------------------------------- /tools/samples_4096.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/samples_4096.json -------------------------------------------------------------------------------- /tools/samples_8192.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/samples_8192.json -------------------------------------------------------------------------------- /tools/simple_flags.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/simple_flags.cc -------------------------------------------------------------------------------- /tools/simple_flags.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenPPL/ppl.llm.serving/HEAD/tools/simple_flags.h --------------------------------------------------------------------------------