├── .gitattributes ├── .gitignore ├── LEGAL.md ├── LICENSE ├── README.md ├── flood ├── README.md ├── benchmark │ ├── bench_flood.py │ ├── ops │ │ ├── bench_fp8_scaled_mm.py │ │ ├── bench_rms_norm.py │ │ ├── bench_seg_attn.py │ │ ├── bench_seg_la.py │ │ ├── bench_silu.py │ │ └── bench_update_cache.py │ └── vllm │ │ ├── backend_request_func.py │ │ ├── bench_throughput.py │ │ ├── bench_vllm.py │ │ ├── benchmark_serving.py │ │ ├── benchmark_utils.py │ │ ├── vllm_client.sh │ │ └── vllm_serve.sh ├── build.sh ├── csrc │ ├── activation │ │ ├── activation_kernels.cu │ │ └── activation_kernels.h │ ├── cache │ │ ├── cache.cu │ │ └── cache.h │ ├── cuda_type_utils.h │ ├── flood.cpp │ ├── layernorm │ │ ├── reduction.cuh │ │ ├── rmsnorm.cu │ │ └── rmsnorm.h │ ├── moe │ │ ├── moe_align.cu │ │ ├── moe_op.h │ │ ├── moe_sum.cu │ │ └── topk_softmax_kernels.cu │ ├── quantize │ │ ├── fp8_quant.cu │ │ ├── fp8_quant.cuh │ │ └── fp8_quant.h │ ├── rope │ │ ├── rope.cu │ │ └── rope.h │ └── vec_types.h ├── example │ ├── dist_example.py │ ├── ling_example.py │ ├── lookahead_example.py │ ├── multimodal_example.py │ ├── simple_example.py │ └── stream_example.py ├── figures │ └── segcache.png ├── flood │ ├── __init__.py │ ├── facade │ │ ├── __init__.py │ │ ├── dist_llm.py │ │ └── llm.py │ ├── layers │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── embedding.py │ │ ├── linear.py │ │ ├── moe.py │ │ ├── rope.py │ │ ├── sampler.py │ │ └── sync.py │ ├── models │ │ ├── __init__.py │ │ ├── configuration_bailing.py │ │ ├── configuration_bailing_moe.py │ │ ├── configuration_bailing_moe_linear.py │ │ ├── configuration_bailing_moe_linear_v2.py │ │ ├── configuration_bailing_moe_v2.py │ │ ├── configuration_deepseek.py │ │ ├── modeling_bailing.py │ │ ├── modeling_bailing_moe.py │ │ ├── modeling_bailing_moe_linear.py │ │ ├── modeling_bailing_moe_linear_v2.py │ │ ├── modeling_bailing_moe_v2.py │ │ ├── modeling_deepseek.py │ │ ├── modeling_deepseekv2.py │ │ ├── modeling_deepseekv3.py │ │ ├── modeling_llama.py │ │ ├── modeling_qwen2.py │ │ ├── modeling_qwen3.py │ │ ├── modeling_qwen3_moe.py │ │ ├── scaffold.py │ │ └── tokenization_bailing.py │ ├── ops │ │ ├── __init__.py │ │ ├── activation.py │ │ ├── cache.py │ │ ├── draft.py │ │ ├── gemm.py │ │ ├── norm.py │ │ ├── quantization.py │ │ ├── rope.py │ │ ├── sample.py │ │ ├── seg_attn.py │ │ ├── seg_la.py │ │ └── seg_mla.py │ ├── pipe.sh │ └── utils │ │ ├── __init__.py │ │ ├── batch.py │ │ ├── benchmark.py │ │ ├── cache.py │ │ ├── reader.py │ │ ├── request.py │ │ ├── sampling.py │ │ └── speculative.py ├── pyproject.toml ├── requirements.txt ├── service │ ├── bench_service.py │ ├── launch_server.py │ └── send_http_request.py ├── setup.py └── test │ ├── test_batch_mla.py │ ├── test_block_fp8.py │ ├── test_draft.py │ ├── test_fp8_quant.py │ ├── test_fuse_moe.py │ ├── test_hf_model.py │ ├── test_int8_gemm.py │ ├── test_qknorm_rope.py │ ├── test_quant_and_update_cache.py │ ├── test_rms.py │ ├── test_rope.py │ ├── test_sample.py │ ├── test_seg_attn.py │ ├── test_seg_la.py │ ├── test_seg_mla.py │ ├── test_silu.py │ ├── test_update_cache.py │ ├── test_vllm_model.py │ ├── test_yarn.py │ └── tests.sh ├── ipad ├── README.md ├── examples │ ├── glm_example.py │ ├── llama_example.py │ ├── opt_example.py │ └── preprocess.py ├── ipad │ ├── __init__.py │ ├── common │ │ ├── __init__.py │ │ ├── distill_worker.py │ │ └── sparse_module.py │ └── models │ │ ├── __init__.py │ │ ├── glm │ │ ├── __init__.py │ │ ├── configuration_glm.py │ │ ├── glm_trainer.py │ │ ├── modeling_glm.py │ │ ├── predict.py │ │ ├── preprocess.py │ │ └── tokenization_glm.py │ │ ├── llama │ │ ├── __init__.py │ │ ├── configuration_llama.py │ │ ├── llama_trainer.py │ │ ├── modeling_llama.py │ │ ├── predict.py │ │ └── preprocess.py │ │ └── opt │ │ ├── __init__.py │ │ ├── configuration_opt.py │ │ ├── datasets.py │ │ ├── modeling_opt.py │ │ ├── opt_trainer.py │ │ ├── predict.py │ │ └── rename_state_dict.py └── setup.py ├── lookahead ├── README.md ├── benchmarks │ ├── benchmark.py │ ├── chatglm_benchmark.py │ ├── codellama_benchmark.py │ ├── glm_benchmark.py │ ├── llama_benchmark.py │ ├── pia_lantency.py │ ├── preprocess_sample.py │ ├── trie_benchmark.py │ └── vllm_latency.py ├── datasets │ └── dataset.py ├── examples │ ├── baichuan2_13b_example.py │ ├── baichuan2_7b_batch_example.py │ ├── baichuan2_7b_example.py │ ├── baichuan_13b_example.py │ ├── baichuan_7b_example.py │ ├── bloom_example.py │ ├── chatglm3_example.py │ ├── chatglm_example.py │ ├── codellama_example.py │ ├── glm_batch_example.py │ ├── glm_example.py │ ├── gpt2_example.py │ ├── gptj_example.py │ ├── internlm_example.py │ ├── llama_batch_example.py │ ├── llama_example.py │ ├── llama_flash_example.py │ ├── llama_stream_example.py │ ├── local_path.py │ ├── mistral_example.py │ ├── mixtral_example.py │ ├── mixtral_quant_example.py │ ├── opt_batch_example.py │ ├── opt_example.py │ ├── qwen1.5_example.py │ ├── qwen1.5_quant_example.py │ ├── qwen_example.py │ └── qwen_quant_example.py ├── figures │ ├── dynamic.gif │ ├── flow.png │ ├── glm_la_off.gif │ ├── glm_la_on.gif │ ├── llama_la_off.gif │ ├── llama_la_on.gif │ ├── trie_construct.gif │ └── trie_retrieve.gif ├── lookahead │ ├── __init__.py │ ├── common │ │ ├── __init__.py │ │ ├── lookahead_cache.py │ │ ├── lookahead_generation_utils.py │ │ ├── pretrained_model.py │ │ └── pretrained_model_batch.py │ ├── csrc │ │ ├── __init__.py │ │ └── triton │ │ │ ├── __init__.py │ │ │ └── rms_norm.py │ └── models │ │ ├── __init__.py │ │ ├── baichuan2_13b │ │ ├── __init__.py │ │ ├── configuration_baichuan.py │ │ ├── generation_utils.py │ │ ├── handler.py │ │ ├── modeling_baichuan.py │ │ ├── quantizer.py │ │ └── tokenization_baichuan.py │ │ ├── baichuan2_7b │ │ ├── __init__.py │ │ ├── configuration_baichuan.py │ │ ├── generation_utils.py │ │ ├── modeling_baichuan.py │ │ ├── modeling_baichuan_batch.py │ │ ├── quantizer.py │ │ └── tokenization_baichuan.py │ │ ├── baichuan_13b │ │ ├── __init__.py │ │ ├── configuration_baichuan.py │ │ ├── generation_utils.py │ │ ├── handler.py │ │ ├── modeling_baichuan.py │ │ ├── quantizer.py │ │ └── tokenization_baichuan.py │ │ ├── baichuan_7b │ │ ├── __init__.py │ │ ├── configuration_baichuan.py │ │ ├── handler.py │ │ ├── modeling_baichuan.py │ │ └── tokenization_baichuan.py │ │ ├── bloom │ │ ├── __init__.py │ │ └── modeling_bloom.py │ │ ├── chatglm │ │ ├── __init__.py │ │ ├── configuration_chatglm.py │ │ ├── modeling_chatglm.py │ │ └── tokenization_chatglm.py │ │ ├── chatglm3 │ │ ├── __init__.py │ │ ├── configuration_chatglm.py │ │ ├── modeling_chatglm.py │ │ └── tokenization_chatglm.py │ │ ├── glm │ │ ├── __init__.py │ │ ├── configuration_glm.py │ │ ├── modeling_glm.py │ │ ├── modeling_glm_batch.py │ │ └── tokenization_glm.py │ │ ├── gpt2 │ │ ├── __init__.py │ │ └── modeling_gpt2.py │ │ ├── gptj │ │ ├── __init__.py │ │ └── modeling_gptj.py │ │ ├── internlm │ │ ├── __init__.py │ │ ├── configuration_internlm.py │ │ ├── modeling_internlm2.py │ │ └── tokenization_internlm.py │ │ ├── llama │ │ ├── __init__.py │ │ ├── modeling_llama.py │ │ ├── modeling_llama_batch.py │ │ ├── modeling_llama_flash.py │ │ └── modeling_llama_fuse.py │ │ ├── mistral │ │ ├── __init__.py │ │ ├── configuration_mistral.py │ │ └── modeling_mistral.py │ │ ├── mixtral │ │ ├── __init__.py │ │ ├── configuration_mixtral.py │ │ └── modeling_mixtral.py │ │ ├── opt │ │ ├── __init__.py │ │ ├── modeling_opt.py │ │ └── modeling_opt_batch.py │ │ ├── qwen │ │ ├── __init__.py │ │ ├── configuration_qwen.py │ │ ├── modeling_qwen.py │ │ ├── qwen_generation_utils.py │ │ └── tokenization_qwen.py │ │ └── qwen2 │ │ ├── __init__.py │ │ ├── configuration_qwen2.py │ │ └── modeling_qwen2.py ├── requirements.txt ├── scripts │ └── tests.sh ├── setup.py └── tests │ ├── test_lookahead_cache.py │ └── test_triton_rms_norm.py └── pre-push /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/.gitignore -------------------------------------------------------------------------------- /LEGAL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/LEGAL.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/README.md -------------------------------------------------------------------------------- /flood/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/README.md -------------------------------------------------------------------------------- /flood/benchmark/bench_flood.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/bench_flood.py -------------------------------------------------------------------------------- /flood/benchmark/ops/bench_fp8_scaled_mm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/ops/bench_fp8_scaled_mm.py -------------------------------------------------------------------------------- /flood/benchmark/ops/bench_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/ops/bench_rms_norm.py -------------------------------------------------------------------------------- /flood/benchmark/ops/bench_seg_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/ops/bench_seg_attn.py -------------------------------------------------------------------------------- /flood/benchmark/ops/bench_seg_la.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/ops/bench_seg_la.py -------------------------------------------------------------------------------- /flood/benchmark/ops/bench_silu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/ops/bench_silu.py -------------------------------------------------------------------------------- /flood/benchmark/ops/bench_update_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/ops/bench_update_cache.py -------------------------------------------------------------------------------- /flood/benchmark/vllm/backend_request_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/vllm/backend_request_func.py -------------------------------------------------------------------------------- /flood/benchmark/vllm/bench_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/vllm/bench_throughput.py -------------------------------------------------------------------------------- /flood/benchmark/vllm/bench_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/vllm/bench_vllm.py -------------------------------------------------------------------------------- /flood/benchmark/vllm/benchmark_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/vllm/benchmark_serving.py -------------------------------------------------------------------------------- /flood/benchmark/vllm/benchmark_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/vllm/benchmark_utils.py -------------------------------------------------------------------------------- /flood/benchmark/vllm/vllm_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/vllm/vllm_client.sh -------------------------------------------------------------------------------- /flood/benchmark/vllm/vllm_serve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/benchmark/vllm/vllm_serve.sh -------------------------------------------------------------------------------- /flood/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/build.sh -------------------------------------------------------------------------------- /flood/csrc/activation/activation_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/activation/activation_kernels.cu -------------------------------------------------------------------------------- /flood/csrc/activation/activation_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/activation/activation_kernels.h -------------------------------------------------------------------------------- /flood/csrc/cache/cache.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/cache/cache.cu -------------------------------------------------------------------------------- /flood/csrc/cache/cache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/cache/cache.h -------------------------------------------------------------------------------- /flood/csrc/cuda_type_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/cuda_type_utils.h -------------------------------------------------------------------------------- /flood/csrc/flood.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/flood.cpp -------------------------------------------------------------------------------- /flood/csrc/layernorm/reduction.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/layernorm/reduction.cuh -------------------------------------------------------------------------------- /flood/csrc/layernorm/rmsnorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/layernorm/rmsnorm.cu -------------------------------------------------------------------------------- /flood/csrc/layernorm/rmsnorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/layernorm/rmsnorm.h -------------------------------------------------------------------------------- /flood/csrc/moe/moe_align.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/moe/moe_align.cu -------------------------------------------------------------------------------- /flood/csrc/moe/moe_op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/moe/moe_op.h -------------------------------------------------------------------------------- /flood/csrc/moe/moe_sum.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/moe/moe_sum.cu -------------------------------------------------------------------------------- /flood/csrc/moe/topk_softmax_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/moe/topk_softmax_kernels.cu -------------------------------------------------------------------------------- /flood/csrc/quantize/fp8_quant.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/quantize/fp8_quant.cu -------------------------------------------------------------------------------- /flood/csrc/quantize/fp8_quant.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/quantize/fp8_quant.cuh -------------------------------------------------------------------------------- /flood/csrc/quantize/fp8_quant.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/quantize/fp8_quant.h -------------------------------------------------------------------------------- /flood/csrc/rope/rope.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/rope/rope.cu -------------------------------------------------------------------------------- /flood/csrc/rope/rope.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/rope/rope.h -------------------------------------------------------------------------------- /flood/csrc/vec_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/csrc/vec_types.h -------------------------------------------------------------------------------- /flood/example/dist_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/example/dist_example.py -------------------------------------------------------------------------------- /flood/example/ling_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/example/ling_example.py -------------------------------------------------------------------------------- /flood/example/lookahead_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/example/lookahead_example.py -------------------------------------------------------------------------------- /flood/example/multimodal_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/example/multimodal_example.py -------------------------------------------------------------------------------- /flood/example/simple_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/example/simple_example.py -------------------------------------------------------------------------------- /flood/example/stream_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/example/stream_example.py -------------------------------------------------------------------------------- /flood/figures/segcache.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/figures/segcache.png -------------------------------------------------------------------------------- /flood/flood/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flood/flood/facade/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flood/flood/facade/dist_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/facade/dist_llm.py -------------------------------------------------------------------------------- /flood/flood/facade/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/facade/llm.py -------------------------------------------------------------------------------- /flood/flood/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flood/flood/layers/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/layers/attention.py -------------------------------------------------------------------------------- /flood/flood/layers/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/layers/embedding.py -------------------------------------------------------------------------------- /flood/flood/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/layers/linear.py -------------------------------------------------------------------------------- /flood/flood/layers/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/layers/moe.py -------------------------------------------------------------------------------- /flood/flood/layers/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/layers/rope.py -------------------------------------------------------------------------------- /flood/flood/layers/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/layers/sampler.py -------------------------------------------------------------------------------- /flood/flood/layers/sync.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/layers/sync.py -------------------------------------------------------------------------------- /flood/flood/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/__init__.py -------------------------------------------------------------------------------- /flood/flood/models/configuration_bailing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/configuration_bailing.py -------------------------------------------------------------------------------- /flood/flood/models/configuration_bailing_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/configuration_bailing_moe.py -------------------------------------------------------------------------------- /flood/flood/models/configuration_bailing_moe_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/configuration_bailing_moe_linear.py -------------------------------------------------------------------------------- /flood/flood/models/configuration_bailing_moe_linear_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/configuration_bailing_moe_linear_v2.py -------------------------------------------------------------------------------- /flood/flood/models/configuration_bailing_moe_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/configuration_bailing_moe_v2.py -------------------------------------------------------------------------------- /flood/flood/models/configuration_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/configuration_deepseek.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_bailing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_bailing.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_bailing_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_bailing_moe.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_bailing_moe_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_bailing_moe_linear.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_bailing_moe_linear_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_bailing_moe_linear_v2.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_bailing_moe_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_bailing_moe_v2.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_deepseek.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_deepseekv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_deepseekv2.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_deepseekv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_deepseekv3.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_llama.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_qwen2.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_qwen3.py -------------------------------------------------------------------------------- /flood/flood/models/modeling_qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/modeling_qwen3_moe.py -------------------------------------------------------------------------------- /flood/flood/models/scaffold.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/scaffold.py -------------------------------------------------------------------------------- /flood/flood/models/tokenization_bailing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/models/tokenization_bailing.py -------------------------------------------------------------------------------- /flood/flood/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flood/flood/ops/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/activation.py -------------------------------------------------------------------------------- /flood/flood/ops/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/cache.py -------------------------------------------------------------------------------- /flood/flood/ops/draft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/draft.py -------------------------------------------------------------------------------- /flood/flood/ops/gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/gemm.py -------------------------------------------------------------------------------- /flood/flood/ops/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/norm.py -------------------------------------------------------------------------------- /flood/flood/ops/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/quantization.py -------------------------------------------------------------------------------- /flood/flood/ops/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/rope.py -------------------------------------------------------------------------------- /flood/flood/ops/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/sample.py -------------------------------------------------------------------------------- /flood/flood/ops/seg_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/seg_attn.py -------------------------------------------------------------------------------- /flood/flood/ops/seg_la.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/seg_la.py -------------------------------------------------------------------------------- /flood/flood/ops/seg_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/ops/seg_mla.py -------------------------------------------------------------------------------- /flood/flood/pipe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/pipe.sh -------------------------------------------------------------------------------- /flood/flood/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flood/flood/utils/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/utils/batch.py -------------------------------------------------------------------------------- /flood/flood/utils/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/utils/benchmark.py -------------------------------------------------------------------------------- /flood/flood/utils/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/utils/cache.py -------------------------------------------------------------------------------- /flood/flood/utils/reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/utils/reader.py -------------------------------------------------------------------------------- /flood/flood/utils/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/utils/request.py -------------------------------------------------------------------------------- /flood/flood/utils/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/utils/sampling.py -------------------------------------------------------------------------------- /flood/flood/utils/speculative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/flood/utils/speculative.py -------------------------------------------------------------------------------- /flood/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/pyproject.toml -------------------------------------------------------------------------------- /flood/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | transformers >= 4.54.0 -------------------------------------------------------------------------------- /flood/service/bench_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/service/bench_service.py -------------------------------------------------------------------------------- /flood/service/launch_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/service/launch_server.py -------------------------------------------------------------------------------- /flood/service/send_http_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/service/send_http_request.py -------------------------------------------------------------------------------- /flood/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/setup.py -------------------------------------------------------------------------------- /flood/test/test_batch_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_batch_mla.py -------------------------------------------------------------------------------- /flood/test/test_block_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_block_fp8.py -------------------------------------------------------------------------------- /flood/test/test_draft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_draft.py -------------------------------------------------------------------------------- /flood/test/test_fp8_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_fp8_quant.py -------------------------------------------------------------------------------- /flood/test/test_fuse_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_fuse_moe.py -------------------------------------------------------------------------------- /flood/test/test_hf_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_hf_model.py -------------------------------------------------------------------------------- /flood/test/test_int8_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_int8_gemm.py -------------------------------------------------------------------------------- /flood/test/test_qknorm_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_qknorm_rope.py -------------------------------------------------------------------------------- /flood/test/test_quant_and_update_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_quant_and_update_cache.py -------------------------------------------------------------------------------- /flood/test/test_rms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_rms.py -------------------------------------------------------------------------------- /flood/test/test_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_rope.py -------------------------------------------------------------------------------- /flood/test/test_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_sample.py -------------------------------------------------------------------------------- /flood/test/test_seg_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_seg_attn.py -------------------------------------------------------------------------------- /flood/test/test_seg_la.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_seg_la.py -------------------------------------------------------------------------------- /flood/test/test_seg_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_seg_mla.py -------------------------------------------------------------------------------- /flood/test/test_silu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_silu.py -------------------------------------------------------------------------------- /flood/test/test_update_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_update_cache.py -------------------------------------------------------------------------------- /flood/test/test_vllm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_vllm_model.py -------------------------------------------------------------------------------- /flood/test/test_yarn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/test_yarn.py -------------------------------------------------------------------------------- /flood/test/tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/flood/test/tests.sh -------------------------------------------------------------------------------- /ipad/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/README.md -------------------------------------------------------------------------------- /ipad/examples/glm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/examples/glm_example.py -------------------------------------------------------------------------------- /ipad/examples/llama_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/examples/llama_example.py -------------------------------------------------------------------------------- /ipad/examples/opt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/examples/opt_example.py -------------------------------------------------------------------------------- /ipad/examples/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/examples/preprocess.py -------------------------------------------------------------------------------- /ipad/ipad/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ipad/ipad/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/common/__init__.py -------------------------------------------------------------------------------- /ipad/ipad/common/distill_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/common/distill_worker.py -------------------------------------------------------------------------------- /ipad/ipad/common/sparse_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/common/sparse_module.py -------------------------------------------------------------------------------- /ipad/ipad/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/__init__.py -------------------------------------------------------------------------------- /ipad/ipad/models/glm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/glm/__init__.py -------------------------------------------------------------------------------- /ipad/ipad/models/glm/configuration_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/glm/configuration_glm.py -------------------------------------------------------------------------------- /ipad/ipad/models/glm/glm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/glm/glm_trainer.py -------------------------------------------------------------------------------- /ipad/ipad/models/glm/modeling_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/glm/modeling_glm.py -------------------------------------------------------------------------------- /ipad/ipad/models/glm/predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/glm/predict.py -------------------------------------------------------------------------------- /ipad/ipad/models/glm/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/glm/preprocess.py -------------------------------------------------------------------------------- /ipad/ipad/models/glm/tokenization_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/glm/tokenization_glm.py -------------------------------------------------------------------------------- /ipad/ipad/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/llama/__init__.py -------------------------------------------------------------------------------- /ipad/ipad/models/llama/configuration_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/llama/configuration_llama.py -------------------------------------------------------------------------------- /ipad/ipad/models/llama/llama_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/llama/llama_trainer.py -------------------------------------------------------------------------------- /ipad/ipad/models/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/llama/modeling_llama.py -------------------------------------------------------------------------------- /ipad/ipad/models/llama/predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/llama/predict.py -------------------------------------------------------------------------------- /ipad/ipad/models/llama/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/llama/preprocess.py -------------------------------------------------------------------------------- /ipad/ipad/models/opt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/opt/__init__.py -------------------------------------------------------------------------------- /ipad/ipad/models/opt/configuration_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/opt/configuration_opt.py -------------------------------------------------------------------------------- /ipad/ipad/models/opt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/opt/datasets.py -------------------------------------------------------------------------------- /ipad/ipad/models/opt/modeling_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/opt/modeling_opt.py -------------------------------------------------------------------------------- /ipad/ipad/models/opt/opt_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/opt/opt_trainer.py -------------------------------------------------------------------------------- /ipad/ipad/models/opt/predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/opt/predict.py -------------------------------------------------------------------------------- /ipad/ipad/models/opt/rename_state_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/ipad/models/opt/rename_state_dict.py -------------------------------------------------------------------------------- /ipad/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/ipad/setup.py -------------------------------------------------------------------------------- /lookahead/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/README.md -------------------------------------------------------------------------------- /lookahead/benchmarks/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/benchmark.py -------------------------------------------------------------------------------- /lookahead/benchmarks/chatglm_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/chatglm_benchmark.py -------------------------------------------------------------------------------- /lookahead/benchmarks/codellama_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/codellama_benchmark.py -------------------------------------------------------------------------------- /lookahead/benchmarks/glm_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/glm_benchmark.py -------------------------------------------------------------------------------- /lookahead/benchmarks/llama_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/llama_benchmark.py -------------------------------------------------------------------------------- /lookahead/benchmarks/pia_lantency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/pia_lantency.py -------------------------------------------------------------------------------- /lookahead/benchmarks/preprocess_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/preprocess_sample.py -------------------------------------------------------------------------------- /lookahead/benchmarks/trie_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/trie_benchmark.py -------------------------------------------------------------------------------- /lookahead/benchmarks/vllm_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/benchmarks/vllm_latency.py -------------------------------------------------------------------------------- /lookahead/datasets/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/datasets/dataset.py -------------------------------------------------------------------------------- /lookahead/examples/baichuan2_13b_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/baichuan2_13b_example.py -------------------------------------------------------------------------------- /lookahead/examples/baichuan2_7b_batch_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/baichuan2_7b_batch_example.py -------------------------------------------------------------------------------- /lookahead/examples/baichuan2_7b_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/baichuan2_7b_example.py -------------------------------------------------------------------------------- /lookahead/examples/baichuan_13b_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/baichuan_13b_example.py -------------------------------------------------------------------------------- /lookahead/examples/baichuan_7b_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/baichuan_7b_example.py -------------------------------------------------------------------------------- /lookahead/examples/bloom_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/bloom_example.py -------------------------------------------------------------------------------- /lookahead/examples/chatglm3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/chatglm3_example.py -------------------------------------------------------------------------------- /lookahead/examples/chatglm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/chatglm_example.py -------------------------------------------------------------------------------- /lookahead/examples/codellama_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/codellama_example.py -------------------------------------------------------------------------------- /lookahead/examples/glm_batch_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/glm_batch_example.py -------------------------------------------------------------------------------- /lookahead/examples/glm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/glm_example.py -------------------------------------------------------------------------------- /lookahead/examples/gpt2_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/gpt2_example.py -------------------------------------------------------------------------------- /lookahead/examples/gptj_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/gptj_example.py -------------------------------------------------------------------------------- /lookahead/examples/internlm_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/internlm_example.py -------------------------------------------------------------------------------- /lookahead/examples/llama_batch_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/llama_batch_example.py -------------------------------------------------------------------------------- /lookahead/examples/llama_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/llama_example.py -------------------------------------------------------------------------------- /lookahead/examples/llama_flash_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/llama_flash_example.py -------------------------------------------------------------------------------- /lookahead/examples/llama_stream_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/llama_stream_example.py -------------------------------------------------------------------------------- /lookahead/examples/local_path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/local_path.py -------------------------------------------------------------------------------- /lookahead/examples/mistral_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/mistral_example.py -------------------------------------------------------------------------------- /lookahead/examples/mixtral_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/mixtral_example.py -------------------------------------------------------------------------------- /lookahead/examples/mixtral_quant_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/mixtral_quant_example.py -------------------------------------------------------------------------------- /lookahead/examples/opt_batch_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/opt_batch_example.py -------------------------------------------------------------------------------- /lookahead/examples/opt_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/opt_example.py -------------------------------------------------------------------------------- /lookahead/examples/qwen1.5_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/qwen1.5_example.py -------------------------------------------------------------------------------- /lookahead/examples/qwen1.5_quant_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/qwen1.5_quant_example.py -------------------------------------------------------------------------------- /lookahead/examples/qwen_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/qwen_example.py -------------------------------------------------------------------------------- /lookahead/examples/qwen_quant_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/examples/qwen_quant_example.py -------------------------------------------------------------------------------- /lookahead/figures/dynamic.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/dynamic.gif -------------------------------------------------------------------------------- /lookahead/figures/flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/flow.png -------------------------------------------------------------------------------- /lookahead/figures/glm_la_off.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/glm_la_off.gif -------------------------------------------------------------------------------- /lookahead/figures/glm_la_on.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/glm_la_on.gif -------------------------------------------------------------------------------- /lookahead/figures/llama_la_off.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/llama_la_off.gif -------------------------------------------------------------------------------- /lookahead/figures/llama_la_on.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/llama_la_on.gif -------------------------------------------------------------------------------- /lookahead/figures/trie_construct.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/trie_construct.gif -------------------------------------------------------------------------------- /lookahead/figures/trie_retrieve.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/figures/trie_retrieve.gif -------------------------------------------------------------------------------- /lookahead/lookahead/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/common/lookahead_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/common/lookahead_cache.py -------------------------------------------------------------------------------- /lookahead/lookahead/common/lookahead_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/common/lookahead_generation_utils.py -------------------------------------------------------------------------------- /lookahead/lookahead/common/pretrained_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/common/pretrained_model.py -------------------------------------------------------------------------------- /lookahead/lookahead/common/pretrained_model_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/common/pretrained_model_batch.py -------------------------------------------------------------------------------- /lookahead/lookahead/csrc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/csrc/__init__.py -------------------------------------------------------------------------------- /lookahead/lookahead/csrc/triton/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/csrc/triton/__init__.py -------------------------------------------------------------------------------- /lookahead/lookahead/csrc/triton/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/csrc/triton/rms_norm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_13b/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_13b/__init__.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_13b/configuration_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_13b/configuration_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_13b/generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_13b/generation_utils.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_13b/handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_13b/handler.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_13b/modeling_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_13b/modeling_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_13b/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_13b/quantizer.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_13b/tokenization_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_13b/tokenization_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_7b/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_7b/__init__.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_7b/configuration_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_7b/configuration_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_7b/generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_7b/generation_utils.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_7b/modeling_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_7b/modeling_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_7b/modeling_baichuan_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_7b/modeling_baichuan_batch.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_7b/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_7b/quantizer.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan2_7b/tokenization_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan2_7b/tokenization_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_13b/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_13b/__init__.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_13b/configuration_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_13b/configuration_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_13b/generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_13b/generation_utils.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_13b/handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_13b/handler.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_13b/modeling_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_13b/modeling_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_13b/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_13b/quantizer.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_13b/tokenization_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_13b/tokenization_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_7b/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_7b/__init__.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_7b/configuration_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_7b/configuration_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_7b/handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_7b/handler.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_7b/modeling_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_7b/modeling_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/baichuan_7b/tokenization_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/baichuan_7b/tokenization_baichuan.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/bloom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/bloom/modeling_bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/bloom/modeling_bloom.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm/configuration_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/chatglm/configuration_chatglm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm/modeling_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/chatglm/modeling_chatglm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm/tokenization_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/chatglm/tokenization_chatglm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm3/configuration_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/chatglm3/configuration_chatglm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm3/modeling_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/chatglm3/modeling_chatglm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/chatglm3/tokenization_chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/chatglm3/tokenization_chatglm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/glm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/glm/__init__.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/glm/configuration_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/glm/configuration_glm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/glm/modeling_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/glm/modeling_glm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/glm/modeling_glm_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/glm/modeling_glm_batch.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/glm/tokenization_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/glm/tokenization_glm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/gpt2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/gpt2/modeling_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/gpt2/modeling_gpt2.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/gptj/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/gptj/modeling_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/gptj/modeling_gptj.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/internlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/internlm/configuration_internlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/internlm/configuration_internlm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/internlm/modeling_internlm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/internlm/modeling_internlm2.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/internlm/tokenization_internlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/internlm/tokenization_internlm.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/llama/modeling_llama.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/llama/modeling_llama_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/llama/modeling_llama_batch.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/llama/modeling_llama_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/llama/modeling_llama_flash.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/llama/modeling_llama_fuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/llama/modeling_llama_fuse.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/mistral/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/mistral/configuration_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/mistral/configuration_mistral.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/mistral/modeling_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/mistral/modeling_mistral.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/mixtral/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/mixtral/configuration_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/mixtral/configuration_mixtral.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/mixtral/modeling_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/mixtral/modeling_mixtral.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/opt/modeling_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/opt/modeling_opt.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/opt/modeling_opt_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/opt/modeling_opt_batch.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen/configuration_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/qwen/configuration_qwen.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen/modeling_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/qwen/modeling_qwen.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen/qwen_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/qwen/qwen_generation_utils.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen/tokenization_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/qwen/tokenization_qwen.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen2/configuration_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/qwen2/configuration_qwen2.py -------------------------------------------------------------------------------- /lookahead/lookahead/models/qwen2/modeling_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/lookahead/models/qwen2/modeling_qwen2.py -------------------------------------------------------------------------------- /lookahead/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | transformers==4.30.2 -------------------------------------------------------------------------------- /lookahead/scripts/tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/scripts/tests.sh -------------------------------------------------------------------------------- /lookahead/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/setup.py -------------------------------------------------------------------------------- /lookahead/tests/test_lookahead_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/tests/test_lookahead_cache.py -------------------------------------------------------------------------------- /lookahead/tests/test_triton_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/lookahead/tests/test_triton_rms_norm.py -------------------------------------------------------------------------------- /pre-push: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alipay/PainlessInferenceAcceleration/HEAD/pre-push --------------------------------------------------------------------------------