├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── README.md ├── cmake └── Modules │ └── FindNCCL.cmake ├── scripts ├── converter.py ├── decode_output.py ├── encode_input.py └── lib │ ├── converter_lib.py │ └── gpt_token_encoder.py └── src ├── CMakeLists.txt ├── csrc ├── CMakeLists.txt ├── kernel │ ├── CMakeLists.txt │ ├── activation_types.h │ ├── activations.cuh │ ├── addbias.cu │ ├── addbias.h │ ├── count_nan.cu │ ├── count_nan.h │ ├── embedding.cu │ ├── embedding.h │ ├── findmax.cu │ ├── findmax.h │ ├── fused_activ_multiply.cu │ ├── fused_activ_multiply.h │ ├── fused_addbias_activ.cu │ ├── fused_addbias_activ.h │ ├── fused_context_stage_attention.cu │ ├── fused_context_stage_attention.h │ ├── fused_decoding_stage_attention.cu │ ├── fused_decoding_stage_attention.h │ ├── fused_decoding_stage_attention_mha.cu │ ├── gather_last_tokens.cu │ ├── gather_last_tokens.h │ ├── kvcache_mgmt.cu │ ├── kvcache_mgmt.h │ ├── layernorm.cu │ ├── layernorm.h │ ├── reduction.cuh │ ├── rmsnorm.cu │ ├── rmsnorm.h │ ├── rotary_posi_embedding.cu │ ├── rotary_posi_embedding.h │ ├── softmax.cu │ ├── softmax.h │ ├── unfused_attention.cu │ ├── unfused_attention.h │ ├── xformers_attention.cu │ └── xformers_attention.h ├── layer │ ├── CMakeLists.txt │ ├── attention.cc │ ├── attention.h │ ├── ffn.cc │ ├── ffn.h │ ├── gated_ffn.cc │ └── gated_ffn.h ├── model │ ├── CMakeLists.txt │ └── gpt │ │ ├── CMakeLists.txt │ │ ├── gpt.cc │ │ ├── gpt.h │ │ ├── gpt2 │ │ ├── CMakeLists.txt │ │ ├── gpt2op.cc │ │ └── gpt2op.h │ │ ├── gpt_base.h │ │ ├── gpt_hyper_param.h │ │ ├── gpt_pagedattn_param.h │ │ ├── gpt_parallelism_param.h │ │ ├── gpt_weight.cc │ │ ├── gpt_weight.h │ │ ├── gptop_base.cc │ │ ├── gptop_base.h │ │ ├── llama2 │ │ ├── CMakeLists.txt │ │ ├── llama2op.cc │ │ └── llama2op.h │ │ └── opt │ │ ├── CMakeLists.txt │ │ ├── optop.cc │ │ └── optop.h ├── pybinding.cc └── util │ ├── CMakeLists.txt │ ├── cublas_wrapper.cc │ ├── cublas_wrapper.h │ ├── cuda_utils.h │ ├── debug_utils.h │ ├── nccl_utils.cc │ ├── nccl_utils.h │ ├── py_block_migration.cc │ ├── py_block_migration.h │ ├── py_nccl.cc │ ├── py_nccl.h │ ├── py_swapping.cc │ ├── py_swapping.h │ ├── st_datatypes.h │ └── torch_utils.h ├── examples ├── CMakeLists.txt ├── benchmark_all_input_same.cc ├── lib │ ├── common_gpt_hyper_params.h │ ├── inference_batch.cc │ ├── inference_batch.h │ ├── simple_vocab_decoder.h │ ├── st_args.cc │ ├── st_args.h │ └── utils.h └── run_gpt.cc └── unittest ├── CMakeLists.txt ├── kernel ├── CMakeLists.txt ├── addbias.cc ├── attention_ref.cc ├── attention_ref.h ├── findmax.cc ├── fused_activ_multiply.cc ├── fused_addbias_activ.cc ├── fused_attention.cc ├── kvcache_mgmt_ref.cc ├── kvcache_mgmt_ref.h ├── layernorm.cc ├── rmsnorm.cc ├── rotary_posi_embedding.cc ├── rotary_posi_embedding_ref.cc ├── rotary_posi_embedding_ref.h └── softmax.cc ├── layer ├── CMakeLists.txt ├── attention_ref.cc ├── attention_ref.h ├── attention_utils.h ├── parallel_attention.cc └── parallel_ffn.cc ├── model └── CMakeLists.txt ├── unittest_torch_utils.h ├── unittest_utils.h └── util ├── CMakeLists.txt └── cublas_wrapper.cc /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | __pycache__ 3 | 4 | /temp/** 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/README.md -------------------------------------------------------------------------------- /cmake/Modules/FindNCCL.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/cmake/Modules/FindNCCL.cmake -------------------------------------------------------------------------------- /scripts/converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/scripts/converter.py -------------------------------------------------------------------------------- /scripts/decode_output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/scripts/decode_output.py -------------------------------------------------------------------------------- /scripts/encode_input.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/scripts/encode_input.py -------------------------------------------------------------------------------- /scripts/lib/converter_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/scripts/lib/converter_lib.py -------------------------------------------------------------------------------- /scripts/lib/gpt_token_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/scripts/lib/gpt_token_encoder.py -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/kernel/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/kernel/activation_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/activation_types.h -------------------------------------------------------------------------------- /src/csrc/kernel/activations.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/activations.cuh -------------------------------------------------------------------------------- /src/csrc/kernel/addbias.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/addbias.cu -------------------------------------------------------------------------------- /src/csrc/kernel/addbias.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/addbias.h -------------------------------------------------------------------------------- /src/csrc/kernel/count_nan.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/count_nan.cu -------------------------------------------------------------------------------- /src/csrc/kernel/count_nan.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/count_nan.h -------------------------------------------------------------------------------- /src/csrc/kernel/embedding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/embedding.cu -------------------------------------------------------------------------------- /src/csrc/kernel/embedding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/embedding.h -------------------------------------------------------------------------------- /src/csrc/kernel/findmax.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/findmax.cu -------------------------------------------------------------------------------- /src/csrc/kernel/findmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/findmax.h -------------------------------------------------------------------------------- /src/csrc/kernel/fused_activ_multiply.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_activ_multiply.cu -------------------------------------------------------------------------------- /src/csrc/kernel/fused_activ_multiply.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_activ_multiply.h -------------------------------------------------------------------------------- /src/csrc/kernel/fused_addbias_activ.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_addbias_activ.cu -------------------------------------------------------------------------------- /src/csrc/kernel/fused_addbias_activ.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_addbias_activ.h -------------------------------------------------------------------------------- /src/csrc/kernel/fused_context_stage_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_context_stage_attention.cu -------------------------------------------------------------------------------- /src/csrc/kernel/fused_context_stage_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_context_stage_attention.h -------------------------------------------------------------------------------- /src/csrc/kernel/fused_decoding_stage_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_decoding_stage_attention.cu -------------------------------------------------------------------------------- /src/csrc/kernel/fused_decoding_stage_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_decoding_stage_attention.h -------------------------------------------------------------------------------- /src/csrc/kernel/fused_decoding_stage_attention_mha.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/fused_decoding_stage_attention_mha.cu -------------------------------------------------------------------------------- /src/csrc/kernel/gather_last_tokens.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/gather_last_tokens.cu -------------------------------------------------------------------------------- /src/csrc/kernel/gather_last_tokens.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/gather_last_tokens.h -------------------------------------------------------------------------------- /src/csrc/kernel/kvcache_mgmt.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/kvcache_mgmt.cu -------------------------------------------------------------------------------- /src/csrc/kernel/kvcache_mgmt.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/kvcache_mgmt.h -------------------------------------------------------------------------------- /src/csrc/kernel/layernorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/layernorm.cu -------------------------------------------------------------------------------- /src/csrc/kernel/layernorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/layernorm.h -------------------------------------------------------------------------------- /src/csrc/kernel/reduction.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/reduction.cuh -------------------------------------------------------------------------------- /src/csrc/kernel/rmsnorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/rmsnorm.cu -------------------------------------------------------------------------------- /src/csrc/kernel/rmsnorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/rmsnorm.h -------------------------------------------------------------------------------- /src/csrc/kernel/rotary_posi_embedding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/rotary_posi_embedding.cu -------------------------------------------------------------------------------- /src/csrc/kernel/rotary_posi_embedding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/rotary_posi_embedding.h -------------------------------------------------------------------------------- /src/csrc/kernel/softmax.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/softmax.cu -------------------------------------------------------------------------------- /src/csrc/kernel/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/softmax.h -------------------------------------------------------------------------------- /src/csrc/kernel/unfused_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/unfused_attention.cu -------------------------------------------------------------------------------- /src/csrc/kernel/unfused_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/unfused_attention.h -------------------------------------------------------------------------------- /src/csrc/kernel/xformers_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/xformers_attention.cu -------------------------------------------------------------------------------- /src/csrc/kernel/xformers_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/kernel/xformers_attention.h -------------------------------------------------------------------------------- /src/csrc/layer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/layer/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/layer/attention.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/layer/attention.cc -------------------------------------------------------------------------------- /src/csrc/layer/attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/layer/attention.h -------------------------------------------------------------------------------- /src/csrc/layer/ffn.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/layer/ffn.cc -------------------------------------------------------------------------------- /src/csrc/layer/ffn.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/layer/ffn.h -------------------------------------------------------------------------------- /src/csrc/layer/gated_ffn.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/layer/gated_ffn.cc -------------------------------------------------------------------------------- /src/csrc/layer/gated_ffn.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/layer/gated_ffn.h -------------------------------------------------------------------------------- /src/csrc/model/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(gpt) -------------------------------------------------------------------------------- /src/csrc/model/gpt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt.cc -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt2/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt2/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt2/gpt2op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt2/gpt2op.cc -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt2/gpt2op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt2/gpt2op.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt_base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt_base.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt_hyper_param.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt_hyper_param.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt_pagedattn_param.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt_pagedattn_param.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt_parallelism_param.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt_parallelism_param.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt_weight.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt_weight.cc -------------------------------------------------------------------------------- /src/csrc/model/gpt/gpt_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gpt_weight.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/gptop_base.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gptop_base.cc -------------------------------------------------------------------------------- /src/csrc/model/gpt/gptop_base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/gptop_base.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/llama2/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/llama2/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/model/gpt/llama2/llama2op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/llama2/llama2op.cc -------------------------------------------------------------------------------- /src/csrc/model/gpt/llama2/llama2op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/llama2/llama2op.h -------------------------------------------------------------------------------- /src/csrc/model/gpt/opt/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/opt/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/model/gpt/opt/optop.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/opt/optop.cc -------------------------------------------------------------------------------- /src/csrc/model/gpt/opt/optop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/model/gpt/opt/optop.h -------------------------------------------------------------------------------- /src/csrc/pybinding.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/pybinding.cc -------------------------------------------------------------------------------- /src/csrc/util/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/CMakeLists.txt -------------------------------------------------------------------------------- /src/csrc/util/cublas_wrapper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/cublas_wrapper.cc -------------------------------------------------------------------------------- /src/csrc/util/cublas_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/cublas_wrapper.h -------------------------------------------------------------------------------- /src/csrc/util/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/cuda_utils.h -------------------------------------------------------------------------------- /src/csrc/util/debug_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/debug_utils.h -------------------------------------------------------------------------------- /src/csrc/util/nccl_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/nccl_utils.cc -------------------------------------------------------------------------------- /src/csrc/util/nccl_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/nccl_utils.h -------------------------------------------------------------------------------- /src/csrc/util/py_block_migration.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/py_block_migration.cc -------------------------------------------------------------------------------- /src/csrc/util/py_block_migration.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/py_block_migration.h -------------------------------------------------------------------------------- /src/csrc/util/py_nccl.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/py_nccl.cc -------------------------------------------------------------------------------- /src/csrc/util/py_nccl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/py_nccl.h -------------------------------------------------------------------------------- /src/csrc/util/py_swapping.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/py_swapping.cc -------------------------------------------------------------------------------- /src/csrc/util/py_swapping.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/py_swapping.h -------------------------------------------------------------------------------- /src/csrc/util/st_datatypes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/st_datatypes.h -------------------------------------------------------------------------------- /src/csrc/util/torch_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/csrc/util/torch_utils.h -------------------------------------------------------------------------------- /src/examples/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/CMakeLists.txt -------------------------------------------------------------------------------- /src/examples/benchmark_all_input_same.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/benchmark_all_input_same.cc -------------------------------------------------------------------------------- /src/examples/lib/common_gpt_hyper_params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/lib/common_gpt_hyper_params.h -------------------------------------------------------------------------------- /src/examples/lib/inference_batch.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/lib/inference_batch.cc -------------------------------------------------------------------------------- /src/examples/lib/inference_batch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/lib/inference_batch.h -------------------------------------------------------------------------------- /src/examples/lib/simple_vocab_decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/lib/simple_vocab_decoder.h -------------------------------------------------------------------------------- /src/examples/lib/st_args.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/lib/st_args.cc -------------------------------------------------------------------------------- /src/examples/lib/st_args.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/lib/st_args.h -------------------------------------------------------------------------------- /src/examples/lib/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/lib/utils.h -------------------------------------------------------------------------------- /src/examples/run_gpt.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/examples/run_gpt.cc -------------------------------------------------------------------------------- /src/unittest/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/CMakeLists.txt -------------------------------------------------------------------------------- /src/unittest/kernel/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/CMakeLists.txt -------------------------------------------------------------------------------- /src/unittest/kernel/addbias.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/addbias.cc -------------------------------------------------------------------------------- /src/unittest/kernel/attention_ref.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/attention_ref.cc -------------------------------------------------------------------------------- /src/unittest/kernel/attention_ref.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/attention_ref.h -------------------------------------------------------------------------------- /src/unittest/kernel/findmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/findmax.cc -------------------------------------------------------------------------------- /src/unittest/kernel/fused_activ_multiply.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/fused_activ_multiply.cc -------------------------------------------------------------------------------- /src/unittest/kernel/fused_addbias_activ.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/fused_addbias_activ.cc -------------------------------------------------------------------------------- /src/unittest/kernel/fused_attention.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/fused_attention.cc -------------------------------------------------------------------------------- /src/unittest/kernel/kvcache_mgmt_ref.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/kvcache_mgmt_ref.cc -------------------------------------------------------------------------------- /src/unittest/kernel/kvcache_mgmt_ref.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/kvcache_mgmt_ref.h -------------------------------------------------------------------------------- /src/unittest/kernel/layernorm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/layernorm.cc -------------------------------------------------------------------------------- /src/unittest/kernel/rmsnorm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/rmsnorm.cc -------------------------------------------------------------------------------- /src/unittest/kernel/rotary_posi_embedding.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/rotary_posi_embedding.cc -------------------------------------------------------------------------------- /src/unittest/kernel/rotary_posi_embedding_ref.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/rotary_posi_embedding_ref.cc -------------------------------------------------------------------------------- /src/unittest/kernel/rotary_posi_embedding_ref.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/rotary_posi_embedding_ref.h -------------------------------------------------------------------------------- /src/unittest/kernel/softmax.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/kernel/softmax.cc -------------------------------------------------------------------------------- /src/unittest/layer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/layer/CMakeLists.txt -------------------------------------------------------------------------------- /src/unittest/layer/attention_ref.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/layer/attention_ref.cc -------------------------------------------------------------------------------- /src/unittest/layer/attention_ref.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/layer/attention_ref.h -------------------------------------------------------------------------------- /src/unittest/layer/attention_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/layer/attention_utils.h -------------------------------------------------------------------------------- /src/unittest/layer/parallel_attention.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/layer/parallel_attention.cc -------------------------------------------------------------------------------- /src/unittest/layer/parallel_ffn.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/layer/parallel_ffn.cc -------------------------------------------------------------------------------- /src/unittest/model/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/unittest/unittest_torch_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/unittest_torch_utils.h -------------------------------------------------------------------------------- /src/unittest/unittest_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/unittest_utils.h -------------------------------------------------------------------------------- /src/unittest/util/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/util/CMakeLists.txt -------------------------------------------------------------------------------- /src/unittest/util/cublas_wrapper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LLMServe/SwiftTransformer/HEAD/src/unittest/util/cublas_wrapper.cc --------------------------------------------------------------------------------