├── CMakeLists.txt ├── README.md ├── examples ├── CMakeLists.txt ├── README.md └── cpp │ ├── CMakeLists.txt │ ├── attention │ ├── CMakeLists.txt │ ├── context_attn_example.cpp │ └── self_attn_example.cpp │ ├── decoder │ ├── CMakeLists.txt │ ├── context_decoder_example.cpp │ └── self_decoder_example.cpp │ └── ffn │ ├── CMakeLists.txt │ └── ffn_example.cpp ├── llama2-7b-tokenizer.bin ├── src ├── CMakeLists.txt ├── kernels │ ├── CMakeLists.txt │ ├── act_kernel.cu │ ├── act_kernel.h │ ├── add_residual.cu │ ├── add_residual.h │ ├── attn_softmax_kernel.cu │ ├── attn_softmax_kernel.h │ ├── build_casual_mask.cu │ ├── build_casual_mask.h │ ├── cal_paddingoffset.cu │ ├── cal_paddingoffset.h │ ├── concat_past_kv.cu │ ├── concat_past_kv.h │ ├── cublas_utils.cc │ ├── cublas_utils.h │ ├── fused_addresidual_norm.cu │ ├── fused_addresidual_norm.h │ ├── fused_decoder_self_attention.cu │ ├── fused_decoder_self_attention.h │ ├── fused_transpose_and_remv_pad.cu │ ├── fused_transpose_and_remv_pad.h │ ├── input_embedding.cu │ ├── input_embedding.h │ ├── linear.cu │ ├── linear.h │ ├── qkv_bias_and_RoPE.cu │ ├── qkv_bias_and_RoPE.h │ ├── repeat_kv.cu │ ├── repeat_kv.h │ ├── rmsnorm_kernel.cu │ ├── rmsnorm_kernel.h │ ├── sampling.cu │ ├── sampling.h │ ├── topK.cu │ ├── topK.h │ ├── topK_bk.cu │ └── topK_bk.h ├── layers │ ├── CMakeLists.txt │ ├── attention │ │ ├── CMakeLists.txt │ │ ├── context_attention.cpp │ │ ├── context_attention.h │ │ ├── masked_self_attention.cpp │ │ └── masked_self_attention.h │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── context_decoder.cpp │ │ ├── context_decoder.h │ │ ├── self_decoder.cpp │ │ └── self_decoder.h │ └── ffn │ │ ├── CMakeLists.txt │ │ ├── ffn.cpp │ │ └── ffn.h ├── memory │ └── allocator │ │ ├── base_allocator.h │ │ └── cuda_allocator.h ├── models │ ├── CMakeLists.txt │ ├── basemodel.h │ ├── common_params.h │ ├── llama │ │ ├── llama.cpp │ │ ├── llama.h │ │ └── llama_params.h │ └── tokenizer.h ├── utils │ ├── CMakeLists.txt │ ├── cuda_debug_utils.cuh │ ├── debug_utils.h │ ├── macro.h │ ├── model_utils.h │ ├── params.h │ ├── string_utils.h │ ├── tensor.h │ ├── vectorize_utils.h │ ├── weight_utils.cu │ └── weight_utils.h └── weights │ ├── CMakeLists.txt │ ├── base_weights.h │ ├── llama │ ├── CMakeLists.txt │ ├── attention_weights.h │ ├── embedding_weights.h │ ├── ffn_weights.h │ ├── layer_weights.cc │ ├── layer_weights.h │ ├── llama_weights.cc │ ├── llama_weights.h │ └── norm_weights.h │ └── weight.h ├── tests ├── CMakeLists.txt └── unittests │ ├── CMakeLists.txt │ ├── test_act.cu │ ├── test_bias_and_RoPE.cu │ ├── test_bmm.cu │ ├── test_cal_paddingoffset.cu │ ├── test_casual_mask.cu │ ├── test_concat_kv.cu │ ├── test_data_compare.cu │ ├── test_fused_addresidual_norm.cu │ ├── test_fused_decoder_attention.cu │ ├── test_fused_trans_remv_pad.cu │ ├── test_input_embedding.cu │ ├── test_linear.cu │ ├── test_mask_softmax.cu │ ├── test_repeat_kv.cu │ ├── test_residual.cu │ ├── test_rmsnorm.cu │ ├── test_sampling.cu │ └── test_topk.cu ├── tools ├── 1.png ├── HF_llama_run_script.py ├── README.md ├── convert_downloaded_llama_weights.py └── weights_convert.py └── user_entry.cpp /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/README.md -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(cpp) 2 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/attention/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/attention/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/attention/context_attn_example.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/attention/context_attn_example.cpp -------------------------------------------------------------------------------- /examples/cpp/attention/self_attn_example.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/attention/self_attn_example.cpp -------------------------------------------------------------------------------- /examples/cpp/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/decoder/context_decoder_example.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/decoder/context_decoder_example.cpp -------------------------------------------------------------------------------- /examples/cpp/decoder/self_decoder_example.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/decoder/self_decoder_example.cpp -------------------------------------------------------------------------------- /examples/cpp/ffn/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/ffn/CMakeLists.txt -------------------------------------------------------------------------------- /examples/cpp/ffn/ffn_example.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/examples/cpp/ffn/ffn_example.cpp -------------------------------------------------------------------------------- /llama2-7b-tokenizer.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/llama2-7b-tokenizer.bin -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/CMakeLists.txt -------------------------------------------------------------------------------- /src/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/CMakeLists.txt -------------------------------------------------------------------------------- /src/kernels/act_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/act_kernel.cu -------------------------------------------------------------------------------- /src/kernels/act_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/act_kernel.h -------------------------------------------------------------------------------- /src/kernels/add_residual.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/add_residual.cu -------------------------------------------------------------------------------- /src/kernels/add_residual.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/add_residual.h -------------------------------------------------------------------------------- /src/kernels/attn_softmax_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/attn_softmax_kernel.cu -------------------------------------------------------------------------------- /src/kernels/attn_softmax_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/attn_softmax_kernel.h -------------------------------------------------------------------------------- /src/kernels/build_casual_mask.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/build_casual_mask.cu -------------------------------------------------------------------------------- /src/kernels/build_casual_mask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/build_casual_mask.h -------------------------------------------------------------------------------- /src/kernels/cal_paddingoffset.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/cal_paddingoffset.cu -------------------------------------------------------------------------------- /src/kernels/cal_paddingoffset.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/cal_paddingoffset.h -------------------------------------------------------------------------------- /src/kernels/concat_past_kv.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/concat_past_kv.cu -------------------------------------------------------------------------------- /src/kernels/concat_past_kv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/concat_past_kv.h -------------------------------------------------------------------------------- /src/kernels/cublas_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/cublas_utils.cc -------------------------------------------------------------------------------- /src/kernels/cublas_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/cublas_utils.h -------------------------------------------------------------------------------- /src/kernels/fused_addresidual_norm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/fused_addresidual_norm.cu -------------------------------------------------------------------------------- /src/kernels/fused_addresidual_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/fused_addresidual_norm.h -------------------------------------------------------------------------------- /src/kernels/fused_decoder_self_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/fused_decoder_self_attention.cu -------------------------------------------------------------------------------- /src/kernels/fused_decoder_self_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/fused_decoder_self_attention.h -------------------------------------------------------------------------------- /src/kernels/fused_transpose_and_remv_pad.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/fused_transpose_and_remv_pad.cu -------------------------------------------------------------------------------- /src/kernels/fused_transpose_and_remv_pad.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/fused_transpose_and_remv_pad.h -------------------------------------------------------------------------------- /src/kernels/input_embedding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/input_embedding.cu -------------------------------------------------------------------------------- /src/kernels/input_embedding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/input_embedding.h -------------------------------------------------------------------------------- /src/kernels/linear.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/linear.cu -------------------------------------------------------------------------------- /src/kernels/linear.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/linear.h -------------------------------------------------------------------------------- /src/kernels/qkv_bias_and_RoPE.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/qkv_bias_and_RoPE.cu -------------------------------------------------------------------------------- /src/kernels/qkv_bias_and_RoPE.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/qkv_bias_and_RoPE.h -------------------------------------------------------------------------------- /src/kernels/repeat_kv.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/repeat_kv.cu -------------------------------------------------------------------------------- /src/kernels/repeat_kv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/repeat_kv.h -------------------------------------------------------------------------------- /src/kernels/rmsnorm_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/rmsnorm_kernel.cu -------------------------------------------------------------------------------- /src/kernels/rmsnorm_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/rmsnorm_kernel.h -------------------------------------------------------------------------------- /src/kernels/sampling.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/sampling.cu -------------------------------------------------------------------------------- /src/kernels/sampling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/sampling.h -------------------------------------------------------------------------------- /src/kernels/topK.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/topK.cu -------------------------------------------------------------------------------- /src/kernels/topK.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/topK.h -------------------------------------------------------------------------------- /src/kernels/topK_bk.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/topK_bk.cu -------------------------------------------------------------------------------- /src/kernels/topK_bk.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/kernels/topK_bk.h -------------------------------------------------------------------------------- /src/layers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/CMakeLists.txt -------------------------------------------------------------------------------- /src/layers/attention/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/attention/CMakeLists.txt -------------------------------------------------------------------------------- /src/layers/attention/context_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/attention/context_attention.cpp -------------------------------------------------------------------------------- /src/layers/attention/context_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/attention/context_attention.h -------------------------------------------------------------------------------- /src/layers/attention/masked_self_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/attention/masked_self_attention.cpp -------------------------------------------------------------------------------- /src/layers/attention/masked_self_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/attention/masked_self_attention.h -------------------------------------------------------------------------------- /src/layers/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/decoder/CMakeLists.txt -------------------------------------------------------------------------------- /src/layers/decoder/context_decoder.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/decoder/context_decoder.cpp -------------------------------------------------------------------------------- /src/layers/decoder/context_decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/decoder/context_decoder.h -------------------------------------------------------------------------------- /src/layers/decoder/self_decoder.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/decoder/self_decoder.cpp -------------------------------------------------------------------------------- /src/layers/decoder/self_decoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/decoder/self_decoder.h -------------------------------------------------------------------------------- /src/layers/ffn/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/ffn/CMakeLists.txt -------------------------------------------------------------------------------- /src/layers/ffn/ffn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/ffn/ffn.cpp -------------------------------------------------------------------------------- /src/layers/ffn/ffn.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/layers/ffn/ffn.h -------------------------------------------------------------------------------- /src/memory/allocator/base_allocator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/memory/allocator/base_allocator.h -------------------------------------------------------------------------------- /src/memory/allocator/cuda_allocator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/memory/allocator/cuda_allocator.h -------------------------------------------------------------------------------- /src/models/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/models/CMakeLists.txt -------------------------------------------------------------------------------- /src/models/basemodel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/models/basemodel.h -------------------------------------------------------------------------------- /src/models/common_params.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/models/llama/llama.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/models/llama/llama.cpp -------------------------------------------------------------------------------- /src/models/llama/llama.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/models/llama/llama.h -------------------------------------------------------------------------------- /src/models/llama/llama_params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/models/llama/llama_params.h -------------------------------------------------------------------------------- /src/models/tokenizer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/models/tokenizer.h -------------------------------------------------------------------------------- /src/utils/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/CMakeLists.txt -------------------------------------------------------------------------------- /src/utils/cuda_debug_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/cuda_debug_utils.cuh -------------------------------------------------------------------------------- /src/utils/debug_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/debug_utils.h -------------------------------------------------------------------------------- /src/utils/macro.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/macro.h -------------------------------------------------------------------------------- /src/utils/model_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/model_utils.h -------------------------------------------------------------------------------- /src/utils/params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/params.h -------------------------------------------------------------------------------- /src/utils/string_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/string_utils.h -------------------------------------------------------------------------------- /src/utils/tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/tensor.h -------------------------------------------------------------------------------- /src/utils/vectorize_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/vectorize_utils.h -------------------------------------------------------------------------------- /src/utils/weight_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/weight_utils.cu -------------------------------------------------------------------------------- /src/utils/weight_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/utils/weight_utils.h -------------------------------------------------------------------------------- /src/weights/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(llama) 2 | -------------------------------------------------------------------------------- /src/weights/base_weights.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/base_weights.h -------------------------------------------------------------------------------- /src/weights/llama/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/CMakeLists.txt -------------------------------------------------------------------------------- /src/weights/llama/attention_weights.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/attention_weights.h -------------------------------------------------------------------------------- /src/weights/llama/embedding_weights.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/embedding_weights.h -------------------------------------------------------------------------------- /src/weights/llama/ffn_weights.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/ffn_weights.h -------------------------------------------------------------------------------- /src/weights/llama/layer_weights.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/layer_weights.cc -------------------------------------------------------------------------------- /src/weights/llama/layer_weights.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/layer_weights.h -------------------------------------------------------------------------------- /src/weights/llama/llama_weights.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/llama_weights.cc -------------------------------------------------------------------------------- /src/weights/llama/llama_weights.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/llama/llama_weights.h -------------------------------------------------------------------------------- /src/weights/llama/norm_weights.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | template 3 | struct LayerNormWeight { 4 | T* gamma; 5 | }; -------------------------------------------------------------------------------- /src/weights/weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/src/weights/weight.h -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(unittests) -------------------------------------------------------------------------------- /tests/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/CMakeLists.txt -------------------------------------------------------------------------------- /tests/unittests/test_act.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_act.cu -------------------------------------------------------------------------------- /tests/unittests/test_bias_and_RoPE.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_bias_and_RoPE.cu -------------------------------------------------------------------------------- /tests/unittests/test_bmm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_bmm.cu -------------------------------------------------------------------------------- /tests/unittests/test_cal_paddingoffset.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_cal_paddingoffset.cu -------------------------------------------------------------------------------- /tests/unittests/test_casual_mask.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_casual_mask.cu -------------------------------------------------------------------------------- /tests/unittests/test_concat_kv.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_concat_kv.cu -------------------------------------------------------------------------------- /tests/unittests/test_data_compare.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_data_compare.cu -------------------------------------------------------------------------------- /tests/unittests/test_fused_addresidual_norm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_fused_addresidual_norm.cu -------------------------------------------------------------------------------- /tests/unittests/test_fused_decoder_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_fused_decoder_attention.cu -------------------------------------------------------------------------------- /tests/unittests/test_fused_trans_remv_pad.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_fused_trans_remv_pad.cu -------------------------------------------------------------------------------- /tests/unittests/test_input_embedding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_input_embedding.cu -------------------------------------------------------------------------------- /tests/unittests/test_linear.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_linear.cu -------------------------------------------------------------------------------- /tests/unittests/test_mask_softmax.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_mask_softmax.cu -------------------------------------------------------------------------------- /tests/unittests/test_repeat_kv.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_repeat_kv.cu -------------------------------------------------------------------------------- /tests/unittests/test_residual.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_residual.cu -------------------------------------------------------------------------------- /tests/unittests/test_rmsnorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_rmsnorm.cu -------------------------------------------------------------------------------- /tests/unittests/test_sampling.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_sampling.cu -------------------------------------------------------------------------------- /tests/unittests/test_topk.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tests/unittests/test_topk.cu -------------------------------------------------------------------------------- /tools/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tools/1.png -------------------------------------------------------------------------------- /tools/HF_llama_run_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tools/HF_llama_run_script.py -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tools/README.md -------------------------------------------------------------------------------- /tools/convert_downloaded_llama_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tools/convert_downloaded_llama_weights.py -------------------------------------------------------------------------------- /tools/weights_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/tools/weights_convert.py -------------------------------------------------------------------------------- /user_entry.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RussWong/LLM-engineering/HEAD/user_entry.cpp --------------------------------------------------------------------------------