├── .gitignore ├── README.md ├── csrc ├── adam │ ├── adam_kernel.cu │ └── interface.cpp ├── bias_dropout_add │ ├── bias_dropout_add.cu │ └── interface.cpp ├── bias_gelu │ ├── bias_gelu.cu │ └── interface.cpp ├── layernorm │ ├── interface.cpp │ ├── interface_gamma_beta.cpp │ ├── layernorm.cu │ └── layernorm_backward.cu ├── multi_tensor │ ├── interface.cpp │ ├── multi_tensor_apply.cuh │ └── multi_tensor_l2norm_kernel.cu ├── rmsnorm │ ├── interface.cpp │ ├── interface_gamma.cpp │ ├── rmsnorm.cu │ └── rmsnorm_backward.cu ├── rounding │ ├── fp32_to_bf16.cu │ └── interface.cpp ├── softmax_dropout │ ├── interface.cpp │ ├── softmax_dropout_kernel.cu │ └── softmax_fast.h ├── type_shim.h ├── util.h └── xentropy │ ├── interface.cpp │ └── xentropy_kernel.cu ├── fused_ops ├── __init__.py ├── layernorm_module │ ├── __init__.py │ └── fused_layer_norm.py ├── rmsnorm_module │ ├── __init__.py │ └── fused_rms_norm.py ├── softmax_dropout_module │ ├── __init__.py │ └── fused_softmax_dropout.py └── xentropy_module │ ├── __init__.py │ └── softmax_xentropy.py ├── setup.py └── test ├── test_bias_dropout_add.py ├── test_bias_gelu.py ├── test_ln.py └── test_softmax.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/README.md -------------------------------------------------------------------------------- /csrc/adam/adam_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/adam/adam_kernel.cu -------------------------------------------------------------------------------- /csrc/adam/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/adam/interface.cpp -------------------------------------------------------------------------------- /csrc/bias_dropout_add/bias_dropout_add.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/bias_dropout_add/bias_dropout_add.cu -------------------------------------------------------------------------------- /csrc/bias_dropout_add/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/bias_dropout_add/interface.cpp -------------------------------------------------------------------------------- /csrc/bias_gelu/bias_gelu.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/bias_gelu/bias_gelu.cu -------------------------------------------------------------------------------- /csrc/bias_gelu/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/bias_gelu/interface.cpp -------------------------------------------------------------------------------- /csrc/layernorm/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/layernorm/interface.cpp -------------------------------------------------------------------------------- /csrc/layernorm/interface_gamma_beta.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/layernorm/interface_gamma_beta.cpp -------------------------------------------------------------------------------- /csrc/layernorm/layernorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/layernorm/layernorm.cu -------------------------------------------------------------------------------- /csrc/layernorm/layernorm_backward.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/layernorm/layernorm_backward.cu -------------------------------------------------------------------------------- /csrc/multi_tensor/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/multi_tensor/interface.cpp -------------------------------------------------------------------------------- /csrc/multi_tensor/multi_tensor_apply.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/multi_tensor/multi_tensor_apply.cuh -------------------------------------------------------------------------------- /csrc/multi_tensor/multi_tensor_l2norm_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/multi_tensor/multi_tensor_l2norm_kernel.cu -------------------------------------------------------------------------------- /csrc/rmsnorm/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/rmsnorm/interface.cpp -------------------------------------------------------------------------------- /csrc/rmsnorm/interface_gamma.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/rmsnorm/interface_gamma.cpp -------------------------------------------------------------------------------- /csrc/rmsnorm/rmsnorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/rmsnorm/rmsnorm.cu -------------------------------------------------------------------------------- /csrc/rmsnorm/rmsnorm_backward.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/rmsnorm/rmsnorm_backward.cu -------------------------------------------------------------------------------- /csrc/rounding/fp32_to_bf16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/rounding/fp32_to_bf16.cu -------------------------------------------------------------------------------- /csrc/rounding/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/rounding/interface.cpp -------------------------------------------------------------------------------- /csrc/softmax_dropout/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/softmax_dropout/interface.cpp -------------------------------------------------------------------------------- /csrc/softmax_dropout/softmax_dropout_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/softmax_dropout/softmax_dropout_kernel.cu -------------------------------------------------------------------------------- /csrc/softmax_dropout/softmax_fast.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/softmax_dropout/softmax_fast.h -------------------------------------------------------------------------------- /csrc/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/type_shim.h -------------------------------------------------------------------------------- /csrc/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/util.h -------------------------------------------------------------------------------- /csrc/xentropy/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/xentropy/interface.cpp -------------------------------------------------------------------------------- /csrc/xentropy/xentropy_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/csrc/xentropy/xentropy_kernel.cu -------------------------------------------------------------------------------- /fused_ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/__init__.py -------------------------------------------------------------------------------- /fused_ops/layernorm_module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/layernorm_module/__init__.py -------------------------------------------------------------------------------- /fused_ops/layernorm_module/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/layernorm_module/fused_layer_norm.py -------------------------------------------------------------------------------- /fused_ops/rmsnorm_module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/rmsnorm_module/__init__.py -------------------------------------------------------------------------------- /fused_ops/rmsnorm_module/fused_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/rmsnorm_module/fused_rms_norm.py -------------------------------------------------------------------------------- /fused_ops/softmax_dropout_module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/softmax_dropout_module/__init__.py -------------------------------------------------------------------------------- /fused_ops/softmax_dropout_module/fused_softmax_dropout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/softmax_dropout_module/fused_softmax_dropout.py -------------------------------------------------------------------------------- /fused_ops/xentropy_module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/xentropy_module/__init__.py -------------------------------------------------------------------------------- /fused_ops/xentropy_module/softmax_xentropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/fused_ops/xentropy_module/softmax_xentropy.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/setup.py -------------------------------------------------------------------------------- /test/test_bias_dropout_add.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/test/test_bias_dropout_add.py -------------------------------------------------------------------------------- /test/test_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/test/test_bias_gelu.py -------------------------------------------------------------------------------- /test/test_ln.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/test/test_ln.py -------------------------------------------------------------------------------- /test/test_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolinke/fused_ops/HEAD/test/test_softmax.py --------------------------------------------------------------------------------