├── .gitignore ├── KernelBench ├── level1 │ ├── 100_HingeLoss.py │ ├── 10_3D_tensor_matrix_multiplication.py │ ├── 11_4D_tensor_matrix_multiplication.py │ ├── 12_Matmul_with_diagonal_matrices_.py │ ├── 13_Matmul_for_symmetric_matrices.py │ ├── 14_Matmul_for_upper_triangular_matrices.py │ ├── 15_Matmul_for_lower_triangular_matrices.py │ ├── 16_Matmul_with_transposed_A.py │ ├── 17_Matmul_with_transposed_B.py │ ├── 18_Matmul_with_transposed_both.py │ ├── 19_ReLU.py │ ├── 1_Square_matrix_multiplication_.py │ ├── 20_LeakyReLU.py │ ├── 21_Sigmoid.py │ ├── 22_Tanh.py │ ├── 23_Softmax.py │ ├── 24_LogSoftmax.py │ ├── 25_Swish.py │ ├── 26_GELU_.py │ ├── 27_SELU_.py │ ├── 28_HardSigmoid.py │ ├── 29_Softplus.py │ ├── 2_Standard_matrix_multiplication_.py │ ├── 30_Softsign.py │ ├── 31_ELU.py │ ├── 32_HardTanh.py │ ├── 33_BatchNorm.py │ ├── 34_InstanceNorm.py │ ├── 35_GroupNorm_.py │ ├── 36_RMSNorm_.py │ ├── 37_FrobeniusNorm_.py │ ├── 38_L1Norm_.py │ ├── 39_L2Norm_.py │ ├── 3_Batched_matrix_multiplication.py │ ├── 40_LayerNorm.py │ ├── 41_Max_Pooling_1D.py │ ├── 42_Max_Pooling_2D.py │ ├── 43_Max_Pooling_3D.py │ ├── 44_Average_Pooling_1D.py │ ├── 45_Average_Pooling_2D.py │ ├── 46_Average_Pooling_3D.py │ ├── 47_Sum_reduction_over_a_dimension.py │ ├── 48_Mean_reduction_over_a_dimension.py │ ├── 49_Max_reduction_over_a_dimension.py │ ├── 4_Matrix_vector_multiplication_.py │ ├── 50_Product_reduction_over_a_dimension.py │ ├── 51_Argmax_over_a_dimension.py │ ├── 52_Argmin_over_a_dimension.py │ ├── 53_Min_reduction_over_a_dimension.py │ ├── 54_conv_standard_3D__square_input__square_kernel.py │ ├── 55_conv_standard_2D__asymmetric_input__square_kernel.py │ ├── 56_conv_standard_2D__asymmetric_input__asymmetric_kernel.py │ ├── 57_conv_transposed_2D__square_input__square_kernel.py │ ├── 58_conv_transposed_3D__asymmetric_input__asymmetric_kernel.py │ ├── 59_conv_standard_3D__asymmetric_input__square_kernel.py │ ├── 5_Matrix_scalar_multiplication.py │ ├── 60_conv_standard_3D__square_input__asymmetric_kernel.py │ ├── 61_conv_transposed_3D__square_input__square_kernel.py │ ├── 62_conv_standard_2D__square_input__asymmetric_kernel.py │ ├── 63_conv_standard_2D__square_input__square_kernel.py │ ├── 64_conv_transposed_1D.py │ ├── 65_conv_transposed_2D__square_input__asymmetric_kernel.py │ ├── 66_conv_standard_3D__asymmetric_input__asymmetric_kernel.py │ ├── 67_conv_standard_1D.py │ ├── 68_conv_transposed_3D__square_input__asymmetric_kernel.py │ ├── 69_conv_transposed_2D__asymmetric_input__asymmetric_kernel.py │ ├── 6_Matmul_with_large_K_dimension_.py │ ├── 70_conv_transposed_3D__asymmetric_input__square_kernel.py │ ├── 71_conv_transposed_2D__asymmetric_input__square_kernel.py │ ├── 72_conv_transposed_3D_asymmetric_input_asymmetric_kernel___strided_padded_grouped_.py │ ├── 73_conv_transposed_3D_asymmetric_input_square_kernel__strided_padded__grouped.py │ ├── 74_conv_transposed_1D_dilated.py │ ├── 75_conv_transposed_2D_asymmetric_input_asymmetric_kernel_strided__grouped____padded____dilated__.py │ ├── 76_conv_standard_1D_dilated_strided__.py │ ├── 77_conv_transposed_3D_square_input_square_kernel___padded____dilated____strided__.py │ ├── 78_conv_transposed_2D_asymmetric_input_asymmetric_kernel___padded__.py │ ├── 79_conv_transposed_1D_asymmetric_input_square_kernel___padded____strided____dilated__.py │ ├── 7_Matmul_with_small_K_dimension_.py │ ├── 80_conv_standard_2D_square_input_asymmetric_kernel___dilated____padded__.py │ ├── 81_conv_transposed_2D_asymmetric_input_square_kernel___dilated____padded____strided__.py │ ├── 82_conv_depthwise_2D_square_input_square_kernel.py │ ├── 83_conv_depthwise_2D_square_input_asymmetric_kernel.py │ ├── 84_conv_depthwise_2D_asymmetric_input_square_kernel.py │ ├── 85_conv_depthwise_2D_asymmetric_input_asymmetric_kernel.py │ ├── 86_conv_depthwise_separable_2D.py │ ├── 87_conv_pointwise_2D.py │ ├── 88_MinGPTNewGelu.py │ ├── 89_cumsum.py │ ├── 8_Matmul_with_irregular_shapes_.py │ ├── 90_cumprod.py │ ├── 91_cumsum_reverse.py │ ├── 92_cumsum_exclusive.py │ ├── 93_masked_cumsum.py │ ├── 94_MSELoss.py │ ├── 95_CrossEntropyLoss.py │ ├── 96_HuberLoss.py │ ├── 97_CosineSimilarityLoss.py │ ├── 98_KLDivLoss.py │ ├── 99_TripletMarginLoss.py │ └── 9_Tall_skinny_matrix_multiplication_.py ├── level2 │ ├── 100_ConvTranspose3d_Clamp_Min_Divide.py │ ├── 10_ConvTranspose2d_MaxPool_Hardtanh_Mean_Tanh.py │ ├── 11_ConvTranspose2d_BatchNorm_Tanh_MaxPool_GroupNorm.py │ ├── 12_Gemm_Multiply_LeakyReLU.py │ ├── 13_ConvTranspose3d_Mean_Add_Softmax_Tanh_Scaling.py │ ├── 14_Gemm_Divide_Sum_Scaling.py │ ├── 15_ConvTranspose3d_BatchNorm_Subtract.py │ ├── 16_ConvTranspose2d_Mish_Add_Hardtanh_Scaling.py │ ├── 17_Conv2d_InstanceNorm_Divide.py │ ├── 18_Matmul_Sum_Max_AvgPool_LogSumExp_LogSumExp.py │ ├── 19_ConvTranspose2d_GELU_GroupNorm.py │ ├── 1_Conv2D_ReLU_BiasAdd.py │ ├── 20_ConvTranspose3d_Sum_ResidualAdd_Multiply_ResidualAdd.py │ ├── 21_Conv2d_Add_Scale_Sigmoid_GroupNorm.py │ ├── 22_Matmul_Scale_ResidualAdd_Clamp_LogSumExp_Mish.py │ ├── 23_Conv3d_GroupNorm_Mean.py │ ├── 24_Conv3d_Min_Softmax.py │ ├── 25_Conv2d_Min_Tanh_Tanh.py │ ├── 26_ConvTranspose3d_Add_HardSwish.py │ ├── 27_Conv3d_HardSwish_ReLU_Softmax_Mean.py │ ├── 28_BMM_InstanceNorm_Sum_ResidualAdd_Multiply.py │ ├── 29_Matmul_Mish_Mish.py │ ├── 2_ConvTranspose2d_BiasAdd_Clamp_Scaling_Clamp_Divide.py │ ├── 30_Gemm_GroupNorm_Hardtanh.py │ ├── 31_Conv2d_Min_Add_Multiply.py │ ├── 32_Conv2d_Scaling_Min.py │ ├── 33_Gemm_Scale_BatchNorm.py │ ├── 34_ConvTranspose3d_LayerNorm_GELU_Scaling.py │ ├── 35_Conv2d_Subtract_HardSwish_MaxPool_Mish.py │ ├── 36_ConvTranspose2d_Min_Sum_GELU_Add.py │ ├── 37_Matmul_Swish_Sum_GroupNorm.py │ ├── 38_ConvTranspose3d_AvgPool_Clamp_Softmax_Multiply.py │ ├── 39_Gemm_Scale_BatchNorm.py │ ├── 3_ConvTranspose3d_Sum_LayerNorm_AvgPool_GELU.py │ ├── 40_Matmul_Scaling_ResidualAdd.py │ ├── 41_Gemm_BatchNorm_GELU_GroupNorm_Mean_ReLU.py │ ├── 42_ConvTranspose2d_GlobalAvgPool_BiasAdd_LogSumExp_Sum_Multiply.py │ ├── 43_Conv3d_Max_LogSumExp_ReLU.py │ ├── 44_ConvTranspose2d_Multiply_GlobalAvgPool_GlobalAvgPool_Mean.py │ ├── 45_Gemm_Sigmoid_Sum_LogSumExp.py │ ├── 46_Conv2d_Subtract_Tanh_Subtract_AvgPool.py │ ├── 47_Conv3d_Mish_Tanh.py │ ├── 48_Conv3d_Scaling_Tanh_Multiply_Sigmoid.py │ ├── 49_ConvTranspose3d_Softmax_Sigmoid.py │ ├── 4_Conv2d_Mish_Mish.py │ ├── 50_ConvTranspose3d_Scaling_AvgPool_BiasAdd_Scaling.py │ ├── 51_Gemm_Subtract_GlobalAvgPool_LogSumExp_GELU_ResidualAdd.py │ ├── 52_Conv2d_Activation_BatchNorm.py │ ├── 53_Gemm_Scaling_Hardtanh_GELU.py │ ├── 54_Conv2d_Multiply_LeakyReLU_GELU.py │ ├── 55_Matmul_MaxPool_Sum_Scale.py │ ├── 56_Matmul_Sigmoid_Sum.py │ ├── 57_Conv2d_ReLU_HardSwish.py │ ├── 58_ConvTranspose3d_LogSumExp_HardSwish_Subtract_Clamp_Max.py │ ├── 59_Matmul_Swish_Scaling.py │ ├── 5_ConvTranspose2d_Subtract_Tanh.py │ ├── 60_ConvTranspose3d_Swish_GroupNorm_HardSwish.py │ ├── 61_ConvTranspose3d_ReLU_GroupNorm.py │ ├── 62_Matmul_GroupNorm_LeakyReLU_Sum.py │ ├── 63_Gemm_ReLU_Divide.py │ ├── 64_Gemm_LogSumExp_LeakyReLU_LeakyReLU_GELU_GELU.py │ ├── 65_Conv2d_AvgPool_Sigmoid_Sum.py │ ├── 66_Matmul_Dropout_Mean_Softmax.py │ ├── 67_Conv2d_GELU_GlobalAvgPool.py │ ├── 68_Matmul_Min_Subtract.py │ ├── 69_Conv2d_HardSwish_ReLU.py │ ├── 6_Conv3d_Softmax_MaxPool_MaxPool.py │ ├── 70_Gemm_Sigmoid_Scaling_ResidualAdd.py │ ├── 71_Conv2d_Divide_LeakyReLU.py │ ├── 72_ConvTranspose3d_BatchNorm_AvgPool_AvgPool.py │ ├── 73_Conv2d_BatchNorm_Scaling.py │ ├── 74_ConvTranspose3d_LeakyReLU_Multiply_LeakyReLU_Max.py │ ├── 75_Gemm_GroupNorm_Min_BiasAdd.py │ ├── 76_Gemm_Add_ReLU.py │ ├── 77_ConvTranspose3d_Scale_BatchNorm_GlobalAvgPool.py │ ├── 78_ConvTranspose3d_Max_Max_Sum.py │ ├── 79_Conv3d_Multiply_InstanceNorm_Clamp_Multiply_Max.py │ ├── 7_Conv3d_ReLU_LeakyReLU_GELU_Sigmoid_BiasAdd.py │ ├── 80_Gemm_Max_Subtract_GELU.py │ ├── 81_Gemm_Swish_Divide_Clamp_Tanh_Clamp.py │ ├── 82_Conv2d_Tanh_Scaling_BiasAdd_Max.py │ ├── 83_Conv3d_GroupNorm_Min_Clamp_Dropout.py │ ├── 84_Gemm_BatchNorm_Scaling_Softmax.py │ ├── 85_Conv2d_GroupNorm_Scale_MaxPool_Clamp.py │ ├── 86_Matmul_Divide_GELU.py │ ├── 87_Conv2d_Subtract_Subtract_Mish.py │ ├── 88_Gemm_GroupNorm_Swish_Multiply_Swish.py │ ├── 89_ConvTranspose3d_MaxPool_Softmax_Subtract_Swish_Max.py │ ├── 8_Conv3d_Divide_Max_GlobalAvgPool_BiasAdd_Sum.py │ ├── 90_Conv3d_LeakyReLU_Sum_Clamp_GELU.py │ ├── 91_ConvTranspose2d_Softmax_BiasAdd_Scaling_Sigmoid.py │ ├── 92_Conv2d_GroupNorm_Tanh_HardSwish_ResidualAdd_LogSumExp.py │ ├── 93_ConvTranspose2d_Add_Min_GELU_Multiply.py │ ├── 94_Gemm_BiasAdd_Hardtanh_Mish_GroupNorm.py │ ├── 95_Matmul_Add_Swish_Tanh_GELU_Hardtanh.py │ ├── 96_ConvTranspose3d_Multiply_Max_GlobalAvgPool_Clamp.py │ ├── 97_Matmul_BatchNorm_BiasAdd_Divide_Swish.py │ ├── 98_Matmul_AvgPool_GELU_Scale_Max.py │ ├── 99_Matmul_GELU_Softmax.py │ └── 9_Matmul_Subtract_Multiply_ReLU.py ├── level3 │ ├── 10_ResNet101.py │ ├── 11_VGG16.py │ ├── 12_VGG19.py │ ├── 13_DenseNet121TransitionLayer.py │ ├── 14_DenseNet121DenseBlock.py │ ├── 15_DenseNet121.py │ ├── 16_DenseNet201.py │ ├── 17_SqueezeNetFireModule.py │ ├── 18_SqueezeNet.py │ ├── 19_MobileNetV1.py │ ├── 1_MLP.py │ ├── 20_MobileNetV2.py │ ├── 21_EfficientNetMBConv.py │ ├── 22_EfficientNetB0.py │ ├── 23_EfficientNetB1.py │ ├── 24_EfficientNetB2.py │ ├── 25_ShuffleNetUnit.py │ ├── 26_ShuffleNet.py │ ├── 27_RegNet.py │ ├── 28_VisionTransformer.py │ ├── 29_SwinMLP.py │ ├── 2_ShallowWideMLP.py │ ├── 30_SwinTransformerV2.py │ ├── 31_VisionAttention.py │ ├── 32_ConvolutionalVisionTransformer.py │ ├── 33_VanillaRNN.py │ ├── 34_VanillaRNNHidden.py │ ├── 35_LTSM.py │ ├── 36_LTSMHn.py │ ├── 37_LTSMCn.py │ ├── 38_LTSMBidirectional.py │ ├── 39_GRU.py │ ├── 3_DeepNarrowMLP.py │ ├── 40_GRUHidden.py │ ├── 41_GRUBirectional.py │ ├── 42_GRUBidirectionalHidden.py │ ├── 43_MinGPTCausalAttention.py │ ├── 44_MiniGPTBlock.py │ ├── 45_UNetSoftmax.py │ ├── 46_NetVladWithGhostClusters.py │ ├── 47_NetVladNoGhostClusters.py │ ├── 48_Mamba2ReturnY.py │ ├── 49_Mamba2ReturnFinalState.py │ ├── 4_LeNet5.py │ ├── 50_ReLUSelfAttention.py │ ├── 5_AlexNet.py │ ├── 6_GoogleNetInceptionModule.py │ ├── 7_GoogleNetInceptionV1.py │ ├── 8_ResNetBasicBlock.py │ └── 9_ResNet18.py └── level4 │ ├── 10_google-bigbird-roberta-base_bs1024_seq32.py │ ├── 11_google-electra-small-discriminator_bs1_seq511.py │ ├── 12_google-electra-small-discriminator_bs1024_seq32.py │ ├── 13_google-reformer-enwik8_bs32_seq256.py │ ├── 14_google-electra-small-discriminator_bs32_seq256.py │ ├── 15_google-reformer-enwik8_bs1024_seq32.py │ ├── 16_gpt2_bs1_seq1023.py │ ├── 17_facebook-bart-large_bs1024_seq32.py │ ├── 18_EleutherAI-gpt-neo-2p7B_bs512_seq32.py │ ├── 19_gpt2_bs1024_seq32.py │ ├── 1_EleutherAI-gpt-neo-2p7B_bs32_seq256.py │ ├── 20_facebook-bart-large_bs32_seq256.py │ ├── 2_facebook-opt-1p3b_bs1_seq2047.py │ ├── 3_EleutherAI-gpt-neo-2p7B_bs1_seq2047.py │ ├── 4_facebook-opt-1p3b_bs32_seq256.py │ ├── 5_google-bigbird-roberta-base_bs1_seq4095.py │ ├── 6_facebook-bart-large_bs1_seq1023.py │ ├── 7_gpt2_bs32_seq256.py │ ├── 8_facebook-opt-1p3b_bs512_seq32.py │ └── 9_google-bigbird-roberta-base_bs32_seq256.py ├── LICENSE ├── README.md ├── assets └── figures │ ├── KernelBenchMascot.png │ └── KernelBenchWorkFlow.png ├── requirements.txt ├── results └── timing │ ├── A100-80GB_modal │ ├── baseline_time_torch.json │ └── baseline_time_torch_compile_inductor_default.json │ ├── A100_modal │ ├── baseline_time_torch.json │ └── baseline_time_torch_compile_inductor_default.json │ ├── A10G_modal │ ├── baseline_time_torch.json │ └── baseline_time_torch_compile_inductor_default.json │ ├── B200_together │ ├── baseline_time_torch.json │ ├── baseline_time_torch_compile_cudagraphs.json │ ├── baseline_time_torch_compile_inductor_default.json │ ├── baseline_time_torch_compile_inductor_max-autotune-no-cudagraphs.json │ ├── baseline_time_torch_compile_inductor_max-autotune.json │ └── baseline_time_torch_compile_inductor_reduce-overhead.json │ ├── H100_together │ ├── baseline_time_torch.json │ ├── baseline_time_torch_compile_cudagraphs.json │ ├── baseline_time_torch_compile_inductor_default.json │ ├── baseline_time_torch_compile_inductor_max-autotune-no-cudagraphs.json │ ├── baseline_time_torch_compile_inductor_max-autotune.json │ └── baseline_time_torch_compile_inductor_reduce-overhead.json │ ├── L40S_matx3 │ ├── baseline_time_torch.json │ ├── baseline_time_torch_compile_cudagraphs.json │ ├── baseline_time_torch_compile_inductor_default.json │ ├── baseline_time_torch_compile_inductor_max-autotune-no-cudagraphs.json │ ├── baseline_time_torch_compile_inductor_max-autotune.json │ └── baseline_time_torch_compile_inductor_reduce-overhead.json │ ├── L40S_modal │ ├── baseline_time_torch.json │ └── baseline_time_torch_compile_inductor_default.json │ ├── L4_modal │ ├── baseline_time_torch.json │ └── baseline_time_torch_compile_inductor_default.json │ ├── README.md │ ├── T4_modal │ ├── baseline_time_torch.json │ └── baseline_time_torch_compile_inductor_default.json │ └── old │ ├── baseline_time_cleaned.json │ ├── baseline_time_matx2.json │ ├── baseline_time_matx3.json │ ├── baseline_time_torch_compile_cleaned.json │ ├── baseline_time_torch_compile_matx2.json │ └── baseline_time_torch_compile_matx3.json ├── scripts ├── benchmark_eval_analysis.py ├── debug_stddout.py ├── eval_from_generations.py ├── generate_and_eval_single_sample.py ├── generate_and_eval_single_sample_modal.py ├── generate_baseline_time.py ├── generate_baseline_time_modal.py ├── generate_samples.py ├── inspect_baseline.py ├── inspect_kernel_pytorch_profiler.py ├── inspect_triton.py ├── run_and_check.py ├── verify_bench.py └── verify_generation.py ├── setup.py └── src ├── __init__.py ├── analysis.py ├── compile.py ├── dataset.py ├── eval.py ├── frameworks.py ├── make_hf_dataset.py ├── prompt_constructor.py ├── prompts ├── README.md ├── cot │ ├── model_cot_fuse_gelu.py │ ├── model_cot_mnist2.py │ └── model_cot_tiled_matmul.py ├── few_shot │ ├── model_ex_add.py │ ├── model_ex_flash_attn.py │ ├── model_ex_fuse_gelu.py │ ├── model_ex_mnist2.py │ ├── model_ex_tiled_matmul.py │ ├── model_new_ex_add.py │ ├── model_new_ex_flash_attn.py │ ├── model_new_ex_fuse_gelu.py │ ├── model_new_ex_mnist2.py │ └── model_new_ex_tiled_matmul.py ├── hardware │ └── gpu_specs.py ├── model_ex_0.py ├── model_ex_1.py ├── model_ex_2.py ├── model_ex_add.py ├── model_new_ex_0.py ├── model_new_ex_1.py ├── model_new_ex_2.py └── model_new_ex_add.py ├── score.py ├── scratch ├── log.txt ├── model.py ├── model_new.py ├── prompt.txt └── test.py ├── unit_tests ├── test_dataset.py ├── test_score.py └── test_utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | **__pycache__/ 2 | scratch/ 3 | **/scratch/ 4 | *.egg-info/ 5 | **build/ 6 | **dist/ 7 | # **results/ 8 | results/* 9 | runs/* 10 | cache/* 11 | !results/timing/ 12 | .env 13 | -------------------------------------------------------------------------------- /KernelBench/level1/100_HingeLoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that computes Hinge Loss for binary classification tasks. 7 | 8 | Parameters: 9 | None 10 | """ 11 | def __init__(self): 12 | super(Model, self).__init__() 13 | 14 | def forward(self, predictions, targets): 15 | return torch.mean(torch.clamp(1 - predictions * targets, min=0)) 16 | 17 | batch_size = 128 18 | input_shape = (1,) 19 | dim = 1 20 | 21 | def get_inputs(): 22 | return [torch.randn(batch_size, *input_shape), torch.randint(0, 2, (batch_size, 1)).float() * 2 - 1] 23 | 24 | def get_init_inputs(): 25 | return [] -------------------------------------------------------------------------------- /KernelBench/level1/10_3D_tensor_matrix_multiplication.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Performs 3D tensor-matrix multiplication. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A, B): 12 | """ 13 | Performs 3D tensor-matrix multiplication. 14 | 15 | Args: 16 | A (torch.Tensor): Input 3D tensor of shape (N, M, K). 17 | B (torch.Tensor): Input matrix of shape (K, L). 18 | 19 | Returns: 20 | torch.Tensor: Output tensor of shape (N, M, L), resulting from the multiplication of A and B along the last dimension of A. 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | N = 16 25 | M = 1024 26 | K = 2048 27 | L = 768 28 | 29 | def get_inputs(): 30 | A = torch.randn(N, M, K) 31 | B = torch.randn(K, L) 32 | return [A, B] 33 | 34 | def get_init_inputs(): 35 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/11_4D_tensor_matrix_multiplication.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Performs 4D tensor-matrix multiplication: 7 | C[b, i, j, k] = sum_l A[b, i, j, l] * B[l, k] 8 | 9 | Args: 10 | A (torch.Tensor): Input 4D tensor of shape (b, i, j, l) 11 | B (torch.Tensor): Input matrix of shape (l, k) 12 | 13 | Returns: 14 | torch.Tensor: Output 4D tensor of shape (b, i, j, k) 15 | """ 16 | def __init__(self): 17 | super(Model, self).__init__() 18 | 19 | def forward(self, A, B): 20 | """ 21 | Performs the 4D tensor-matrix multiplication. 22 | 23 | Args: 24 | A (torch.Tensor): Input 4D tensor of shape (b, i, j, l) 25 | B (torch.Tensor): Input matrix of shape (l, k) 26 | 27 | Returns: 28 | torch.Tensor: Output 4D tensor of shape (b, i, j, k) 29 | """ 30 | return torch.einsum("bijl,lk->bijk", A, B) 31 | 32 | # Test code 33 | b = 16 34 | i = 256 35 | j = 512 36 | l = 256 37 | k = 768 38 | 39 | def get_inputs(): 40 | A = torch.randn(b, i, j, l) 41 | B = torch.randn(l, k) 42 | return [A, B] 43 | 44 | def get_init_inputs(): 45 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/12_Matmul_with_diagonal_matrices_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication of a diagonal matrix with another matrix. 7 | C = diag(A) * B 8 | """ 9 | def __init__(self): 10 | super(Model, self).__init__() 11 | 12 | def forward(self, A, B): 13 | """ 14 | Performs the matrix multiplication. 15 | 16 | Args: 17 | A (torch.Tensor): A 1D tensor representing the diagonal of the diagonal matrix. Shape: (N,). 18 | B (torch.Tensor): A 2D tensor representing the second matrix. Shape: (N, M). 19 | 20 | Returns: 21 | torch.Tensor: The result of the matrix multiplication. Shape: (N, M). 22 | """ 23 | return torch.diag(A) @ B 24 | 25 | M = 4096 26 | N = 4096 27 | 28 | def get_inputs(): 29 | A = torch.randn(N) 30 | B = torch.randn(N, M) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/13_Matmul_for_symmetric_matrices.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) with A and B being symmetric matrices. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A, B): 12 | """ 13 | Performs matrix multiplication of two symmetric matrices. 14 | 15 | Args: 16 | A (torch.Tensor): Input matrix A, shape (N, N), symmetric. 17 | B (torch.Tensor): Input matrix B, shape (N, N), symmetric. 18 | 19 | Returns: 20 | torch.Tensor: Output matrix C, shape (N, N). 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | N = 4096 25 | 26 | def get_inputs(): 27 | """ 28 | Generates a pair of random symmetric matrices for testing. 29 | 30 | Returns: 31 | list: List containing two symmetric tensors A and B. 32 | """ 33 | A = torch.randn(N, N) 34 | A = (A + A.T) / 2 # Ensure symmetry 35 | B = torch.randn(N, N) 36 | B = (B + B.T) / 2 # Ensure symmetry 37 | return [A, B] 38 | 39 | def get_init_inputs(): 40 | """ 41 | No specific initialization inputs needed for this model. 42 | 43 | Returns: 44 | list: Empty list. 45 | """ 46 | return [] -------------------------------------------------------------------------------- /KernelBench/level1/14_Matmul_for_upper_triangular_matrices.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs matrix multiplication (C = A * B) for upper triangular matrices. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A, B): 12 | """ 13 | Performs matrix multiplication for upper triangular matrices. 14 | 15 | Args: 16 | A (torch.Tensor): Upper triangular matrix of shape (N, N). 17 | B (torch.Tensor): Upper triangular matrix of shape (N, N). 18 | 19 | Returns: 20 | torch.Tensor: The product of A and B, also an upper triangular matrix of shape (N, N). 21 | """ 22 | return torch.triu(torch.matmul(A, B)) 23 | 24 | N = 4096 25 | 26 | def get_inputs(): 27 | """ 28 | Generates upper triangular matrices for testing. 29 | 30 | Returns: 31 | list: A list containing two upper triangular matrices of shape (N, N). 32 | """ 33 | A = torch.triu(torch.randn(N, N)) 34 | B = torch.triu(torch.randn(N, N)) 35 | return [A, B] 36 | 37 | def get_init_inputs(): 38 | """ 39 | No specific initialization inputs are needed for this model. 40 | 41 | Returns: 42 | list: An empty list. 43 | """ 44 | return [] -------------------------------------------------------------------------------- /KernelBench/level1/15_Matmul_for_lower_triangular_matrices.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication (C = A * B) where A and B are lower triangular matrices. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A, B): 12 | """ 13 | Performs matrix multiplication of lower triangular matrices A and B. 14 | 15 | Args: 16 | A (torch.Tensor): Lower triangular matrix of shape (N, N). 17 | B (torch.Tensor): Lower triangular matrix of shape (N, N). 18 | 19 | Returns: 20 | torch.Tensor: The result of matrix multiplication C of shape (N, N). 21 | """ 22 | return torch.tril(torch.matmul(A, B)) 23 | 24 | M = 4096 25 | 26 | def get_inputs(): 27 | A = torch.randn(M, M) 28 | B = torch.randn(M, M) 29 | A = torch.tril(A) 30 | B = torch.tril(B) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/16_Matmul_with_transposed_A.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix multiplication. 14 | 15 | Args: 16 | A: Input tensor of shape (M, K). 17 | B: Input tensor of shape (K, N). 18 | 19 | Returns: 20 | Output tensor of shape (M, N). 21 | """ 22 | return torch.matmul(A.T, B) 23 | 24 | M = 1024 25 | K = 4096 26 | N = 2048 27 | 28 | def get_inputs(): 29 | A = torch.randn(K, M) 30 | B = torch.randn(K, N) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/17_Matmul_with_transposed_B.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix multiplication. 14 | 15 | Args: 16 | A: Input tensor of shape (M, K). 17 | B: Input tensor of shape (K, N). 18 | 19 | Returns: 20 | Output tensor of shape (M, N). 21 | """ 22 | return torch.matmul(A, B.T) 23 | 24 | M = 1024 25 | K = 4096 26 | N = 2048 27 | 28 | def get_inputs(): 29 | A = torch.randn(M, K) 30 | B = torch.randn(N, K) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/18_Matmul_with_transposed_both.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix multiplication. 14 | 15 | Args: 16 | A: Input tensor of shape (M, K). 17 | B: Input tensor of shape (K, N). 18 | 19 | Returns: 20 | Output tensor of shape (M, N). 21 | """ 22 | return torch.matmul(A.T, B.T) 23 | 24 | M = 1024 25 | K = 4096 26 | N = 2048 27 | 28 | def get_inputs(): 29 | A = torch.randn(K, M) 30 | B = torch.randn(N, K) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/19_ReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a ReLU activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies ReLU activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with ReLU applied, same shape as input. 20 | """ 21 | return torch.relu(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/1_Square_matrix_multiplication_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single square matrix multiplication (C = A * B) 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs the matrix multiplication. 14 | 15 | Args: 16 | A (torch.Tensor): Input matrix A of shape (N, N). 17 | B (torch.Tensor): Input matrix B of shape (N, N). 18 | 19 | Returns: 20 | torch.Tensor: Output matrix C of shape (N, N). 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | N = 2048 25 | 26 | def get_inputs(): 27 | A = torch.randn(N, N) 28 | B = torch.randn(N, N) 29 | return [A, B] 30 | 31 | def get_init_inputs(): 32 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/20_LeakyReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a LeakyReLU activation. 7 | """ 8 | def __init__(self, negative_slope: float = 0.01): 9 | """ 10 | Initializes the LeakyReLU module. 11 | 12 | Args: 13 | negative_slope (float, optional): The negative slope of the activation function. Defaults to 0.01. 14 | """ 15 | super(Model, self).__init__() 16 | self.negative_slope = negative_slope 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies LeakyReLU activation to the input tensor. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor of any shape. 24 | 25 | Returns: 26 | torch.Tensor: Output tensor with LeakyReLU applied, same shape as input. 27 | """ 28 | return torch.nn.functional.leaky_relu(x, negative_slope=self.negative_slope) 29 | 30 | batch_size = 16 31 | dim = 16384 32 | 33 | def get_inputs(): 34 | x = torch.randn(batch_size, dim) 35 | return [x] 36 | 37 | def get_init_inputs(): 38 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/21_Sigmoid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a Sigmoid activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies Sigmoid activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with Sigmoid applied, same shape as input. 20 | """ 21 | return torch.sigmoid(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/22_Tanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a Tanh activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies Tanh activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with Tanh applied, same shape as input. 20 | """ 21 | return torch.tanh(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/23_Softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a Softmax activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies Softmax activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of shape (batch_size, num_features). 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with Softmax applied, same shape as input. 20 | """ 21 | return torch.softmax(x, dim=1) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/24_LogSoftmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a LogSoftmax activation. 7 | """ 8 | def __init__(self, dim: int = 1): 9 | super(Model, self).__init__() 10 | self.dim = dim 11 | 12 | def forward(self, x: torch.Tensor) -> torch.Tensor: 13 | """ 14 | Applies LogSoftmax activation to the input tensor. 15 | 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, dim). 18 | 19 | Returns: 20 | torch.Tensor: Output tensor with LogSoftmax applied, same shape as input. 21 | """ 22 | return torch.log_softmax(x, dim=self.dim) 23 | 24 | batch_size = 16 25 | dim = 16384 26 | 27 | def get_inputs(): 28 | x = torch.randn(batch_size, dim) 29 | return [x] 30 | 31 | def get_init_inputs(): 32 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/25_Swish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a Swish activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies Swish activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with Swish applied, same shape as input. 20 | """ 21 | return x * torch.sigmoid(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/26_GELU_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a GELU activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies GELU activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with GELU applied, same shape as input. 20 | """ 21 | return torch.nn.functional.gelu(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/27_SELU_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a SELU activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies SELU activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with SELU applied, same shape as input. 20 | """ 21 | return torch.selu(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/28_HardSigmoid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a HardSigmoid activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies HardSigmoid activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with HardSigmoid applied, same shape as input. 20 | """ 21 | return torch.nn.functional.hardsigmoid(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/29_Softplus.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a Softplus activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies Softplus activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with Softplus applied, same shape as input. 20 | """ 21 | return torch.nn.functional.softplus(x) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/2_Standard_matrix_multiplication_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix multiplication. 14 | 15 | Args: 16 | A: Input tensor of shape (M, K). 17 | B: Input tensor of shape (K, N). 18 | 19 | Returns: 20 | Output tensor of shape (M, N). 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | M = 1024 25 | K = 4096 26 | N = 2048 27 | 28 | def get_inputs(): 29 | A = torch.randn(M, K) 30 | B = torch.randn(K, N) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/30_Softsign.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a Softsign activation. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, x: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Applies Softsign activation to the input tensor. 14 | 15 | Args: 16 | x (torch.Tensor): Input tensor of any shape. 17 | 18 | Returns: 19 | torch.Tensor: Output tensor with Softsign applied, same shape as input. 20 | """ 21 | return x / (1 + torch.abs(x)) 22 | 23 | batch_size = 16 24 | dim = 16384 25 | 26 | def get_inputs(): 27 | x = torch.randn(batch_size, dim) 28 | return [x] 29 | 30 | def get_init_inputs(): 31 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/31_ELU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Model(nn.Module): 6 | """ 7 | Simple model that performs an ELU activation. 8 | """ 9 | def __init__(self, alpha: float = 1.0): 10 | """ 11 | Initializes the ELU model. 12 | 13 | Args: 14 | alpha (float, optional): The alpha parameter for the ELU function. Defaults to 1.0. 15 | """ 16 | super(Model, self).__init__() 17 | self.alpha = alpha 18 | 19 | def forward(self, x: torch.Tensor) -> torch.Tensor: 20 | """ 21 | Applies ELU activation to the input tensor. 22 | 23 | Args: 24 | x (torch.Tensor): Input tensor of any shape. 25 | 26 | Returns: 27 | torch.Tensor: Output tensor with ELU applied, same shape as input. 28 | """ 29 | return F.elu(x, alpha=self.alpha) 30 | 31 | batch_size = 16 32 | dim = 16384 33 | 34 | def get_inputs(): 35 | x = torch.randn(batch_size, dim) 36 | return [x] 37 | 38 | def get_init_inputs(): 39 | return [1.0] # Provide alpha value for initialization -------------------------------------------------------------------------------- /KernelBench/level1/32_HardTanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Model(nn.Module): 6 | """ 7 | Simple model that performs a HardTanh activation. 8 | """ 9 | def __init__(self): 10 | super(Model, self).__init__() 11 | 12 | def forward(self, x: torch.Tensor) -> torch.Tensor: 13 | """ 14 | Applies HardTanh activation to the input tensor. 15 | 16 | Args: 17 | x (torch.Tensor): Input tensor of any shape. 18 | 19 | Returns: 20 | torch.Tensor: Output tensor with HardTanh applied, same shape as input. 21 | """ 22 | return F.hardtanh(x, min_val=-1., max_val=1.) 23 | 24 | batch_size = 16 25 | dim = 16384 26 | 27 | def get_inputs(): 28 | x = torch.randn(batch_size, dim) 29 | return [x] 30 | 31 | def get_init_inputs(): 32 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/33_BatchNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs Batch Normalization. 7 | """ 8 | def __init__(self, num_features: int): 9 | """ 10 | Initializes the BatchNorm layer. 11 | 12 | Args: 13 | num_features (int): Number of features in the input tensor. 14 | """ 15 | super(Model, self).__init__() 16 | self.bn = nn.BatchNorm2d(num_features=num_features) 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies Batch Normalization to the input tensor. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor of shape (batch_size, num_features, *). 24 | 25 | Returns: 26 | torch.Tensor: Output tensor with Batch Normalization applied, same shape as input. 27 | """ 28 | return self.bn(x) 29 | 30 | batch_size = 16 31 | features = 64 32 | dim1 = 256 33 | dim2 = 256 34 | 35 | def get_inputs(): 36 | x = torch.randn(batch_size, features, dim1, dim2) 37 | return [x] 38 | 39 | def get_init_inputs(): 40 | return [features] -------------------------------------------------------------------------------- /KernelBench/level1/34_InstanceNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs Instance Normalization. 7 | """ 8 | def __init__(self, num_features: int): 9 | """ 10 | Initializes the InstanceNorm layer. 11 | 12 | Args: 13 | num_features (int): Number of features in the input tensor. 14 | """ 15 | super(Model, self).__init__() 16 | self.inorm = nn.InstanceNorm2d(num_features=num_features) 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies Instance Normalization to the input tensor. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor of shape (batch_size, num_features, height, width). 24 | 25 | Returns: 26 | torch.Tensor: Output tensor with Instance Normalization applied, same shape as input. 27 | """ 28 | return self.inorm(x) 29 | 30 | batch_size = 16 31 | features = 64 32 | dim1 = 256 33 | dim2 = 256 34 | 35 | def get_inputs(): 36 | x = torch.randn(batch_size, features, dim1, dim2) 37 | return [x] 38 | 39 | def get_init_inputs(): 40 | return [features] -------------------------------------------------------------------------------- /KernelBench/level1/35_GroupNorm_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs Group Normalization. 7 | """ 8 | def __init__(self, num_features: int, num_groups: int): 9 | """ 10 | Initializes the GroupNorm layer. 11 | 12 | Args: 13 | num_features (int): Number of features in the input tensor. 14 | num_groups (int): Number of groups to divide the channels into. 15 | """ 16 | super(Model, self).__init__() 17 | self.gn = nn.GroupNorm(num_groups=num_groups, num_channels=num_features) 18 | 19 | def forward(self, x: torch.Tensor) -> torch.Tensor: 20 | """ 21 | Applies Group Normalization to the input tensor. 22 | 23 | Args: 24 | x (torch.Tensor): Input tensor of shape (batch_size, num_features, *). 25 | 26 | Returns: 27 | torch.Tensor: Output tensor with Group Normalization applied, same shape as input. 28 | """ 29 | return self.gn(x) 30 | 31 | batch_size = 16 32 | features = 64 33 | num_groups = 8 34 | dim1 = 256 35 | dim2 = 256 36 | 37 | def get_inputs(): 38 | x = torch.randn(batch_size, features, dim1, dim2) 39 | return [x] 40 | 41 | def get_init_inputs(): 42 | return [features, num_groups] # num_features -------------------------------------------------------------------------------- /KernelBench/level1/36_RMSNorm_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs RMS Normalization. 7 | """ 8 | def __init__(self, num_features: int, eps: float = 1e-5): 9 | """ 10 | Initializes the RMSNorm layer. 11 | 12 | Args: 13 | num_features (int): Number of features in the input tensor. 14 | eps (float, optional): A small value added to the denominator to avoid division by zero. Defaults to 1e-5. 15 | """ 16 | super(Model, self).__init__() 17 | self.num_features = num_features 18 | self.eps = eps 19 | 20 | def forward(self, x: torch.Tensor) -> torch.Tensor: 21 | """ 22 | Applies RMS Normalization to the input tensor. 23 | 24 | Args: 25 | x (torch.Tensor): Input tensor of shape (batch_size, num_features, *). 26 | 27 | Returns: 28 | torch.Tensor: Output tensor with RMS Normalization applied, same shape as input. 29 | """ 30 | # Calculate the RMS along the feature dimension 31 | rms = torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.eps) 32 | 33 | # Normalize the input by dividing by the RMS 34 | return x / rms 35 | 36 | batch_size = 16 37 | features = 64 38 | dim1 = 256 39 | dim2 = 256 40 | 41 | def get_inputs(): 42 | x = torch.randn(batch_size, features, dim1, dim2) 43 | return [x] 44 | 45 | def get_init_inputs(): 46 | return [features] -------------------------------------------------------------------------------- /KernelBench/level1/37_FrobeniusNorm_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs Frobenius norm normalization. 7 | """ 8 | def __init__(self): 9 | """ 10 | Initializes the Frobenius norm normalization layer. 11 | """ 12 | super(Model, self).__init__() 13 | 14 | def forward(self, x: torch.Tensor) -> torch.Tensor: 15 | """ 16 | Applies Frobenius norm normalization to the input tensor. 17 | 18 | Args: 19 | x (torch.Tensor): Input tensor of arbitrary shape. 20 | 21 | Returns: 22 | torch.Tensor: Output tensor with Frobenius norm normalization applied, same shape as input. 23 | """ 24 | norm = torch.norm(x, p='fro') 25 | return x / norm 26 | 27 | batch_size = 16 28 | features = 64 29 | dim1 = 256 30 | dim2 = 256 31 | 32 | def get_inputs(): 33 | x = torch.randn(batch_size, features, dim1, dim2) 34 | return [x] 35 | 36 | def get_init_inputs(): 37 | return [] -------------------------------------------------------------------------------- /KernelBench/level1/38_L1Norm_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs L1 normalization. 7 | """ 8 | def __init__(self): 9 | """ 10 | Initializes the L1 normalization layer. 11 | """ 12 | super(Model, self).__init__() 13 | 14 | def forward(self, x: torch.Tensor) -> torch.Tensor: 15 | """ 16 | Applies L1 normalization to the input tensor. 17 | 18 | Args: 19 | x (torch.Tensor): Input tensor of shape (..., dim, ...). 20 | 21 | Returns: 22 | torch.Tensor: Output tensor with L1 normalization applied, same shape as input. 23 | """ 24 | return x / torch.sum(torch.abs(x), dim=1, keepdim=True) 25 | 26 | batch_size = 16 27 | dim = 16384 28 | 29 | def get_inputs(): 30 | x = torch.randn(batch_size, dim) 31 | return [x] 32 | 33 | def get_init_inputs(): 34 | return [] -------------------------------------------------------------------------------- /KernelBench/level1/39_L2Norm_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs L2 normalization. 7 | """ 8 | def __init__(self): 9 | """ 10 | Initializes the L2Norm layer. 11 | 12 | Args: 13 | dim (int): Dimension along which to normalize. 14 | """ 15 | super(Model, self).__init__() 16 | 17 | def forward(self, x: torch.Tensor) -> torch.Tensor: 18 | """ 19 | Applies L2 normalization to the input tensor. 20 | 21 | Args: 22 | x (torch.Tensor): Input tensor of shape (*, dim, *). 23 | 24 | Returns: 25 | torch.Tensor: Output tensor with L2 normalization applied, same shape as input. 26 | """ 27 | return x / torch.norm(x, p=2, dim=1, keepdim=True) 28 | 29 | batch_size = 16 30 | dim = 16384 31 | 32 | def get_inputs(): 33 | x = torch.randn(batch_size, dim) 34 | return [x] 35 | 36 | def get_init_inputs(): 37 | return [] -------------------------------------------------------------------------------- /KernelBench/level1/3_Batched_matrix_multiplication.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Performs batched matrix multiplication (C = A * B) where A, B, and C have the same batch dimension. 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs batched matrix multiplication. 14 | 15 | Args: 16 | A: Input tensor of shape (batch_size, m, k). 17 | B: Input tensor of shape (batch_size, k, n). 18 | 19 | Returns: 20 | C: Output tensor of shape (batch_size, m, n). 21 | """ 22 | return torch.bmm(A, B) 23 | 24 | batch_size = 128 25 | m = 128 26 | k = 256 27 | n = 512 28 | 29 | def get_inputs(): 30 | A = torch.randn(batch_size, m, k) 31 | B = torch.randn(batch_size, k, n) 32 | return [A, B] 33 | 34 | def get_init_inputs(): 35 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/40_LayerNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs Layer Normalization. 7 | """ 8 | def __init__(self, normalized_shape: tuple): 9 | """ 10 | Initializes the LayerNorm layer. 11 | 12 | Args: 13 | normalized_shape (tuple): Shape of the input tensor to be normalized. 14 | """ 15 | super(Model, self).__init__() 16 | self.ln = nn.LayerNorm(normalized_shape=normalized_shape) 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies Layer Normalization to the input tensor. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor of shape (*, normalized_shape). 24 | 25 | Returns: 26 | torch.Tensor: Output tensor with Layer Normalization applied, same shape as input. 27 | """ 28 | return self.ln(x) 29 | 30 | batch_size = 16 31 | features = 64 32 | dim1 = 256 33 | dim2 = 256 34 | 35 | def get_inputs(): 36 | x = torch.randn(batch_size, features, dim1, dim2) 37 | return [x] 38 | 39 | def get_init_inputs(): 40 | return [(features, dim1, dim2)] -------------------------------------------------------------------------------- /KernelBench/level1/44_Average_Pooling_1D.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs 1D Average Pooling. 7 | """ 8 | def __init__(self, kernel_size: int, stride: int = 1, padding: int = 0): 9 | """ 10 | Initializes the 1D Average Pooling layer. 11 | 12 | Args: 13 | kernel_size (int): Size of the pooling window. 14 | stride (int, optional): Stride of the pooling operation. Defaults to 1. 15 | padding (int, optional): Padding applied to the input tensor. Defaults to 0. 16 | """ 17 | super(Model, self).__init__() 18 | self.avg_pool = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=padding) 19 | 20 | def forward(self, x: torch.Tensor) -> torch.Tensor: 21 | """ 22 | Applies 1D Average Pooling to the input tensor. 23 | 24 | Args: 25 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, input_length). 26 | 27 | Returns: 28 | torch.Tensor: Output tensor with 1D Average Pooling applied, shape (batch_size, in_channels, output_length). 29 | """ 30 | return self.avg_pool(x) 31 | 32 | batch_size = 16 33 | in_channels = 32 34 | input_length = 128 35 | kernel_size = 4 36 | stride = 2 37 | padding = 1 38 | 39 | def get_inputs(): 40 | x = torch.randn(batch_size, in_channels, input_length) 41 | return [x] 42 | 43 | def get_init_inputs(): 44 | return [kernel_size, stride, padding] -------------------------------------------------------------------------------- /KernelBench/level1/45_Average_Pooling_2D.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs 2D Average Pooling. 7 | """ 8 | def __init__(self, kernel_size: int, stride: int = None, padding: int = 0): 9 | """ 10 | Initializes the Average Pooling layer. 11 | 12 | Args: 13 | kernel_size (int): Size of the pooling window. 14 | stride (int, optional): Stride of the pooling operation. Defaults to None (same as kernel_size). 15 | padding (int, optional): Padding applied to the input tensor. Defaults to 0. 16 | """ 17 | super(Model, self).__init__() 18 | self.avg_pool = nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding) 19 | 20 | def forward(self, x: torch.Tensor) -> torch.Tensor: 21 | """ 22 | Applies 2D Average Pooling to the input tensor. 23 | 24 | Args: 25 | x (torch.Tensor): Input tensor of shape (batch_size, channels, height, width). 26 | 27 | Returns: 28 | torch.Tensor: Output tensor with Average Pooling applied. 29 | """ 30 | return self.avg_pool(x) 31 | 32 | batch_size = 16 33 | channels = 64 34 | height = 256 35 | width = 256 36 | kernel_size = 3 37 | 38 | def get_inputs(): 39 | x = torch.randn(batch_size, channels, height, width) 40 | return [x] 41 | 42 | def get_init_inputs(): 43 | return [kernel_size] -------------------------------------------------------------------------------- /KernelBench/level1/47_Sum_reduction_over_a_dimension.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs sum reduction over a specified dimension. 7 | """ 8 | def __init__(self, dim: int): 9 | """ 10 | Initializes the model with the dimension to reduce over. 11 | 12 | Args: 13 | dim (int): Dimension to reduce over. 14 | """ 15 | super(Model, self).__init__() 16 | self.dim = dim 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies sum reduction over the specified dimension. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor of shape (..., dim, ...). 24 | 25 | Returns: 26 | torch.Tensor: Output tensor after sum reduction, shape (..., 1, ...). 27 | """ 28 | return torch.sum(x, dim=self.dim, keepdim=True) 29 | 30 | batch_size = 16 31 | dim1 = 256 32 | dim2 = 256 33 | reduce_dim = 1 34 | 35 | def get_inputs(): 36 | x = torch.randn(batch_size, dim1, dim2) 37 | return [x] 38 | 39 | def get_init_inputs(): 40 | return [reduce_dim] -------------------------------------------------------------------------------- /KernelBench/level1/48_Mean_reduction_over_a_dimension.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs mean reduction over a specific dimension. 7 | """ 8 | def __init__(self, dim: int): 9 | """ 10 | Initializes the model with the dimension to reduce over. 11 | 12 | Args: 13 | dim (int): The dimension to reduce over. 14 | """ 15 | super(Model, self).__init__() 16 | self.dim = dim 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Reduces the input tensor along the specified dimension by taking the mean. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor of arbitrary shape. 24 | 25 | Returns: 26 | torch.Tensor: Output tensor with reduced dimension. The shape of the output is the same as the input except for the reduced dimension which is removed. 27 | """ 28 | return torch.mean(x, dim=self.dim) 29 | 30 | batch_size = 16 31 | dim1 = 256 32 | dim2 = 256 33 | 34 | def get_inputs(): 35 | x = torch.randn(batch_size, dim1, dim2) 36 | return [x] 37 | 38 | def get_init_inputs(): 39 | return [1] -------------------------------------------------------------------------------- /KernelBench/level1/49_Max_reduction_over_a_dimension.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs Max reduction over a specific dimension. 7 | """ 8 | def __init__(self, dim: int): 9 | """ 10 | Initializes the model with the dimension to reduce over. 11 | 12 | Args: 13 | dim (int): The dimension to reduce over. 14 | """ 15 | super(Model, self).__init__() 16 | self.dim = dim 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies Max reduction over the specified dimension to the input tensor. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor. 24 | 25 | Returns: 26 | torch.Tensor: Output tensor after Max reduction over the specified dimension. 27 | """ 28 | return torch.max(x, dim=self.dim)[0] 29 | 30 | batch_size = 16 31 | dim1 = 256 32 | dim2 = 256 33 | 34 | def get_inputs(): 35 | x = torch.randn(batch_size, dim1, dim2) 36 | return [x] 37 | 38 | def get_init_inputs(): 39 | return [1] # Example, change to desired dimension -------------------------------------------------------------------------------- /KernelBench/level1/4_Matrix_vector_multiplication_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs matrix-vector multiplication (C = A * B). 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix-vector multiplication. 14 | 15 | Args: 16 | A: Input matrix of shape (M, K). 17 | B: Input vector of shape (K, 1). 18 | 19 | Returns: 20 | Output vector of shape (M, 1). 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | M = 256 25 | K = 131072 26 | 27 | def get_inputs(): 28 | A = torch.randn(M, K) 29 | B = torch.randn(K, 1) 30 | return [A, B] 31 | 32 | def get_init_inputs(): 33 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/50_Product_reduction_over_a_dimension.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs product reduction over a dimension. 7 | """ 8 | def __init__(self, dim: int): 9 | """ 10 | Initializes the model with the dimension to reduce over. 11 | 12 | Args: 13 | dim (int): Dimension to reduce over. 14 | """ 15 | super(Model, self).__init__() 16 | self.dim = dim 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Performs product reduction over the specified dimension. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor. 24 | 25 | Returns: 26 | torch.Tensor: Output tensor with product reduction applied. 27 | """ 28 | return torch.prod(x, dim=self.dim) 29 | 30 | batch_size = 16 31 | dim1 = 256 32 | dim2 = 256 33 | reduction_dim = 1 34 | 35 | def get_inputs(): 36 | x = torch.randn(batch_size, dim1, dim2) 37 | return [x] 38 | 39 | def get_init_inputs(): 40 | return [reduction_dim] -------------------------------------------------------------------------------- /KernelBench/level1/51_Argmax_over_a_dimension.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs Argmax over a specified dimension. 7 | """ 8 | def __init__(self, dim: int): 9 | """ 10 | Initializes the model with the dimension to perform argmax. 11 | 12 | Args: 13 | dim (int): The dimension to perform argmax over. 14 | """ 15 | super(Model, self).__init__() 16 | self.dim = dim 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies argmax over the specified dimension to the input tensor. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor. 24 | 25 | Returns: 26 | torch.Tensor: Output tensor with argmax applied, with the specified dimension removed. 27 | """ 28 | return torch.argmax(x, dim=self.dim) 29 | 30 | batch_size = 16 31 | dim1 = 256 32 | dim2 = 256 33 | 34 | def get_inputs(): 35 | x = torch.randn(batch_size, dim1, dim2) 36 | return [x] 37 | 38 | def get_init_inputs(): 39 | return [1] -------------------------------------------------------------------------------- /KernelBench/level1/52_Argmin_over_a_dimension.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that finds the index of the minimum value along a specified dimension. 7 | """ 8 | def __init__(self, dim: int): 9 | """ 10 | Initializes the model with the dimension to perform argmin on. 11 | 12 | Args: 13 | dim (int): Dimension along which to find the minimum value. 14 | """ 15 | super(Model, self).__init__() 16 | self.dim = dim 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Finds the index of the minimum value along the specified dimension. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor. 24 | 25 | Returns: 26 | torch.Tensor: Tensor containing the indices of the minimum values along the specified dimension. 27 | """ 28 | return torch.argmin(x, dim=self.dim) 29 | 30 | batch_size = 16 31 | dim1 = 256 32 | dim2 = 256 33 | dim = 1 34 | 35 | def get_inputs(): 36 | x = torch.randn(batch_size, dim1, dim2) 37 | return [x] 38 | 39 | def get_init_inputs(): 40 | return [dim] -------------------------------------------------------------------------------- /KernelBench/level1/53_Min_reduction_over_a_dimension.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs min reduction over a specific dimension. 7 | """ 8 | def __init__(self, dim: int): 9 | """ 10 | Initializes the model with the dimension to reduce over. 11 | 12 | Args: 13 | dim (int): The dimension to reduce over. 14 | """ 15 | super(Model, self).__init__() 16 | self.dim = dim 17 | 18 | def forward(self, x: torch.Tensor) -> torch.Tensor: 19 | """ 20 | Applies min reduction over the specified dimension to the input tensor. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor. 24 | 25 | Returns: 26 | torch.Tensor: Output tensor after min reduction over the specified dimension. 27 | """ 28 | return torch.min(x, dim=self.dim)[0] 29 | 30 | batch_size = 16 31 | dim1 = 256 32 | dim2 = 256 33 | 34 | def get_inputs(): 35 | x = torch.randn(batch_size, dim1, dim2) 36 | return [x] 37 | 38 | def get_init_inputs(): 39 | return [1] # Example, change to desired dimension -------------------------------------------------------------------------------- /KernelBench/level1/5_Matrix_scalar_multiplication.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix-scalar multiplication (C = A * s) 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, s: float) -> torch.Tensor: 12 | """ 13 | Performs matrix-scalar multiplication. 14 | 15 | Args: 16 | A: Input matrix of shape (M, N) 17 | s: Scalar value 18 | 19 | Returns: 20 | C: Resulting matrix of shape (M, N) 21 | """ 22 | return A * s 23 | 24 | M = 16384 25 | N = 4096 26 | 27 | def get_inputs(): 28 | A = torch.randn(M, N) 29 | s = 3.14 30 | return [A, s] 31 | 32 | def get_init_inputs(): 33 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/6_Matmul_with_large_K_dimension_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) with a large K dimension 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix multiplication of A and B. 14 | 15 | Args: 16 | A: Input tensor of shape (M, K) 17 | B: Input tensor of shape (K, N) 18 | 19 | Returns: 20 | Output tensor of shape (M, N) 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | M = 256 25 | N = 256 26 | K = 131072 27 | 28 | def get_inputs(): 29 | A = torch.randn(M, K) 30 | B = torch.randn(K, N) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/7_Matmul_with_small_K_dimension_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) with a small K dimension 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix multiplication. 14 | 15 | Args: 16 | A: Input tensor of shape (M, K). 17 | B: Input tensor of shape (K, N). 18 | 19 | Returns: 20 | Output tensor of shape (M, N). 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | M = 16384 25 | N = 16384 26 | K = 32 27 | 28 | def get_inputs(): 29 | A = torch.randn(M, K) 30 | B = torch.randn(K, N) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/87_conv_pointwise_2D.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Performs a pointwise 2D convolution operation. 7 | 8 | Args: 9 | in_channels (int): Number of channels in the input tensor. 10 | out_channels (int): Number of channels produced by the convolution. 11 | bias (bool, optional): If `True`, adds a learnable bias to the output. Defaults to `False`. 12 | """ 13 | def __init__(self, in_channels: int, out_channels: int, bias: bool = False): 14 | super(Model, self).__init__() 15 | self.conv1d = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=bias) 16 | 17 | def forward(self, x: torch.Tensor) -> torch.Tensor: 18 | """ 19 | Performs the pointwise 2D convolution. 20 | 21 | Args: 22 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width). 23 | 24 | Returns: 25 | torch.Tensor: Output tensor of shape (batch_size, out_channels, height, width). 26 | """ 27 | return self.conv1d(x) 28 | 29 | # Test code 30 | batch_size = 16 31 | in_channels = 3 32 | out_channels = 64 33 | width = 256 34 | height = 256 35 | 36 | def get_inputs(): 37 | x = torch.randn(batch_size, in_channels, height, width) 38 | return [x] 39 | 40 | def get_init_inputs(): 41 | return [in_channels, out_channels] -------------------------------------------------------------------------------- /KernelBench/level1/88_MinGPTNewGelu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | 6 | # From https://github.com/karpathy/minGPT/blob/master/mingpt/model.py 7 | 8 | class Model(nn.Module): 9 | """ 10 | Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). 11 | Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 12 | """ 13 | def __init__(self): 14 | super(Model, self).__init__() 15 | 16 | def forward(self, x): 17 | return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) 18 | 19 | batch_size = 2000 20 | dim = 2000 21 | 22 | def get_inputs(): 23 | return [torch.randn(batch_size, dim)] 24 | 25 | def get_init_inputs(): 26 | return [] -------------------------------------------------------------------------------- /KernelBench/level1/8_Matmul_with_irregular_shapes_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) with irregular shapes 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: 12 | """ 13 | Performs matrix multiplication of A and B. 14 | 15 | Args: 16 | A: Input tensor with shape (M, K). 17 | B: Input tensor with shape (K, N). 18 | 19 | Returns: 20 | C: Output tensor with shape (M, N). 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | M = 8205 25 | K = 2949 26 | N = 5921 27 | 28 | def get_inputs(): 29 | A = torch.randn(M, K) 30 | B = torch.randn(K, N) 31 | return [A, B] 32 | 33 | def get_init_inputs(): 34 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level1/90_cumprod.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a cumulative product operation along a specified dimension. 7 | 8 | Parameters: 9 | dim (int): The dimension along which to perform the cumulative product operation. 10 | """ 11 | 12 | def __init__(self, dim): 13 | """ 14 | Initialize the CumulativeProductModel. 15 | 16 | Args: 17 | dim (int): The dimension along which to perform the cumulative product. 18 | """ 19 | super(Model, self).__init__() 20 | self.dim = dim 21 | 22 | def forward(self, x): 23 | """ 24 | Forward pass, computing the cumulative product along the specified dimension. 25 | 26 | Args: 27 | x (torch.Tensor): Input tensor of shape (batch_size, *input_shape). 28 | 29 | Returns: 30 | torch.Tensor: Tensor of the same shape as `x` after applying cumulative product along `dim`. 31 | """ 32 | return torch.cumprod(x, dim=self.dim) 33 | 34 | # Define input dimensions and parameters 35 | batch_size = 128 36 | input_shape = (4000,) 37 | dim = 1 38 | 39 | def get_inputs(): 40 | return [torch.randn(batch_size, *input_shape)] 41 | 42 | def get_init_inputs(): 43 | return [dim] 44 | -------------------------------------------------------------------------------- /KernelBench/level1/91_cumsum_reverse.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a reverse cumulative sum operation along a specified dimension. 7 | 8 | Parameters: 9 | dim (int): The dimension along which to perform the reverse cumulative sum. 10 | """ 11 | 12 | def __init__(self, dim): 13 | super(Model, self).__init__() 14 | self.dim = dim 15 | 16 | def forward(self, x): 17 | return torch.cumsum(x.flip(self.dim), dim=self.dim).flip(self.dim) 18 | 19 | batch_size = 128 20 | input_shape = (4000,) 21 | dim = 1 22 | 23 | def get_inputs(): 24 | return [torch.randn(batch_size, *input_shape)] 25 | 26 | def get_init_inputs(): 27 | return [dim] 28 | -------------------------------------------------------------------------------- /KernelBench/level1/92_cumsum_exclusive.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs an exclusive cumulative sum (does not include the current element). 7 | 8 | Parameters: 9 | dim (int): The dimension along which to perform the exclusive cumulative sum. 10 | """ 11 | 12 | def __init__(self, dim): 13 | super(Model, self).__init__() 14 | self.dim = dim 15 | 16 | def forward(self, x): 17 | exclusive_cumsum = torch.cat((torch.zeros_like(x.select(self.dim, 0).unsqueeze(self.dim)), x), dim=self.dim)[:-1] 18 | return torch.cumsum(exclusive_cumsum, dim=self.dim) 19 | 20 | batch_size = 128 21 | input_shape = (4000,) 22 | dim = 1 23 | 24 | def get_inputs(): 25 | return [torch.randn(batch_size, *input_shape)] 26 | 27 | def get_init_inputs(): 28 | return [dim] 29 | -------------------------------------------------------------------------------- /KernelBench/level1/93_masked_cumsum.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a masked cumulative sum, only summing elements that satisfy a condition. 7 | 8 | Parameters: 9 | dim (int): The dimension along which to perform the masked cumulative sum. 10 | """ 11 | 12 | def __init__(self, dim): 13 | super(Model, self).__init__() 14 | self.dim = dim 15 | 16 | def forward(self, x, mask): 17 | """ 18 | Args: 19 | x (torch.Tensor): Input tensor of shape (batch_size, *input_shape). 20 | mask (torch.Tensor): Boolean mask of the same shape as x. 21 | 22 | Returns: 23 | torch.Tensor: Cumulative sum of elements where mask is True. 24 | """ 25 | return torch.cumsum(x * mask, dim=self.dim) 26 | 27 | batch_size = 128 28 | input_shape = (4000,) 29 | dim = 1 30 | 31 | def get_inputs(): 32 | x = torch.randn(batch_size, *input_shape) 33 | mask = torch.randint(0, 2, x.shape).bool() # Random boolean mask 34 | return [x, mask] 35 | 36 | def get_init_inputs(): 37 | return [dim] 38 | -------------------------------------------------------------------------------- /KernelBench/level1/94_MSELoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that computes the Mean Squared Error loss for regression tasks. 7 | 8 | Parameters: 9 | None 10 | """ 11 | def __init__(self): 12 | super(Model, self).__init__() 13 | 14 | def forward(self, predictions, targets): 15 | return torch.mean((predictions - targets) ** 2) 16 | 17 | batch_size = 128 18 | input_shape = (4096, ) 19 | dim = 1 20 | 21 | def get_inputs(): 22 | return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)] 23 | 24 | def get_init_inputs(): 25 | return [] 26 | -------------------------------------------------------------------------------- /KernelBench/level1/95_CrossEntropyLoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that computes Cross Entropy Loss for multi-class classification tasks. 7 | 8 | Parameters: 9 | None 10 | """ 11 | def __init__(self): 12 | super(Model, self).__init__() 13 | 14 | def forward(self, predictions, targets): 15 | return torch.nn.functional.cross_entropy(predictions, targets) 16 | 17 | batch_size = 4096 18 | num_classes = 10 19 | input_shape = (num_classes, ) # Output for each class 20 | dim = 1 21 | 22 | def get_inputs(): 23 | return [torch.randn(batch_size, *input_shape), torch.randint(0, num_classes, (batch_size,))] 24 | 25 | def get_init_inputs(): 26 | return [] 27 | -------------------------------------------------------------------------------- /KernelBench/level1/96_HuberLoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that computes Smooth L1 (Huber) Loss for regression tasks. 7 | 8 | Parameters: 9 | None 10 | """ 11 | def __init__(self): 12 | super(Model, self).__init__() 13 | 14 | def forward(self, predictions, targets): 15 | return torch.nn.functional.smooth_l1_loss(predictions, targets) 16 | 17 | batch_size = 128 18 | input_shape = (4096, ) 19 | dim = 1 20 | 21 | def get_inputs(): 22 | return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)] 23 | 24 | def get_init_inputs(): 25 | return [] 26 | -------------------------------------------------------------------------------- /KernelBench/level1/97_CosineSimilarityLoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that computes Cosine Similarity Loss for comparing vectors. 7 | 8 | Parameters: 9 | None 10 | """ 11 | def __init__(self): 12 | super(Model, self).__init__() 13 | 14 | def forward(self, predictions, targets): 15 | cosine_sim = torch.nn.functional.cosine_similarity(predictions, targets, dim=1) 16 | return torch.mean(1 - cosine_sim) 17 | 18 | batch_size = 128 19 | input_shape = (4096, ) 20 | dim = 1 21 | 22 | def get_inputs(): 23 | return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)] 24 | 25 | def get_init_inputs(): 26 | return [] 27 | -------------------------------------------------------------------------------- /KernelBench/level1/98_KLDivLoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that computes Kullback-Leibler Divergence for comparing two distributions. 7 | 8 | Parameters: 9 | None 10 | """ 11 | def __init__(self): 12 | super(Model, self).__init__() 13 | 14 | def forward(self, predictions, targets): 15 | return torch.nn.functional.kl_div(torch.log(predictions), targets, reduction='batchmean') 16 | 17 | batch_size = 128 18 | input_shape = (4096, ) 19 | dim = 1 20 | 21 | def get_inputs(): 22 | return [torch.randn(batch_size, *input_shape).softmax(dim=-1), torch.randn(batch_size, *input_shape).softmax(dim=-1)] 23 | 24 | def get_init_inputs(): 25 | return [] 26 | -------------------------------------------------------------------------------- /KernelBench/level1/99_TripletMarginLoss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that computes Triplet Margin Loss for metric learning tasks. 7 | 8 | Parameters: 9 | margin (float): The margin between the positive and negative samples. 10 | """ 11 | def __init__(self, margin=1.0): 12 | super(Model, self).__init__() 13 | self.loss_fn = torch.nn.TripletMarginLoss(margin=margin) 14 | 15 | def forward(self, anchor, positive, negative): 16 | return self.loss_fn(anchor, positive, negative) 17 | 18 | batch_size = 128 19 | input_shape = (4096, ) 20 | dim = 1 21 | 22 | def get_inputs(): 23 | return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)] 24 | 25 | def get_init_inputs(): 26 | return [1.0] # Default margin 27 | -------------------------------------------------------------------------------- /KernelBench/level1/9_Tall_skinny_matrix_multiplication_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a single matrix multiplication (C = A * B) where one of the matrices is tall and skinny (M >> N or N >> M) 7 | """ 8 | def __init__(self): 9 | super(Model, self).__init__() 10 | 11 | def forward(self, A, B): 12 | """ 13 | Performs the matrix multiplication. 14 | 15 | Args: 16 | A (torch.Tensor): Input matrix of shape (M, K) or (K, M) where M >> N or N >> M. 17 | B (torch.Tensor): Input matrix of shape (K, N) or (N, K) where M >> N or N >> M. 18 | 19 | Returns: 20 | torch.Tensor: Output matrix of shape (M, N) or (N, M) 21 | """ 22 | return torch.matmul(A, B) 23 | 24 | M = 16384 25 | N = 16 26 | 27 | def get_inputs(): 28 | A = torch.randn(M, N) 29 | B = torch.randn(N, M) 30 | return [A, B] 31 | 32 | def get_init_inputs(): 33 | return [] # No special initialization inputs needed -------------------------------------------------------------------------------- /KernelBench/level2/100_ConvTranspose3d_Clamp_Min_Divide.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a transposed 3D convolution, clamps the output to a minimum value, 7 | and then divides the result by a constant. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, min_value, divisor): 10 | super(Model, self).__init__() 11 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 12 | self.min_value = min_value 13 | self.divisor = divisor 14 | 15 | def forward(self, x): 16 | x = self.conv_transpose(x) 17 | x = torch.clamp(x, min=self.min_value) 18 | x = x / self.divisor 19 | return x 20 | 21 | batch_size = 16 22 | in_channels = 32 23 | out_channels = 16 24 | depth, height, width = 16, 32, 32 25 | kernel_size = 3 26 | stride = 2 27 | padding = 1 28 | min_value = -1.0 29 | divisor = 2.0 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, depth, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size, stride, padding, min_value, divisor] -------------------------------------------------------------------------------- /KernelBench/level2/10_ConvTranspose2d_MaxPool_Hardtanh_Mean_Tanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, followed by max pooling, hardtanh activation, mean operation, and tanh activation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, maxpool_kernel_size, maxpool_stride, hardtanh_min, hardtanh_max): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 11 | self.maxpool = nn.MaxPool2d(kernel_size=maxpool_kernel_size, stride=maxpool_stride) 12 | self.hardtanh = nn.Hardtanh(min_val=hardtanh_min, max_val=hardtanh_max) 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | x = self.maxpool(x) 17 | x = self.hardtanh(x) 18 | x = torch.mean(x, dim=(2, 3), keepdim=True) 19 | x = torch.tanh(x) 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 32 24 | out_channels = 64 25 | height, width = 16, 16 26 | kernel_size = 4 27 | stride = 2 28 | padding = 1 29 | maxpool_kernel_size = 2 30 | maxpool_stride = 2 31 | hardtanh_min = -1 32 | hardtanh_max = 1 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_channels, height, width)] 36 | 37 | def get_init_inputs(): 38 | return [in_channels, out_channels, kernel_size, stride, padding, maxpool_kernel_size, maxpool_stride, hardtanh_min, hardtanh_max] -------------------------------------------------------------------------------- /KernelBench/level2/11_ConvTranspose2d_BatchNorm_Tanh_MaxPool_GroupNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, batch normalization, tanh activation, max pooling, and group normalization. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups, num_groups): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 11 | self.batch_norm = nn.BatchNorm2d(out_channels) 12 | self.tanh = nn.Tanh() 13 | self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2) 14 | self.group_norm = nn.GroupNorm(num_groups=num_groups, num_channels=out_channels) 15 | 16 | def forward(self, x): 17 | x = self.conv_transpose(x) 18 | x = self.batch_norm(x) 19 | x = self.tanh(x) 20 | x = self.max_pool(x) 21 | x = self.group_norm(x) 22 | return x 23 | 24 | batch_size = 128 25 | in_channels = 32 26 | out_channels = 64 27 | kernel_size = 4 28 | stride = 2 29 | padding = 1 30 | groups = 8 31 | num_groups = 4 32 | height, width = 32, 32 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_channels, height, width)] 36 | 37 | def get_init_inputs(): 38 | return [in_channels, out_channels, kernel_size, stride, padding, groups, num_groups] -------------------------------------------------------------------------------- /KernelBench/level2/12_Gemm_Multiply_LeakyReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a Gemm, multiplies the result, and applies LeakyReLU. 7 | """ 8 | def __init__(self, in_features, out_features, multiplier, negative_slope): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.multiplier = multiplier 12 | self.leaky_relu = nn.LeakyReLU(negative_slope) 13 | 14 | def forward(self, x): 15 | x = self.gemm(x) 16 | x = x * self.multiplier 17 | x = self.leaky_relu(x) 18 | return x 19 | 20 | batch_size = 128 21 | in_features = 1024 22 | out_features = 512 23 | multiplier = 2.0 24 | negative_slope = 0.1 25 | 26 | def get_inputs(): 27 | return [torch.randn(batch_size, in_features)] 28 | 29 | def get_init_inputs(): 30 | return [in_features, out_features, multiplier, negative_slope] -------------------------------------------------------------------------------- /KernelBench/level2/13_ConvTranspose3d_Mean_Add_Softmax_Tanh_Scaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a series of operations: 7 | 1. Transposed 3D convolution 8 | 2. Mean pooling 9 | 3. Addition 10 | 4. Softmax 11 | 5. Tanh activation 12 | 6. Scaling 13 | """ 14 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias_shape, scaling_factor): 15 | super(Model, self).__init__() 16 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 17 | self.bias = nn.Parameter(torch.randn(bias_shape)) 18 | self.scaling_factor = scaling_factor 19 | 20 | def forward(self, x): 21 | x = self.conv_transpose(x) 22 | x = torch.mean(x, dim=1, keepdim=True) 23 | x = x + self.bias 24 | x = torch.softmax(x, dim=1) 25 | x = torch.tanh(x) 26 | x = x * self.scaling_factor 27 | return x 28 | 29 | batch_size = 16 30 | in_channels = 8 31 | out_channels = 16 32 | depth, height, width = 16, 32, 32 33 | kernel_size = 3 34 | stride = 2 35 | padding = 1 36 | bias_shape = (1, 1, 1, 1, 1) 37 | scaling_factor = 2.0 38 | 39 | def get_inputs(): 40 | return [torch.randn(batch_size, in_channels, depth, height, width)] 41 | 42 | def get_init_inputs(): 43 | return [in_channels, out_channels, kernel_size, stride, padding, bias_shape, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/14_Gemm_Divide_Sum_Scaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a matrix multiplication, division, summation, and scaling. 7 | """ 8 | def __init__(self, input_size, hidden_size, scaling_factor): 9 | super(Model, self).__init__() 10 | self.weight = nn.Parameter(torch.randn(hidden_size, input_size)) 11 | self.scaling_factor = scaling_factor 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x (torch.Tensor): Input tensor of shape (batch_size, input_size). 17 | Returns: 18 | torch.Tensor: Output tensor of shape (batch_size, hidden_size). 19 | """ 20 | x = torch.matmul(x, self.weight.T) # Gemm 21 | x = x / 2 # Divide 22 | x = torch.sum(x, dim=1, keepdim=True) # Sum 23 | x = x * self.scaling_factor # Scaling 24 | return x 25 | 26 | 27 | batch_size = 128 28 | input_size = 10 29 | hidden_size = 20 30 | scaling_factor = 1.5 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, input_size)] 34 | 35 | def get_init_inputs(): 36 | return [input_size, hidden_size, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/15_ConvTranspose3d_BatchNorm_Subtract.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A 3D convolutional transpose layer followed by Batch Normalization and subtraction. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=True): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=bias) 11 | self.batch_norm = nn.BatchNorm3d(out_channels) 12 | 13 | def forward(self, x): 14 | x = self.conv_transpose(x) 15 | x = self.batch_norm(x) 16 | x = x - torch.mean(x, dim=(2, 3, 4), keepdim=True) # Subtract mean along spatial dimensions 17 | return x 18 | 19 | batch_size = 16 20 | in_channels = 16 21 | out_channels = 32 22 | depth, height, width = 16, 32, 32 23 | kernel_size = 3 24 | stride = 2 25 | padding = 1 26 | 27 | def get_inputs(): 28 | return [torch.randn(batch_size, in_channels, depth, height, width)] 29 | 30 | def get_init_inputs(): 31 | return [in_channels, out_channels, kernel_size, stride, padding] -------------------------------------------------------------------------------- /KernelBench/level2/16_ConvTranspose2d_Mish_Add_Hardtanh_Scaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, applies Mish activation, adds a value, 7 | applies Hardtanh activation, and scales the output. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, add_value, scale): 10 | super(Model, self).__init__() 11 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, output_padding) 12 | self.add_value = add_value 13 | self.scale = scale 14 | 15 | def forward(self, x): 16 | x = self.conv_transpose(x) 17 | x = torch.nn.functional.mish(x) # Mish activation 18 | x = x + self.add_value 19 | x = torch.nn.functional.hardtanh(x, min_val=-1, max_val=1) # Hardtanh activation 20 | x = x * self.scale # Scaling 21 | return x 22 | 23 | batch_size = 128 24 | in_channels = 32 25 | out_channels = 64 26 | height, width = 16, 16 27 | kernel_size = 4 28 | stride = 2 29 | padding = 1 30 | output_padding = 1 31 | add_value = 0.5 32 | scale = 2 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_channels, height, width)] 36 | 37 | def get_init_inputs(): 38 | return [in_channels, out_channels, kernel_size, stride, padding, output_padding, add_value, scale] -------------------------------------------------------------------------------- /KernelBench/level2/17_Conv2d_InstanceNorm_Divide.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, applies Instance Normalization, and divides by a constant. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, divide_by): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.instance_norm = nn.InstanceNorm2d(out_channels) 12 | self.divide_by = divide_by 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | x = self.instance_norm(x) 17 | x = x / self.divide_by 18 | return x 19 | 20 | batch_size = 128 21 | in_channels = 3 22 | out_channels = 16 23 | height, width = 32, 32 24 | kernel_size = 3 25 | divide_by = 2.0 26 | 27 | def get_inputs(): 28 | return [torch.randn(batch_size, in_channels, height, width)] 29 | 30 | def get_init_inputs(): 31 | return [in_channels, out_channels, kernel_size, divide_by] -------------------------------------------------------------------------------- /KernelBench/level2/18_Matmul_Sum_Max_AvgPool_LogSumExp_LogSumExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a sequence of operations: 7 | - Matrix multiplication 8 | - Summation 9 | - Max 10 | - Average pooling 11 | - LogSumExp 12 | - LogSumExp 13 | """ 14 | def __init__(self, in_features, out_features): 15 | super(Model, self).__init__() 16 | self.linear = nn.Linear(in_features, out_features) 17 | 18 | def forward(self, x): 19 | """ 20 | Args: 21 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 22 | Returns: 23 | torch.Tensor: Output tensor of shape (batch_size, 1). 24 | """ 25 | x = self.linear(x) # (batch_size, out_features) 26 | x = torch.sum(x, dim=1, keepdim=True) # (batch_size, 1) 27 | x = torch.max(x, dim=1, keepdim=True)[0] # (batch_size, 1) 28 | x = torch.mean(x, dim=1, keepdim=True) # (batch_size, 1) 29 | x = torch.logsumexp(x, dim=1, keepdim=True) # (batch_size, 1) 30 | x = torch.logsumexp(x, dim=1, keepdim=True) # (batch_size, 1) 31 | return x 32 | 33 | batch_size = 128 34 | in_features = 10 35 | out_features = 5 36 | 37 | def get_inputs(): 38 | return [torch.randn(batch_size, in_features)] 39 | 40 | def get_init_inputs(): 41 | return [in_features, out_features] -------------------------------------------------------------------------------- /KernelBench/level2/19_ConvTranspose2d_GELU_GroupNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, applies GELU, and normalizes with GroupNorm. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, groups, num_groups): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride) 11 | self.group_norm = nn.GroupNorm(num_groups=num_groups, num_channels=out_channels) 12 | 13 | def forward(self, x): 14 | x = self.conv_transpose(x) 15 | x = torch.nn.functional.gelu(x) 16 | x = self.group_norm(x) 17 | return x 18 | 19 | batch_size = 128 20 | in_channels = 32 21 | out_channels = 64 22 | height, width = 32, 32 23 | kernel_size = 4 24 | stride = 2 25 | groups = 8 26 | num_groups = 8 27 | 28 | def get_inputs(): 29 | return [torch.randn(batch_size, in_channels, height, width)] 30 | 31 | def get_init_inputs(): 32 | return [in_channels, out_channels, kernel_size, stride, groups, num_groups] -------------------------------------------------------------------------------- /KernelBench/level2/1_Conv2D_ReLU_BiasAdd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, applies ReLU, and adds a bias term. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, bias_shape): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | 13 | def forward(self, x): 14 | x = self.conv(x) 15 | x = torch.relu(x) 16 | x = x + self.bias 17 | return x 18 | 19 | batch_size = 128 20 | in_channels = 3 21 | out_channels = 16 22 | height, width = 32, 32 23 | kernel_size = 3 24 | bias_shape = (out_channels, 1, 1) 25 | 26 | def get_inputs(): 27 | return [torch.randn(batch_size, in_channels, height, width)] 28 | 29 | def get_init_inputs(): 30 | return [in_channels, out_channels, kernel_size, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/20_ConvTranspose3d_Sum_ResidualAdd_Multiply_ResidualAdd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D transposed convolution, followed by a sum, 7 | a residual add, a multiplication, and another residual add. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape): 10 | super(Model, self).__init__() 11 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding) 12 | self.bias = nn.Parameter(torch.randn(bias_shape)) 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | original_x = x.clone().detach() 17 | x = x + self.bias 18 | x = x + original_x 19 | x = x * original_x 20 | x = x + original_x 21 | return x 22 | 23 | batch_size = 16 24 | in_channels = 32 25 | out_channels = 64 26 | depth, height, width = 16, 32, 32 27 | kernel_size = 3 28 | stride = 2 29 | padding = 1 30 | output_padding = 1 31 | bias_shape = (out_channels, 1, 1, 1) 32 | 33 | def get_inputs(): 34 | return [torch.randn(batch_size, in_channels, depth, height, width)] 35 | 36 | def get_init_inputs(): 37 | return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/21_Conv2d_Add_Scale_Sigmoid_GroupNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, adds a bias term, scales, applies sigmoid, and performs group normalization. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, num_groups, bias_shape, scale_shape): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | self.scale = nn.Parameter(torch.randn(scale_shape)) 13 | self.group_norm = nn.GroupNorm(num_groups, out_channels) 14 | 15 | def forward(self, x): 16 | x = self.conv(x) 17 | x = x + self.bias 18 | x = x * self.scale 19 | x = torch.sigmoid(x) 20 | x = self.group_norm(x) 21 | return x 22 | 23 | batch_size = 128 24 | in_channels = 3 25 | out_channels = 16 26 | height, width = 32, 32 27 | kernel_size = 3 28 | num_groups = 8 29 | bias_shape = (out_channels, 1, 1) 30 | scale_shape = (out_channels, 1, 1) 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_channels, height, width)] 34 | 35 | def get_init_inputs(): 36 | return [in_channels, out_channels, kernel_size, num_groups, bias_shape, scale_shape] -------------------------------------------------------------------------------- /KernelBench/level2/22_Matmul_Scale_ResidualAdd_Clamp_LogSumExp_Mish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a matrix multiplication, scales the result, adds a residual connection, clamps the output, 7 | applies LogSumExp, and finally applies the Mish activation function. 8 | """ 9 | def __init__(self, input_size, hidden_size, scale_factor, clamp_min, clamp_max): 10 | super(Model, self).__init__() 11 | self.matmul = nn.Linear(input_size, hidden_size) 12 | self.scale_factor = scale_factor 13 | self.clamp_min = clamp_min 14 | self.clamp_max = clamp_max 15 | 16 | def forward(self, x): 17 | """ 18 | Args: 19 | x: Input tensor of shape (batch_size, input_size). 20 | 21 | Returns: 22 | Output tensor of shape (batch_size, hidden_size). 23 | """ 24 | x = self.matmul(x) 25 | x = x * self.scale_factor 26 | x = x + x 27 | x = torch.clamp(x, self.clamp_min, self.clamp_max) 28 | x = torch.logsumexp(x, dim=1, keepdim=True) 29 | x = x * torch.nn.functional.mish(x) # Mish activation 30 | return x 31 | 32 | batch_size = 128 33 | input_size = 512 34 | hidden_size = 1024 35 | scale_factor = 2.0 36 | clamp_min = -10.0 37 | clamp_max = 10.0 38 | 39 | def get_inputs(): 40 | return [torch.randn(batch_size, input_size)] 41 | 42 | def get_init_inputs(): 43 | return [input_size, hidden_size, scale_factor, clamp_min, clamp_max] -------------------------------------------------------------------------------- /KernelBench/level2/23_Conv3d_GroupNorm_Mean.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, applies Group Normalization, computes the mean 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, num_groups): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 11 | self.group_norm = nn.GroupNorm(num_groups, out_channels) 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W). 17 | Returns: 18 | torch.Tensor: Output tensor of shape (batch_size, 1). 19 | """ 20 | x = self.conv(x) 21 | x = self.group_norm(x) 22 | x = x.mean(dim=[1, 2, 3, 4]) # Compute mean across all dimensions except batch 23 | return x 24 | 25 | batch_size = 128 26 | in_channels = 3 27 | out_channels = 16 28 | D, H, W = 16, 32, 32 29 | kernel_size = 3 30 | num_groups = 8 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_channels, D, H, W)] 34 | 35 | def get_init_inputs(): 36 | return [in_channels, out_channels, kernel_size, num_groups] -------------------------------------------------------------------------------- /KernelBench/level2/24_Conv3d_Min_Softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a 3D convolution, applies minimum operation along a specific dimension, 7 | and then applies softmax. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, dim): 10 | super(Model, self).__init__() 11 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 12 | self.dim = dim 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W) 18 | Returns: 19 | torch.Tensor: Output tensor of shape (batch_size, out_channels, H, W) 20 | """ 21 | x = self.conv(x) 22 | x = torch.min(x, dim=self.dim)[0] # Apply minimum along the specified dimension 23 | x = torch.softmax(x, dim=1) # Apply softmax along the channel dimension 24 | return x 25 | 26 | batch_size = 128 27 | in_channels = 3 28 | out_channels = 16 29 | D, H, W = 16, 32, 32 30 | kernel_size = 3 31 | dim = 2 # Dimension along which to apply minimum operation (e.g., depth) 32 | 33 | def get_inputs(): 34 | return [torch.randn(batch_size, in_channels, D, H, W)] 35 | 36 | def get_init_inputs(): 37 | return [in_channels, out_channels, kernel_size, dim] -------------------------------------------------------------------------------- /KernelBench/level2/25_Conv2d_Min_Tanh_Tanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, applies minimum operation, Tanh, and another Tanh. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | 12 | def forward(self, x): 13 | x = self.conv(x) 14 | x = torch.min(x, dim=1, keepdim=True)[0] # Apply minimum operation along the channel dimension 15 | x = torch.tanh(x) 16 | x = torch.tanh(x) 17 | return x 18 | 19 | batch_size = 128 20 | in_channels = 3 21 | out_channels = 16 22 | height, width = 32, 32 23 | kernel_size = 3 24 | 25 | def get_inputs(): 26 | return [torch.randn(batch_size, in_channels, height, width)] 27 | 28 | def get_init_inputs(): 29 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/27_Conv3d_HardSwish_ReLU_Softmax_Mean.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a 3D convolution, applies HardSwish, ReLU, Softmax, and then calculates the mean. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, bias=True): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, bias=bias) 11 | 12 | def forward(self, x): 13 | x = self.conv(x) 14 | x = torch.nn.functional.hardswish(x) 15 | x = torch.relu(x) 16 | x = torch.softmax(x, dim=1) 17 | x = torch.mean(x, dim=[2, 3, 4]) 18 | return x 19 | 20 | batch_size = 128 21 | in_channels = 3 22 | out_channels = 16 23 | depth, height, width = 16, 32, 32 24 | kernel_size = 3 25 | 26 | def get_inputs(): 27 | return [torch.randn(batch_size, in_channels, depth, height, width)] 28 | 29 | def get_init_inputs(): 30 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/28_BMM_InstanceNorm_Sum_ResidualAdd_Multiply.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a batch matrix multiplication, instance normalization, summation, residual addition, and multiplication. 7 | """ 8 | def __init__(self, in_features, out_features, eps=1e-5, momentum=0.1): 9 | super(Model, self).__init__() 10 | self.bmm = nn.Linear(in_features, out_features) 11 | self.instance_norm = nn.InstanceNorm2d(out_features, eps=eps, momentum=momentum) 12 | 13 | def forward(self, x, y): 14 | """ 15 | Args: 16 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 17 | y (torch.Tensor): Input tensor of shape (batch_size, out_features). 18 | 19 | Returns: 20 | torch.Tensor: Output tensor of shape (batch_size, out_features). 21 | """ 22 | x = self.bmm(x) 23 | x = self.instance_norm(x.unsqueeze(1).unsqueeze(1)).squeeze(1).squeeze(1) 24 | x = x + y 25 | x = x * y 26 | return x 27 | 28 | batch_size = 128 29 | in_features = 64 30 | out_features = 128 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_features), torch.randn(batch_size, out_features)] 34 | 35 | def get_init_inputs(): 36 | return [in_features, out_features] -------------------------------------------------------------------------------- /KernelBench/level2/29_Matmul_Mish_Mish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, applies Mish, and applies Mish again. 7 | """ 8 | def __init__(self, in_features, out_features): 9 | super(Model, self).__init__() 10 | self.linear = nn.Linear(in_features, out_features) 11 | 12 | def forward(self, x): 13 | x = self.linear(x) 14 | x = torch.nn.functional.mish(x) 15 | x = torch.nn.functional.mish(x) 16 | return x 17 | 18 | batch_size = 128 19 | in_features = 10 20 | out_features = 20 21 | 22 | def get_inputs(): 23 | return [torch.randn(batch_size, in_features)] 24 | 25 | def get_init_inputs(): 26 | return [in_features, out_features] -------------------------------------------------------------------------------- /KernelBench/level2/2_ConvTranspose2d_BiasAdd_Clamp_Scaling_Clamp_Divide.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, adds a bias term, clamps, scales, clamps, and divides. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | self.scaling_factor = scaling_factor 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | x = x + self.bias 17 | x = torch.clamp(x, min=0.0, max=1.0) 18 | x = x * self.scaling_factor 19 | x = torch.clamp(x, min=0.0, max=1.0) 20 | x = x / self.scaling_factor 21 | return x 22 | 23 | batch_size = 128 24 | in_channels = 3 25 | out_channels = 16 26 | height, width = 32, 32 27 | kernel_size = 3 28 | stride = 2 29 | padding = 1 30 | output_padding = 1 31 | bias_shape = (out_channels, 1, 1) 32 | scaling_factor = 2.0 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_channels, height, width)] 36 | 37 | def get_init_inputs(): 38 | return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/30_Gemm_GroupNorm_Hardtanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a GEMM, applies Group Normalization, and then HardTanh. 7 | """ 8 | def __init__(self, in_features, out_features, num_groups, hardtanh_min, hardtanh_max): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.group_norm = nn.GroupNorm(num_groups, out_features) 12 | self.hardtanh = nn.Hardtanh(min_val=hardtanh_min, max_val=hardtanh_max) 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 18 | Returns: 19 | torch.Tensor: Output tensor of shape (batch_size, out_features). 20 | """ 21 | x = self.gemm(x) 22 | x = self.group_norm(x) 23 | x = self.hardtanh(x) 24 | return x 25 | 26 | batch_size = 128 27 | in_features = 1024 28 | out_features = 512 29 | num_groups = 8 30 | hardtanh_min = -2.0 31 | hardtanh_max = 2.0 32 | 33 | def get_inputs(): 34 | return [torch.randn(batch_size, in_features)] 35 | 36 | def get_init_inputs(): 37 | return [in_features, out_features, num_groups, hardtanh_min, hardtanh_max] -------------------------------------------------------------------------------- /KernelBench/level2/31_Conv2d_Min_Add_Multiply.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, takes the minimum with a constant, adds a bias term, and multiplies by a scaling factor. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, constant_value, bias_shape, scaling_factor): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.constant_value = constant_value 12 | self.bias = nn.Parameter(torch.randn(bias_shape)) 13 | self.scaling_factor = scaling_factor 14 | 15 | def forward(self, x): 16 | x = self.conv(x) 17 | x = torch.min(x, torch.tensor(self.constant_value)) 18 | x = x + self.bias 19 | x = x * self.scaling_factor 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 3 24 | out_channels = 16 25 | height, width = 32, 32 26 | kernel_size = 3 27 | constant_value = 0.5 28 | bias_shape = (out_channels, 1, 1) 29 | scaling_factor = 2.0 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size, constant_value, bias_shape, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/32_Conv2d_Scaling_Min.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, scales the output, and then applies a minimum operation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, scale_factor): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.scale_factor = scale_factor 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width). 17 | Returns: 18 | torch.Tensor: Output tensor of shape (batch_size, out_channels, height, width). 19 | """ 20 | x = self.conv(x) 21 | x = x * self.scale_factor 22 | x = torch.min(x, dim=1, keepdim=True)[0] # Minimum along channel dimension 23 | return x 24 | 25 | batch_size = 128 26 | in_channels = 3 27 | out_channels = 16 28 | height, width = 32, 32 29 | kernel_size = 3 30 | scale_factor = 2.0 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_channels, height, width)] 34 | 35 | def get_init_inputs(): 36 | return [in_channels, out_channels, kernel_size, scale_factor] -------------------------------------------------------------------------------- /KernelBench/level2/33_Gemm_Scale_BatchNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a GEMM (general matrix multiplication), applies scaling, 7 | and then batch normalization. 8 | """ 9 | def __init__(self, in_features, out_features, scale_shape, eps=1e-5, momentum=0.1): 10 | super(Model, self).__init__() 11 | self.gemm = nn.Linear(in_features, out_features) 12 | self.scale = nn.Parameter(torch.randn(scale_shape)) 13 | self.bn = nn.BatchNorm1d(out_features, eps=eps, momentum=momentum) 14 | 15 | def forward(self, x): 16 | x = self.gemm(x) 17 | x = x * self.scale 18 | x = self.bn(x) 19 | return x 20 | 21 | batch_size = 128 22 | in_features = 1024 23 | out_features = 512 24 | scale_shape = (out_features,) 25 | 26 | def get_inputs(): 27 | return [torch.randn(batch_size, in_features)] 28 | 29 | def get_init_inputs(): 30 | return [in_features, out_features, scale_shape] -------------------------------------------------------------------------------- /KernelBench/level2/35_Conv2d_Subtract_HardSwish_MaxPool_Mish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, subtracts a value, applies HardSwish, MaxPool, and Mish activation functions. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, subtract_value, pool_kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.subtract_value = subtract_value 12 | self.pool = nn.MaxPool2d(pool_kernel_size) 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | x = x - self.subtract_value 17 | x = torch.nn.functional.hardswish(x) 18 | x = self.pool(x) 19 | x = torch.nn.functional.mish(x) 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 3 24 | out_channels = 16 25 | height, width = 32, 32 26 | kernel_size = 3 27 | subtract_value = 0.5 28 | pool_kernel_size = 2 29 | 30 | def get_inputs(): 31 | return [torch.randn(batch_size, in_channels, height, width)] 32 | 33 | def get_init_inputs(): 34 | return [in_channels, out_channels, kernel_size, subtract_value, pool_kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/36_ConvTranspose2d_Min_Sum_GELU_Add.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a convolution transpose, minimum operation, sum operation, GELU activation and addition. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, output_padding) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | 13 | def forward(self, x): 14 | x = self.conv_transpose(x) 15 | x = torch.min(x, dim=1, keepdim=True)[0] # Minimum operation along channel dimension 16 | x = torch.sum(x, dim=2, keepdim=True) # Sum operation along height dimension 17 | x = torch.nn.functional.gelu(x) # GELU activation 18 | x = x + self.bias 19 | return x 20 | 21 | batch_size = 128 22 | in_channels = 3 23 | out_channels = 16 24 | height, width = 32, 32 25 | kernel_size = 3 26 | stride = 2 27 | padding = 1 28 | output_padding = 1 29 | bias_shape = (out_channels, 1, 1) 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/37_Matmul_Swish_Sum_GroupNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a matrix multiplication, applies Swish activation, sums with a bias term, and normalizes with GroupNorm. 7 | """ 8 | def __init__(self, in_features, out_features, num_groups, bias_shape): 9 | super(Model, self).__init__() 10 | self.matmul = nn.Linear(in_features, out_features) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | self.group_norm = nn.GroupNorm(num_groups, out_features) 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 18 | Returns: 19 | torch.Tensor: Output tensor of shape (batch_size, out_features). 20 | """ 21 | x = self.matmul(x) 22 | x = torch.sigmoid(x) * x # Swish activation 23 | x = x + self.bias 24 | x = self.group_norm(x) 25 | return x 26 | 27 | batch_size = 128 28 | in_features = 512 29 | out_features = 1024 30 | num_groups = 32 31 | bias_shape = (out_features,) 32 | 33 | def get_inputs(): 34 | return [torch.randn(batch_size, in_features)] 35 | 36 | def get_init_inputs(): 37 | return [in_features, out_features, num_groups, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/39_Gemm_Scale_BatchNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, scales the result, and applies batch normalization. 7 | """ 8 | def __init__(self, in_features, out_features, scale_shape, eps=1e-5, momentum=0.1): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.scale = nn.Parameter(torch.randn(scale_shape)) 12 | self.bn = nn.BatchNorm1d(out_features, eps=eps, momentum=momentum) 13 | 14 | def forward(self, x): 15 | x = self.gemm(x) 16 | x = x * self.scale 17 | x = self.bn(x) 18 | return x 19 | 20 | batch_size = 128 21 | in_features = 1024 22 | out_features = 512 23 | scale_shape = (out_features,) 24 | 25 | def get_inputs(): 26 | return [torch.randn(batch_size, in_features)] 27 | 28 | def get_init_inputs(): 29 | return [in_features, out_features, scale_shape] -------------------------------------------------------------------------------- /KernelBench/level2/40_Matmul_Scaling_ResidualAdd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a matrix multiplication, scaling, and residual addition. 7 | 8 | Args: 9 | in_features (int): Number of input features. 10 | out_features (int): Number of output features. 11 | scaling_factor (float): Scaling factor to apply after matrix multiplication. 12 | """ 13 | def __init__(self, in_features, out_features, scaling_factor): 14 | super(Model, self).__init__() 15 | self.matmul = nn.Linear(in_features, out_features) 16 | self.scaling_factor = scaling_factor 17 | 18 | def forward(self, x): 19 | """ 20 | Forward pass of the model. 21 | 22 | Args: 23 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 24 | 25 | Returns: 26 | torch.Tensor: Output tensor of shape (batch_size, out_features). 27 | """ 28 | x = self.matmul(x) 29 | original_x = x.clone().detach() 30 | x = x * self.scaling_factor 31 | x = x + original_x 32 | return x 33 | 34 | batch_size = 128 35 | in_features = 64 36 | out_features = 128 37 | scaling_factor = 0.5 38 | 39 | def get_inputs(): 40 | return [torch.randn(batch_size, in_features)] 41 | 42 | def get_init_inputs(): 43 | return [in_features, out_features, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/41_Gemm_BatchNorm_GELU_GroupNorm_Mean_ReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a GEMM, BatchNorm, GELU, GroupNorm, Mean, and ReLU operations in sequence. 7 | """ 8 | def __init__(self, in_features, out_features, num_groups): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.batch_norm = nn.BatchNorm1d(out_features) 12 | self.group_norm = nn.GroupNorm(num_groups, out_features) 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 18 | Returns: 19 | torch.Tensor: Output tensor of shape (batch_size, out_features). 20 | """ 21 | x = self.gemm(x) 22 | x = self.batch_norm(x) 23 | x = torch.nn.functional.gelu(x) 24 | x = self.group_norm(x) 25 | x = torch.mean(x, dim=1, keepdim=True) 26 | x = torch.relu(x) 27 | return x 28 | 29 | batch_size = 128 30 | in_features = 512 31 | out_features = 1024 32 | num_groups = 8 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_features)] 36 | 37 | def get_init_inputs(): 38 | return [in_features, out_features, num_groups] -------------------------------------------------------------------------------- /KernelBench/level2/42_ConvTranspose2d_GlobalAvgPool_BiasAdd_LogSumExp_Sum_Multiply.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, global average pooling, adds a bias, applies log-sum-exp, sum, and multiplication. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, bias_shape): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | 13 | def forward(self, x): 14 | x = self.conv_transpose(x) 15 | x = torch.mean(x, dim=(2, 3), keepdim=True) # Global average pooling 16 | x = x + self.bias 17 | x = torch.logsumexp(x, dim=1, keepdim=True) # Log-sum-exp 18 | x = torch.sum(x, dim=(2, 3)) # Sum 19 | x = x * 10.0 # Multiplication 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 3 24 | out_channels = 16 25 | height, width = 32, 32 26 | kernel_size = 3 27 | bias_shape = (out_channels, 1, 1) 28 | 29 | def get_inputs(): 30 | return [torch.randn(batch_size, in_channels, height, width)] 31 | 32 | def get_init_inputs(): 33 | return [in_channels, out_channels, kernel_size, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/43_Conv3d_Max_LogSumExp_ReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, max pooling, log sum exp, and ReLU activation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 11 | self.max_pool = nn.MaxPool3d(kernel_size=2, stride=2) 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x: Input tensor of shape (batch_size, in_channels, depth, height, width) 17 | Returns: 18 | Output tensor of shape (batch_size, out_channels, depth', height', width') 19 | """ 20 | x = self.conv(x) 21 | x = self.max_pool(x) 22 | x = torch.logsumexp(x, dim=1, keepdim=True) 23 | x = torch.relu(x) 24 | return x 25 | 26 | batch_size = 128 27 | in_channels = 3 28 | out_channels = 16 29 | depth, height, width = 16, 32, 32 30 | kernel_size = 3 31 | stride = 1 32 | padding = 1 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_channels, depth, height, width)] 36 | 37 | def get_init_inputs(): 38 | return [in_channels, out_channels, kernel_size, stride, padding] -------------------------------------------------------------------------------- /KernelBench/level2/44_ConvTranspose2d_Multiply_GlobalAvgPool_GlobalAvgPool_Mean.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, multiplies by a scalar, applies global average pooling, 7 | another global average pooling 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, multiplier): 10 | super(Model, self).__init__() 11 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding) 12 | self.multiplier = multiplier 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | x = x * self.multiplier 17 | x = torch.mean(x, dim=[2, 3], keepdim=True) # First global average pooling 18 | x = torch.mean(x, dim=[2, 3], keepdim=True) # Second global average pooling 19 | return x 20 | 21 | batch_size = 128 22 | in_channels = 3 23 | out_channels = 16 24 | height, width = 32, 32 25 | kernel_size = 3 26 | stride = 2 27 | padding = 1 28 | output_padding = 1 29 | multiplier = 0.5 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size, stride, padding, output_padding, multiplier] -------------------------------------------------------------------------------- /KernelBench/level2/45_Gemm_Sigmoid_Sum_LogSumExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a matrix multiplication (Gemm), applies Sigmoid, sums the result, and calculates the LogSumExp. 7 | """ 8 | def __init__(self, input_size, hidden_size, output_size): 9 | super(Model, self).__init__() 10 | self.linear1 = nn.Linear(input_size, hidden_size) 11 | self.linear2 = nn.Linear(hidden_size, output_size) 12 | 13 | def forward(self, x): 14 | x = self.linear1(x) 15 | x = torch.sigmoid(x) 16 | x = torch.sum(x, dim=1) 17 | x = torch.logsumexp(x, dim=0) 18 | return x 19 | 20 | batch_size = 128 21 | input_size = 10 22 | hidden_size = 20 23 | output_size = 5 24 | 25 | def get_inputs(): 26 | return [torch.randn(batch_size, input_size)] 27 | 28 | def get_init_inputs(): 29 | return [input_size, hidden_size, output_size] -------------------------------------------------------------------------------- /KernelBench/level2/46_Conv2d_Subtract_Tanh_Subtract_AvgPool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, subtraction, tanh activation, subtraction and average pooling. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, subtract1_value, subtract2_value, kernel_size_pool): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.subtract1_value = subtract1_value 12 | self.subtract2_value = subtract2_value 13 | self.avgpool = nn.AvgPool2d(kernel_size_pool) 14 | 15 | def forward(self, x): 16 | x = self.conv(x) 17 | x = x - self.subtract1_value 18 | x = torch.tanh(x) 19 | x = x - self.subtract2_value 20 | x = self.avgpool(x) 21 | return x 22 | 23 | batch_size = 128 24 | in_channels = 3 25 | out_channels = 16 26 | height, width = 32, 32 27 | kernel_size = 3 28 | subtract1_value = 0.5 29 | subtract2_value = 0.2 30 | kernel_size_pool = 2 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_channels, height, width)] 34 | 35 | def get_init_inputs(): 36 | return [in_channels, out_channels, kernel_size, subtract1_value, subtract2_value, kernel_size_pool] -------------------------------------------------------------------------------- /KernelBench/level2/47_Conv3d_Mish_Tanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, applies Mish activation, and then applies Tanh activation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 11 | 12 | def forward(self, x): 13 | """ 14 | Args: 15 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W). 16 | 17 | Returns: 18 | torch.Tensor: Output tensor of shape (batch_size, out_channels, D', H', W'). 19 | """ 20 | x = self.conv(x) 21 | x = torch.nn.functional.mish(x) 22 | x = torch.tanh(x) 23 | return x 24 | 25 | batch_size = 16 26 | in_channels = 3 27 | out_channels = 16 28 | D, H, W = 16, 32, 32 29 | kernel_size = 3 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, D, H, W)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/48_Conv3d_Scaling_Tanh_Multiply_Sigmoid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, scales the output, applies tanh, multiplies by a scaling factor, and applies sigmoid. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, scaling_factor, bias_shape): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 11 | self.scaling_factor = nn.Parameter(torch.randn(bias_shape)) 12 | self.bias = nn.Parameter(torch.randn(bias_shape)) 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | x = x * self.scaling_factor 17 | x = torch.tanh(x) 18 | x = x * self.bias 19 | x = torch.sigmoid(x) 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 3 24 | out_channels = 16 25 | depth, height, width = 16, 32, 32 26 | kernel_size = 3 27 | scaling_factor = 2 28 | bias_shape = (out_channels, 1, 1, 1) 29 | 30 | def get_inputs(): 31 | return [torch.randn(batch_size, in_channels, depth, height, width)] 32 | 33 | def get_init_inputs(): 34 | return [in_channels, out_channels, kernel_size, scaling_factor, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/49_ConvTranspose3d_Softmax_Sigmoid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D transposed convolution, applies Softmax and Sigmoid. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias=True): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=bias) 11 | self.softmax = nn.Softmax(dim=1) 12 | self.sigmoid = nn.Sigmoid() 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W). 18 | 19 | Returns: 20 | torch.Tensor: Output tensor of shape (batch_size, out_channels, D, H, W). 21 | """ 22 | x = self.conv_transpose(x) 23 | x = self.softmax(x) 24 | x = self.sigmoid(x) 25 | return x 26 | 27 | batch_size = 16 28 | in_channels = 32 29 | out_channels = 64 30 | D, H, W = 16, 32, 32 31 | kernel_size = 3 32 | stride = 2 33 | padding = 1 34 | output_padding = 1 35 | 36 | def get_inputs(): 37 | return [torch.randn(batch_size, in_channels, D, H, W)] 38 | 39 | def get_init_inputs(): 40 | return [in_channels, out_channels, kernel_size, stride, padding, output_padding] -------------------------------------------------------------------------------- /KernelBench/level2/4_Conv2d_Mish_Mish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, applies Mish, and another Mish. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | 12 | def forward(self, x): 13 | x = self.conv(x) 14 | x = torch.nn.functional.mish(x) 15 | x = torch.nn.functional.mish(x) 16 | return x 17 | 18 | batch_size = 128 19 | in_channels = 3 20 | out_channels = 16 21 | height, width = 32, 32 22 | kernel_size = 3 23 | 24 | def get_inputs(): 25 | return [torch.randn(batch_size, in_channels, height, width)] 26 | 27 | def get_init_inputs(): 28 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/50_ConvTranspose3d_Scaling_AvgPool_BiasAdd_Scaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D transposed convolution, scaling, average pooling, bias addition, and scaling. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, scale1, scale2, bias_shape): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 11 | self.scale1 = nn.Parameter(torch.tensor(scale1)) 12 | self.avg_pool = nn.AvgPool3d(kernel_size=2) 13 | self.bias = nn.Parameter(torch.randn(bias_shape)) 14 | self.scale2 = nn.Parameter(torch.tensor(scale2)) 15 | 16 | def forward(self, x): 17 | x = self.conv_transpose(x) 18 | x = x * self.scale1 19 | x = self.avg_pool(x) 20 | x = x + self.bias 21 | x = x * self.scale2 22 | return x 23 | 24 | batch_size = 128 25 | in_channels = 3 26 | out_channels = 16 27 | depth, height, width = 16, 32, 32 28 | kernel_size = 3 29 | stride = 2 30 | padding = 1 31 | scale1 = 0.5 32 | scale2 = 1.0 33 | bias_shape = (out_channels, 1, 1, 1) 34 | 35 | def get_inputs(): 36 | return [torch.randn(batch_size, in_channels, depth, height, width)] 37 | 38 | def get_init_inputs(): 39 | return [in_channels, out_channels, kernel_size, stride, padding, scale1, scale2, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/51_Gemm_Subtract_GlobalAvgPool_LogSumExp_GELU_ResidualAdd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a series of operations: Gemm, Subtract, GlobalAvgPool, LogSumExp, GELU, and ResidualAdd. 7 | """ 8 | def __init__(self, in_features, out_features, bias=True): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features, bias=bias) 11 | self.subtract = nn.Parameter(torch.randn(out_features)) 12 | 13 | def forward(self, x): 14 | original_x = x.clone().detach() 15 | # Gemm 16 | x = self.gemm(x) 17 | 18 | # Subtract 19 | x = x - self.subtract 20 | 21 | # GlobalAvgPool 22 | x = torch.mean(x, dim=1, keepdim=True) 23 | 24 | # LogSumExp 25 | x = torch.logsumexp(x, dim=1, keepdim=True) 26 | 27 | # GELU 28 | x = torch.nn.functional.gelu(x) 29 | 30 | # ResidualAdd 31 | x = x + original_x 32 | 33 | return x 34 | 35 | batch_size = 128 36 | in_features = 1024 37 | out_features = 512 38 | 39 | def get_inputs(): 40 | return [torch.randn(batch_size, in_features)] 41 | 42 | def get_init_inputs(): 43 | return [in_features, out_features] -------------------------------------------------------------------------------- /KernelBench/level2/52_Conv2d_Activation_BatchNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, applies activation, and then applies Batch Normalization. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, eps=1e-5, momentum=0.1): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.bn = nn.BatchNorm2d(out_channels, eps=eps, momentum=momentum) 12 | 13 | def forward(self, x): 14 | x = self.conv(x) 15 | x = torch.multiply(torch.tanh(torch.nn.functional.softplus(x)), x) 16 | x = self.bn(x) 17 | return x 18 | 19 | batch_size = 128 20 | in_channels = 3 21 | out_channels = 16 22 | height, width = 32, 32 23 | kernel_size = 3 24 | 25 | def get_inputs(): 26 | return [torch.randn(batch_size, in_channels, height, width)] 27 | 28 | def get_init_inputs(): 29 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/53_Gemm_Scaling_Hardtanh_GELU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a GEMM, scaling, hardtanh, and GELU activation. 7 | """ 8 | def __init__(self, in_features, out_features, scaling_factor, hardtanh_min, hardtanh_max): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.scaling_factor = scaling_factor 12 | self.hardtanh = nn.Hardtanh(min_val=hardtanh_min, max_val=hardtanh_max) 13 | self.gelu = nn.GELU() 14 | 15 | def forward(self, x): 16 | x = self.gemm(x) 17 | x = x * self.scaling_factor 18 | x = self.hardtanh(x) 19 | x = self.gelu(x) 20 | return x 21 | 22 | batch_size = 128 23 | in_features = 1024 24 | out_features = 512 25 | scaling_factor = 0.5 26 | hardtanh_min = -2 27 | hardtanh_max = 2 28 | 29 | def get_inputs(): 30 | return [torch.randn(batch_size, in_features)] 31 | 32 | def get_init_inputs(): 33 | return [in_features, out_features, scaling_factor, hardtanh_min, hardtanh_max] -------------------------------------------------------------------------------- /KernelBench/level2/54_Conv2d_Multiply_LeakyReLU_GELU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, multiplies by a learnable scalar, applies LeakyReLU, and then GELU. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, multiplier_shape): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.multiplier = nn.Parameter(torch.randn(multiplier_shape)) 12 | self.leaky_relu = nn.LeakyReLU() 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | x = x * self.multiplier 17 | x = self.leaky_relu(x) 18 | x = torch.nn.functional.gelu(x) 19 | return x 20 | 21 | batch_size = 128 22 | in_channels = 3 23 | out_channels = 16 24 | height, width = 32, 32 25 | kernel_size = 3 26 | multiplier_shape = (out_channels, 1, 1) 27 | 28 | def get_inputs(): 29 | return [torch.randn(batch_size, in_channels, height, width)] 30 | 31 | def get_init_inputs(): 32 | return [in_channels, out_channels, kernel_size, multiplier_shape] -------------------------------------------------------------------------------- /KernelBench/level2/55_Matmul_MaxPool_Sum_Scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs matrix multiplication, max pooling, sum, and scaling. 7 | """ 8 | def __init__(self, in_features, out_features, kernel_size, scale_factor): 9 | super(Model, self).__init__() 10 | self.matmul = nn.Linear(in_features, out_features) 11 | self.max_pool = nn.MaxPool1d(kernel_size) 12 | self.scale_factor = scale_factor 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 18 | 19 | Returns: 20 | torch.Tensor: Output tensor of shape (batch_size, out_features). 21 | """ 22 | x = self.matmul(x) 23 | x = self.max_pool(x.unsqueeze(1)).squeeze(1) 24 | x = torch.sum(x, dim=1) 25 | x = x * self.scale_factor 26 | return x 27 | 28 | batch_size = 128 29 | in_features = 10 30 | out_features = 5 31 | kernel_size = 2 32 | scale_factor = 0.5 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_features)] 36 | 37 | def get_init_inputs(): 38 | return [in_features, out_features, kernel_size, scale_factor] -------------------------------------------------------------------------------- /KernelBench/level2/56_Matmul_Sigmoid_Sum.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, applies sigmoid, and sums the result. 7 | """ 8 | def __init__(self, input_size, hidden_size): 9 | super(Model, self).__init__() 10 | self.linear = nn.Linear(input_size, hidden_size) 11 | 12 | def forward(self, x): 13 | """ 14 | Args: 15 | x: Input tensor of shape (batch_size, input_size). 16 | 17 | Returns: 18 | Output tensor of shape (batch_size, 1). 19 | """ 20 | x = self.linear(x) 21 | x = torch.sigmoid(x) 22 | x = torch.sum(x, dim=1, keepdim=True) 23 | return x 24 | 25 | batch_size = 128 26 | input_size = 10 27 | hidden_size = 20 28 | 29 | def get_inputs(): 30 | return [torch.randn(batch_size, input_size)] 31 | 32 | def get_init_inputs(): 33 | return [input_size, hidden_size] -------------------------------------------------------------------------------- /KernelBench/level2/57_Conv2d_ReLU_HardSwish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, applies ReLU, and applies HardSwish activation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | 12 | def forward(self, x): 13 | x = self.conv(x) 14 | x = torch.relu(x) 15 | x = x * torch.clamp((x + 3) / 6, 0, 1) 16 | return x 17 | 18 | batch_size = 128 19 | in_channels = 3 20 | out_channels = 16 21 | height, width = 32, 32 22 | kernel_size = 3 23 | 24 | def get_inputs(): 25 | return [torch.randn(batch_size, in_channels, height, width)] 26 | 27 | def get_init_inputs(): 28 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/58_ConvTranspose3d_LogSumExp_HardSwish_Subtract_Clamp_Max.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D transposed convolution, LogSumExp, HardSwish, subtraction, clamp, and maximum operations. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias_shape): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | 13 | def forward(self, x): 14 | x = self.conv_transpose(x) 15 | x = torch.logsumexp(x, dim=1, keepdim=True) 16 | x = x * torch.sigmoid(x + 3) / 6 17 | x = x - self.bias 18 | x = torch.clamp(x, min=-1, max=1) 19 | x = torch.max(x, dim=1, keepdim=True)[0] 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 3 24 | out_channels = 16 25 | depth, height, width = 16, 32, 32 26 | kernel_size = 3 27 | stride = 2 28 | padding = 1 29 | bias_shape = (out_channels, 1, 1, 1) 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, depth, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size, stride, padding, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/59_Matmul_Swish_Scaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, applies Swish activation, and scales the result. 7 | """ 8 | def __init__(self, in_features, out_features, scaling_factor): 9 | super(Model, self).__init__() 10 | self.matmul = nn.Linear(in_features, out_features) 11 | self.scaling_factor = scaling_factor 12 | 13 | def forward(self, x): 14 | x = self.matmul(x) 15 | x = x * torch.sigmoid(x) # Swish activation 16 | x = x * self.scaling_factor 17 | return x 18 | 19 | batch_size = 128 20 | in_features = 1024 21 | out_features = 512 22 | scaling_factor = 2.0 23 | 24 | def get_inputs(): 25 | return [torch.randn(batch_size, in_features)] 26 | 27 | def get_init_inputs(): 28 | return [in_features, out_features, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/5_ConvTranspose2d_Subtract_Tanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, subtracts a bias term, and applies tanh activation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, bias_shape, stride=2, padding=1, output_padding=1): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | 13 | def forward(self, x): 14 | x = self.conv_transpose(x) 15 | x = x - self.bias 16 | x = torch.tanh(x) 17 | return x 18 | 19 | batch_size = 128 20 | in_channels = 32 21 | out_channels = 16 22 | height, width = 16, 16 23 | kernel_size = 4 24 | bias_shape = (out_channels, 1, 1) 25 | 26 | def get_inputs(): 27 | return [torch.randn(batch_size, in_channels, height, width)] 28 | 29 | def get_init_inputs(): 30 | return [in_channels, out_channels, kernel_size, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/60_ConvTranspose3d_Swish_GroupNorm_HardSwish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D transposed convolution, applies Swish activation, 7 | group normalization, and then HardSwish activation. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups, eps, bias=True): 10 | super(Model, self).__init__() 11 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=bias) 12 | self.group_norm = nn.GroupNorm(num_groups=groups, num_channels=out_channels, eps=eps) 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | x = torch.sigmoid(x) * x # Swish activation 17 | x = self.group_norm(x) 18 | x = torch.nn.functional.hardswish(x) # HardSwish activation 19 | return x 20 | 21 | batch_size = 128 22 | in_channels = 3 23 | out_channels = 16 24 | depth, height, width = 16, 32, 32 25 | kernel_size = 3 26 | stride = 2 27 | padding = 1 28 | groups = 4 29 | eps = 1e-5 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, depth, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size, stride, padding, groups, eps] -------------------------------------------------------------------------------- /KernelBench/level2/61_ConvTranspose3d_ReLU_GroupNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed 3D convolution, applies ReLU, and then applies group normalization. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, groups, bias=False): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, bias=bias) 11 | self.relu = nn.ReLU() 12 | self.group_norm = nn.GroupNorm(num_groups=groups, num_channels=out_channels) 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W). 18 | 19 | Returns: 20 | torch.Tensor: Output tensor of shape (batch_size, out_channels, D, H, W). 21 | """ 22 | x = self.conv_transpose(x) 23 | x = self.relu(x) 24 | x = self.group_norm(x) 25 | return x 26 | 27 | batch_size = 16 28 | in_channels = 64 29 | out_channels = 128 30 | D, H, W = 8, 16, 16 31 | kernel_size = 3 32 | groups = 8 33 | bias = False 34 | 35 | def get_inputs(): 36 | return [torch.randn(batch_size, in_channels, D, H, W)] 37 | 38 | def get_init_inputs(): 39 | return [in_channels, out_channels, kernel_size, groups, bias] -------------------------------------------------------------------------------- /KernelBench/level2/62_Matmul_GroupNorm_LeakyReLU_Sum.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a matrix multiplication, group normalization, leaky ReLU activation, and element-wise sum. 7 | """ 8 | def __init__(self, input_size, hidden_size, num_groups, eps=1e-5, negative_slope=0.01): 9 | super(Model, self).__init__() 10 | self.fc = nn.Linear(input_size, hidden_size) 11 | self.gn = nn.GroupNorm(num_groups=num_groups, num_channels=hidden_size, eps=eps) 12 | self.leaky_relu = nn.LeakyReLU(negative_slope=negative_slope) 13 | 14 | def forward(self, x): 15 | """ 16 | Performs the forward pass of the model. 17 | 18 | Args: 19 | x: Input tensor of shape (batch_size, input_size). 20 | 21 | Returns: 22 | Output tensor of shape (batch_size, hidden_size). 23 | """ 24 | x = self.fc(x) 25 | x = self.gn(x) 26 | x = self.leaky_relu(x) 27 | x = x + x 28 | return x 29 | 30 | 31 | batch_size = 128 32 | input_size = 512 33 | hidden_size = 256 34 | num_groups = 8 35 | 36 | def get_inputs(): 37 | return [torch.randn(batch_size, input_size)] 38 | 39 | def get_init_inputs(): 40 | return [input_size, hidden_size, num_groups] -------------------------------------------------------------------------------- /KernelBench/level2/63_Gemm_ReLU_Divide.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, applies ReLU, and divides by a constant. 7 | """ 8 | def __init__(self, in_features, out_features, divisor): 9 | super(Model, self).__init__() 10 | self.linear = nn.Linear(in_features, out_features) 11 | self.divisor = divisor 12 | 13 | def forward(self, x): 14 | x = self.linear(x) 15 | x = torch.relu(x) 16 | x = x / self.divisor 17 | return x 18 | 19 | batch_size = 128 20 | in_features = 1024 21 | out_features = 512 22 | divisor = 2.0 23 | 24 | def get_inputs(): 25 | return [torch.randn(batch_size, in_features)] 26 | 27 | def get_init_inputs(): 28 | return [in_features, out_features, divisor] -------------------------------------------------------------------------------- /KernelBench/level2/64_Gemm_LogSumExp_LeakyReLU_LeakyReLU_GELU_GELU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a matrix multiplication (Gemm), followed by LogSumExp, LeakyReLU, 7 | LeakyReLU, GELU, and GELU activations. 8 | """ 9 | def __init__(self, in_features, out_features, bias=True): 10 | super(Model, self).__init__() 11 | self.linear = nn.Linear(in_features, out_features, bias=bias) 12 | 13 | def forward(self, x): 14 | # Gemm 15 | x = self.linear(x) 16 | # LogSumExp 17 | x = torch.logsumexp(x, dim=1, keepdim=True) 18 | # LeakyReLU 19 | x = torch.nn.functional.leaky_relu(x, negative_slope=0.01) 20 | # LeakyReLU 21 | x = torch.nn.functional.leaky_relu(x, negative_slope=0.01) 22 | # GELU 23 | x = torch.nn.functional.gelu(x) 24 | # GELU 25 | x = torch.nn.functional.gelu(x) 26 | return x 27 | 28 | batch_size = 128 29 | in_features = 1024 30 | out_features = 512 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_features)] 34 | 35 | def get_init_inputs(): 36 | return [in_features, out_features] -------------------------------------------------------------------------------- /KernelBench/level2/65_Conv2d_AvgPool_Sigmoid_Sum.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | This model performs a convolution, average pooling, applies sigmoid, and sums the result. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, pool_kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.avg_pool = nn.AvgPool2d(pool_kernel_size) 12 | 13 | def forward(self, x): 14 | x = self.conv(x) 15 | x = self.avg_pool(x) 16 | x = torch.sigmoid(x) 17 | x = torch.sum(x, dim=[1,2,3]) # Sum over all spatial dimensions 18 | return x 19 | 20 | batch_size = 128 21 | in_channels = 3 22 | out_channels = 16 23 | height, width = 32, 32 24 | kernel_size = 3 25 | pool_kernel_size = 2 26 | 27 | def get_inputs(): 28 | return [torch.randn(batch_size, in_channels, height, width)] 29 | 30 | def get_init_inputs(): 31 | return [in_channels, out_channels, kernel_size, pool_kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/66_Matmul_Dropout_Mean_Softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs matrix multiplication, applies dropout, calculates the mean, and then applies softmax. 7 | """ 8 | def __init__(self, in_features, out_features, dropout_p): 9 | super(Model, self).__init__() 10 | self.matmul = nn.Linear(in_features, out_features) 11 | self.dropout = nn.Dropout(dropout_p) 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 17 | 18 | Returns: 19 | torch.Tensor: Output tensor of shape (batch_size, out_features). 20 | """ 21 | x = self.matmul(x) 22 | x = self.dropout(x) 23 | x = torch.mean(x, dim=1, keepdim=True) 24 | x = torch.softmax(x, dim=1) 25 | return x 26 | 27 | batch_size = 128 28 | in_features = 100 29 | out_features = 50 30 | dropout_p = 0.2 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_features)] 34 | 35 | def get_init_inputs(): 36 | return [in_features, out_features, dropout_p] -------------------------------------------------------------------------------- /KernelBench/level2/67_Conv2d_GELU_GlobalAvgPool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, applies GELU, and then performs global average pooling. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | 12 | def forward(self, x): 13 | """ 14 | Args: 15 | x: Input tensor of shape (batch_size, in_channels, height, width) 16 | Returns: 17 | Output tensor of shape (batch_size, out_channels) 18 | """ 19 | x = self.conv(x) 20 | x = torch.nn.functional.gelu(x) 21 | x = torch.nn.functional.adaptive_avg_pool2d(x, 1) 22 | x = x.squeeze(-1).squeeze(-1) 23 | return x 24 | 25 | batch_size = 128 26 | in_channels = 3 27 | out_channels = 16 28 | height, width = 32, 32 29 | kernel_size = 3 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/68_Matmul_Min_Subtract.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, applies minimum, and subtracts a constant. 7 | """ 8 | def __init__(self, in_features, out_features, constant): 9 | super(Model, self).__init__() 10 | self.linear = nn.Linear(in_features, out_features) 11 | self.constant = nn.Parameter(torch.tensor(constant)) 12 | 13 | def forward(self, x): 14 | x = self.linear(x) 15 | x = torch.min(x, self.constant) 16 | x = x - self.constant 17 | return x 18 | 19 | batch_size = 128 20 | in_features = 10 21 | out_features = 5 22 | constant = 2.0 23 | 24 | def get_inputs(): 25 | return [torch.randn(batch_size, in_features)] 26 | 27 | def get_init_inputs(): 28 | return [in_features, out_features, constant] -------------------------------------------------------------------------------- /KernelBench/level2/69_Conv2d_HardSwish_ReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, applies HardSwish, and then ReLU. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | 12 | def forward(self, x): 13 | """ 14 | Args: 15 | x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width). 16 | 17 | Returns: 18 | torch.Tensor: Output tensor of shape (batch_size, out_channels, height, width). 19 | """ 20 | x = self.conv(x) 21 | x = torch.nn.functional.hardswish(x) 22 | x = torch.relu(x) 23 | return x 24 | 25 | batch_size = 128 26 | in_channels = 3 27 | out_channels = 16 28 | height, width = 32, 32 29 | kernel_size = 3 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/6_Conv3d_Softmax_MaxPool_MaxPool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, applies Softmax, and performs two max pooling operations. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, pool_kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 11 | self.pool1 = nn.MaxPool3d(pool_kernel_size) 12 | self.pool2 = nn.MaxPool3d(pool_kernel_size) 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x: Input tensor of shape (batch_size, in_channels, depth, height, width) 18 | Returns: 19 | Output tensor of shape (batch_size, out_channels, depth', height', width') where depth', height', width' are the dimensions after pooling. 20 | """ 21 | x = self.conv(x) 22 | x = torch.softmax(x, dim=1) 23 | x = self.pool1(x) 24 | x = self.pool2(x) 25 | return x 26 | 27 | batch_size = 128 28 | in_channels = 3 29 | out_channels = 16 30 | depth, height, width = 16, 32, 32 31 | kernel_size = 3 32 | pool_kernel_size = 2 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_channels, depth, height, width)] 36 | 37 | def get_init_inputs(): 38 | return [in_channels, out_channels, kernel_size, pool_kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/70_Gemm_Sigmoid_Scaling_ResidualAdd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model implementing the pattern "Gemm_Sigmoid_Scaling_ResidualAdd". 7 | """ 8 | def __init__(self, input_size, hidden_size, scaling_factor): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(input_size, hidden_size) 11 | self.scaling_factor = scaling_factor 12 | 13 | def forward(self, x): 14 | """ 15 | Forward pass of the model. 16 | 17 | Args: 18 | x (torch.Tensor): Input tensor of shape (batch_size, input_size). 19 | 20 | Returns: 21 | torch.Tensor: Output tensor of shape (batch_size, hidden_size). 22 | """ 23 | x = self.gemm(x) 24 | original_x = x 25 | x = torch.sigmoid(x) 26 | x = x * self.scaling_factor 27 | x = x + original_x 28 | return x 29 | 30 | batch_size = 128 31 | input_size = 1024 32 | hidden_size = 512 33 | scaling_factor = 2.0 34 | 35 | def get_inputs(): 36 | return [torch.randn(batch_size, input_size)] 37 | 38 | def get_init_inputs(): 39 | return [input_size, hidden_size, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/71_Conv2d_Divide_LeakyReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, divides by a constant, and applies LeakyReLU. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, divisor): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.divisor = divisor 12 | 13 | def forward(self, x): 14 | x = self.conv(x) 15 | x = x / self.divisor 16 | x = torch.nn.functional.leaky_relu(x, negative_slope=0.01) 17 | return x 18 | 19 | batch_size = 128 20 | in_channels = 3 21 | out_channels = 16 22 | height, width = 32, 32 23 | kernel_size = 3 24 | divisor = 2 25 | 26 | def get_inputs(): 27 | return [torch.randn(batch_size, in_channels, height, width)] 28 | 29 | def get_init_inputs(): 30 | return [in_channels, out_channels, kernel_size, divisor] -------------------------------------------------------------------------------- /KernelBench/level2/72_ConvTranspose3d_BatchNorm_AvgPool_AvgPool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a 3D transposed convolution, followed by batch normalization, 7 | two average pooling layers. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias_shape): 10 | super(Model, self).__init__() 11 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 12 | self.batch_norm = nn.BatchNorm3d(out_channels) 13 | self.avg_pool1 = nn.AvgPool3d(kernel_size=2) 14 | self.avg_pool2 = nn.AvgPool3d(kernel_size=2) 15 | 16 | def forward(self, x): 17 | x = self.conv_transpose(x) 18 | x = self.batch_norm(x) 19 | x = self.avg_pool1(x) 20 | x = self.avg_pool2(x) 21 | return x 22 | 23 | 24 | batch_size = 128 25 | in_channels = 3 26 | out_channels = 16 27 | depth, height, width = 32, 32, 32 28 | kernel_size = 3 29 | stride = 2 30 | padding = 1 31 | bias_shape = (out_channels, 1, 1, 1) 32 | 33 | def get_inputs(): 34 | return [torch.randn(batch_size, in_channels, depth, height, width)] 35 | 36 | def get_init_inputs(): 37 | return [in_channels, out_channels, kernel_size, stride, padding, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/73_Conv2d_BatchNorm_Scaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a convolution, applies Batch Normalization, and scales the output. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, scaling_factor): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.bn = nn.BatchNorm2d(out_channels) 12 | self.scaling_factor = scaling_factor 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | x = self.bn(x) 17 | x = x * self.scaling_factor 18 | return x 19 | 20 | batch_size = 128 21 | in_channels = 3 22 | out_channels = 16 23 | height, width = 32, 32 24 | kernel_size = 3 25 | scaling_factor = 2.0 26 | 27 | def get_inputs(): 28 | return [torch.randn(batch_size, in_channels, height, width)] 29 | 30 | def get_init_inputs(): 31 | return [in_channels, out_channels, kernel_size, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/75_Gemm_GroupNorm_Min_BiasAdd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a GEMM, Group Normalization, Minimum operation, and Bias addition. 7 | """ 8 | def __init__(self, in_features, out_features, num_groups, bias_shape): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.group_norm = nn.GroupNorm(num_groups, out_features) 12 | self.bias = nn.Parameter(torch.randn(bias_shape)) 13 | 14 | def forward(self, x): 15 | x = self.gemm(x) 16 | x = self.group_norm(x) 17 | x = torch.min(x, dim=1, keepdim=True)[0] 18 | x = x + self.bias 19 | return x 20 | 21 | batch_size = 128 22 | in_features = 512 23 | out_features = 256 24 | num_groups = 8 25 | bias_shape = (1, out_features, 1, 1) 26 | 27 | def get_inputs(): 28 | return [torch.randn(batch_size, in_features)] 29 | 30 | def get_init_inputs(): 31 | return [in_features, out_features, num_groups, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/76_Gemm_Add_ReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, adds a bias term, and applies ReLU. 7 | """ 8 | def __init__(self, in_features, out_features, bias_shape): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features, bias=False) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x (torch.Tensor): Input tensor with shape (batch_size, in_features). 17 | Returns: 18 | torch.Tensor: Output tensor with shape (batch_size, out_features). 19 | """ 20 | x = self.gemm(x) 21 | x = x + self.bias 22 | x = torch.relu(x) 23 | return x 24 | 25 | batch_size = 128 26 | in_features = 1024 27 | out_features = 512 28 | bias_shape = (out_features,) 29 | 30 | def get_inputs(): 31 | return [torch.randn(batch_size, in_features)] 32 | 33 | def get_init_inputs(): 34 | return [in_features, out_features, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/77_ConvTranspose3d_Scale_BatchNorm_GlobalAvgPool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D transposed convolution, scales the output, applies batch normalization, 7 | and then performs global average pooling. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, scale_factor, eps=1e-5, momentum=0.1): 10 | super(Model, self).__init__() 11 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size) 12 | self.scale_factor = scale_factor 13 | self.batch_norm = nn.BatchNorm3d(out_channels, eps=eps, momentum=momentum) 14 | self.global_avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) 15 | 16 | def forward(self, x): 17 | x = self.conv_transpose(x) 18 | x = x * self.scale_factor 19 | x = self.batch_norm(x) 20 | x = self.global_avg_pool(x) 21 | return x 22 | 23 | batch_size = 16 24 | in_channels = 64 25 | out_channels = 32 26 | depth, height, width = 16, 32, 32 27 | kernel_size = 3 28 | scale_factor = 2.0 29 | 30 | def get_inputs(): 31 | return [torch.randn(batch_size, in_channels, depth, height, width)] 32 | 33 | def get_init_inputs(): 34 | return [in_channels, out_channels, kernel_size, scale_factor] -------------------------------------------------------------------------------- /KernelBench/level2/78_ConvTranspose3d_Max_Max_Sum.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D transposed convolution, followed by two max pooling layers and a sum operation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding) 11 | self.max_pool1 = nn.MaxPool3d(kernel_size=2) 12 | self.max_pool2 = nn.MaxPool3d(kernel_size=3) 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | x = self.max_pool1(x) 17 | x = self.max_pool2(x) 18 | x = torch.sum(x, dim=1, keepdim=True) 19 | return x 20 | 21 | batch_size = 16 22 | in_channels = 8 23 | out_channels = 16 24 | depth, height, width = 16, 32, 32 25 | kernel_size = 3 26 | stride = 2 27 | padding = 1 28 | 29 | def get_inputs(): 30 | return [torch.randn(batch_size, in_channels, depth, height, width)] 31 | 32 | def get_init_inputs(): 33 | return [in_channels, out_channels, kernel_size, stride, padding] -------------------------------------------------------------------------------- /KernelBench/level2/79_Conv3d_Multiply_InstanceNorm_Clamp_Multiply_Max.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A 3D convolutional layer followed by multiplication, instance normalization, clamping, multiplication, and a max operation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, multiplier_shape, clamp_min, clamp_max): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 11 | self.multiplier = nn.Parameter(torch.randn(multiplier_shape)) 12 | self.instance_norm = nn.InstanceNorm3d(out_channels) 13 | self.clamp_min = clamp_min 14 | self.clamp_max = clamp_max 15 | 16 | def forward(self, x): 17 | x = self.conv(x) 18 | x = x * self.multiplier 19 | x = self.instance_norm(x) 20 | x = torch.clamp(x, self.clamp_min, self.clamp_max) 21 | x = x * self.multiplier 22 | x = torch.max(x, dim=1)[0] 23 | return x 24 | 25 | batch_size = 128 26 | in_channels = 3 27 | out_channels = 16 28 | depth, height, width = 16, 32, 32 29 | kernel_size = 3 30 | multiplier_shape = (out_channels, 1, 1, 1) 31 | clamp_min = -1.0 32 | clamp_max = 1.0 33 | 34 | def get_inputs(): 35 | return [torch.randn(batch_size, in_channels, depth, height, width)] 36 | 37 | def get_init_inputs(): 38 | return [in_channels, out_channels, kernel_size, multiplier_shape, clamp_min, clamp_max] -------------------------------------------------------------------------------- /KernelBench/level2/7_Conv3d_ReLU_LeakyReLU_GELU_Sigmoid_BiasAdd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, applies ReLU, LeakyReLU, GELU, Sigmoid activations, and bias in sequence. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, bias_shape): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | 13 | def forward(self, x): 14 | x = self.conv(x) 15 | x = torch.relu(x) 16 | x = torch.nn.functional.leaky_relu(x, negative_slope=0.01) 17 | x = torch.nn.functional.gelu(x) 18 | x = torch.sigmoid(x) 19 | x = x + self.bias 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 3 24 | out_channels = 16 25 | depth, height, width = 16, 32, 32 26 | kernel_size = 3 27 | bias_shape = (out_channels, 1, 1, 1) 28 | 29 | def get_inputs(): 30 | return [torch.randn(batch_size, in_channels, depth, height, width)] 31 | 32 | def get_init_inputs(): 33 | return [in_channels, out_channels, kernel_size, bias_shape] -------------------------------------------------------------------------------- /KernelBench/level2/80_Gemm_Max_Subtract_GELU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a GEMM, followed by a max operation, subtraction, and GELU activation. 7 | """ 8 | def __init__(self, in_features, out_features, max_dim): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.max_dim = max_dim 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x: Input tensor of shape (batch_size, in_features) 17 | 18 | Returns: 19 | Output tensor of shape (batch_size, out_features) 20 | """ 21 | x = self.gemm(x) 22 | x = torch.max(x, dim=self.max_dim, keepdim=True).values 23 | x = x - x.mean(dim=1, keepdim=True) 24 | x = torch.nn.functional.gelu(x) 25 | return x 26 | 27 | batch_size = 128 28 | in_features = 512 29 | out_features = 1024 30 | max_dim = 1 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_features)] 34 | 35 | def get_init_inputs(): 36 | return [in_features, out_features, max_dim] -------------------------------------------------------------------------------- /KernelBench/level2/81_Gemm_Swish_Divide_Clamp_Tanh_Clamp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a gemm, swish, divide, clamp, tanh, and clamp operations. 7 | """ 8 | def __init__(self, in_features, out_features, bias=True): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features, bias=bias) 11 | 12 | def forward(self, x): 13 | """ 14 | Args: 15 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 16 | Returns: 17 | torch.Tensor: Output tensor of shape (batch_size, out_features). 18 | """ 19 | x = self.gemm(x) 20 | x = x * torch.sigmoid(x) # Swish activation 21 | x = x / 2.0 22 | x = torch.clamp(x, min=-1.0, max=1.0) # Clamp between -1 and 1 23 | x = torch.tanh(x) # Tanh activation 24 | x = torch.clamp(x, min=-1.0, max=1.0) # Clamp between -1 and 1 25 | return x 26 | 27 | batch_size = 128 28 | in_features = 1024 29 | out_features = 512 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_features)] 33 | 34 | def get_init_inputs(): 35 | return [in_features, out_features] -------------------------------------------------------------------------------- /KernelBench/level2/82_Conv2d_Tanh_Scaling_BiasAdd_Max.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a convolution, applies tanh, scaling, adds a bias term, and then max-pools. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, scaling_factor, bias_shape, pool_kernel_size): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.scaling_factor = scaling_factor 12 | self.bias = nn.Parameter(torch.randn(bias_shape)) 13 | self.max_pool = nn.MaxPool2d(pool_kernel_size) 14 | 15 | def forward(self, x): 16 | # Convolution 17 | x = self.conv(x) 18 | # Tanh activation 19 | x = torch.tanh(x) 20 | # Scaling 21 | x = x * self.scaling_factor 22 | # Bias addition 23 | x = x + self.bias 24 | # Max-pooling 25 | x = self.max_pool(x) 26 | return x 27 | 28 | batch_size = 128 29 | in_channels = 3 30 | out_channels = 16 31 | height, width = 32, 32 32 | kernel_size = 3 33 | scaling_factor = 2.0 34 | bias_shape = (out_channels, 1, 1) 35 | pool_kernel_size = 2 36 | 37 | def get_inputs(): 38 | return [torch.randn(batch_size, in_channels, height, width)] 39 | 40 | def get_init_inputs(): 41 | return [in_channels, out_channels, kernel_size, scaling_factor, bias_shape, pool_kernel_size] -------------------------------------------------------------------------------- /KernelBench/level2/83_Conv3d_GroupNorm_Min_Clamp_Dropout.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, applies Group Normalization, minimum, clamp, and dropout. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, groups, min_value, max_value, dropout_p): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 11 | self.norm = nn.GroupNorm(groups, out_channels) 12 | self.dropout = nn.Dropout(dropout_p) 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | x = self.norm(x) 17 | x = torch.min(x, torch.tensor(min_value)) 18 | x = torch.clamp(x, min=min_value, max=max_value) 19 | x = self.dropout(x) 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 3 24 | out_channels = 16 25 | depth, height, width = 16, 32, 32 26 | kernel_size = 3 27 | groups = 8 28 | min_value = 0.0 29 | max_value = 1.0 30 | dropout_p = 0.2 31 | 32 | def get_inputs(): 33 | return [torch.randn(batch_size, in_channels, depth, height, width)] 34 | 35 | def get_init_inputs(): 36 | return [in_channels, out_channels, kernel_size, groups, min_value, max_value, dropout_p] -------------------------------------------------------------------------------- /KernelBench/level2/84_Gemm_BatchNorm_Scaling_Softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a matrix multiplication (Gemm), Batch Normalization, scaling, and Softmax. 7 | """ 8 | def __init__(self, in_features, out_features, bn_eps=1e-5, bn_momentum=0.1, scale_shape=(1,)): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.bn = nn.BatchNorm1d(out_features, eps=bn_eps, momentum=bn_momentum) 12 | self.scale = nn.Parameter(torch.ones(scale_shape)) 13 | self.softmax = nn.Softmax(dim=1) 14 | 15 | def forward(self, x): 16 | """ 17 | Args: 18 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 19 | Returns: 20 | torch.Tensor: Output tensor of shape (batch_size, out_features). 21 | """ 22 | x = self.gemm(x) 23 | x = self.bn(x) 24 | x = self.scale * x 25 | x = self.softmax(x) 26 | return x 27 | 28 | batch_size = 128 29 | in_features = 1024 30 | out_features = 512 31 | bn_eps = 1e-5 32 | bn_momentum = 0.1 33 | scale_shape = (1,) 34 | 35 | def get_inputs(): 36 | return [torch.randn(batch_size, in_features)] 37 | 38 | def get_init_inputs(): 39 | return [in_features, out_features, bn_eps, bn_momentum, scale_shape] -------------------------------------------------------------------------------- /KernelBench/level2/86_Matmul_Divide_GELU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a matrix multiplication, divides by a scalar, and applies GELU activation. 7 | """ 8 | def __init__(self, input_size, output_size, divisor): 9 | super(Model, self).__init__() 10 | self.linear = nn.Linear(input_size, output_size) 11 | self.divisor = divisor 12 | 13 | def forward(self, x): 14 | """ 15 | Args: 16 | x (torch.Tensor): Input tensor of shape (batch_size, input_size). 17 | Returns: 18 | torch.Tensor: Output tensor of shape (batch_size, output_size). 19 | """ 20 | x = self.linear(x) 21 | x = x / self.divisor 22 | x = torch.nn.functional.gelu(x) 23 | return x 24 | 25 | batch_size = 128 26 | input_size = 512 27 | output_size = 1024 28 | divisor = 10.0 29 | 30 | def get_inputs(): 31 | return [torch.randn(batch_size, input_size)] 32 | 33 | def get_init_inputs(): 34 | return [input_size, output_size, divisor] -------------------------------------------------------------------------------- /KernelBench/level2/87_Conv2d_Subtract_Subtract_Mish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, subtracts two values, applies Mish activation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, subtract_value_1, subtract_value_2): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 11 | self.subtract_value_1 = subtract_value_1 12 | self.subtract_value_2 = subtract_value_2 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | x = x - self.subtract_value_1 17 | x = x - self.subtract_value_2 18 | x = torch.nn.functional.mish(x) 19 | return x 20 | 21 | batch_size = 128 22 | in_channels = 3 23 | out_channels = 16 24 | height, width = 32, 32 25 | kernel_size = 3 26 | subtract_value_1 = 0.5 27 | subtract_value_2 = 0.2 28 | 29 | def get_inputs(): 30 | return [torch.randn(batch_size, in_channels, height, width)] 31 | 32 | def get_init_inputs(): 33 | return [in_channels, out_channels, kernel_size, subtract_value_1, subtract_value_2] -------------------------------------------------------------------------------- /KernelBench/level2/88_Gemm_GroupNorm_Swish_Multiply_Swish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a GEMM, GroupNorm, Swish, Multiply, and Swish operations. 7 | """ 8 | def __init__(self, in_features, out_features, num_groups, multiply_weight_shape): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.group_norm = nn.GroupNorm(num_groups, out_features) 12 | self.multiply_weight = nn.Parameter(torch.randn(multiply_weight_shape)) 13 | 14 | def forward(self, x): 15 | # (batch_size, in_features) -> (batch_size, out_features) 16 | x = self.gemm(x) 17 | # (batch_size, out_features) -> (batch_size, out_features) 18 | x = self.group_norm(x) 19 | # (batch_size, out_features) -> (batch_size, out_features) 20 | x = x * torch.sigmoid(x) 21 | # (batch_size, out_features) -> (batch_size, out_features) 22 | x = x * self.multiply_weight 23 | # (batch_size, out_features) -> (batch_size, out_features) 24 | x = x * torch.sigmoid(x) 25 | return x 26 | 27 | batch_size = 128 28 | in_features = 512 29 | out_features = 1024 30 | num_groups = 16 31 | multiply_weight_shape = (out_features,) 32 | 33 | def get_inputs(): 34 | return [torch.randn(batch_size, in_features)] 35 | 36 | def get_init_inputs(): 37 | return [in_features, out_features, num_groups, multiply_weight_shape] -------------------------------------------------------------------------------- /KernelBench/level2/90_Conv3d_LeakyReLU_Sum_Clamp_GELU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a 3D convolution, applies LeakyReLU, sums with a tensor, clamps, and applies GELU activation. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, sum_tensor_shape): 9 | super(Model, self).__init__() 10 | self.conv = nn.Conv3d(in_channels, out_channels, kernel_size) 11 | self.sum_tensor = nn.Parameter(torch.randn(sum_tensor_shape)) 12 | 13 | def forward(self, x): 14 | x = self.conv(x) 15 | x = torch.nn.functional.leaky_relu(x, negative_slope=0.2) 16 | x = x + self.sum_tensor 17 | x = torch.clamp(x, min=-1.0, max=1.0) 18 | x = torch.nn.functional.gelu(x) 19 | return x 20 | 21 | batch_size = 128 22 | in_channels = 3 23 | out_channels = 16 24 | depth, height, width = 16, 32, 32 25 | kernel_size = 3 26 | sum_tensor_shape = (out_channels, 1, 1, 1) 27 | 28 | def get_inputs(): 29 | return [torch.randn(batch_size, in_channels, depth, height, width)] 30 | 31 | def get_init_inputs(): 32 | return [in_channels, out_channels, kernel_size, sum_tensor_shape] -------------------------------------------------------------------------------- /KernelBench/level2/91_ConvTranspose2d_Softmax_BiasAdd_Scaling_Sigmoid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, applies softmax, adds a bias term, scales the result, and applies sigmoid. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | self.scaling_factor = scaling_factor 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | x = torch.softmax(x, dim=1) 17 | x = x + self.bias 18 | x = x * self.scaling_factor 19 | x = torch.sigmoid(x) 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 32 24 | out_channels = 64 25 | height, width = 16, 16 26 | kernel_size = 4 27 | stride = 2 28 | padding = 1 29 | output_padding = 1 30 | bias_shape = (out_channels, 1, 1) 31 | scaling_factor = 2.0 32 | 33 | def get_inputs(): 34 | return [torch.randn(batch_size, in_channels, height, width)] 35 | 36 | def get_init_inputs(): 37 | return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor] -------------------------------------------------------------------------------- /KernelBench/level2/92_Conv2d_GroupNorm_Tanh_HardSwish_ResidualAdd_LogSumExp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a convolution, applies Group Normalization, Tanh, HardSwish, 7 | Residual Addition, and LogSumExp. 8 | """ 9 | def __init__(self, in_channels, out_channels, kernel_size, groups, eps=1e-5): 10 | super(Model, self).__init__() 11 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size) 12 | self.group_norm = nn.GroupNorm(groups, out_channels, eps=eps) 13 | self.tanh = nn.Tanh() 14 | self.hard_swish = nn.Hardswish() 15 | 16 | def forward(self, x): 17 | # Convolution 18 | x_conv = self.conv(x) 19 | # Group Normalization 20 | x_norm = self.group_norm(x_conv) 21 | # Tanh 22 | x_tanh = self.tanh(x_norm) 23 | # HardSwish 24 | x_hard_swish = self.hard_swish(x_tanh) 25 | # Residual Addition 26 | x_res = x_conv + x_hard_swish 27 | # LogSumExp 28 | x_logsumexp = torch.logsumexp(x_res, dim=1, keepdim=True) 29 | return x_logsumexp 30 | 31 | batch_size = 128 32 | in_channels = 3 33 | out_channels = 16 34 | height, width = 32, 32 35 | kernel_size = 3 36 | groups = 8 37 | 38 | def get_inputs(): 39 | return [torch.randn(batch_size, in_channels, height, width)] 40 | 41 | def get_init_inputs(): 42 | return [in_channels, out_channels, kernel_size, groups] -------------------------------------------------------------------------------- /KernelBench/level2/93_ConvTranspose2d_Add_Min_GELU_Multiply.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a transposed convolution, adds a value, takes the minimum, applies GELU, and multiplies by a value. 7 | """ 8 | def __init__(self, in_channels, out_channels, kernel_size, stride, add_value, multiply_value): 9 | super(Model, self).__init__() 10 | self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride) 11 | self.add_value = add_value 12 | self.multiply_value = multiply_value 13 | 14 | def forward(self, x): 15 | x = self.conv_transpose(x) 16 | x = x + self.add_value 17 | x = torch.min(x, torch.tensor(0.0)) 18 | x = torch.nn.functional.gelu(x) 19 | x = x * self.multiply_value 20 | return x 21 | 22 | batch_size = 128 23 | in_channels = 32 24 | out_channels = 16 25 | height, width = 32, 32 26 | kernel_size = 4 27 | stride = 2 28 | add_value = 0.5 29 | multiply_value = 2.0 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_channels, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [in_channels, out_channels, kernel_size, stride, add_value, multiply_value] -------------------------------------------------------------------------------- /KernelBench/level2/94_Gemm_BiasAdd_Hardtanh_Mish_GroupNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model that performs a GEMM, BiasAdd, Hardtanh, Mish, and GroupNorm operations in sequence. 7 | """ 8 | def __init__(self, in_features, out_features, bias_shape, num_groups): 9 | super(Model, self).__init__() 10 | self.gemm = nn.Linear(in_features, out_features) 11 | self.bias = nn.Parameter(torch.randn(bias_shape)) 12 | self.hardtanh = nn.Hardtanh() 13 | self.mish = nn.Mish() 14 | self.groupnorm = nn.GroupNorm(num_groups=num_groups, num_channels=out_features) 15 | 16 | def forward(self, x): 17 | """ 18 | Args: 19 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 20 | Returns: 21 | torch.Tensor: Output tensor of shape (batch_size, out_features). 22 | """ 23 | x = self.gemm(x) 24 | x = x + self.bias 25 | x = self.hardtanh(x) 26 | x = self.mish(x) 27 | x = self.groupnorm(x) 28 | return x 29 | 30 | 31 | batch_size = 128 32 | in_features = 512 33 | out_features = 1024 34 | bias_shape = (out_features,) 35 | num_groups = 32 36 | 37 | def get_inputs(): 38 | return [torch.randn(batch_size, in_features)] 39 | 40 | def get_init_inputs(): 41 | return [in_features, out_features, bias_shape, num_groups] -------------------------------------------------------------------------------- /KernelBench/level2/95_Matmul_Add_Swish_Tanh_GELU_Hardtanh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, adds a value, applies Swish, Tanh, GELU, and Hardtanh activation functions. 7 | """ 8 | def __init__(self, in_features, out_features, add_value_shape): 9 | super(Model, self).__init__() 10 | self.matmul = nn.Linear(in_features, out_features) 11 | self.add_value = nn.Parameter(torch.randn(add_value_shape)) 12 | 13 | def forward(self, x): 14 | x = self.matmul(x) 15 | x = x + self.add_value 16 | x = torch.sigmoid(x) * x # Swish 17 | x = torch.tanh(x) 18 | x = torch.nn.functional.gelu(x) # GELU 19 | x = torch.nn.functional.hardtanh(x, min_val=-1, max_val=1) # Hardtanh 20 | return x 21 | 22 | batch_size = 128 23 | in_features = 1024 24 | out_features = 512 25 | add_value_shape = (out_features,) 26 | 27 | def get_inputs(): 28 | return [torch.randn(batch_size, in_features)] 29 | 30 | def get_init_inputs(): 31 | return [in_features, out_features, add_value_shape] -------------------------------------------------------------------------------- /KernelBench/level2/97_Matmul_BatchNorm_BiasAdd_Divide_Swish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a matrix multiplication, batch normalization, bias addition, division, and Swish activation. 7 | """ 8 | def __init__(self, in_features, out_features, bn_eps=1e-5, bn_momentum=0.1, bias_shape=(1,), divide_value=1.0): 9 | super(Model, self).__init__() 10 | self.matmul = nn.Linear(in_features, out_features) 11 | self.bn = nn.BatchNorm1d(out_features, eps=bn_eps, momentum=bn_momentum) 12 | self.bias = nn.Parameter(torch.randn(bias_shape)) 13 | self.divide_value = divide_value 14 | 15 | def forward(self, x): 16 | x = self.matmul(x) 17 | x = self.bn(x) 18 | x = x + self.bias 19 | x = x / self.divide_value 20 | x = x * torch.sigmoid(x) 21 | return x 22 | 23 | batch_size = 128 24 | in_features = 1024 25 | out_features = 512 26 | bn_eps = 1e-5 27 | bn_momentum = 0.1 28 | bias_shape = (1,) 29 | divide_value = 1.0 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, in_features)] 33 | 34 | def get_init_inputs(): 35 | return [in_features, out_features, bn_eps, bn_momentum, bias_shape, divide_value] -------------------------------------------------------------------------------- /KernelBench/level2/98_Matmul_AvgPool_GELU_Scale_Max.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | A model implementing the pattern "Matmul_AvgPool_GELU_Scale_Max". 7 | """ 8 | def __init__(self, in_features, out_features, pool_kernel_size, scale_factor): 9 | super(Model, self).__init__() 10 | self.matmul = nn.Linear(in_features, out_features) 11 | self.avg_pool = nn.AvgPool1d(kernel_size=pool_kernel_size) 12 | self.scale_factor = scale_factor 13 | 14 | def forward(self, x): 15 | """ 16 | Args: 17 | x (torch.Tensor): Input tensor of shape (batch_size, in_features). 18 | 19 | Returns: 20 | torch.Tensor: Output tensor of shape (batch_size, out_features). 21 | """ 22 | x = self.matmul(x) 23 | x = self.avg_pool(x.unsqueeze(1)).squeeze(1) 24 | x = torch.nn.functional.gelu(x) 25 | x = x * self.scale_factor 26 | x = torch.max(x, dim=1).values 27 | return x 28 | 29 | batch_size = 128 30 | in_features = 512 31 | out_features = 256 32 | pool_kernel_size = 4 33 | scale_factor = 2.0 34 | 35 | def get_inputs(): 36 | return [torch.randn(batch_size, in_features)] 37 | 38 | def get_init_inputs(): 39 | return [in_features, out_features, pool_kernel_size, scale_factor] -------------------------------------------------------------------------------- /KernelBench/level2/99_Matmul_GELU_Softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Simple model that performs a matrix multiplication, applies GELU, and then applies Softmax. 7 | """ 8 | def __init__(self, in_features, out_features): 9 | super(Model, self).__init__() 10 | self.linear = nn.Linear(in_features, out_features) 11 | 12 | def forward(self, x): 13 | x = self.linear(x) 14 | x = torch.nn.functional.gelu(x) 15 | x = torch.nn.functional.softmax(x, dim=1) 16 | return x 17 | 18 | batch_size = 128 19 | in_features = 100 20 | out_features = 10 21 | 22 | def get_inputs(): 23 | return [torch.randn(batch_size, in_features)] 24 | 25 | def get_init_inputs(): 26 | return [in_features, out_features] -------------------------------------------------------------------------------- /KernelBench/level2/9_Matmul_Subtract_Multiply_ReLU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Model(nn.Module): 5 | """ 6 | Model that performs a matrix multiplication, subtraction, multiplication, and ReLU activation. 7 | """ 8 | def __init__(self, in_features, out_features, subtract_value, multiply_value): 9 | super(Model, self).__init__() 10 | self.linear = nn.Linear(in_features, out_features) 11 | self.subtract_value = subtract_value 12 | self.multiply_value = multiply_value 13 | 14 | def forward(self, x): 15 | x = self.linear(x) 16 | x = x - self.subtract_value 17 | x = x * self.multiply_value 18 | x = torch.relu(x) 19 | return x 20 | 21 | batch_size = 128 22 | in_features = 10 23 | out_features = 5 24 | subtract_value = 2.0 25 | multiply_value = 1.5 26 | 27 | def get_inputs(): 28 | return [torch.randn(batch_size, in_features)] 29 | 30 | def get_init_inputs(): 31 | return [in_features, out_features, subtract_value, multiply_value] -------------------------------------------------------------------------------- /KernelBench/level3/13_DenseNet121TransitionLayer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Model(nn.Module): 6 | def __init__(self, num_input_features: int, num_output_features: int): 7 | """ 8 | :param num_input_features: The number of input feature maps 9 | :param num_output_features: The number of output feature maps 10 | """ 11 | super(Model, self).__init__() 12 | self.transition = nn.Sequential( 13 | nn.BatchNorm2d(num_input_features), 14 | nn.ReLU(inplace=True), 15 | nn.Conv2d(num_input_features, num_output_features, kernel_size=1, bias=False), 16 | nn.AvgPool2d(kernel_size=2, stride=2) 17 | ) 18 | 19 | def forward(self, x): 20 | """ 21 | :param x: Input tensor of shape (batch_size, num_input_features, height, width) 22 | :return: Downsampled tensor with reduced number of feature maps 23 | """ 24 | return self.transition(x) 25 | 26 | batch_size = 10 27 | num_input_features = 32 28 | num_output_features = 64 29 | height, width = 224, 224 30 | 31 | def get_inputs(): 32 | return [torch.randn(batch_size, num_input_features, height, width)] 33 | 34 | def get_init_inputs(): 35 | return [num_input_features, num_output_features] 36 | -------------------------------------------------------------------------------- /KernelBench/level3/1_MLP.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Model(nn.Module): 6 | def __init__(self, input_size, layer_sizes, output_size): 7 | """ 8 | :param input_size: The number of input features 9 | :param layer_sizes: A list of ints containing the sizes of each hidden layer 10 | :param output_size: The number of output features 11 | """ 12 | super(Model, self).__init__() 13 | 14 | layers = [] 15 | current_input_size = input_size 16 | 17 | for layer_size in layer_sizes: 18 | layers.append(nn.Linear(current_input_size, layer_size)) 19 | layers.append(nn.ReLU()) 20 | current_input_size = layer_size 21 | 22 | layers.append(nn.Linear(current_input_size, output_size)) 23 | 24 | self.network = nn.Sequential(*layers) 25 | 26 | def forward(self, x): 27 | """ 28 | :param x: The input tensor, shape (batch_size, input_size) 29 | :return: The output tensor, shape (batch_size, output_size) 30 | """ 31 | return self.network(x) 32 | 33 | # Test code 34 | batch_size = 1 35 | input_size = 1000 36 | layer_sizes = [400, 800] 37 | output_size = 500 38 | 39 | def get_inputs(): 40 | return [torch.randn(batch_size, input_size)] 41 | 42 | def get_init_inputs(): 43 | return [input_size, layer_sizes, output_size] -------------------------------------------------------------------------------- /KernelBench/level3/31_VisionAttention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Model(nn.Module): 6 | def __init__(self, embed_dim, num_heads): 7 | """ 8 | Attention Block using Multihead Self-Attention. 9 | :param embed_dim: Embedding dimension (the number of channels) 10 | :param num_heads: Number of attention heads 11 | """ 12 | super(Model, self).__init__() 13 | self.attn = nn.MultiheadAttention(embed_dim, num_heads) 14 | self.norm = nn.LayerNorm(embed_dim) 15 | 16 | def forward(self, x): 17 | """ 18 | Forward pass of the AttentionBlock. 19 | :param x: Input tensor of shape (B, C, H, W) 20 | :return: Output tensor of the same shape (B, C, H, W) 21 | """ 22 | B, C, H, W = x.shape 23 | x = x.view(B, C, H * W).permute(2, 0, 1) # (seq_len, batch_size, embed_dim) 24 | attn_output, _ = self.attn(x, x, x) 25 | x = self.norm(attn_output + x) # (seq_len, batch_size, embed_dim) 26 | x = x.permute(1, 2, 0).view(B, C, H, W) 27 | return x 28 | 29 | embed_dim = 128 30 | num_heads = 4 31 | batch_size = 2 32 | num_channels = embed_dim 33 | image_height = 128 34 | image_width = 128 35 | 36 | def get_inputs(): 37 | return [torch.randn(batch_size, num_channels, image_height, image_width)] 38 | 39 | def get_init_inputs(): 40 | return [embed_dim, num_heads] -------------------------------------------------------------------------------- /KernelBench/level4/10_google-bigbird-roberta-base_bs1024_seq32.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/bigbird-roberta-base" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 32 19 | batch_size = 1024 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/11_google-electra-small-discriminator_bs1_seq511.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/electra-small-discriminator" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 511 19 | batch_size = 1 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/12_google-electra-small-discriminator_bs1024_seq32.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/electra-small-discriminator" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 32 19 | batch_size = 1024 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/13_google-reformer-enwik8_bs32_seq256.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/reformer-enwik8" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 256 19 | batch_size = 32 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/14_google-electra-small-discriminator_bs32_seq256.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/electra-small-discriminator" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 256 19 | batch_size = 32 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/15_google-reformer-enwik8_bs1024_seq32.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/reformer-enwik8" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 32 19 | batch_size = 1024 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/16_gpt2_bs1_seq1023.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "gpt2" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 1023 19 | batch_size = 1 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/17_facebook-bart-large_bs1024_seq32.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "facebook/bart-large" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 32 19 | batch_size = 1024 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/18_EleutherAI-gpt-neo-2p7B_bs512_seq32.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "EleutherAI/gpt-neo-2.7B" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 32 19 | batch_size = 512 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/19_gpt2_bs1024_seq32.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "gpt2" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 32 19 | batch_size = 1024 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/1_EleutherAI-gpt-neo-2p7B_bs32_seq256.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "EleutherAI/gpt-neo-2.7B" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 256 19 | batch_size = 32 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/20_facebook-bart-large_bs32_seq256.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "facebook/bart-large" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 256 19 | batch_size = 32 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/2_facebook-opt-1p3b_bs1_seq2047.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "facebook/opt-1.3b" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 2047 19 | batch_size = 1 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/3_EleutherAI-gpt-neo-2p7B_bs1_seq2047.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "EleutherAI/gpt-neo-2.7B" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 2047 19 | batch_size = 1 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/4_facebook-opt-1p3b_bs32_seq256.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "facebook/opt-1.3b" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 256 19 | batch_size = 32 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/5_google-bigbird-roberta-base_bs1_seq4095.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/bigbird-roberta-base" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 4095 19 | batch_size = 1 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/6_facebook-bart-large_bs1_seq1023.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "facebook/bart-large" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 1023 19 | batch_size = 1 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/7_gpt2_bs32_seq256.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "gpt2" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 256 19 | batch_size = 32 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/8_facebook-opt-1p3b_bs512_seq32.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "facebook/opt-1.3b" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 32 19 | batch_size = 512 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /KernelBench/level4/9_google-bigbird-roberta-base_bs32_seq256.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from transformers import AutoModelForCausalLM, AutoConfig 4 | 5 | class Model(torch.nn.Module): 6 | def __init__(self, model_name, config): 7 | super().__init__() 8 | self.model_name = model_name 9 | self.config = config 10 | self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config) 11 | 12 | def forward(self, x): 13 | return self.model(x).logits 14 | 15 | model_name = "google/bigbird-roberta-base" 16 | config = AutoConfig.from_pretrained(model_name) 17 | vocab_size = config.vocab_size 18 | sequence_length = 256 19 | batch_size = 32 20 | 21 | def get_inputs(): 22 | inputs = torch.randint(0, vocab_size, (batch_size, sequence_length)) 23 | return [inputs] 24 | 25 | def get_init_inputs(): 26 | return [model_name, config] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Anne Ouyang, Simon Guo, Azalia Mirhoseini 4 | Scaling Intelligence Lab, Stanford University 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /assets/figures/KernelBenchMascot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/assets/figures/KernelBenchMascot.png -------------------------------------------------------------------------------- /assets/figures/KernelBenchWorkFlow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/assets/figures/KernelBenchWorkFlow.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic 2 | modal 3 | numpy 4 | openai 5 | packaging 6 | pydra_config 7 | torch==2.5.0 8 | tqdm 9 | datasets 10 | transformers 11 | google-generativeai 12 | together 13 | pytest 14 | ninja 15 | archon-ai 16 | einops -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == "__main__": 4 | setup( 5 | name="src", 6 | version="0.0.1", 7 | packages=["src"], 8 | ) 9 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/__init__.py -------------------------------------------------------------------------------- /src/prompts/README.md: -------------------------------------------------------------------------------- 1 | 2 | This folder includes PyTorch modules paired with CUDA kernels, which are used as in-context examples in KernelBench. 3 | 4 | 5 | 6 | Acknowledgements: 7 | - Fused GeLU and Tiled Matmul: [Christian Mills, GPU MODE Lecture 04](https://christianjmills.com/posts/cuda-mode-notes/lecture-004/) 8 | - Minimal Flash Attention: [Peter Kim, Minimal Flash Attention](https://github.com/tspeterkim/flash-attention-minimal/tree/main) 9 | 10 | There are some examples. 11 | [TODO] Table detailing content and speedups of each example -------------------------------------------------------------------------------- /src/prompts/few_shot/model_ex_add.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self) -> None: 8 | super().__init__() 9 | 10 | def forward(self, a, b): 11 | return a + b 12 | 13 | 14 | def get_inputs(): 15 | # randomly generate input tensors based on the model architecture 16 | a = torch.randn(1, 128).cuda() 17 | b = torch.randn(1, 128).cuda() 18 | return [a, b] 19 | 20 | 21 | def get_init_inputs(): 22 | # randomly generate tensors required for initialization based on the model architecture 23 | return [] -------------------------------------------------------------------------------- /src/prompts/few_shot/model_ex_flash_attn.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class Model(nn.Module): 8 | """ 9 | Model that performs an attention operation 10 | """ 11 | def __init__(self) -> None: 12 | super().__init__() 13 | 14 | def forward(self, Q, K, V): 15 | att = (Q @ K.transpose(-2, -1) * (1.0 / math.sqrt(K.size(-1)))) 16 | att = F.softmax(att, dim=-1) 17 | y = att @ V 18 | return y 19 | 20 | batch_size = 32 21 | n_head = 12 22 | seq_len = 64 23 | head_embd = 32 24 | 25 | def get_inputs(): 26 | # randomly generate input tensors based on the model architecture 27 | Q = torch.randn(batch_size, n_head, seq_len, head_embd) 28 | K = torch.randn(batch_size, n_head, seq_len, head_embd) 29 | V = torch.randn(batch_size, n_head, seq_len, head_embd) 30 | return [Q, K, V] 31 | 32 | 33 | def get_init_inputs(): 34 | # randomly generate tensors required for initialization based on the model architecture 35 | return [] 36 | -------------------------------------------------------------------------------- /src/prompts/few_shot/model_ex_fuse_gelu.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self) -> None: 8 | super().__init__() 9 | 10 | def forward(self, x): 11 | return F.gelu(x, approximate='tanh') 12 | 13 | 14 | def get_inputs(): 15 | # randomly generate input tensors based on the model architecture 16 | x = torch.randn(1024, 1024).cuda() 17 | return [x] 18 | 19 | 20 | def get_init_inputs(): 21 | # randomly generate tensors required for initialization based on the model architecture 22 | return [] 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/prompts/few_shot/model_ex_mnist2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self) -> None: 8 | super().__init__() 9 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 10 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 11 | self.fc1 = nn.Linear(320, 50) 12 | self.fc2 = nn.Linear(50, 10) 13 | 14 | def forward(self, x): 15 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 16 | x = F.relu(F.max_pool2d(self.conv2(x), 2)) 17 | x = x.view(-1, 320) 18 | x = F.relu(self.fc1(x)) 19 | x = self.fc2(x) 20 | return F.log_softmax(x, dim=1) 21 | 22 | 23 | def get_inputs(): 24 | # randomly generate input tensors based on the model architecture 25 | x = torch.randn(1, 1, 28, 28).cuda() 26 | return [x] 27 | 28 | 29 | def get_init_inputs(): 30 | # randomly generate tensors required for initialization based on the model architecture 31 | return [] -------------------------------------------------------------------------------- /src/prompts/few_shot/model_ex_tiled_matmul.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self) -> None: 8 | super().__init__() 9 | 10 | def forward(self, a, b): 11 | return a@b 12 | 13 | 14 | def get_inputs(): 15 | # randomly generate input tensors based on the model architecture 16 | a = torch.randn(1024, 1024).cuda() 17 | b = torch.randn(1024, 1024).cuda() 18 | return [a, b] 19 | 20 | 21 | def get_init_inputs(): 22 | # randomly generate tensors required for initialization based on the model architecture 23 | return [] 24 | 25 | -------------------------------------------------------------------------------- /src/prompts/model_ex_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/prompts/model_ex_0.py -------------------------------------------------------------------------------- /src/prompts/model_ex_1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self) -> None: 8 | super().__init__() 9 | 10 | def forward(self, a, b): 11 | return a + b 12 | 13 | 14 | def get_inputs(): 15 | # randomly generate input tensors based on the model architecture 16 | a = torch.randn(1, 128).cuda() 17 | b = torch.randn(1, 128).cuda() 18 | return [a, b] 19 | 20 | 21 | def get_init_inputs(): 22 | # randomly generate tensors required for initialization based on the model architecture 23 | return [] 24 | -------------------------------------------------------------------------------- /src/prompts/model_ex_2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Model(nn.Module): 6 | def __init__(self) -> None: 7 | super().__init__() 8 | self.op1 = ...... 9 | self.op2 = ...... 10 | self.op3 = ...... 11 | self.op4 = ...... 12 | self.op5 = ...... 13 | self.op6 = ...... 14 | 15 | def forward(self, x): 16 | x = self.op1(x, ......) 17 | x = self.op2(x, ......) 18 | x = self.op3(x, ......) 19 | x = self.op4(x, ......) 20 | x = self.op5(x, ......) 21 | x = self.op6(x, ......) 22 | return x 23 | -------------------------------------------------------------------------------- /src/prompts/model_ex_add.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self) -> None: 8 | super().__init__() 9 | 10 | def forward(self, a, b): 11 | return a + b 12 | 13 | 14 | def get_inputs(): 15 | # randomly generate input tensors based on the model architecture 16 | a = torch.randn(1, 128).cuda() 17 | b = torch.randn(1, 128).cuda() 18 | return [a, b] 19 | 20 | 21 | def get_init_inputs(): 22 | # randomly generate tensors required for initialization based on the model architecture 23 | return [] 24 | -------------------------------------------------------------------------------- /src/prompts/model_new_ex_0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/prompts/model_new_ex_0.py -------------------------------------------------------------------------------- /src/scratch/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/scratch/model.py -------------------------------------------------------------------------------- /src/scratch/model_new.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/scratch/model_new.py -------------------------------------------------------------------------------- /src/unit_tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | from src.dataset import get_code_hash 4 | 5 | """ 6 | Usage 7 | pytest test_dataset.py 8 | """ 9 | 10 | 11 | def test_get_code_hash(): 12 | """ 13 | Test collision and equivalence checking 14 | """ 15 | 16 | code_snippet_batch_1_v1 = """ 17 | import torch 18 | # This is for a single batch 19 | ''' 20 | Some random multi-line comment 21 | ''' 22 | B = 1 23 | """ 24 | 25 | code_snippet_batch_1_v2 = """ 26 | import torch 27 | ''' 28 | More problem descriptions (updated) 29 | ''' 30 | # low batch setting 31 | 32 | B = 1 33 | """ 34 | 35 | code_snippet_batch_64 = """ 36 | import torch 37 | # This is for a single batch 38 | ''' 39 | Some random multi-line comment 40 | ''' 41 | B = 64 42 | """ 43 | 44 | assert get_code_hash(code_snippet_batch_1_v1) == get_code_hash(code_snippet_batch_1_v2), \ 45 | "Hash should be equal for semantically equivalent code with different comments" 46 | 47 | assert get_code_hash(code_snippet_batch_1_v1) != get_code_hash(code_snippet_batch_64), \ 48 | "Hash should differ for code with different batch sizes" --------------------------------------------------------------------------------