├── .gitignore
├── KernelBench
    ├── level1
    │   ├── 100_HingeLoss.py
    │   ├── 10_3D_tensor_matrix_multiplication.py
    │   ├── 11_4D_tensor_matrix_multiplication.py
    │   ├── 12_Matmul_with_diagonal_matrices_.py
    │   ├── 13_Matmul_for_symmetric_matrices.py
    │   ├── 14_Matmul_for_upper_triangular_matrices.py
    │   ├── 15_Matmul_for_lower_triangular_matrices.py
    │   ├── 16_Matmul_with_transposed_A.py
    │   ├── 17_Matmul_with_transposed_B.py
    │   ├── 18_Matmul_with_transposed_both.py
    │   ├── 19_ReLU.py
    │   ├── 1_Square_matrix_multiplication_.py
    │   ├── 20_LeakyReLU.py
    │   ├── 21_Sigmoid.py
    │   ├── 22_Tanh.py
    │   ├── 23_Softmax.py
    │   ├── 24_LogSoftmax.py
    │   ├── 25_Swish.py
    │   ├── 26_GELU_.py
    │   ├── 27_SELU_.py
    │   ├── 28_HardSigmoid.py
    │   ├── 29_Softplus.py
    │   ├── 2_Standard_matrix_multiplication_.py
    │   ├── 30_Softsign.py
    │   ├── 31_ELU.py
    │   ├── 32_HardTanh.py
    │   ├── 33_BatchNorm.py
    │   ├── 34_InstanceNorm.py
    │   ├── 35_GroupNorm_.py
    │   ├── 36_RMSNorm_.py
    │   ├── 37_FrobeniusNorm_.py
    │   ├── 38_L1Norm_.py
    │   ├── 39_L2Norm_.py
    │   ├── 3_Batched_matrix_multiplication.py
    │   ├── 40_LayerNorm.py
    │   ├── 41_Max_Pooling_1D.py
    │   ├── 42_Max_Pooling_2D.py
    │   ├── 43_Max_Pooling_3D.py
    │   ├── 44_Average_Pooling_1D.py
    │   ├── 45_Average_Pooling_2D.py
    │   ├── 46_Average_Pooling_3D.py
    │   ├── 47_Sum_reduction_over_a_dimension.py
    │   ├── 48_Mean_reduction_over_a_dimension.py
    │   ├── 49_Max_reduction_over_a_dimension.py
    │   ├── 4_Matrix_vector_multiplication_.py
    │   ├── 50_Product_reduction_over_a_dimension.py
    │   ├── 51_Argmax_over_a_dimension.py
    │   ├── 52_Argmin_over_a_dimension.py
    │   ├── 53_Min_reduction_over_a_dimension.py
    │   ├── 54_conv_standard_3D__square_input__square_kernel.py
    │   ├── 55_conv_standard_2D__asymmetric_input__square_kernel.py
    │   ├── 56_conv_standard_2D__asymmetric_input__asymmetric_kernel.py
    │   ├── 57_conv_transposed_2D__square_input__square_kernel.py
    │   ├── 58_conv_transposed_3D__asymmetric_input__asymmetric_kernel.py
    │   ├── 59_conv_standard_3D__asymmetric_input__square_kernel.py
    │   ├── 5_Matrix_scalar_multiplication.py
    │   ├── 60_conv_standard_3D__square_input__asymmetric_kernel.py
    │   ├── 61_conv_transposed_3D__square_input__square_kernel.py
    │   ├── 62_conv_standard_2D__square_input__asymmetric_kernel.py
    │   ├── 63_conv_standard_2D__square_input__square_kernel.py
    │   ├── 64_conv_transposed_1D.py
    │   ├── 65_conv_transposed_2D__square_input__asymmetric_kernel.py
    │   ├── 66_conv_standard_3D__asymmetric_input__asymmetric_kernel.py
    │   ├── 67_conv_standard_1D.py
    │   ├── 68_conv_transposed_3D__square_input__asymmetric_kernel.py
    │   ├── 69_conv_transposed_2D__asymmetric_input__asymmetric_kernel.py
    │   ├── 6_Matmul_with_large_K_dimension_.py
    │   ├── 70_conv_transposed_3D__asymmetric_input__square_kernel.py
    │   ├── 71_conv_transposed_2D__asymmetric_input__square_kernel.py
    │   ├── 72_conv_transposed_3D_asymmetric_input_asymmetric_kernel___strided_padded_grouped_.py
    │   ├── 73_conv_transposed_3D_asymmetric_input_square_kernel__strided_padded__grouped.py
    │   ├── 74_conv_transposed_1D_dilated.py
    │   ├── 75_conv_transposed_2D_asymmetric_input_asymmetric_kernel_strided__grouped____padded____dilated__.py
    │   ├── 76_conv_standard_1D_dilated_strided__.py
    │   ├── 77_conv_transposed_3D_square_input_square_kernel___padded____dilated____strided__.py
    │   ├── 78_conv_transposed_2D_asymmetric_input_asymmetric_kernel___padded__.py
    │   ├── 79_conv_transposed_1D_asymmetric_input_square_kernel___padded____strided____dilated__.py
    │   ├── 7_Matmul_with_small_K_dimension_.py
    │   ├── 80_conv_standard_2D_square_input_asymmetric_kernel___dilated____padded__.py
    │   ├── 81_conv_transposed_2D_asymmetric_input_square_kernel___dilated____padded____strided__.py
    │   ├── 82_conv_depthwise_2D_square_input_square_kernel.py
    │   ├── 83_conv_depthwise_2D_square_input_asymmetric_kernel.py
    │   ├── 84_conv_depthwise_2D_asymmetric_input_square_kernel.py
    │   ├── 85_conv_depthwise_2D_asymmetric_input_asymmetric_kernel.py
    │   ├── 86_conv_depthwise_separable_2D.py
    │   ├── 87_conv_pointwise_2D.py
    │   ├── 88_MinGPTNewGelu.py
    │   ├── 89_cumsum.py
    │   ├── 8_Matmul_with_irregular_shapes_.py
    │   ├── 90_cumprod.py
    │   ├── 91_cumsum_reverse.py
    │   ├── 92_cumsum_exclusive.py
    │   ├── 93_masked_cumsum.py
    │   ├── 94_MSELoss.py
    │   ├── 95_CrossEntropyLoss.py
    │   ├── 96_HuberLoss.py
    │   ├── 97_CosineSimilarityLoss.py
    │   ├── 98_KLDivLoss.py
    │   ├── 99_TripletMarginLoss.py
    │   └── 9_Tall_skinny_matrix_multiplication_.py
    ├── level2
    │   ├── 100_ConvTranspose3d_Clamp_Min_Divide.py
    │   ├── 10_ConvTranspose2d_MaxPool_Hardtanh_Mean_Tanh.py
    │   ├── 11_ConvTranspose2d_BatchNorm_Tanh_MaxPool_GroupNorm.py
    │   ├── 12_Gemm_Multiply_LeakyReLU.py
    │   ├── 13_ConvTranspose3d_Mean_Add_Softmax_Tanh_Scaling.py
    │   ├── 14_Gemm_Divide_Sum_Scaling.py
    │   ├── 15_ConvTranspose3d_BatchNorm_Subtract.py
    │   ├── 16_ConvTranspose2d_Mish_Add_Hardtanh_Scaling.py
    │   ├── 17_Conv2d_InstanceNorm_Divide.py
    │   ├── 18_Matmul_Sum_Max_AvgPool_LogSumExp_LogSumExp.py
    │   ├── 19_ConvTranspose2d_GELU_GroupNorm.py
    │   ├── 1_Conv2D_ReLU_BiasAdd.py
    │   ├── 20_ConvTranspose3d_Sum_ResidualAdd_Multiply_ResidualAdd.py
    │   ├── 21_Conv2d_Add_Scale_Sigmoid_GroupNorm.py
    │   ├── 22_Matmul_Scale_ResidualAdd_Clamp_LogSumExp_Mish.py
    │   ├── 23_Conv3d_GroupNorm_Mean.py
    │   ├── 24_Conv3d_Min_Softmax.py
    │   ├── 25_Conv2d_Min_Tanh_Tanh.py
    │   ├── 26_ConvTranspose3d_Add_HardSwish.py
    │   ├── 27_Conv3d_HardSwish_ReLU_Softmax_Mean.py
    │   ├── 28_BMM_InstanceNorm_Sum_ResidualAdd_Multiply.py
    │   ├── 29_Matmul_Mish_Mish.py
    │   ├── 2_ConvTranspose2d_BiasAdd_Clamp_Scaling_Clamp_Divide.py
    │   ├── 30_Gemm_GroupNorm_Hardtanh.py
    │   ├── 31_Conv2d_Min_Add_Multiply.py
    │   ├── 32_Conv2d_Scaling_Min.py
    │   ├── 33_Gemm_Scale_BatchNorm.py
    │   ├── 34_ConvTranspose3d_LayerNorm_GELU_Scaling.py
    │   ├── 35_Conv2d_Subtract_HardSwish_MaxPool_Mish.py
    │   ├── 36_ConvTranspose2d_Min_Sum_GELU_Add.py
    │   ├── 37_Matmul_Swish_Sum_GroupNorm.py
    │   ├── 38_ConvTranspose3d_AvgPool_Clamp_Softmax_Multiply.py
    │   ├── 39_Gemm_Scale_BatchNorm.py
    │   ├── 3_ConvTranspose3d_Sum_LayerNorm_AvgPool_GELU.py
    │   ├── 40_Matmul_Scaling_ResidualAdd.py
    │   ├── 41_Gemm_BatchNorm_GELU_GroupNorm_Mean_ReLU.py
    │   ├── 42_ConvTranspose2d_GlobalAvgPool_BiasAdd_LogSumExp_Sum_Multiply.py
    │   ├── 43_Conv3d_Max_LogSumExp_ReLU.py
    │   ├── 44_ConvTranspose2d_Multiply_GlobalAvgPool_GlobalAvgPool_Mean.py
    │   ├── 45_Gemm_Sigmoid_Sum_LogSumExp.py
    │   ├── 46_Conv2d_Subtract_Tanh_Subtract_AvgPool.py
    │   ├── 47_Conv3d_Mish_Tanh.py
    │   ├── 48_Conv3d_Scaling_Tanh_Multiply_Sigmoid.py
    │   ├── 49_ConvTranspose3d_Softmax_Sigmoid.py
    │   ├── 4_Conv2d_Mish_Mish.py
    │   ├── 50_ConvTranspose3d_Scaling_AvgPool_BiasAdd_Scaling.py
    │   ├── 51_Gemm_Subtract_GlobalAvgPool_LogSumExp_GELU_ResidualAdd.py
    │   ├── 52_Conv2d_Activation_BatchNorm.py
    │   ├── 53_Gemm_Scaling_Hardtanh_GELU.py
    │   ├── 54_Conv2d_Multiply_LeakyReLU_GELU.py
    │   ├── 55_Matmul_MaxPool_Sum_Scale.py
    │   ├── 56_Matmul_Sigmoid_Sum.py
    │   ├── 57_Conv2d_ReLU_HardSwish.py
    │   ├── 58_ConvTranspose3d_LogSumExp_HardSwish_Subtract_Clamp_Max.py
    │   ├── 59_Matmul_Swish_Scaling.py
    │   ├── 5_ConvTranspose2d_Subtract_Tanh.py
    │   ├── 60_ConvTranspose3d_Swish_GroupNorm_HardSwish.py
    │   ├── 61_ConvTranspose3d_ReLU_GroupNorm.py
    │   ├── 62_Matmul_GroupNorm_LeakyReLU_Sum.py
    │   ├── 63_Gemm_ReLU_Divide.py
    │   ├── 64_Gemm_LogSumExp_LeakyReLU_LeakyReLU_GELU_GELU.py
    │   ├── 65_Conv2d_AvgPool_Sigmoid_Sum.py
    │   ├── 66_Matmul_Dropout_Mean_Softmax.py
    │   ├── 67_Conv2d_GELU_GlobalAvgPool.py
    │   ├── 68_Matmul_Min_Subtract.py
    │   ├── 69_Conv2d_HardSwish_ReLU.py
    │   ├── 6_Conv3d_Softmax_MaxPool_MaxPool.py
    │   ├── 70_Gemm_Sigmoid_Scaling_ResidualAdd.py
    │   ├── 71_Conv2d_Divide_LeakyReLU.py
    │   ├── 72_ConvTranspose3d_BatchNorm_AvgPool_AvgPool.py
    │   ├── 73_Conv2d_BatchNorm_Scaling.py
    │   ├── 74_ConvTranspose3d_LeakyReLU_Multiply_LeakyReLU_Max.py
    │   ├── 75_Gemm_GroupNorm_Min_BiasAdd.py
    │   ├── 76_Gemm_Add_ReLU.py
    │   ├── 77_ConvTranspose3d_Scale_BatchNorm_GlobalAvgPool.py
    │   ├── 78_ConvTranspose3d_Max_Max_Sum.py
    │   ├── 79_Conv3d_Multiply_InstanceNorm_Clamp_Multiply_Max.py
    │   ├── 7_Conv3d_ReLU_LeakyReLU_GELU_Sigmoid_BiasAdd.py
    │   ├── 80_Gemm_Max_Subtract_GELU.py
    │   ├── 81_Gemm_Swish_Divide_Clamp_Tanh_Clamp.py
    │   ├── 82_Conv2d_Tanh_Scaling_BiasAdd_Max.py
    │   ├── 83_Conv3d_GroupNorm_Min_Clamp_Dropout.py
    │   ├── 84_Gemm_BatchNorm_Scaling_Softmax.py
    │   ├── 85_Conv2d_GroupNorm_Scale_MaxPool_Clamp.py
    │   ├── 86_Matmul_Divide_GELU.py
    │   ├── 87_Conv2d_Subtract_Subtract_Mish.py
    │   ├── 88_Gemm_GroupNorm_Swish_Multiply_Swish.py
    │   ├── 89_ConvTranspose3d_MaxPool_Softmax_Subtract_Swish_Max.py
    │   ├── 8_Conv3d_Divide_Max_GlobalAvgPool_BiasAdd_Sum.py
    │   ├── 90_Conv3d_LeakyReLU_Sum_Clamp_GELU.py
    │   ├── 91_ConvTranspose2d_Softmax_BiasAdd_Scaling_Sigmoid.py
    │   ├── 92_Conv2d_GroupNorm_Tanh_HardSwish_ResidualAdd_LogSumExp.py
    │   ├── 93_ConvTranspose2d_Add_Min_GELU_Multiply.py
    │   ├── 94_Gemm_BiasAdd_Hardtanh_Mish_GroupNorm.py
    │   ├── 95_Matmul_Add_Swish_Tanh_GELU_Hardtanh.py
    │   ├── 96_ConvTranspose3d_Multiply_Max_GlobalAvgPool_Clamp.py
    │   ├── 97_Matmul_BatchNorm_BiasAdd_Divide_Swish.py
    │   ├── 98_Matmul_AvgPool_GELU_Scale_Max.py
    │   ├── 99_Matmul_GELU_Softmax.py
    │   └── 9_Matmul_Subtract_Multiply_ReLU.py
    ├── level3
    │   ├── 10_ResNet101.py
    │   ├── 11_VGG16.py
    │   ├── 12_VGG19.py
    │   ├── 13_DenseNet121TransitionLayer.py
    │   ├── 14_DenseNet121DenseBlock.py
    │   ├── 15_DenseNet121.py
    │   ├── 16_DenseNet201.py
    │   ├── 17_SqueezeNetFireModule.py
    │   ├── 18_SqueezeNet.py
    │   ├── 19_MobileNetV1.py
    │   ├── 1_MLP.py
    │   ├── 20_MobileNetV2.py
    │   ├── 21_EfficientNetMBConv.py
    │   ├── 22_EfficientNetB0.py
    │   ├── 23_EfficientNetB1.py
    │   ├── 24_EfficientNetB2.py
    │   ├── 25_ShuffleNetUnit.py
    │   ├── 26_ShuffleNet.py
    │   ├── 27_RegNet.py
    │   ├── 28_VisionTransformer.py
    │   ├── 29_SwinMLP.py
    │   ├── 2_ShallowWideMLP.py
    │   ├── 30_SwinTransformerV2.py
    │   ├── 31_VisionAttention.py
    │   ├── 32_ConvolutionalVisionTransformer.py
    │   ├── 33_VanillaRNN.py
    │   ├── 34_VanillaRNNHidden.py
    │   ├── 35_LTSM.py
    │   ├── 36_LTSMHn.py
    │   ├── 37_LTSMCn.py
    │   ├── 38_LTSMBidirectional.py
    │   ├── 39_GRU.py
    │   ├── 3_DeepNarrowMLP.py
    │   ├── 40_GRUHidden.py
    │   ├── 41_GRUBirectional.py
    │   ├── 42_GRUBidirectionalHidden.py
    │   ├── 43_MinGPTCausalAttention.py
    │   ├── 44_MiniGPTBlock.py
    │   ├── 45_UNetSoftmax.py
    │   ├── 46_NetVladWithGhostClusters.py
    │   ├── 47_NetVladNoGhostClusters.py
    │   ├── 48_Mamba2ReturnY.py
    │   ├── 49_Mamba2ReturnFinalState.py
    │   ├── 4_LeNet5.py
    │   ├── 50_ReLUSelfAttention.py
    │   ├── 5_AlexNet.py
    │   ├── 6_GoogleNetInceptionModule.py
    │   ├── 7_GoogleNetInceptionV1.py
    │   ├── 8_ResNetBasicBlock.py
    │   └── 9_ResNet18.py
    └── level4
    │   ├── 10_google-bigbird-roberta-base_bs1024_seq32.py
    │   ├── 11_google-electra-small-discriminator_bs1_seq511.py
    │   ├── 12_google-electra-small-discriminator_bs1024_seq32.py
    │   ├── 13_google-reformer-enwik8_bs32_seq256.py
    │   ├── 14_google-electra-small-discriminator_bs32_seq256.py
    │   ├── 15_google-reformer-enwik8_bs1024_seq32.py
    │   ├── 16_gpt2_bs1_seq1023.py
    │   ├── 17_facebook-bart-large_bs1024_seq32.py
    │   ├── 18_EleutherAI-gpt-neo-2p7B_bs512_seq32.py
    │   ├── 19_gpt2_bs1024_seq32.py
    │   ├── 1_EleutherAI-gpt-neo-2p7B_bs32_seq256.py
    │   ├── 20_facebook-bart-large_bs32_seq256.py
    │   ├── 2_facebook-opt-1p3b_bs1_seq2047.py
    │   ├── 3_EleutherAI-gpt-neo-2p7B_bs1_seq2047.py
    │   ├── 4_facebook-opt-1p3b_bs32_seq256.py
    │   ├── 5_google-bigbird-roberta-base_bs1_seq4095.py
    │   ├── 6_facebook-bart-large_bs1_seq1023.py
    │   ├── 7_gpt2_bs32_seq256.py
    │   ├── 8_facebook-opt-1p3b_bs512_seq32.py
    │   └── 9_google-bigbird-roberta-base_bs32_seq256.py
├── LICENSE
├── README.md
├── assets
    └── figures
    │   ├── KernelBenchMascot.png
    │   └── KernelBenchWorkFlow.png
├── requirements.txt
├── results
    └── timing
    │   ├── A100-80GB_modal
    │       ├── baseline_time_torch.json
    │       └── baseline_time_torch_compile_inductor_default.json
    │   ├── A100_modal
    │       ├── baseline_time_torch.json
    │       └── baseline_time_torch_compile_inductor_default.json
    │   ├── A10G_modal
    │       ├── baseline_time_torch.json
    │       └── baseline_time_torch_compile_inductor_default.json
    │   ├── B200_together
    │       ├── baseline_time_torch.json
    │       ├── baseline_time_torch_compile_cudagraphs.json
    │       ├── baseline_time_torch_compile_inductor_default.json
    │       ├── baseline_time_torch_compile_inductor_max-autotune-no-cudagraphs.json
    │       ├── baseline_time_torch_compile_inductor_max-autotune.json
    │       └── baseline_time_torch_compile_inductor_reduce-overhead.json
    │   ├── H100_together
    │       ├── baseline_time_torch.json
    │       ├── baseline_time_torch_compile_cudagraphs.json
    │       ├── baseline_time_torch_compile_inductor_default.json
    │       ├── baseline_time_torch_compile_inductor_max-autotune-no-cudagraphs.json
    │       ├── baseline_time_torch_compile_inductor_max-autotune.json
    │       └── baseline_time_torch_compile_inductor_reduce-overhead.json
    │   ├── L40S_matx3
    │       ├── baseline_time_torch.json
    │       ├── baseline_time_torch_compile_cudagraphs.json
    │       ├── baseline_time_torch_compile_inductor_default.json
    │       ├── baseline_time_torch_compile_inductor_max-autotune-no-cudagraphs.json
    │       ├── baseline_time_torch_compile_inductor_max-autotune.json
    │       └── baseline_time_torch_compile_inductor_reduce-overhead.json
    │   ├── L40S_modal
    │       ├── baseline_time_torch.json
    │       └── baseline_time_torch_compile_inductor_default.json
    │   ├── L4_modal
    │       ├── baseline_time_torch.json
    │       └── baseline_time_torch_compile_inductor_default.json
    │   ├── README.md
    │   ├── T4_modal
    │       ├── baseline_time_torch.json
    │       └── baseline_time_torch_compile_inductor_default.json
    │   └── old
    │       ├── baseline_time_cleaned.json
    │       ├── baseline_time_matx2.json
    │       ├── baseline_time_matx3.json
    │       ├── baseline_time_torch_compile_cleaned.json
    │       ├── baseline_time_torch_compile_matx2.json
    │       └── baseline_time_torch_compile_matx3.json
├── scripts
    ├── benchmark_eval_analysis.py
    ├── debug_stddout.py
    ├── eval_from_generations.py
    ├── generate_and_eval_single_sample.py
    ├── generate_and_eval_single_sample_modal.py
    ├── generate_baseline_time.py
    ├── generate_baseline_time_modal.py
    ├── generate_samples.py
    ├── inspect_baseline.py
    ├── inspect_kernel_pytorch_profiler.py
    ├── inspect_triton.py
    ├── run_and_check.py
    ├── verify_bench.py
    └── verify_generation.py
├── setup.py
└── src
    ├── __init__.py
    ├── analysis.py
    ├── compile.py
    ├── dataset.py
    ├── eval.py
    ├── frameworks.py
    ├── make_hf_dataset.py
    ├── prompt_constructor.py
    ├── prompts
        ├── README.md
        ├── cot
        │   ├── model_cot_fuse_gelu.py
        │   ├── model_cot_mnist2.py
        │   └── model_cot_tiled_matmul.py
        ├── few_shot
        │   ├── model_ex_add.py
        │   ├── model_ex_flash_attn.py
        │   ├── model_ex_fuse_gelu.py
        │   ├── model_ex_mnist2.py
        │   ├── model_ex_tiled_matmul.py
        │   ├── model_new_ex_add.py
        │   ├── model_new_ex_flash_attn.py
        │   ├── model_new_ex_fuse_gelu.py
        │   ├── model_new_ex_mnist2.py
        │   └── model_new_ex_tiled_matmul.py
        ├── hardware
        │   └── gpu_specs.py
        ├── model_ex_0.py
        ├── model_ex_1.py
        ├── model_ex_2.py
        ├── model_ex_add.py
        ├── model_new_ex_0.py
        ├── model_new_ex_1.py
        ├── model_new_ex_2.py
        └── model_new_ex_add.py
    ├── score.py
    ├── scratch
        ├── log.txt
        ├── model.py
        ├── model_new.py
        ├── prompt.txt
        └── test.py
    ├── unit_tests
        ├── test_dataset.py
        ├── test_score.py
        └── test_utils.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | **__pycache__/
 2 | scratch/
 3 | **/scratch/
 4 | *.egg-info/
 5 | **build/
 6 | **dist/
 7 | # **results/
 8 | results/*
 9 | runs/*
10 | cache/*
11 | !results/timing/
12 | .env
13 | 


--------------------------------------------------------------------------------
/KernelBench/level1/100_HingeLoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that computes Hinge Loss for binary classification tasks.
 7 | 
 8 |     Parameters:
 9 |         None
10 |     """
11 |     def __init__(self):
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, predictions, targets):
15 |         return torch.mean(torch.clamp(1 - predictions * targets, min=0))
16 | 
17 | batch_size = 128
18 | input_shape = (1,)
19 | dim = 1
20 | 
21 | def get_inputs():
22 |     return [torch.randn(batch_size, *input_shape), torch.randint(0, 2, (batch_size, 1)).float() * 2 - 1]
23 | 
24 | def get_init_inputs():
25 |     return []


--------------------------------------------------------------------------------
/KernelBench/level1/10_3D_tensor_matrix_multiplication.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Performs 3D tensor-matrix multiplication.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A, B):
12 |         """
13 |         Performs 3D tensor-matrix multiplication.
14 | 
15 |         Args:
16 |             A (torch.Tensor): Input 3D tensor of shape (N, M, K).
17 |             B (torch.Tensor): Input matrix of shape (K, L).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor of shape (N, M, L), resulting from the multiplication of A and B along the last dimension of A.
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | N = 16
25 | M = 1024
26 | K = 2048
27 | L = 768
28 | 
29 | def get_inputs():
30 |     A = torch.randn(N, M, K)
31 |     B = torch.randn(K, L)
32 |     return [A, B]
33 | 
34 | def get_init_inputs():
35 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/11_4D_tensor_matrix_multiplication.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Performs 4D tensor-matrix multiplication: 
 7 |         C[b, i, j, k] = sum_l A[b, i, j, l] * B[l, k]
 8 | 
 9 |     Args:
10 |         A (torch.Tensor): Input 4D tensor of shape (b, i, j, l)
11 |         B (torch.Tensor): Input matrix of shape (l, k)
12 | 
13 |     Returns:
14 |         torch.Tensor: Output 4D tensor of shape (b, i, j, k)
15 |     """
16 |     def __init__(self):
17 |         super(Model, self).__init__()
18 | 
19 |     def forward(self, A, B):
20 |         """
21 |         Performs the 4D tensor-matrix multiplication.
22 | 
23 |         Args:
24 |             A (torch.Tensor): Input 4D tensor of shape (b, i, j, l)
25 |             B (torch.Tensor): Input matrix of shape (l, k)
26 | 
27 |         Returns:
28 |             torch.Tensor: Output 4D tensor of shape (b, i, j, k)
29 |         """
30 |         return torch.einsum("bijl,lk->bijk", A, B)
31 | 
32 | # Test code
33 | b = 16
34 | i = 256
35 | j = 512
36 | l = 256
37 | k = 768
38 | 
39 | def get_inputs():
40 |     A = torch.randn(b, i, j, l)
41 |     B = torch.randn(l, k)
42 |     return [A, B]
43 | 
44 | def get_init_inputs():
45 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/12_Matmul_with_diagonal_matrices_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication of a diagonal matrix with another matrix.
 7 |     C = diag(A) * B
 8 |     """
 9 |     def __init__(self):
10 |         super(Model, self).__init__()
11 |     
12 |     def forward(self, A, B):
13 |         """
14 |         Performs the matrix multiplication.
15 | 
16 |         Args:
17 |             A (torch.Tensor): A 1D tensor representing the diagonal of the diagonal matrix. Shape: (N,).
18 |             B (torch.Tensor): A 2D tensor representing the second matrix. Shape: (N, M).
19 | 
20 |         Returns:
21 |             torch.Tensor: The result of the matrix multiplication. Shape: (N, M).
22 |         """
23 |         return torch.diag(A) @ B
24 | 
25 | M = 4096
26 | N = 4096
27 | 
28 | def get_inputs():
29 |     A = torch.randn(N)
30 |     B = torch.randn(N, M)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/13_Matmul_for_symmetric_matrices.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B) with A and B being symmetric matrices.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A, B):
12 |         """
13 |         Performs matrix multiplication of two symmetric matrices.
14 | 
15 |         Args:
16 |             A (torch.Tensor): Input matrix A, shape (N, N), symmetric.
17 |             B (torch.Tensor): Input matrix B, shape (N, N), symmetric.
18 | 
19 |         Returns:
20 |             torch.Tensor: Output matrix C, shape (N, N).
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | N = 4096
25 | 
26 | def get_inputs():
27 |     """
28 |     Generates a pair of random symmetric matrices for testing.
29 | 
30 |     Returns:
31 |         list: List containing two symmetric tensors A and B.
32 |     """
33 |     A = torch.randn(N, N)
34 |     A = (A + A.T) / 2  # Ensure symmetry
35 |     B = torch.randn(N, N)
36 |     B = (B + B.T) / 2  # Ensure symmetry
37 |     return [A, B]
38 | 
39 | def get_init_inputs():
40 |     """
41 |     No specific initialization inputs needed for this model.
42 | 
43 |     Returns:
44 |         list: Empty list.
45 |     """
46 |     return []


--------------------------------------------------------------------------------
/KernelBench/level1/14_Matmul_for_upper_triangular_matrices.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs matrix multiplication (C = A * B) for upper triangular matrices.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A, B):
12 |         """
13 |         Performs matrix multiplication for upper triangular matrices.
14 | 
15 |         Args:
16 |             A (torch.Tensor): Upper triangular matrix of shape (N, N).
17 |             B (torch.Tensor): Upper triangular matrix of shape (N, N).
18 | 
19 |         Returns:
20 |             torch.Tensor: The product of A and B, also an upper triangular matrix of shape (N, N).
21 |         """
22 |         return torch.triu(torch.matmul(A, B))
23 | 
24 | N = 4096
25 | 
26 | def get_inputs():
27 |     """
28 |     Generates upper triangular matrices for testing.
29 | 
30 |     Returns:
31 |         list: A list containing two upper triangular matrices of shape (N, N).
32 |     """
33 |     A = torch.triu(torch.randn(N, N))
34 |     B = torch.triu(torch.randn(N, N))
35 |     return [A, B]
36 | 
37 | def get_init_inputs():
38 |     """
39 |     No specific initialization inputs are needed for this model.
40 | 
41 |     Returns:
42 |         list: An empty list.
43 |     """
44 |     return []


--------------------------------------------------------------------------------
/KernelBench/level1/15_Matmul_for_lower_triangular_matrices.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication (C = A * B) where A and B are lower triangular matrices. 
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A, B):
12 |         """
13 |         Performs matrix multiplication of lower triangular matrices A and B.
14 | 
15 |         Args:
16 |             A (torch.Tensor): Lower triangular matrix of shape (N, N).
17 |             B (torch.Tensor): Lower triangular matrix of shape (N, N).
18 | 
19 |         Returns:
20 |             torch.Tensor: The result of matrix multiplication C of shape (N, N).
21 |         """
22 |         return torch.tril(torch.matmul(A, B))
23 | 
24 | M = 4096
25 | 
26 | def get_inputs():
27 |     A = torch.randn(M, M)
28 |     B = torch.randn(M, M)
29 |     A = torch.tril(A)
30 |     B = torch.tril(B)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/16_Matmul_with_transposed_A.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B)
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix multiplication.
14 | 
15 |         Args:
16 |             A: Input tensor of shape (M, K).
17 |             B: Input tensor of shape (K, N).
18 | 
19 |         Returns:
20 |             Output tensor of shape (M, N).
21 |         """
22 |         return torch.matmul(A.T, B)
23 | 
24 | M = 1024
25 | K = 4096
26 | N = 2048
27 | 
28 | def get_inputs():
29 |     A = torch.randn(K, M)
30 |     B = torch.randn(K, N)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/17_Matmul_with_transposed_B.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B)
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix multiplication.
14 | 
15 |         Args:
16 |             A: Input tensor of shape (M, K).
17 |             B: Input tensor of shape (K, N).
18 | 
19 |         Returns:
20 |             Output tensor of shape (M, N).
21 |         """
22 |         return torch.matmul(A, B.T)
23 | 
24 | M = 1024
25 | K = 4096
26 | N = 2048
27 | 
28 | def get_inputs():
29 |     A = torch.randn(M, K)
30 |     B = torch.randn(N, K)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/18_Matmul_with_transposed_both.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B)
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix multiplication.
14 | 
15 |         Args:
16 |             A: Input tensor of shape (M, K).
17 |             B: Input tensor of shape (K, N).
18 | 
19 |         Returns:
20 |             Output tensor of shape (M, N).
21 |         """
22 |         return torch.matmul(A.T, B.T)
23 | 
24 | M = 1024
25 | K = 4096
26 | N = 2048
27 | 
28 | def get_inputs():
29 |     A = torch.randn(K, M)
30 |     B = torch.randn(N, K)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/19_ReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a ReLU activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies ReLU activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with ReLU applied, same shape as input.
20 |         """
21 |         return torch.relu(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/1_Square_matrix_multiplication_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single square matrix multiplication (C = A * B)
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs the matrix multiplication.
14 | 
15 |         Args:
16 |             A (torch.Tensor): Input matrix A of shape (N, N).
17 |             B (torch.Tensor): Input matrix B of shape (N, N).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output matrix C of shape (N, N).
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | N = 2048
25 | 
26 | def get_inputs():
27 |     A = torch.randn(N, N)
28 |     B = torch.randn(N, N)
29 |     return [A, B]
30 | 
31 | def get_init_inputs():
32 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/20_LeakyReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a LeakyReLU activation.
 7 |     """
 8 |     def __init__(self, negative_slope: float = 0.01):
 9 |         """
10 |         Initializes the LeakyReLU module.
11 | 
12 |         Args:
13 |             negative_slope (float, optional): The negative slope of the activation function. Defaults to 0.01.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.negative_slope = negative_slope
17 |     
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies LeakyReLU activation to the input tensor.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor of any shape.
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor with LeakyReLU applied, same shape as input.
27 |         """
28 |         return torch.nn.functional.leaky_relu(x, negative_slope=self.negative_slope)
29 | 
30 | batch_size = 16
31 | dim = 16384
32 | 
33 | def get_inputs():
34 |     x = torch.randn(batch_size, dim)
35 |     return [x]
36 | 
37 | def get_init_inputs():
38 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/21_Sigmoid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a Sigmoid activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies Sigmoid activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with Sigmoid applied, same shape as input.
20 |         """
21 |         return torch.sigmoid(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/22_Tanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a Tanh activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies Tanh activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with Tanh applied, same shape as input.
20 |         """
21 |         return torch.tanh(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/23_Softmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a Softmax activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies Softmax activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of shape (batch_size, num_features).
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with Softmax applied, same shape as input.
20 |         """
21 |         return torch.softmax(x, dim=1)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/24_LogSoftmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a LogSoftmax activation.
 7 |     """
 8 |     def __init__(self, dim: int = 1):
 9 |         super(Model, self).__init__()
10 |         self.dim = dim
11 |     
12 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
13 |         """
14 |         Applies LogSoftmax activation to the input tensor.
15 | 
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, dim).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor with LogSoftmax applied, same shape as input.
21 |         """
22 |         return torch.log_softmax(x, dim=self.dim)
23 | 
24 | batch_size = 16
25 | dim = 16384
26 | 
27 | def get_inputs():
28 |     x = torch.randn(batch_size, dim)
29 |     return [x]
30 | 
31 | def get_init_inputs():
32 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/25_Swish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a Swish activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies Swish activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with Swish applied, same shape as input.
20 |         """
21 |         return x * torch.sigmoid(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/26_GELU_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a GELU activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies GELU activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with GELU applied, same shape as input.
20 |         """
21 |         return torch.nn.functional.gelu(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/27_SELU_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a SELU activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies SELU activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with SELU applied, same shape as input.
20 |         """
21 |         return torch.selu(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/28_HardSigmoid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a HardSigmoid activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies HardSigmoid activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with HardSigmoid applied, same shape as input.
20 |         """
21 |         return torch.nn.functional.hardsigmoid(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/29_Softplus.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a Softplus activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies Softplus activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with Softplus applied, same shape as input.
20 |         """
21 |         return torch.nn.functional.softplus(x)
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/2_Standard_matrix_multiplication_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B)
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix multiplication.
14 | 
15 |         Args:
16 |             A: Input tensor of shape (M, K).
17 |             B: Input tensor of shape (K, N).
18 | 
19 |         Returns:
20 |             Output tensor of shape (M, N).
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | M = 1024
25 | K = 4096
26 | N = 2048
27 | 
28 | def get_inputs():
29 |     A = torch.randn(M, K)
30 |     B = torch.randn(K, N)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/30_Softsign.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a Softsign activation.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Applies Softsign activation to the input tensor.
14 | 
15 |         Args:
16 |             x (torch.Tensor): Input tensor of any shape.
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor with Softsign applied, same shape as input.
20 |         """
21 |         return x / (1 + torch.abs(x))
22 | 
23 | batch_size = 16
24 | dim = 16384
25 | 
26 | def get_inputs():
27 |     x = torch.randn(batch_size, dim)
28 |     return [x]
29 | 
30 | def get_init_inputs():
31 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/31_ELU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Model(nn.Module):
 6 |     """
 7 |     Simple model that performs an ELU activation.
 8 |     """
 9 |     def __init__(self, alpha: float = 1.0):
10 |         """
11 |         Initializes the ELU model.
12 | 
13 |         Args:
14 |             alpha (float, optional): The alpha parameter for the ELU function. Defaults to 1.0.
15 |         """
16 |         super(Model, self).__init__()
17 |         self.alpha = alpha
18 |     
19 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
20 |         """
21 |         Applies ELU activation to the input tensor.
22 | 
23 |         Args:
24 |             x (torch.Tensor): Input tensor of any shape.
25 | 
26 |         Returns:
27 |             torch.Tensor: Output tensor with ELU applied, same shape as input.
28 |         """
29 |         return F.elu(x, alpha=self.alpha)
30 | 
31 | batch_size = 16
32 | dim = 16384
33 | 
34 | def get_inputs():
35 |     x = torch.randn(batch_size, dim)
36 |     return [x]
37 | 
38 | def get_init_inputs():
39 |     return [1.0]  # Provide alpha value for initialization


--------------------------------------------------------------------------------
/KernelBench/level1/32_HardTanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Model(nn.Module):
 6 |     """
 7 |     Simple model that performs a HardTanh activation.
 8 |     """
 9 |     def __init__(self):
10 |         super(Model, self).__init__()
11 |     
12 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
13 |         """
14 |         Applies HardTanh activation to the input tensor.
15 | 
16 |         Args:
17 |             x (torch.Tensor): Input tensor of any shape.
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor with HardTanh applied, same shape as input.
21 |         """
22 |         return F.hardtanh(x, min_val=-1., max_val=1.)
23 | 
24 | batch_size = 16
25 | dim = 16384
26 | 
27 | def get_inputs():
28 |     x = torch.randn(batch_size, dim)
29 |     return [x]
30 | 
31 | def get_init_inputs():
32 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/33_BatchNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs Batch Normalization.
 7 |     """
 8 |     def __init__(self, num_features: int):
 9 |         """
10 |         Initializes the BatchNorm layer.
11 | 
12 |         Args:
13 |             num_features (int): Number of features in the input tensor.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.bn = nn.BatchNorm2d(num_features=num_features)
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies Batch Normalization to the input tensor.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor of shape (batch_size, num_features, *).
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor with Batch Normalization applied, same shape as input.
27 |         """
28 |         return self.bn(x)
29 | 
30 | batch_size = 16
31 | features = 64
32 | dim1 = 256
33 | dim2 = 256
34 | 
35 | def get_inputs():
36 |     x = torch.randn(batch_size, features, dim1, dim2)
37 |     return [x]
38 | 
39 | def get_init_inputs():
40 |     return [features]


--------------------------------------------------------------------------------
/KernelBench/level1/34_InstanceNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs Instance Normalization.
 7 |     """
 8 |     def __init__(self, num_features: int):
 9 |         """
10 |         Initializes the InstanceNorm layer.
11 | 
12 |         Args:
13 |             num_features (int): Number of features in the input tensor.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.inorm = nn.InstanceNorm2d(num_features=num_features)
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies Instance Normalization to the input tensor.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor of shape (batch_size, num_features, height, width).
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor with Instance Normalization applied, same shape as input.
27 |         """
28 |         return self.inorm(x)
29 | 
30 | batch_size = 16
31 | features = 64
32 | dim1 = 256
33 | dim2 = 256
34 | 
35 | def get_inputs():
36 |     x = torch.randn(batch_size, features, dim1, dim2)
37 |     return [x]
38 | 
39 | def get_init_inputs():
40 |     return [features]


--------------------------------------------------------------------------------
/KernelBench/level1/35_GroupNorm_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs Group Normalization.
 7 |     """
 8 |     def __init__(self, num_features: int, num_groups: int):
 9 |         """
10 |         Initializes the GroupNorm layer.
11 | 
12 |         Args:
13 |             num_features (int): Number of features in the input tensor.
14 |             num_groups (int): Number of groups to divide the channels into.
15 |         """
16 |         super(Model, self).__init__()
17 |         self.gn = nn.GroupNorm(num_groups=num_groups, num_channels=num_features)
18 | 
19 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
20 |         """
21 |         Applies Group Normalization to the input tensor.
22 | 
23 |         Args:
24 |             x (torch.Tensor): Input tensor of shape (batch_size, num_features, *).
25 | 
26 |         Returns:
27 |             torch.Tensor: Output tensor with Group Normalization applied, same shape as input.
28 |         """
29 |         return self.gn(x)
30 | 
31 | batch_size = 16
32 | features = 64
33 | num_groups = 8
34 | dim1 = 256
35 | dim2 = 256
36 | 
37 | def get_inputs():
38 |     x = torch.randn(batch_size, features, dim1, dim2)
39 |     return [x]
40 | 
41 | def get_init_inputs():
42 |     return [features, num_groups] # num_features


--------------------------------------------------------------------------------
/KernelBench/level1/36_RMSNorm_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs RMS Normalization.
 7 |     """
 8 |     def __init__(self, num_features: int, eps: float = 1e-5):
 9 |         """
10 |         Initializes the RMSNorm layer.
11 | 
12 |         Args:
13 |             num_features (int): Number of features in the input tensor.
14 |             eps (float, optional): A small value added to the denominator to avoid division by zero. Defaults to 1e-5.
15 |         """
16 |         super(Model, self).__init__()
17 |         self.num_features = num_features
18 |         self.eps = eps
19 | 
20 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
21 |         """
22 |         Applies RMS Normalization to the input tensor.
23 | 
24 |         Args:
25 |             x (torch.Tensor): Input tensor of shape (batch_size, num_features, *).
26 | 
27 |         Returns:
28 |             torch.Tensor: Output tensor with RMS Normalization applied, same shape as input.
29 |         """
30 |         # Calculate the RMS along the feature dimension
31 |         rms = torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.eps)
32 | 
33 |         # Normalize the input by dividing by the RMS
34 |         return x / rms
35 | 
36 | batch_size = 16
37 | features = 64
38 | dim1 = 256
39 | dim2 = 256
40 | 
41 | def get_inputs():
42 |     x = torch.randn(batch_size, features, dim1, dim2)
43 |     return [x]
44 | 
45 | def get_init_inputs():
46 |     return [features]


--------------------------------------------------------------------------------
/KernelBench/level1/37_FrobeniusNorm_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs Frobenius norm normalization.
 7 |     """
 8 |     def __init__(self):
 9 |         """
10 |         Initializes the Frobenius norm normalization layer.
11 |         """
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
15 |         """
16 |         Applies Frobenius norm normalization to the input tensor.
17 | 
18 |         Args:
19 |             x (torch.Tensor): Input tensor of arbitrary shape.
20 | 
21 |         Returns:
22 |             torch.Tensor: Output tensor with Frobenius norm normalization applied, same shape as input.
23 |         """
24 |         norm = torch.norm(x, p='fro')
25 |         return x / norm
26 | 
27 | batch_size = 16
28 | features = 64
29 | dim1 = 256
30 | dim2 = 256
31 | 
32 | def get_inputs():
33 |     x = torch.randn(batch_size, features, dim1, dim2)
34 |     return [x]
35 | 
36 | def get_init_inputs():
37 |     return []


--------------------------------------------------------------------------------
/KernelBench/level1/38_L1Norm_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs L1 normalization.
 7 |     """
 8 |     def __init__(self):
 9 |         """
10 |         Initializes the L1 normalization layer.
11 |         """
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
15 |         """
16 |         Applies L1 normalization to the input tensor.
17 | 
18 |         Args:
19 |             x (torch.Tensor): Input tensor of shape (..., dim, ...).
20 | 
21 |         Returns:
22 |             torch.Tensor: Output tensor with L1 normalization applied, same shape as input.
23 |         """
24 |         return x / torch.sum(torch.abs(x), dim=1, keepdim=True)
25 | 
26 | batch_size = 16
27 | dim = 16384
28 | 
29 | def get_inputs():
30 |     x = torch.randn(batch_size, dim)
31 |     return [x]
32 | 
33 | def get_init_inputs():
34 |     return []


--------------------------------------------------------------------------------
/KernelBench/level1/39_L2Norm_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs L2 normalization.
 7 |     """
 8 |     def __init__(self):
 9 |         """
10 |         Initializes the L2Norm layer.
11 | 
12 |         Args:
13 |             dim (int): Dimension along which to normalize.
14 |         """
15 |         super(Model, self).__init__()
16 | 
17 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
18 |         """
19 |         Applies L2 normalization to the input tensor.
20 | 
21 |         Args:
22 |             x (torch.Tensor): Input tensor of shape (*, dim, *).
23 | 
24 |         Returns:
25 |             torch.Tensor: Output tensor with L2 normalization applied, same shape as input.
26 |         """
27 |         return x / torch.norm(x, p=2, dim=1, keepdim=True)
28 | 
29 | batch_size = 16
30 | dim = 16384
31 | 
32 | def get_inputs():
33 |     x = torch.randn(batch_size, dim)
34 |     return [x]
35 | 
36 | def get_init_inputs():
37 |     return []


--------------------------------------------------------------------------------
/KernelBench/level1/3_Batched_matrix_multiplication.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Performs batched matrix multiplication (C = A * B) where A, B, and C have the same batch dimension.
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs batched matrix multiplication.
14 | 
15 |         Args:
16 |             A: Input tensor of shape (batch_size, m, k).
17 |             B: Input tensor of shape (batch_size, k, n).
18 | 
19 |         Returns:
20 |             C: Output tensor of shape (batch_size, m, n).
21 |         """
22 |         return torch.bmm(A, B)
23 | 
24 | batch_size = 128
25 | m = 128
26 | k = 256
27 | n = 512
28 | 
29 | def get_inputs():
30 |     A = torch.randn(batch_size, m, k)
31 |     B = torch.randn(batch_size, k, n)
32 |     return [A, B]
33 | 
34 | def get_init_inputs():
35 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/40_LayerNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs Layer Normalization.
 7 |     """
 8 |     def __init__(self, normalized_shape: tuple):
 9 |         """
10 |         Initializes the LayerNorm layer.
11 | 
12 |         Args:
13 |             normalized_shape (tuple): Shape of the input tensor to be normalized.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.ln = nn.LayerNorm(normalized_shape=normalized_shape)
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies Layer Normalization to the input tensor.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor of shape (*, normalized_shape).
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor with Layer Normalization applied, same shape as input.
27 |         """
28 |         return self.ln(x)
29 | 
30 | batch_size = 16
31 | features = 64
32 | dim1 = 256
33 | dim2 = 256
34 | 
35 | def get_inputs():
36 |     x = torch.randn(batch_size, features, dim1, dim2)
37 |     return [x]
38 | 
39 | def get_init_inputs():
40 |     return [(features, dim1, dim2)]


--------------------------------------------------------------------------------
/KernelBench/level1/44_Average_Pooling_1D.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs 1D Average Pooling.
 7 |     """
 8 |     def __init__(self, kernel_size: int, stride: int = 1, padding: int = 0):
 9 |         """
10 |         Initializes the 1D Average Pooling layer.
11 | 
12 |         Args:
13 |             kernel_size (int): Size of the pooling window.
14 |             stride (int, optional): Stride of the pooling operation. Defaults to 1.
15 |             padding (int, optional): Padding applied to the input tensor. Defaults to 0.
16 |         """
17 |         super(Model, self).__init__()
18 |         self.avg_pool = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=padding)
19 | 
20 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
21 |         """
22 |         Applies 1D Average Pooling to the input tensor.
23 | 
24 |         Args:
25 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, input_length).
26 | 
27 |         Returns:
28 |             torch.Tensor: Output tensor with 1D Average Pooling applied, shape (batch_size, in_channels, output_length).
29 |         """
30 |         return self.avg_pool(x)
31 | 
32 | batch_size = 16
33 | in_channels = 32
34 | input_length = 128
35 | kernel_size = 4
36 | stride = 2
37 | padding = 1
38 | 
39 | def get_inputs():
40 |     x = torch.randn(batch_size, in_channels, input_length)
41 |     return [x]
42 | 
43 | def get_init_inputs():
44 |     return [kernel_size, stride, padding]


--------------------------------------------------------------------------------
/KernelBench/level1/45_Average_Pooling_2D.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs 2D Average Pooling.
 7 |     """
 8 |     def __init__(self, kernel_size: int, stride: int = None, padding: int = 0):
 9 |         """
10 |         Initializes the Average Pooling layer.
11 | 
12 |         Args:
13 |             kernel_size (int): Size of the pooling window.
14 |             stride (int, optional): Stride of the pooling operation. Defaults to None (same as kernel_size).
15 |             padding (int, optional): Padding applied to the input tensor. Defaults to 0.
16 |         """
17 |         super(Model, self).__init__()
18 |         self.avg_pool = nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding)
19 | 
20 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
21 |         """
22 |         Applies 2D Average Pooling to the input tensor.
23 | 
24 |         Args:
25 |             x (torch.Tensor): Input tensor of shape (batch_size, channels, height, width).
26 | 
27 |         Returns:
28 |             torch.Tensor: Output tensor with Average Pooling applied.
29 |         """
30 |         return self.avg_pool(x)
31 | 
32 | batch_size = 16
33 | channels = 64
34 | height = 256
35 | width = 256
36 | kernel_size = 3
37 | 
38 | def get_inputs():
39 |     x = torch.randn(batch_size, channels, height, width)
40 |     return [x]
41 | 
42 | def get_init_inputs():
43 |     return [kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level1/47_Sum_reduction_over_a_dimension.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs sum reduction over a specified dimension.
 7 |     """
 8 |     def __init__(self, dim: int):
 9 |         """
10 |         Initializes the model with the dimension to reduce over.
11 | 
12 |         Args:
13 |             dim (int): Dimension to reduce over.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.dim = dim
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies sum reduction over the specified dimension.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor of shape (..., dim, ...).
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor after sum reduction, shape (..., 1, ...).
27 |         """
28 |         return torch.sum(x, dim=self.dim, keepdim=True)
29 | 
30 | batch_size = 16
31 | dim1 = 256
32 | dim2 = 256
33 | reduce_dim = 1
34 | 
35 | def get_inputs():
36 |     x = torch.randn(batch_size, dim1, dim2)
37 |     return [x]
38 | 
39 | def get_init_inputs():
40 |     return [reduce_dim]


--------------------------------------------------------------------------------
/KernelBench/level1/48_Mean_reduction_over_a_dimension.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs mean reduction over a specific dimension.
 7 |     """
 8 |     def __init__(self, dim: int):
 9 |         """
10 |         Initializes the model with the dimension to reduce over.
11 | 
12 |         Args:
13 |             dim (int): The dimension to reduce over.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.dim = dim
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Reduces the input tensor along the specified dimension by taking the mean.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor of arbitrary shape.
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor with reduced dimension. The shape of the output is the same as the input except for the reduced dimension which is removed.
27 |         """
28 |         return torch.mean(x, dim=self.dim)
29 | 
30 | batch_size = 16
31 | dim1 = 256
32 | dim2 = 256
33 | 
34 | def get_inputs():
35 |     x = torch.randn(batch_size, dim1, dim2)
36 |     return [x]
37 | 
38 | def get_init_inputs():
39 |     return [1]


--------------------------------------------------------------------------------
/KernelBench/level1/49_Max_reduction_over_a_dimension.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs Max reduction over a specific dimension.
 7 |     """
 8 |     def __init__(self, dim: int):
 9 |         """
10 |         Initializes the model with the dimension to reduce over.
11 | 
12 |         Args:
13 |             dim (int): The dimension to reduce over.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.dim = dim
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies Max reduction over the specified dimension to the input tensor.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor.
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor after Max reduction over the specified dimension.
27 |         """
28 |         return torch.max(x, dim=self.dim)[0]
29 | 
30 | batch_size = 16
31 | dim1 = 256
32 | dim2 = 256
33 | 
34 | def get_inputs():
35 |     x = torch.randn(batch_size, dim1, dim2)
36 |     return [x]
37 | 
38 | def get_init_inputs():
39 |     return [1] # Example, change to desired dimension


--------------------------------------------------------------------------------
/KernelBench/level1/4_Matrix_vector_multiplication_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs matrix-vector multiplication (C = A * B).
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix-vector multiplication.
14 | 
15 |         Args:
16 |             A: Input matrix of shape (M, K).
17 |             B: Input vector of shape (K, 1).
18 | 
19 |         Returns:
20 |             Output vector of shape (M, 1).
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | M = 256
25 | K = 131072
26 | 
27 | def get_inputs():
28 |     A = torch.randn(M, K)
29 |     B = torch.randn(K, 1)
30 |     return [A, B]
31 | 
32 | def get_init_inputs():
33 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/50_Product_reduction_over_a_dimension.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs product reduction over a dimension.
 7 |     """
 8 |     def __init__(self, dim: int):
 9 |         """
10 |         Initializes the model with the dimension to reduce over.
11 | 
12 |         Args:
13 |             dim (int): Dimension to reduce over.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.dim = dim
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Performs product reduction over the specified dimension.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor.
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor with product reduction applied.
27 |         """
28 |         return torch.prod(x, dim=self.dim)
29 | 
30 | batch_size = 16
31 | dim1 = 256
32 | dim2 = 256
33 | reduction_dim = 1
34 | 
35 | def get_inputs():
36 |     x = torch.randn(batch_size, dim1, dim2)
37 |     return [x]
38 | 
39 | def get_init_inputs():
40 |     return [reduction_dim]


--------------------------------------------------------------------------------
/KernelBench/level1/51_Argmax_over_a_dimension.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs Argmax over a specified dimension.
 7 |     """
 8 |     def __init__(self, dim: int):
 9 |         """
10 |         Initializes the model with the dimension to perform argmax.
11 | 
12 |         Args:
13 |             dim (int): The dimension to perform argmax over.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.dim = dim
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies argmax over the specified dimension to the input tensor.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor.
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor with argmax applied, with the specified dimension removed.
27 |         """
28 |         return torch.argmax(x, dim=self.dim)
29 | 
30 | batch_size = 16
31 | dim1 = 256
32 | dim2 = 256
33 | 
34 | def get_inputs():
35 |     x = torch.randn(batch_size, dim1, dim2)
36 |     return [x]
37 | 
38 | def get_init_inputs():
39 |     return [1]


--------------------------------------------------------------------------------
/KernelBench/level1/52_Argmin_over_a_dimension.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that finds the index of the minimum value along a specified dimension.
 7 |     """
 8 |     def __init__(self, dim: int):
 9 |         """
10 |         Initializes the model with the dimension to perform argmin on.
11 | 
12 |         Args:
13 |             dim (int): Dimension along which to find the minimum value.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.dim = dim
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Finds the index of the minimum value along the specified dimension.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor.
24 | 
25 |         Returns:
26 |             torch.Tensor: Tensor containing the indices of the minimum values along the specified dimension.
27 |         """
28 |         return torch.argmin(x, dim=self.dim)
29 | 
30 | batch_size = 16
31 | dim1 = 256
32 | dim2 = 256
33 | dim = 1
34 | 
35 | def get_inputs():
36 |     x = torch.randn(batch_size, dim1, dim2)
37 |     return [x]
38 | 
39 | def get_init_inputs():
40 |     return [dim]


--------------------------------------------------------------------------------
/KernelBench/level1/53_Min_reduction_over_a_dimension.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs min reduction over a specific dimension.
 7 |     """
 8 |     def __init__(self, dim: int):
 9 |         """
10 |         Initializes the model with the dimension to reduce over.
11 | 
12 |         Args:
13 |             dim (int): The dimension to reduce over.
14 |         """
15 |         super(Model, self).__init__()
16 |         self.dim = dim
17 | 
18 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
19 |         """
20 |         Applies min reduction over the specified dimension to the input tensor.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor.
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor after min reduction over the specified dimension.
27 |         """
28 |         return torch.min(x, dim=self.dim)[0]
29 | 
30 | batch_size = 16
31 | dim1 = 256
32 | dim2 = 256
33 | 
34 | def get_inputs():
35 |     x = torch.randn(batch_size, dim1, dim2)
36 |     return [x]
37 | 
38 | def get_init_inputs():
39 |     return [1] # Example, change to desired dimension


--------------------------------------------------------------------------------
/KernelBench/level1/5_Matrix_scalar_multiplication.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix-scalar multiplication (C = A * s)
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, s: float) -> torch.Tensor:
12 |         """
13 |         Performs matrix-scalar multiplication.
14 | 
15 |         Args:
16 |             A: Input matrix of shape (M, N)
17 |             s: Scalar value
18 | 
19 |         Returns:
20 |             C: Resulting matrix of shape (M, N)
21 |         """
22 |         return A * s
23 | 
24 | M = 16384
25 | N = 4096
26 | 
27 | def get_inputs():
28 |     A = torch.randn(M, N)
29 |     s = 3.14
30 |     return [A, s]
31 | 
32 | def get_init_inputs():
33 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/6_Matmul_with_large_K_dimension_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B) with a large K dimension
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix multiplication of A and B.
14 | 
15 |         Args:
16 |             A: Input tensor of shape (M, K)
17 |             B: Input tensor of shape (K, N)
18 | 
19 |         Returns:
20 |             Output tensor of shape (M, N)
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | M = 256
25 | N = 256
26 | K = 131072
27 | 
28 | def get_inputs():
29 |     A = torch.randn(M, K)
30 |     B = torch.randn(K, N)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/7_Matmul_with_small_K_dimension_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B) with a small K dimension
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix multiplication.
14 | 
15 |         Args:
16 |             A: Input tensor of shape (M, K).
17 |             B: Input tensor of shape (K, N).
18 | 
19 |         Returns:
20 |             Output tensor of shape (M, N).
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | M = 16384
25 | N = 16384
26 | K = 32
27 | 
28 | def get_inputs():
29 |     A = torch.randn(M, K)
30 |     B = torch.randn(K, N)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/87_conv_pointwise_2D.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Performs a pointwise 2D convolution operation.
 7 | 
 8 |     Args:
 9 |         in_channels (int): Number of channels in the input tensor.
10 |         out_channels (int): Number of channels produced by the convolution.
11 |         bias (bool, optional): If `True`, adds a learnable bias to the output. Defaults to `False`.
12 |     """
13 |     def __init__(self, in_channels: int, out_channels: int, bias: bool = False):
14 |         super(Model, self).__init__()
15 |         self.conv1d = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=bias)
16 |         
17 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
18 |         """
19 |         Performs the pointwise 2D convolution.
20 | 
21 |         Args:
22 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width).
23 | 
24 |         Returns:
25 |             torch.Tensor: Output tensor of shape (batch_size, out_channels, height, width).
26 |         """
27 |         return self.conv1d(x)
28 | 
29 | # Test code
30 | batch_size = 16
31 | in_channels = 3
32 | out_channels = 64
33 | width = 256
34 | height = 256
35 | 
36 | def get_inputs():
37 |     x = torch.randn(batch_size, in_channels, height, width)
38 |     return [x]
39 | 
40 | def get_init_inputs():
41 |     return [in_channels, out_channels]


--------------------------------------------------------------------------------
/KernelBench/level1/88_MinGPTNewGelu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import math
 5 | 
 6 | # From https://github.com/karpathy/minGPT/blob/master/mingpt/model.py
 7 | 
 8 | class Model(nn.Module):
 9 |     """
10 |     Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT).
11 |     Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
12 |     """
13 |     def __init__(self):
14 |         super(Model, self).__init__()
15 |     
16 |     def forward(self, x):
17 |         return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
18 | 
19 | batch_size = 2000
20 | dim = 2000
21 | 
22 | def get_inputs():
23 |     return [torch.randn(batch_size, dim)]
24 | 
25 | def get_init_inputs():
26 |     return []


--------------------------------------------------------------------------------
/KernelBench/level1/8_Matmul_with_irregular_shapes_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B) with irregular shapes
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A: torch.Tensor, B: torch.Tensor) -> torch.Tensor:
12 |         """
13 |         Performs matrix multiplication of A and B.
14 | 
15 |         Args:
16 |             A: Input tensor with shape (M, K).
17 |             B: Input tensor with shape (K, N).
18 | 
19 |         Returns:
20 |             C: Output tensor with shape (M, N).
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | M = 8205
25 | K = 2949
26 | N = 5921
27 | 
28 | def get_inputs():
29 |     A = torch.randn(M, K)
30 |     B = torch.randn(K, N)
31 |     return [A, B]
32 | 
33 | def get_init_inputs():
34 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level1/90_cumprod.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a cumulative product operation along a specified dimension.
 7 | 
 8 |     Parameters:
 9 |         dim (int): The dimension along which to perform the cumulative product operation.
10 |     """
11 | 
12 |     def __init__(self, dim):
13 |         """
14 |         Initialize the CumulativeProductModel.
15 | 
16 |         Args:
17 |             dim (int): The dimension along which to perform the cumulative product.
18 |         """
19 |         super(Model, self).__init__()
20 |         self.dim = dim
21 | 
22 |     def forward(self, x):
23 |         """
24 |         Forward pass, computing the cumulative product along the specified dimension.
25 | 
26 |         Args:
27 |             x (torch.Tensor): Input tensor of shape (batch_size, *input_shape).
28 | 
29 |         Returns:
30 |             torch.Tensor: Tensor of the same shape as `x` after applying cumulative product along `dim`.
31 |         """
32 |         return torch.cumprod(x, dim=self.dim)
33 | 
34 | # Define input dimensions and parameters
35 | batch_size = 128
36 | input_shape = (4000,)
37 | dim = 1
38 | 
39 | def get_inputs():
40 |     return [torch.randn(batch_size, *input_shape)]
41 | 
42 | def get_init_inputs():
43 |     return [dim]
44 | 


--------------------------------------------------------------------------------
/KernelBench/level1/91_cumsum_reverse.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a reverse cumulative sum operation along a specified dimension.
 7 | 
 8 |     Parameters:
 9 |         dim (int): The dimension along which to perform the reverse cumulative sum.
10 |     """
11 | 
12 |     def __init__(self, dim):
13 |         super(Model, self).__init__()
14 |         self.dim = dim
15 | 
16 |     def forward(self, x):
17 |         return torch.cumsum(x.flip(self.dim), dim=self.dim).flip(self.dim)
18 | 
19 | batch_size = 128
20 | input_shape = (4000,)
21 | dim = 1
22 | 
23 | def get_inputs():
24 |     return [torch.randn(batch_size, *input_shape)]
25 | 
26 | def get_init_inputs():
27 |     return [dim]
28 | 


--------------------------------------------------------------------------------
/KernelBench/level1/92_cumsum_exclusive.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs an exclusive cumulative sum (does not include the current element).
 7 | 
 8 |     Parameters:
 9 |         dim (int): The dimension along which to perform the exclusive cumulative sum.
10 |     """
11 | 
12 |     def __init__(self, dim):
13 |         super(Model, self).__init__()
14 |         self.dim = dim
15 | 
16 |     def forward(self, x):
17 |         exclusive_cumsum = torch.cat((torch.zeros_like(x.select(self.dim, 0).unsqueeze(self.dim)), x), dim=self.dim)[:-1]
18 |         return torch.cumsum(exclusive_cumsum, dim=self.dim)
19 | 
20 | batch_size = 128
21 | input_shape = (4000,)
22 | dim = 1
23 | 
24 | def get_inputs():
25 |     return [torch.randn(batch_size, *input_shape)]
26 | 
27 | def get_init_inputs():
28 |     return [dim]
29 | 


--------------------------------------------------------------------------------
/KernelBench/level1/93_masked_cumsum.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a masked cumulative sum, only summing elements that satisfy a condition.
 7 | 
 8 |     Parameters:
 9 |         dim (int): The dimension along which to perform the masked cumulative sum.
10 |     """
11 | 
12 |     def __init__(self, dim):
13 |         super(Model, self).__init__()
14 |         self.dim = dim
15 | 
16 |     def forward(self, x, mask):
17 |         """
18 |         Args:
19 |             x (torch.Tensor): Input tensor of shape (batch_size, *input_shape).
20 |             mask (torch.Tensor): Boolean mask of the same shape as x.
21 | 
22 |         Returns:
23 |             torch.Tensor: Cumulative sum of elements where mask is True.
24 |         """
25 |         return torch.cumsum(x * mask, dim=self.dim)
26 | 
27 | batch_size = 128
28 | input_shape = (4000,)
29 | dim = 1
30 | 
31 | def get_inputs():
32 |     x = torch.randn(batch_size, *input_shape)
33 |     mask = torch.randint(0, 2, x.shape).bool()  # Random boolean mask
34 |     return [x, mask]
35 | 
36 | def get_init_inputs():
37 |     return [dim]
38 | 


--------------------------------------------------------------------------------
/KernelBench/level1/94_MSELoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that computes the Mean Squared Error loss for regression tasks.
 7 | 
 8 |     Parameters:
 9 |         None
10 |     """
11 |     def __init__(self):
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, predictions, targets):
15 |         return torch.mean((predictions - targets) ** 2)
16 | 
17 | batch_size = 128
18 | input_shape = (4096, )
19 | dim = 1
20 | 
21 | def get_inputs():
22 |     return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)]
23 | 
24 | def get_init_inputs():
25 |     return []
26 | 


--------------------------------------------------------------------------------
/KernelBench/level1/95_CrossEntropyLoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that computes Cross Entropy Loss for multi-class classification tasks.
 7 | 
 8 |     Parameters:
 9 |         None
10 |     """
11 |     def __init__(self):
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, predictions, targets):
15 |         return torch.nn.functional.cross_entropy(predictions, targets)
16 | 
17 | batch_size = 4096
18 | num_classes = 10
19 | input_shape = (num_classes, )  # Output for each class
20 | dim = 1
21 | 
22 | def get_inputs():
23 |     return [torch.randn(batch_size, *input_shape), torch.randint(0, num_classes, (batch_size,))]
24 | 
25 | def get_init_inputs():
26 |     return []
27 | 


--------------------------------------------------------------------------------
/KernelBench/level1/96_HuberLoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that computes Smooth L1 (Huber) Loss for regression tasks.
 7 | 
 8 |     Parameters:
 9 |         None
10 |     """
11 |     def __init__(self):
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, predictions, targets):
15 |         return torch.nn.functional.smooth_l1_loss(predictions, targets)
16 | 
17 | batch_size = 128
18 | input_shape = (4096, )
19 | dim = 1
20 | 
21 | def get_inputs():
22 |     return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)]
23 | 
24 | def get_init_inputs():
25 |     return []
26 | 


--------------------------------------------------------------------------------
/KernelBench/level1/97_CosineSimilarityLoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that computes Cosine Similarity Loss for comparing vectors.
 7 | 
 8 |     Parameters:
 9 |         None
10 |     """
11 |     def __init__(self):
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, predictions, targets):
15 |         cosine_sim = torch.nn.functional.cosine_similarity(predictions, targets, dim=1)
16 |         return torch.mean(1 - cosine_sim)
17 | 
18 | batch_size = 128
19 | input_shape = (4096, )
20 | dim = 1
21 | 
22 | def get_inputs():
23 |     return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)]
24 | 
25 | def get_init_inputs():
26 |     return []
27 | 


--------------------------------------------------------------------------------
/KernelBench/level1/98_KLDivLoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that computes Kullback-Leibler Divergence for comparing two distributions.
 7 | 
 8 |     Parameters:
 9 |         None
10 |     """
11 |     def __init__(self):
12 |         super(Model, self).__init__()
13 | 
14 |     def forward(self, predictions, targets):
15 |         return torch.nn.functional.kl_div(torch.log(predictions), targets, reduction='batchmean')
16 | 
17 | batch_size = 128
18 | input_shape = (4096, )
19 | dim = 1
20 | 
21 | def get_inputs():
22 |     return [torch.randn(batch_size, *input_shape).softmax(dim=-1), torch.randn(batch_size, *input_shape).softmax(dim=-1)]
23 | 
24 | def get_init_inputs():
25 |     return []
26 | 


--------------------------------------------------------------------------------
/KernelBench/level1/99_TripletMarginLoss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that computes Triplet Margin Loss for metric learning tasks.
 7 | 
 8 |     Parameters:
 9 |         margin (float): The margin between the positive and negative samples.
10 |     """
11 |     def __init__(self, margin=1.0):
12 |         super(Model, self).__init__()
13 |         self.loss_fn = torch.nn.TripletMarginLoss(margin=margin)
14 | 
15 |     def forward(self, anchor, positive, negative):
16 |         return self.loss_fn(anchor, positive, negative)
17 | 
18 | batch_size = 128
19 | input_shape = (4096, )
20 | dim = 1
21 | 
22 | def get_inputs():
23 |     return [torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape), torch.randn(batch_size, *input_shape)]
24 | 
25 | def get_init_inputs():
26 |     return [1.0]  # Default margin
27 | 


--------------------------------------------------------------------------------
/KernelBench/level1/9_Tall_skinny_matrix_multiplication_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a single matrix multiplication (C = A * B) where one of the matrices is tall and skinny (M >> N or N >> M)
 7 |     """
 8 |     def __init__(self):
 9 |         super(Model, self).__init__()
10 |     
11 |     def forward(self, A, B):
12 |         """
13 |         Performs the matrix multiplication.
14 | 
15 |         Args:
16 |             A (torch.Tensor): Input matrix of shape (M, K) or (K, M) where M >> N or N >> M.
17 |             B (torch.Tensor): Input matrix of shape (K, N) or (N, K) where M >> N or N >> M.
18 | 
19 |         Returns:
20 |             torch.Tensor: Output matrix of shape (M, N) or (N, M)
21 |         """
22 |         return torch.matmul(A, B)
23 | 
24 | M = 16384
25 | N = 16
26 | 
27 | def get_inputs():
28 |     A = torch.randn(M, N)
29 |     B = torch.randn(N, M)
30 |     return [A, B]
31 | 
32 | def get_init_inputs():
33 |     return []  # No special initialization inputs needed


--------------------------------------------------------------------------------
/KernelBench/level2/100_ConvTranspose3d_Clamp_Min_Divide.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a transposed 3D convolution, clamps the output to a minimum value, 
 7 |     and then divides the result by a constant.
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, min_value, divisor):
10 |         super(Model, self).__init__()
11 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
12 |         self.min_value = min_value
13 |         self.divisor = divisor
14 | 
15 |     def forward(self, x):
16 |         x = self.conv_transpose(x)
17 |         x = torch.clamp(x, min=self.min_value)
18 |         x = x / self.divisor
19 |         return x
20 | 
21 | batch_size = 16
22 | in_channels = 32
23 | out_channels = 16
24 | depth, height, width = 16, 32, 32
25 | kernel_size = 3
26 | stride = 2
27 | padding = 1
28 | min_value = -1.0
29 | divisor = 2.0
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size, stride, padding, min_value, divisor]


--------------------------------------------------------------------------------
/KernelBench/level2/10_ConvTranspose2d_MaxPool_Hardtanh_Mean_Tanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, followed by max pooling, hardtanh activation, mean operation, and tanh activation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, maxpool_kernel_size, maxpool_stride, hardtanh_min, hardtanh_max):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
11 |         self.maxpool = nn.MaxPool2d(kernel_size=maxpool_kernel_size, stride=maxpool_stride)
12 |         self.hardtanh = nn.Hardtanh(min_val=hardtanh_min, max_val=hardtanh_max)
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         x = self.maxpool(x)
17 |         x = self.hardtanh(x)
18 |         x = torch.mean(x, dim=(2, 3), keepdim=True)
19 |         x = torch.tanh(x)
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 32
24 | out_channels = 64
25 | height, width = 16, 16
26 | kernel_size = 4
27 | stride = 2
28 | padding = 1
29 | maxpool_kernel_size = 2
30 | maxpool_stride = 2
31 | hardtanh_min = -1
32 | hardtanh_max = 1
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_channels, height, width)]
36 | 
37 | def get_init_inputs():
38 |     return [in_channels, out_channels, kernel_size, stride, padding, maxpool_kernel_size, maxpool_stride, hardtanh_min, hardtanh_max]


--------------------------------------------------------------------------------
/KernelBench/level2/11_ConvTranspose2d_BatchNorm_Tanh_MaxPool_GroupNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, batch normalization, tanh activation, max pooling, and group normalization.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups, num_groups):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
11 |         self.batch_norm = nn.BatchNorm2d(out_channels)
12 |         self.tanh = nn.Tanh()
13 |         self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
14 |         self.group_norm = nn.GroupNorm(num_groups=num_groups, num_channels=out_channels)
15 | 
16 |     def forward(self, x):
17 |         x = self.conv_transpose(x)
18 |         x = self.batch_norm(x)
19 |         x = self.tanh(x)
20 |         x = self.max_pool(x)
21 |         x = self.group_norm(x)
22 |         return x
23 | 
24 | batch_size = 128
25 | in_channels = 32
26 | out_channels = 64
27 | kernel_size = 4
28 | stride = 2
29 | padding = 1
30 | groups = 8
31 | num_groups = 4
32 | height, width = 32, 32
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_channels, height, width)]
36 | 
37 | def get_init_inputs():
38 |     return [in_channels, out_channels, kernel_size, stride, padding, groups, num_groups]


--------------------------------------------------------------------------------
/KernelBench/level2/12_Gemm_Multiply_LeakyReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a Gemm, multiplies the result, and applies LeakyReLU.
 7 |     """
 8 |     def __init__(self, in_features, out_features, multiplier, negative_slope):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.multiplier = multiplier
12 |         self.leaky_relu = nn.LeakyReLU(negative_slope)
13 | 
14 |     def forward(self, x):
15 |         x = self.gemm(x)
16 |         x = x * self.multiplier
17 |         x = self.leaky_relu(x)
18 |         return x
19 | 
20 | batch_size = 128
21 | in_features = 1024
22 | out_features = 512
23 | multiplier = 2.0
24 | negative_slope = 0.1
25 | 
26 | def get_inputs():
27 |     return [torch.randn(batch_size, in_features)]
28 | 
29 | def get_init_inputs():
30 |     return [in_features, out_features, multiplier, negative_slope]


--------------------------------------------------------------------------------
/KernelBench/level2/13_ConvTranspose3d_Mean_Add_Softmax_Tanh_Scaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a series of operations:
 7 |     1. Transposed 3D convolution
 8 |     2. Mean pooling
 9 |     3. Addition
10 |     4. Softmax
11 |     5. Tanh activation
12 |     6. Scaling
13 |     """
14 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias_shape, scaling_factor):
15 |         super(Model, self).__init__()
16 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
17 |         self.bias = nn.Parameter(torch.randn(bias_shape))
18 |         self.scaling_factor = scaling_factor
19 | 
20 |     def forward(self, x):
21 |         x = self.conv_transpose(x)
22 |         x = torch.mean(x, dim=1, keepdim=True)
23 |         x = x + self.bias
24 |         x = torch.softmax(x, dim=1)
25 |         x = torch.tanh(x)
26 |         x = x * self.scaling_factor
27 |         return x
28 | 
29 | batch_size = 16
30 | in_channels = 8
31 | out_channels = 16
32 | depth, height, width = 16, 32, 32
33 | kernel_size = 3
34 | stride = 2
35 | padding = 1
36 | bias_shape = (1, 1, 1, 1, 1)
37 | scaling_factor = 2.0
38 | 
39 | def get_inputs():
40 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
41 | 
42 | def get_init_inputs():
43 |     return [in_channels, out_channels, kernel_size, stride, padding, bias_shape, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/14_Gemm_Divide_Sum_Scaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a matrix multiplication, division, summation, and scaling.
 7 |     """
 8 |     def __init__(self, input_size, hidden_size, scaling_factor):
 9 |         super(Model, self).__init__()
10 |         self.weight = nn.Parameter(torch.randn(hidden_size, input_size))
11 |         self.scaling_factor = scaling_factor
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x (torch.Tensor): Input tensor of shape (batch_size, input_size).
17 |         Returns:
18 |             torch.Tensor: Output tensor of shape (batch_size, hidden_size).
19 |         """
20 |         x = torch.matmul(x, self.weight.T)  # Gemm
21 |         x = x / 2  # Divide
22 |         x = torch.sum(x, dim=1, keepdim=True) # Sum
23 |         x = x * self.scaling_factor  # Scaling
24 |         return x
25 | 
26 | 
27 | batch_size = 128
28 | input_size = 10
29 | hidden_size = 20
30 | scaling_factor = 1.5
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, input_size)]
34 | 
35 | def get_init_inputs():
36 |     return [input_size, hidden_size, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/15_ConvTranspose3d_BatchNorm_Subtract.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A 3D convolutional transpose layer followed by Batch Normalization and subtraction.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=True):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=bias)
11 |         self.batch_norm = nn.BatchNorm3d(out_channels)
12 | 
13 |     def forward(self, x):
14 |         x = self.conv_transpose(x)
15 |         x = self.batch_norm(x)
16 |         x = x - torch.mean(x, dim=(2, 3, 4), keepdim=True)  # Subtract mean along spatial dimensions
17 |         return x
18 | 
19 | batch_size = 16
20 | in_channels = 16
21 | out_channels = 32
22 | depth, height, width = 16, 32, 32
23 | kernel_size = 3
24 | stride = 2
25 | padding = 1
26 | 
27 | def get_inputs():
28 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
29 | 
30 | def get_init_inputs():
31 |     return [in_channels, out_channels, kernel_size, stride, padding]


--------------------------------------------------------------------------------
/KernelBench/level2/16_ConvTranspose2d_Mish_Add_Hardtanh_Scaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, applies Mish activation, adds a value, 
 7 |     applies Hardtanh activation, and scales the output.
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, add_value, scale):
10 |         super(Model, self).__init__()
11 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, output_padding)
12 |         self.add_value = add_value
13 |         self.scale = scale
14 | 
15 |     def forward(self, x):
16 |         x = self.conv_transpose(x)
17 |         x = torch.nn.functional.mish(x) # Mish activation
18 |         x = x + self.add_value
19 |         x = torch.nn.functional.hardtanh(x, min_val=-1, max_val=1) # Hardtanh activation
20 |         x = x * self.scale # Scaling
21 |         return x
22 | 
23 | batch_size = 128
24 | in_channels = 32
25 | out_channels = 64
26 | height, width = 16, 16
27 | kernel_size = 4
28 | stride = 2
29 | padding = 1
30 | output_padding = 1
31 | add_value = 0.5
32 | scale = 2
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_channels, height, width)]
36 | 
37 | def get_init_inputs():
38 |     return [in_channels, out_channels, kernel_size, stride, padding, output_padding, add_value, scale]


--------------------------------------------------------------------------------
/KernelBench/level2/17_Conv2d_InstanceNorm_Divide.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, applies Instance Normalization, and divides by a constant.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, divide_by):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.instance_norm = nn.InstanceNorm2d(out_channels)
12 |         self.divide_by = divide_by
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         x = self.instance_norm(x)
17 |         x = x / self.divide_by
18 |         return x
19 | 
20 | batch_size = 128
21 | in_channels = 3
22 | out_channels = 16
23 | height, width = 32, 32
24 | kernel_size = 3
25 | divide_by = 2.0
26 | 
27 | def get_inputs():
28 |     return [torch.randn(batch_size, in_channels, height, width)]
29 | 
30 | def get_init_inputs():
31 |     return [in_channels, out_channels, kernel_size, divide_by]


--------------------------------------------------------------------------------
/KernelBench/level2/18_Matmul_Sum_Max_AvgPool_LogSumExp_LogSumExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a sequence of operations:
 7 |         - Matrix multiplication
 8 |         - Summation
 9 |         - Max
10 |         - Average pooling
11 |         - LogSumExp
12 |         - LogSumExp
13 |     """
14 |     def __init__(self, in_features, out_features):
15 |         super(Model, self).__init__()
16 |         self.linear = nn.Linear(in_features, out_features)
17 | 
18 |     def forward(self, x):
19 |         """
20 |         Args:
21 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
22 |         Returns:
23 |             torch.Tensor: Output tensor of shape (batch_size, 1).
24 |         """
25 |         x = self.linear(x)  # (batch_size, out_features)
26 |         x = torch.sum(x, dim=1, keepdim=True) # (batch_size, 1)
27 |         x = torch.max(x, dim=1, keepdim=True)[0] # (batch_size, 1)
28 |         x = torch.mean(x, dim=1, keepdim=True) # (batch_size, 1)
29 |         x = torch.logsumexp(x, dim=1, keepdim=True) # (batch_size, 1)
30 |         x = torch.logsumexp(x, dim=1, keepdim=True) # (batch_size, 1)
31 |         return x
32 | 
33 | batch_size = 128
34 | in_features = 10
35 | out_features = 5
36 | 
37 | def get_inputs():
38 |     return [torch.randn(batch_size, in_features)]
39 | 
40 | def get_init_inputs():
41 |     return [in_features, out_features]


--------------------------------------------------------------------------------
/KernelBench/level2/19_ConvTranspose2d_GELU_GroupNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, applies GELU, and normalizes with GroupNorm.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, groups, num_groups):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride)
11 |         self.group_norm = nn.GroupNorm(num_groups=num_groups, num_channels=out_channels)
12 | 
13 |     def forward(self, x):
14 |         x = self.conv_transpose(x)
15 |         x = torch.nn.functional.gelu(x)
16 |         x = self.group_norm(x)
17 |         return x
18 | 
19 | batch_size = 128
20 | in_channels = 32
21 | out_channels = 64
22 | height, width = 32, 32
23 | kernel_size = 4
24 | stride = 2
25 | groups = 8
26 | num_groups = 8
27 | 
28 | def get_inputs():
29 |     return [torch.randn(batch_size, in_channels, height, width)]
30 | 
31 | def get_init_inputs():
32 |     return [in_channels, out_channels, kernel_size, stride, groups, num_groups]


--------------------------------------------------------------------------------
/KernelBench/level2/1_Conv2D_ReLU_BiasAdd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, applies ReLU, and adds a bias term.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
12 | 
13 |     def forward(self, x):
14 |         x = self.conv(x)
15 |         x = torch.relu(x)
16 |         x = x + self.bias
17 |         return x
18 | 
19 | batch_size = 128
20 | in_channels = 3
21 | out_channels = 16
22 | height, width = 32, 32
23 | kernel_size = 3
24 | bias_shape = (out_channels, 1, 1)
25 | 
26 | def get_inputs():
27 |     return [torch.randn(batch_size, in_channels, height, width)]
28 | 
29 | def get_init_inputs():
30 |     return [in_channels, out_channels, kernel_size, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/20_ConvTranspose3d_Sum_ResidualAdd_Multiply_ResidualAdd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D transposed convolution, followed by a sum, 
 7 |     a residual add, a multiplication, and another residual add.
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape):
10 |         super(Model, self).__init__()
11 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding)
12 |         self.bias = nn.Parameter(torch.randn(bias_shape))
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         original_x = x.clone().detach()
17 |         x = x + self.bias
18 |         x = x + original_x
19 |         x = x * original_x
20 |         x = x + original_x
21 |         return x
22 | 
23 | batch_size = 16
24 | in_channels = 32
25 | out_channels = 64
26 | depth, height, width = 16, 32, 32
27 | kernel_size = 3
28 | stride = 2
29 | padding = 1
30 | output_padding = 1
31 | bias_shape = (out_channels, 1, 1, 1)
32 | 
33 | def get_inputs():
34 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
35 | 
36 | def get_init_inputs():
37 |     return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/21_Conv2d_Add_Scale_Sigmoid_GroupNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, adds a bias term, scales, applies sigmoid, and performs group normalization.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, num_groups, bias_shape, scale_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
12 |         self.scale = nn.Parameter(torch.randn(scale_shape))
13 |         self.group_norm = nn.GroupNorm(num_groups, out_channels)
14 | 
15 |     def forward(self, x):
16 |         x = self.conv(x)
17 |         x = x + self.bias
18 |         x = x * self.scale
19 |         x = torch.sigmoid(x)
20 |         x = self.group_norm(x)
21 |         return x
22 | 
23 | batch_size = 128
24 | in_channels = 3
25 | out_channels = 16
26 | height, width = 32, 32
27 | kernel_size = 3
28 | num_groups = 8
29 | bias_shape = (out_channels, 1, 1)
30 | scale_shape = (out_channels, 1, 1)
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_channels, height, width)]
34 | 
35 | def get_init_inputs():
36 |     return [in_channels, out_channels, kernel_size, num_groups, bias_shape, scale_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/22_Matmul_Scale_ResidualAdd_Clamp_LogSumExp_Mish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a matrix multiplication, scales the result, adds a residual connection, clamps the output,
 7 |     applies LogSumExp, and finally applies the Mish activation function.
 8 |     """
 9 |     def __init__(self, input_size, hidden_size, scale_factor, clamp_min, clamp_max):
10 |         super(Model, self).__init__()
11 |         self.matmul = nn.Linear(input_size, hidden_size)
12 |         self.scale_factor = scale_factor
13 |         self.clamp_min = clamp_min
14 |         self.clamp_max = clamp_max
15 | 
16 |     def forward(self, x):
17 |         """
18 |         Args:
19 |             x: Input tensor of shape (batch_size, input_size).
20 | 
21 |         Returns:
22 |             Output tensor of shape (batch_size, hidden_size).
23 |         """
24 |         x = self.matmul(x)
25 |         x = x * self.scale_factor
26 |         x = x + x
27 |         x = torch.clamp(x, self.clamp_min, self.clamp_max)
28 |         x = torch.logsumexp(x, dim=1, keepdim=True)
29 |         x = x * torch.nn.functional.mish(x)  # Mish activation
30 |         return x
31 | 
32 | batch_size = 128
33 | input_size = 512
34 | hidden_size = 1024
35 | scale_factor = 2.0
36 | clamp_min = -10.0
37 | clamp_max = 10.0
38 | 
39 | def get_inputs():
40 |     return [torch.randn(batch_size, input_size)]
41 | 
42 | def get_init_inputs():
43 |     return [input_size, hidden_size, scale_factor, clamp_min, clamp_max]


--------------------------------------------------------------------------------
/KernelBench/level2/23_Conv3d_GroupNorm_Mean.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, applies Group Normalization, computes the mean
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, num_groups):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
11 |         self.group_norm = nn.GroupNorm(num_groups, out_channels)
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W).
17 |         Returns:
18 |             torch.Tensor: Output tensor of shape (batch_size, 1).
19 |         """
20 |         x = self.conv(x)
21 |         x = self.group_norm(x)
22 |         x = x.mean(dim=[1, 2, 3, 4]) # Compute mean across all dimensions except batch
23 |         return x
24 | 
25 | batch_size = 128
26 | in_channels = 3
27 | out_channels = 16
28 | D, H, W = 16, 32, 32
29 | kernel_size = 3
30 | num_groups = 8
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_channels, D, H, W)]
34 | 
35 | def get_init_inputs():
36 |     return [in_channels, out_channels, kernel_size, num_groups]


--------------------------------------------------------------------------------
/KernelBench/level2/24_Conv3d_Min_Softmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a 3D convolution, applies minimum operation along a specific dimension, 
 7 |     and then applies softmax.
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, dim):
10 |         super(Model, self).__init__()
11 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
12 |         self.dim = dim
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W)
18 |         Returns:
19 |             torch.Tensor: Output tensor of shape (batch_size, out_channels, H, W)
20 |         """
21 |         x = self.conv(x)
22 |         x = torch.min(x, dim=self.dim)[0]  # Apply minimum along the specified dimension
23 |         x = torch.softmax(x, dim=1)  # Apply softmax along the channel dimension
24 |         return x
25 | 
26 | batch_size = 128
27 | in_channels = 3
28 | out_channels = 16
29 | D, H, W = 16, 32, 32
30 | kernel_size = 3
31 | dim = 2  # Dimension along which to apply minimum operation (e.g., depth)
32 | 
33 | def get_inputs():
34 |     return [torch.randn(batch_size, in_channels, D, H, W)]
35 | 
36 | def get_init_inputs():
37 |     return [in_channels, out_channels, kernel_size, dim]


--------------------------------------------------------------------------------
/KernelBench/level2/25_Conv2d_Min_Tanh_Tanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, applies minimum operation, Tanh, and another Tanh.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 | 
12 |     def forward(self, x):
13 |         x = self.conv(x)
14 |         x = torch.min(x, dim=1, keepdim=True)[0] # Apply minimum operation along the channel dimension
15 |         x = torch.tanh(x)
16 |         x = torch.tanh(x)
17 |         return x
18 | 
19 | batch_size = 128
20 | in_channels = 3
21 | out_channels = 16
22 | height, width = 32, 32
23 | kernel_size = 3
24 | 
25 | def get_inputs():
26 |     return [torch.randn(batch_size, in_channels, height, width)]
27 | 
28 | def get_init_inputs():
29 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/27_Conv3d_HardSwish_ReLU_Softmax_Mean.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a 3D convolution, applies HardSwish, ReLU, Softmax, and then calculates the mean.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, bias=True):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, bias=bias)
11 | 
12 |     def forward(self, x):
13 |         x = self.conv(x)
14 |         x = torch.nn.functional.hardswish(x)
15 |         x = torch.relu(x)
16 |         x = torch.softmax(x, dim=1)
17 |         x = torch.mean(x, dim=[2, 3, 4])
18 |         return x
19 | 
20 | batch_size = 128
21 | in_channels = 3
22 | out_channels = 16
23 | depth, height, width = 16, 32, 32
24 | kernel_size = 3
25 | 
26 | def get_inputs():
27 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
28 | 
29 | def get_init_inputs():
30 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/28_BMM_InstanceNorm_Sum_ResidualAdd_Multiply.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a batch matrix multiplication, instance normalization, summation, residual addition, and multiplication.
 7 |     """
 8 |     def __init__(self, in_features, out_features, eps=1e-5, momentum=0.1):
 9 |         super(Model, self).__init__()
10 |         self.bmm = nn.Linear(in_features, out_features)
11 |         self.instance_norm = nn.InstanceNorm2d(out_features, eps=eps, momentum=momentum)
12 | 
13 |     def forward(self, x, y):
14 |         """
15 |         Args:
16 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
17 |             y (torch.Tensor): Input tensor of shape (batch_size, out_features).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
21 |         """
22 |         x = self.bmm(x)
23 |         x = self.instance_norm(x.unsqueeze(1).unsqueeze(1)).squeeze(1).squeeze(1)
24 |         x = x + y
25 |         x = x * y
26 |         return x
27 | 
28 | batch_size = 128
29 | in_features = 64
30 | out_features = 128
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_features), torch.randn(batch_size, out_features)]
34 | 
35 | def get_init_inputs():
36 |     return [in_features, out_features]


--------------------------------------------------------------------------------
/KernelBench/level2/29_Matmul_Mish_Mish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, applies Mish, and applies Mish again.
 7 |     """
 8 |     def __init__(self, in_features, out_features):
 9 |         super(Model, self).__init__()
10 |         self.linear = nn.Linear(in_features, out_features)
11 | 
12 |     def forward(self, x):
13 |         x = self.linear(x)
14 |         x = torch.nn.functional.mish(x)
15 |         x = torch.nn.functional.mish(x)
16 |         return x
17 | 
18 | batch_size = 128
19 | in_features = 10
20 | out_features = 20
21 | 
22 | def get_inputs():
23 |     return [torch.randn(batch_size, in_features)]
24 | 
25 | def get_init_inputs():
26 |     return [in_features, out_features]


--------------------------------------------------------------------------------
/KernelBench/level2/2_ConvTranspose2d_BiasAdd_Clamp_Scaling_Clamp_Divide.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, adds a bias term, clamps, scales, clamps, and divides.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
12 |         self.scaling_factor = scaling_factor
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         x = x + self.bias
17 |         x = torch.clamp(x, min=0.0, max=1.0)
18 |         x = x * self.scaling_factor
19 |         x = torch.clamp(x, min=0.0, max=1.0)
20 |         x = x / self.scaling_factor
21 |         return x
22 | 
23 | batch_size = 128
24 | in_channels = 3
25 | out_channels = 16
26 | height, width = 32, 32
27 | kernel_size = 3
28 | stride = 2
29 | padding = 1
30 | output_padding = 1
31 | bias_shape = (out_channels, 1, 1)
32 | scaling_factor = 2.0
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_channels, height, width)]
36 | 
37 | def get_init_inputs():
38 |     return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/30_Gemm_GroupNorm_Hardtanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a GEMM, applies Group Normalization, and then HardTanh.
 7 |     """
 8 |     def __init__(self, in_features, out_features, num_groups, hardtanh_min, hardtanh_max):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.group_norm = nn.GroupNorm(num_groups, out_features)
12 |         self.hardtanh = nn.Hardtanh(min_val=hardtanh_min, max_val=hardtanh_max)
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
18 |         Returns:
19 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
20 |         """
21 |         x = self.gemm(x)
22 |         x = self.group_norm(x)
23 |         x = self.hardtanh(x)
24 |         return x
25 | 
26 | batch_size = 128
27 | in_features = 1024
28 | out_features = 512
29 | num_groups = 8
30 | hardtanh_min = -2.0
31 | hardtanh_max = 2.0
32 | 
33 | def get_inputs():
34 |     return [torch.randn(batch_size, in_features)]
35 | 
36 | def get_init_inputs():
37 |     return [in_features, out_features, num_groups, hardtanh_min, hardtanh_max]


--------------------------------------------------------------------------------
/KernelBench/level2/31_Conv2d_Min_Add_Multiply.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, takes the minimum with a constant, adds a bias term, and multiplies by a scaling factor.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, constant_value, bias_shape, scaling_factor):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.constant_value = constant_value
12 |         self.bias = nn.Parameter(torch.randn(bias_shape))
13 |         self.scaling_factor = scaling_factor
14 | 
15 |     def forward(self, x):
16 |         x = self.conv(x)
17 |         x = torch.min(x, torch.tensor(self.constant_value))
18 |         x = x + self.bias
19 |         x = x * self.scaling_factor
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 3
24 | out_channels = 16
25 | height, width = 32, 32
26 | kernel_size = 3
27 | constant_value = 0.5
28 | bias_shape = (out_channels, 1, 1)
29 | scaling_factor = 2.0
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size, constant_value, bias_shape, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/32_Conv2d_Scaling_Min.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, scales the output, and then applies a minimum operation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, scale_factor):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.scale_factor = scale_factor
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width).
17 |         Returns:
18 |             torch.Tensor: Output tensor of shape (batch_size, out_channels, height, width).
19 |         """
20 |         x = self.conv(x)
21 |         x = x * self.scale_factor
22 |         x = torch.min(x, dim=1, keepdim=True)[0]  # Minimum along channel dimension
23 |         return x
24 | 
25 | batch_size = 128
26 | in_channels = 3
27 | out_channels = 16
28 | height, width = 32, 32
29 | kernel_size = 3
30 | scale_factor = 2.0
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_channels, height, width)]
34 | 
35 | def get_init_inputs():
36 |     return [in_channels, out_channels, kernel_size, scale_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/33_Gemm_Scale_BatchNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a GEMM (general matrix multiplication), applies scaling, 
 7 |     and then batch normalization.
 8 |     """
 9 |     def __init__(self, in_features, out_features, scale_shape, eps=1e-5, momentum=0.1):
10 |         super(Model, self).__init__()
11 |         self.gemm = nn.Linear(in_features, out_features)
12 |         self.scale = nn.Parameter(torch.randn(scale_shape))
13 |         self.bn = nn.BatchNorm1d(out_features, eps=eps, momentum=momentum)
14 | 
15 |     def forward(self, x):
16 |         x = self.gemm(x)
17 |         x = x * self.scale
18 |         x = self.bn(x)
19 |         return x
20 | 
21 | batch_size = 128
22 | in_features = 1024
23 | out_features = 512
24 | scale_shape = (out_features,)
25 | 
26 | def get_inputs():
27 |     return [torch.randn(batch_size, in_features)]
28 | 
29 | def get_init_inputs():
30 |     return [in_features, out_features, scale_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/35_Conv2d_Subtract_HardSwish_MaxPool_Mish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, subtracts a value, applies HardSwish, MaxPool, and Mish activation functions.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, subtract_value, pool_kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.subtract_value = subtract_value
12 |         self.pool = nn.MaxPool2d(pool_kernel_size)
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         x = x - self.subtract_value
17 |         x = torch.nn.functional.hardswish(x)
18 |         x = self.pool(x)
19 |         x = torch.nn.functional.mish(x)
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 3
24 | out_channels = 16
25 | height, width = 32, 32
26 | kernel_size = 3
27 | subtract_value = 0.5
28 | pool_kernel_size = 2
29 | 
30 | def get_inputs():
31 |     return [torch.randn(batch_size, in_channels, height, width)]
32 | 
33 | def get_init_inputs():
34 |     return [in_channels, out_channels, kernel_size, subtract_value, pool_kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/36_ConvTranspose2d_Min_Sum_GELU_Add.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a convolution transpose, minimum operation, sum operation, GELU activation and addition.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, output_padding)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape))
12 | 
13 |     def forward(self, x):
14 |         x = self.conv_transpose(x)
15 |         x = torch.min(x, dim=1, keepdim=True)[0]  # Minimum operation along channel dimension
16 |         x = torch.sum(x, dim=2, keepdim=True)  # Sum operation along height dimension
17 |         x = torch.nn.functional.gelu(x)  # GELU activation
18 |         x = x + self.bias
19 |         return x
20 | 
21 | batch_size = 128
22 | in_channels = 3
23 | out_channels = 16
24 | height, width = 32, 32
25 | kernel_size = 3
26 | stride = 2
27 | padding = 1
28 | output_padding = 1
29 | bias_shape = (out_channels, 1, 1)
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/37_Matmul_Swish_Sum_GroupNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a matrix multiplication, applies Swish activation, sums with a bias term, and normalizes with GroupNorm.
 7 |     """
 8 |     def __init__(self, in_features, out_features, num_groups, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.matmul = nn.Linear(in_features, out_features)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape))
12 |         self.group_norm = nn.GroupNorm(num_groups, out_features)
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
18 |         Returns:
19 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
20 |         """
21 |         x = self.matmul(x)
22 |         x = torch.sigmoid(x) * x  # Swish activation
23 |         x = x + self.bias
24 |         x = self.group_norm(x)
25 |         return x
26 | 
27 | batch_size = 128
28 | in_features = 512
29 | out_features = 1024
30 | num_groups = 32
31 | bias_shape = (out_features,)
32 | 
33 | def get_inputs():
34 |     return [torch.randn(batch_size, in_features)]
35 | 
36 | def get_init_inputs():
37 |     return [in_features, out_features, num_groups, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/39_Gemm_Scale_BatchNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, scales the result, and applies batch normalization.
 7 |     """
 8 |     def __init__(self, in_features, out_features, scale_shape, eps=1e-5, momentum=0.1):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.scale = nn.Parameter(torch.randn(scale_shape))
12 |         self.bn = nn.BatchNorm1d(out_features, eps=eps, momentum=momentum)
13 | 
14 |     def forward(self, x):
15 |         x = self.gemm(x)
16 |         x = x * self.scale
17 |         x = self.bn(x)
18 |         return x
19 | 
20 | batch_size = 128
21 | in_features = 1024
22 | out_features = 512
23 | scale_shape = (out_features,)
24 | 
25 | def get_inputs():
26 |     return [torch.randn(batch_size, in_features)]
27 | 
28 | def get_init_inputs():
29 |     return [in_features, out_features, scale_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/40_Matmul_Scaling_ResidualAdd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a matrix multiplication, scaling, and residual addition.
 7 | 
 8 |     Args:
 9 |         in_features (int): Number of input features.
10 |         out_features (int): Number of output features.
11 |         scaling_factor (float): Scaling factor to apply after matrix multiplication.
12 |     """
13 |     def __init__(self, in_features, out_features, scaling_factor):
14 |         super(Model, self).__init__()
15 |         self.matmul = nn.Linear(in_features, out_features)
16 |         self.scaling_factor = scaling_factor
17 | 
18 |     def forward(self, x):
19 |         """
20 |         Forward pass of the model.
21 | 
22 |         Args:
23 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
24 | 
25 |         Returns:
26 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
27 |         """
28 |         x = self.matmul(x)
29 |         original_x = x.clone().detach()
30 |         x = x * self.scaling_factor
31 |         x = x + original_x
32 |         return x
33 | 
34 | batch_size = 128
35 | in_features = 64
36 | out_features = 128
37 | scaling_factor = 0.5
38 | 
39 | def get_inputs():
40 |     return [torch.randn(batch_size, in_features)]
41 | 
42 | def get_init_inputs():
43 |     return [in_features, out_features, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/41_Gemm_BatchNorm_GELU_GroupNorm_Mean_ReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a GEMM, BatchNorm, GELU, GroupNorm, Mean, and ReLU operations in sequence.
 7 |     """
 8 |     def __init__(self, in_features, out_features, num_groups):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.batch_norm = nn.BatchNorm1d(out_features)
12 |         self.group_norm = nn.GroupNorm(num_groups, out_features)
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
18 |         Returns:
19 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
20 |         """
21 |         x = self.gemm(x)
22 |         x = self.batch_norm(x)
23 |         x = torch.nn.functional.gelu(x)
24 |         x = self.group_norm(x)
25 |         x = torch.mean(x, dim=1, keepdim=True)
26 |         x = torch.relu(x)
27 |         return x
28 | 
29 | batch_size = 128
30 | in_features = 512
31 | out_features = 1024
32 | num_groups = 8
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_features)]
36 | 
37 | def get_init_inputs():
38 |     return [in_features, out_features, num_groups]


--------------------------------------------------------------------------------
/KernelBench/level2/42_ConvTranspose2d_GlobalAvgPool_BiasAdd_LogSumExp_Sum_Multiply.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, global average pooling, adds a bias, applies log-sum-exp, sum, and multiplication.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape))
12 | 
13 |     def forward(self, x):
14 |         x = self.conv_transpose(x)
15 |         x = torch.mean(x, dim=(2, 3), keepdim=True)  # Global average pooling
16 |         x = x + self.bias
17 |         x = torch.logsumexp(x, dim=1, keepdim=True)  # Log-sum-exp
18 |         x = torch.sum(x, dim=(2, 3))  # Sum
19 |         x = x * 10.0  # Multiplication
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 3
24 | out_channels = 16
25 | height, width = 32, 32
26 | kernel_size = 3
27 | bias_shape = (out_channels, 1, 1)
28 | 
29 | def get_inputs():
30 |     return [torch.randn(batch_size, in_channels, height, width)]
31 | 
32 | def get_init_inputs():
33 |     return [in_channels, out_channels, kernel_size, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/43_Conv3d_Max_LogSumExp_ReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, max pooling, log sum exp, and ReLU activation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
11 |         self.max_pool = nn.MaxPool3d(kernel_size=2, stride=2)
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x: Input tensor of shape (batch_size, in_channels, depth, height, width)
17 |         Returns:
18 |             Output tensor of shape (batch_size, out_channels, depth', height', width')
19 |         """
20 |         x = self.conv(x)
21 |         x = self.max_pool(x)
22 |         x = torch.logsumexp(x, dim=1, keepdim=True)
23 |         x = torch.relu(x)
24 |         return x
25 | 
26 | batch_size = 128
27 | in_channels = 3
28 | out_channels = 16
29 | depth, height, width = 16, 32, 32
30 | kernel_size = 3
31 | stride = 1
32 | padding = 1
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
36 | 
37 | def get_init_inputs():
38 |     return [in_channels, out_channels, kernel_size, stride, padding]


--------------------------------------------------------------------------------
/KernelBench/level2/44_ConvTranspose2d_Multiply_GlobalAvgPool_GlobalAvgPool_Mean.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, multiplies by a scalar, applies global average pooling, 
 7 |     another global average pooling
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, multiplier):
10 |         super(Model, self).__init__()
11 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding)
12 |         self.multiplier = multiplier
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         x = x * self.multiplier
17 |         x = torch.mean(x, dim=[2, 3], keepdim=True)  # First global average pooling
18 |         x = torch.mean(x, dim=[2, 3], keepdim=True)  # Second global average pooling
19 |         return x
20 | 
21 | batch_size = 128
22 | in_channels = 3
23 | out_channels = 16
24 | height, width = 32, 32
25 | kernel_size = 3
26 | stride = 2
27 | padding = 1
28 | output_padding = 1
29 | multiplier = 0.5
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size, stride, padding, output_padding, multiplier]


--------------------------------------------------------------------------------
/KernelBench/level2/45_Gemm_Sigmoid_Sum_LogSumExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a matrix multiplication (Gemm), applies Sigmoid, sums the result, and calculates the LogSumExp.
 7 |     """
 8 |     def __init__(self, input_size, hidden_size, output_size):
 9 |         super(Model, self).__init__()
10 |         self.linear1 = nn.Linear(input_size, hidden_size)
11 |         self.linear2 = nn.Linear(hidden_size, output_size)
12 | 
13 |     def forward(self, x):
14 |         x = self.linear1(x)
15 |         x = torch.sigmoid(x)
16 |         x = torch.sum(x, dim=1)
17 |         x = torch.logsumexp(x, dim=0)
18 |         return x
19 | 
20 | batch_size = 128
21 | input_size = 10
22 | hidden_size = 20
23 | output_size = 5
24 | 
25 | def get_inputs():
26 |     return [torch.randn(batch_size, input_size)]
27 | 
28 | def get_init_inputs():
29 |     return [input_size, hidden_size, output_size]


--------------------------------------------------------------------------------
/KernelBench/level2/46_Conv2d_Subtract_Tanh_Subtract_AvgPool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, subtraction, tanh activation, subtraction and average pooling.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, subtract1_value, subtract2_value, kernel_size_pool):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.subtract1_value = subtract1_value
12 |         self.subtract2_value = subtract2_value
13 |         self.avgpool = nn.AvgPool2d(kernel_size_pool)
14 | 
15 |     def forward(self, x):
16 |         x = self.conv(x)
17 |         x = x - self.subtract1_value
18 |         x = torch.tanh(x)
19 |         x = x - self.subtract2_value
20 |         x = self.avgpool(x)
21 |         return x
22 | 
23 | batch_size = 128
24 | in_channels = 3
25 | out_channels = 16
26 | height, width = 32, 32
27 | kernel_size = 3
28 | subtract1_value = 0.5
29 | subtract2_value = 0.2
30 | kernel_size_pool = 2
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_channels, height, width)]
34 | 
35 | def get_init_inputs():
36 |     return [in_channels, out_channels, kernel_size, subtract1_value, subtract2_value, kernel_size_pool]


--------------------------------------------------------------------------------
/KernelBench/level2/47_Conv3d_Mish_Tanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, applies Mish activation, and then applies Tanh activation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
11 | 
12 |     def forward(self, x):
13 |         """
14 |         Args:
15 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W).
16 | 
17 |         Returns:
18 |             torch.Tensor: Output tensor of shape (batch_size, out_channels, D', H', W').
19 |         """
20 |         x = self.conv(x)
21 |         x = torch.nn.functional.mish(x)
22 |         x = torch.tanh(x)
23 |         return x
24 | 
25 | batch_size = 16
26 | in_channels = 3
27 | out_channels = 16
28 | D, H, W = 16, 32, 32
29 | kernel_size = 3
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, D, H, W)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/48_Conv3d_Scaling_Tanh_Multiply_Sigmoid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, scales the output, applies tanh, multiplies by a scaling factor, and applies sigmoid.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, scaling_factor, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
11 |         self.scaling_factor = nn.Parameter(torch.randn(bias_shape))
12 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         x = x * self.scaling_factor 
17 |         x = torch.tanh(x)
18 |         x = x * self.bias
19 |         x = torch.sigmoid(x)
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 3
24 | out_channels = 16
25 | depth, height, width = 16, 32, 32
26 | kernel_size = 3
27 | scaling_factor = 2
28 | bias_shape = (out_channels, 1, 1, 1)
29 | 
30 | def get_inputs():
31 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
32 | 
33 | def get_init_inputs():
34 |     return [in_channels, out_channels, kernel_size, scaling_factor, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/49_ConvTranspose3d_Softmax_Sigmoid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D transposed convolution, applies Softmax and Sigmoid.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias=True):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=bias)
11 |         self.softmax = nn.Softmax(dim=1)
12 |         self.sigmoid = nn.Sigmoid()
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor of shape (batch_size, out_channels, D, H, W).
21 |         """
22 |         x = self.conv_transpose(x)
23 |         x = self.softmax(x)
24 |         x = self.sigmoid(x)
25 |         return x
26 | 
27 | batch_size = 16
28 | in_channels = 32
29 | out_channels = 64
30 | D, H, W = 16, 32, 32
31 | kernel_size = 3
32 | stride = 2
33 | padding = 1
34 | output_padding = 1
35 | 
36 | def get_inputs():
37 |     return [torch.randn(batch_size, in_channels, D, H, W)]
38 | 
39 | def get_init_inputs():
40 |     return [in_channels, out_channels, kernel_size, stride, padding, output_padding]


--------------------------------------------------------------------------------
/KernelBench/level2/4_Conv2d_Mish_Mish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, applies Mish, and another Mish.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 | 
12 |     def forward(self, x):
13 |         x = self.conv(x)
14 |         x = torch.nn.functional.mish(x)
15 |         x = torch.nn.functional.mish(x)
16 |         return x
17 | 
18 | batch_size = 128
19 | in_channels = 3
20 | out_channels = 16
21 | height, width = 32, 32
22 | kernel_size = 3
23 | 
24 | def get_inputs():
25 |     return [torch.randn(batch_size, in_channels, height, width)]
26 | 
27 | def get_init_inputs():
28 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/50_ConvTranspose3d_Scaling_AvgPool_BiasAdd_Scaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D transposed convolution, scaling, average pooling, bias addition, and scaling.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, scale1, scale2, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
11 |         self.scale1 = nn.Parameter(torch.tensor(scale1))
12 |         self.avg_pool = nn.AvgPool3d(kernel_size=2)
13 |         self.bias = nn.Parameter(torch.randn(bias_shape))
14 |         self.scale2 = nn.Parameter(torch.tensor(scale2))
15 | 
16 |     def forward(self, x):
17 |         x = self.conv_transpose(x)
18 |         x = x * self.scale1
19 |         x = self.avg_pool(x)
20 |         x = x + self.bias
21 |         x = x * self.scale2
22 |         return x
23 | 
24 | batch_size = 128
25 | in_channels = 3
26 | out_channels = 16
27 | depth, height, width = 16, 32, 32
28 | kernel_size = 3
29 | stride = 2
30 | padding = 1
31 | scale1 = 0.5
32 | scale2 = 1.0
33 | bias_shape = (out_channels, 1, 1, 1)
34 | 
35 | def get_inputs():
36 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
37 | 
38 | def get_init_inputs():
39 |     return [in_channels, out_channels, kernel_size, stride, padding, scale1, scale2, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/51_Gemm_Subtract_GlobalAvgPool_LogSumExp_GELU_ResidualAdd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a series of operations: Gemm, Subtract, GlobalAvgPool, LogSumExp, GELU, and ResidualAdd.
 7 |     """
 8 |     def __init__(self, in_features, out_features, bias=True):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features, bias=bias)
11 |         self.subtract = nn.Parameter(torch.randn(out_features))
12 | 
13 |     def forward(self, x):
14 |         original_x = x.clone().detach()
15 |         # Gemm
16 |         x = self.gemm(x)
17 | 
18 |         # Subtract
19 |         x = x - self.subtract
20 | 
21 |         # GlobalAvgPool
22 |         x = torch.mean(x, dim=1, keepdim=True)
23 | 
24 |         # LogSumExp
25 |         x = torch.logsumexp(x, dim=1, keepdim=True)
26 | 
27 |         # GELU
28 |         x = torch.nn.functional.gelu(x)
29 | 
30 |         # ResidualAdd
31 |         x = x + original_x
32 | 
33 |         return x
34 | 
35 | batch_size = 128
36 | in_features = 1024
37 | out_features = 512
38 | 
39 | def get_inputs():
40 |     return [torch.randn(batch_size, in_features)]
41 | 
42 | def get_init_inputs():
43 |     return [in_features, out_features]


--------------------------------------------------------------------------------
/KernelBench/level2/52_Conv2d_Activation_BatchNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, applies activation, and then applies Batch Normalization.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, eps=1e-5, momentum=0.1):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.bn = nn.BatchNorm2d(out_channels, eps=eps, momentum=momentum)
12 | 
13 |     def forward(self, x):
14 |         x = self.conv(x)
15 |         x = torch.multiply(torch.tanh(torch.nn.functional.softplus(x)), x)
16 |         x = self.bn(x)
17 |         return x
18 | 
19 | batch_size = 128
20 | in_channels = 3
21 | out_channels = 16
22 | height, width = 32, 32
23 | kernel_size = 3
24 | 
25 | def get_inputs():
26 |     return [torch.randn(batch_size, in_channels, height, width)]
27 | 
28 | def get_init_inputs():
29 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/53_Gemm_Scaling_Hardtanh_GELU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a GEMM, scaling, hardtanh, and GELU activation.
 7 |     """
 8 |     def __init__(self, in_features, out_features, scaling_factor, hardtanh_min, hardtanh_max):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.scaling_factor = scaling_factor
12 |         self.hardtanh = nn.Hardtanh(min_val=hardtanh_min, max_val=hardtanh_max)
13 |         self.gelu = nn.GELU()
14 | 
15 |     def forward(self, x):
16 |         x = self.gemm(x)
17 |         x = x * self.scaling_factor
18 |         x = self.hardtanh(x)
19 |         x = self.gelu(x)
20 |         return x
21 | 
22 | batch_size = 128
23 | in_features = 1024
24 | out_features = 512
25 | scaling_factor = 0.5
26 | hardtanh_min = -2
27 | hardtanh_max = 2
28 | 
29 | def get_inputs():
30 |     return [torch.randn(batch_size, in_features)]
31 | 
32 | def get_init_inputs():
33 |     return [in_features, out_features, scaling_factor, hardtanh_min, hardtanh_max]


--------------------------------------------------------------------------------
/KernelBench/level2/54_Conv2d_Multiply_LeakyReLU_GELU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, multiplies by a learnable scalar, applies LeakyReLU, and then GELU.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, multiplier_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.multiplier = nn.Parameter(torch.randn(multiplier_shape)) 
12 |         self.leaky_relu = nn.LeakyReLU()
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         x = x * self.multiplier
17 |         x = self.leaky_relu(x)
18 |         x = torch.nn.functional.gelu(x)
19 |         return x
20 | 
21 | batch_size = 128
22 | in_channels = 3
23 | out_channels = 16
24 | height, width = 32, 32
25 | kernel_size = 3
26 | multiplier_shape = (out_channels, 1, 1)
27 | 
28 | def get_inputs():
29 |     return [torch.randn(batch_size, in_channels, height, width)]
30 | 
31 | def get_init_inputs():
32 |     return [in_channels, out_channels, kernel_size, multiplier_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/55_Matmul_MaxPool_Sum_Scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs matrix multiplication, max pooling, sum, and scaling.
 7 |     """
 8 |     def __init__(self, in_features, out_features, kernel_size, scale_factor):
 9 |         super(Model, self).__init__()
10 |         self.matmul = nn.Linear(in_features, out_features)
11 |         self.max_pool = nn.MaxPool1d(kernel_size)
12 |         self.scale_factor = scale_factor
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
21 |         """
22 |         x = self.matmul(x)
23 |         x = self.max_pool(x.unsqueeze(1)).squeeze(1)
24 |         x = torch.sum(x, dim=1)
25 |         x = x * self.scale_factor
26 |         return x
27 | 
28 | batch_size = 128
29 | in_features = 10
30 | out_features = 5
31 | kernel_size = 2
32 | scale_factor = 0.5
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_features)]
36 | 
37 | def get_init_inputs():
38 |     return [in_features, out_features, kernel_size, scale_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/56_Matmul_Sigmoid_Sum.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, applies sigmoid, and sums the result.
 7 |     """
 8 |     def __init__(self, input_size, hidden_size):
 9 |         super(Model, self).__init__()
10 |         self.linear = nn.Linear(input_size, hidden_size)
11 | 
12 |     def forward(self, x):
13 |         """
14 |         Args:
15 |             x: Input tensor of shape (batch_size, input_size).
16 | 
17 |         Returns:
18 |             Output tensor of shape (batch_size, 1).
19 |         """
20 |         x = self.linear(x)
21 |         x = torch.sigmoid(x)
22 |         x = torch.sum(x, dim=1, keepdim=True)
23 |         return x
24 | 
25 | batch_size = 128
26 | input_size = 10
27 | hidden_size = 20
28 | 
29 | def get_inputs():
30 |     return [torch.randn(batch_size, input_size)]
31 | 
32 | def get_init_inputs():
33 |     return [input_size, hidden_size]


--------------------------------------------------------------------------------
/KernelBench/level2/57_Conv2d_ReLU_HardSwish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, applies ReLU, and applies HardSwish activation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 | 
12 |     def forward(self, x):
13 |         x = self.conv(x)
14 |         x = torch.relu(x)
15 |         x = x * torch.clamp((x + 3) / 6, 0, 1)
16 |         return x
17 | 
18 | batch_size = 128
19 | in_channels = 3
20 | out_channels = 16
21 | height, width = 32, 32
22 | kernel_size = 3
23 | 
24 | def get_inputs():
25 |     return [torch.randn(batch_size, in_channels, height, width)]
26 | 
27 | def get_init_inputs():
28 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/58_ConvTranspose3d_LogSumExp_HardSwish_Subtract_Clamp_Max.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D transposed convolution, LogSumExp, HardSwish, subtraction, clamp, and maximum operations.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
12 | 
13 |     def forward(self, x):
14 |         x = self.conv_transpose(x)
15 |         x = torch.logsumexp(x, dim=1, keepdim=True)
16 |         x = x * torch.sigmoid(x + 3) / 6
17 |         x = x - self.bias
18 |         x = torch.clamp(x, min=-1, max=1)
19 |         x = torch.max(x, dim=1, keepdim=True)[0]
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 3
24 | out_channels = 16
25 | depth, height, width = 16, 32, 32
26 | kernel_size = 3
27 | stride = 2
28 | padding = 1
29 | bias_shape = (out_channels, 1, 1, 1)
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size, stride, padding, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/59_Matmul_Swish_Scaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, applies Swish activation, and scales the result.
 7 |     """
 8 |     def __init__(self, in_features, out_features, scaling_factor):
 9 |         super(Model, self).__init__()
10 |         self.matmul = nn.Linear(in_features, out_features)
11 |         self.scaling_factor = scaling_factor
12 | 
13 |     def forward(self, x):
14 |         x = self.matmul(x)
15 |         x = x * torch.sigmoid(x)  # Swish activation
16 |         x = x * self.scaling_factor
17 |         return x
18 | 
19 | batch_size = 128
20 | in_features = 1024
21 | out_features = 512
22 | scaling_factor = 2.0
23 | 
24 | def get_inputs():
25 |     return [torch.randn(batch_size, in_features)]
26 | 
27 | def get_init_inputs():
28 |     return [in_features, out_features, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/5_ConvTranspose2d_Subtract_Tanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, subtracts a bias term, and applies tanh activation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, bias_shape, stride=2, padding=1, output_padding=1):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
12 | 
13 |     def forward(self, x):
14 |         x = self.conv_transpose(x)
15 |         x = x - self.bias
16 |         x = torch.tanh(x)
17 |         return x
18 | 
19 | batch_size = 128
20 | in_channels = 32
21 | out_channels = 16
22 | height, width = 16, 16
23 | kernel_size = 4
24 | bias_shape = (out_channels, 1, 1)
25 | 
26 | def get_inputs():
27 |     return [torch.randn(batch_size, in_channels, height, width)]
28 | 
29 | def get_init_inputs():
30 |     return [in_channels, out_channels, kernel_size, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/60_ConvTranspose3d_Swish_GroupNorm_HardSwish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D transposed convolution, applies Swish activation, 
 7 |     group normalization, and then HardSwish activation.
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups, eps, bias=True):
10 |         super(Model, self).__init__()
11 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=bias)
12 |         self.group_norm = nn.GroupNorm(num_groups=groups, num_channels=out_channels, eps=eps)
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         x = torch.sigmoid(x) * x  # Swish activation
17 |         x = self.group_norm(x)
18 |         x = torch.nn.functional.hardswish(x)  # HardSwish activation
19 |         return x
20 | 
21 | batch_size = 128
22 | in_channels = 3
23 | out_channels = 16
24 | depth, height, width = 16, 32, 32
25 | kernel_size = 3
26 | stride = 2
27 | padding = 1
28 | groups = 4
29 | eps = 1e-5
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size, stride, padding, groups, eps]


--------------------------------------------------------------------------------
/KernelBench/level2/61_ConvTranspose3d_ReLU_GroupNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed 3D convolution, applies ReLU, and then applies group normalization.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, groups, bias=False):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, bias=bias)
11 |         self.relu = nn.ReLU()
12 |         self.group_norm = nn.GroupNorm(num_groups=groups, num_channels=out_channels)
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, D, H, W).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor of shape (batch_size, out_channels, D, H, W).
21 |         """
22 |         x = self.conv_transpose(x)
23 |         x = self.relu(x)
24 |         x = self.group_norm(x)
25 |         return x
26 | 
27 | batch_size = 16
28 | in_channels = 64
29 | out_channels = 128
30 | D, H, W = 8, 16, 16
31 | kernel_size = 3
32 | groups = 8
33 | bias = False
34 | 
35 | def get_inputs():
36 |     return [torch.randn(batch_size, in_channels, D, H, W)]
37 | 
38 | def get_init_inputs():
39 |     return [in_channels, out_channels, kernel_size, groups, bias]


--------------------------------------------------------------------------------
/KernelBench/level2/62_Matmul_GroupNorm_LeakyReLU_Sum.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a matrix multiplication, group normalization, leaky ReLU activation, and element-wise sum.
 7 |     """
 8 |     def __init__(self, input_size, hidden_size, num_groups, eps=1e-5, negative_slope=0.01):
 9 |         super(Model, self).__init__()
10 |         self.fc = nn.Linear(input_size, hidden_size)
11 |         self.gn = nn.GroupNorm(num_groups=num_groups, num_channels=hidden_size, eps=eps)
12 |         self.leaky_relu = nn.LeakyReLU(negative_slope=negative_slope)
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Performs the forward pass of the model.
17 | 
18 |         Args:
19 |             x: Input tensor of shape (batch_size, input_size).
20 | 
21 |         Returns:
22 |             Output tensor of shape (batch_size, hidden_size).
23 |         """
24 |         x = self.fc(x)
25 |         x = self.gn(x)
26 |         x = self.leaky_relu(x)
27 |         x = x + x
28 |         return x
29 | 
30 | 
31 | batch_size = 128
32 | input_size = 512
33 | hidden_size = 256
34 | num_groups = 8
35 | 
36 | def get_inputs():
37 |     return [torch.randn(batch_size, input_size)]
38 | 
39 | def get_init_inputs():
40 |     return [input_size, hidden_size, num_groups]


--------------------------------------------------------------------------------
/KernelBench/level2/63_Gemm_ReLU_Divide.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, applies ReLU, and divides by a constant.
 7 |     """
 8 |     def __init__(self, in_features, out_features, divisor):
 9 |         super(Model, self).__init__()
10 |         self.linear = nn.Linear(in_features, out_features)
11 |         self.divisor = divisor
12 | 
13 |     def forward(self, x):
14 |         x = self.linear(x)
15 |         x = torch.relu(x)
16 |         x = x / self.divisor
17 |         return x
18 | 
19 | batch_size = 128
20 | in_features = 1024
21 | out_features = 512
22 | divisor = 2.0
23 | 
24 | def get_inputs():
25 |     return [torch.randn(batch_size, in_features)]
26 | 
27 | def get_init_inputs():
28 |     return [in_features, out_features, divisor]


--------------------------------------------------------------------------------
/KernelBench/level2/64_Gemm_LogSumExp_LeakyReLU_LeakyReLU_GELU_GELU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a matrix multiplication (Gemm), followed by LogSumExp, LeakyReLU, 
 7 |     LeakyReLU, GELU, and GELU activations.
 8 |     """
 9 |     def __init__(self, in_features, out_features, bias=True):
10 |         super(Model, self).__init__()
11 |         self.linear = nn.Linear(in_features, out_features, bias=bias)
12 | 
13 |     def forward(self, x):
14 |         # Gemm
15 |         x = self.linear(x)
16 |         # LogSumExp
17 |         x = torch.logsumexp(x, dim=1, keepdim=True)
18 |         # LeakyReLU
19 |         x = torch.nn.functional.leaky_relu(x, negative_slope=0.01)
20 |         # LeakyReLU
21 |         x = torch.nn.functional.leaky_relu(x, negative_slope=0.01)
22 |         # GELU
23 |         x = torch.nn.functional.gelu(x)
24 |         # GELU
25 |         x = torch.nn.functional.gelu(x)
26 |         return x
27 | 
28 | batch_size = 128
29 | in_features = 1024
30 | out_features = 512
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_features)]
34 | 
35 | def get_init_inputs():
36 |     return [in_features, out_features]


--------------------------------------------------------------------------------
/KernelBench/level2/65_Conv2d_AvgPool_Sigmoid_Sum.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     This model performs a convolution, average pooling, applies sigmoid, and sums the result.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, pool_kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.avg_pool = nn.AvgPool2d(pool_kernel_size)
12 | 
13 |     def forward(self, x):
14 |         x = self.conv(x)
15 |         x = self.avg_pool(x)
16 |         x = torch.sigmoid(x)
17 |         x = torch.sum(x, dim=[1,2,3]) # Sum over all spatial dimensions
18 |         return x
19 | 
20 | batch_size = 128
21 | in_channels = 3
22 | out_channels = 16
23 | height, width = 32, 32
24 | kernel_size = 3
25 | pool_kernel_size = 2
26 | 
27 | def get_inputs():
28 |     return [torch.randn(batch_size, in_channels, height, width)]
29 | 
30 | def get_init_inputs():
31 |     return [in_channels, out_channels, kernel_size, pool_kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/66_Matmul_Dropout_Mean_Softmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs matrix multiplication, applies dropout, calculates the mean, and then applies softmax.
 7 |     """
 8 |     def __init__(self, in_features, out_features, dropout_p):
 9 |         super(Model, self).__init__()
10 |         self.matmul = nn.Linear(in_features, out_features)
11 |         self.dropout = nn.Dropout(dropout_p)
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
17 | 
18 |         Returns:
19 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
20 |         """
21 |         x = self.matmul(x)
22 |         x = self.dropout(x)
23 |         x = torch.mean(x, dim=1, keepdim=True)
24 |         x = torch.softmax(x, dim=1)
25 |         return x
26 | 
27 | batch_size = 128
28 | in_features = 100
29 | out_features = 50
30 | dropout_p = 0.2
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_features)]
34 | 
35 | def get_init_inputs():
36 |     return [in_features, out_features, dropout_p]


--------------------------------------------------------------------------------
/KernelBench/level2/67_Conv2d_GELU_GlobalAvgPool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, applies GELU, and then performs global average pooling.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 | 
12 |     def forward(self, x):
13 |         """
14 |         Args:
15 |             x: Input tensor of shape (batch_size, in_channels, height, width)
16 |         Returns:
17 |             Output tensor of shape (batch_size, out_channels)
18 |         """
19 |         x = self.conv(x)
20 |         x = torch.nn.functional.gelu(x)
21 |         x = torch.nn.functional.adaptive_avg_pool2d(x, 1)
22 |         x = x.squeeze(-1).squeeze(-1)
23 |         return x
24 | 
25 | batch_size = 128
26 | in_channels = 3
27 | out_channels = 16
28 | height, width = 32, 32
29 | kernel_size = 3
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/68_Matmul_Min_Subtract.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, applies minimum, and subtracts a constant.
 7 |     """
 8 |     def __init__(self, in_features, out_features, constant):
 9 |         super(Model, self).__init__()
10 |         self.linear = nn.Linear(in_features, out_features)
11 |         self.constant = nn.Parameter(torch.tensor(constant))
12 | 
13 |     def forward(self, x):
14 |         x = self.linear(x)
15 |         x = torch.min(x, self.constant)
16 |         x = x - self.constant
17 |         return x
18 | 
19 | batch_size = 128
20 | in_features = 10
21 | out_features = 5
22 | constant = 2.0
23 | 
24 | def get_inputs():
25 |     return [torch.randn(batch_size, in_features)]
26 | 
27 | def get_init_inputs():
28 |     return [in_features, out_features, constant]


--------------------------------------------------------------------------------
/KernelBench/level2/69_Conv2d_HardSwish_ReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, applies HardSwish, and then ReLU.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 | 
12 |     def forward(self, x):
13 |         """
14 |         Args:
15 |             x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width).
16 | 
17 |         Returns:
18 |             torch.Tensor: Output tensor of shape (batch_size, out_channels, height, width).
19 |         """
20 |         x = self.conv(x)
21 |         x = torch.nn.functional.hardswish(x)
22 |         x = torch.relu(x)
23 |         return x
24 | 
25 | batch_size = 128
26 | in_channels = 3
27 | out_channels = 16
28 | height, width = 32, 32
29 | kernel_size = 3
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/6_Conv3d_Softmax_MaxPool_MaxPool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, applies Softmax, and performs two max pooling operations.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, pool_kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
11 |         self.pool1 = nn.MaxPool3d(pool_kernel_size)
12 |         self.pool2 = nn.MaxPool3d(pool_kernel_size)
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x: Input tensor of shape (batch_size, in_channels, depth, height, width)
18 |         Returns:
19 |             Output tensor of shape (batch_size, out_channels, depth', height', width') where depth', height', width' are the dimensions after pooling.
20 |         """
21 |         x = self.conv(x)
22 |         x = torch.softmax(x, dim=1)
23 |         x = self.pool1(x)
24 |         x = self.pool2(x)
25 |         return x
26 | 
27 | batch_size = 128
28 | in_channels = 3
29 | out_channels = 16
30 | depth, height, width = 16, 32, 32
31 | kernel_size = 3
32 | pool_kernel_size = 2
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
36 | 
37 | def get_init_inputs():
38 |     return [in_channels, out_channels, kernel_size, pool_kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/70_Gemm_Sigmoid_Scaling_ResidualAdd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model implementing the pattern "Gemm_Sigmoid_Scaling_ResidualAdd".
 7 |     """
 8 |     def __init__(self, input_size, hidden_size, scaling_factor):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(input_size, hidden_size)
11 |         self.scaling_factor = scaling_factor
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Forward pass of the model.
16 | 
17 |         Args:
18 |             x (torch.Tensor): Input tensor of shape (batch_size, input_size).
19 | 
20 |         Returns:
21 |             torch.Tensor: Output tensor of shape (batch_size, hidden_size).
22 |         """
23 |         x = self.gemm(x)
24 |         original_x = x
25 |         x = torch.sigmoid(x)
26 |         x = x * self.scaling_factor
27 |         x = x + original_x
28 |         return x
29 | 
30 | batch_size = 128
31 | input_size = 1024
32 | hidden_size = 512
33 | scaling_factor = 2.0
34 | 
35 | def get_inputs():
36 |     return [torch.randn(batch_size, input_size)]
37 | 
38 | def get_init_inputs():
39 |     return [input_size, hidden_size, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/71_Conv2d_Divide_LeakyReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, divides by a constant, and applies LeakyReLU.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, divisor):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.divisor = divisor
12 | 
13 |     def forward(self, x):
14 |         x = self.conv(x)
15 |         x = x / self.divisor
16 |         x = torch.nn.functional.leaky_relu(x, negative_slope=0.01)
17 |         return x
18 | 
19 | batch_size = 128
20 | in_channels = 3
21 | out_channels = 16
22 | height, width = 32, 32
23 | kernel_size = 3
24 | divisor = 2
25 | 
26 | def get_inputs():
27 |     return [torch.randn(batch_size, in_channels, height, width)]
28 | 
29 | def get_init_inputs():
30 |     return [in_channels, out_channels, kernel_size, divisor]


--------------------------------------------------------------------------------
/KernelBench/level2/72_ConvTranspose3d_BatchNorm_AvgPool_AvgPool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a 3D transposed convolution, followed by batch normalization, 
 7 |     two average pooling layers.
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias_shape):
10 |         super(Model, self).__init__()
11 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
12 |         self.batch_norm = nn.BatchNorm3d(out_channels)
13 |         self.avg_pool1 = nn.AvgPool3d(kernel_size=2)
14 |         self.avg_pool2 = nn.AvgPool3d(kernel_size=2)
15 | 
16 |     def forward(self, x):
17 |         x = self.conv_transpose(x)
18 |         x = self.batch_norm(x)
19 |         x = self.avg_pool1(x)
20 |         x = self.avg_pool2(x)
21 |         return x
22 | 
23 | 
24 | batch_size = 128
25 | in_channels = 3
26 | out_channels = 16
27 | depth, height, width = 32, 32, 32
28 | kernel_size = 3
29 | stride = 2
30 | padding = 1
31 | bias_shape = (out_channels, 1, 1, 1)
32 | 
33 | def get_inputs():
34 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
35 | 
36 | def get_init_inputs():
37 |     return [in_channels, out_channels, kernel_size, stride, padding, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/73_Conv2d_BatchNorm_Scaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a convolution, applies Batch Normalization, and scales the output.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, scaling_factor):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.bn = nn.BatchNorm2d(out_channels)
12 |         self.scaling_factor = scaling_factor
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         x = self.bn(x)
17 |         x = x * self.scaling_factor
18 |         return x
19 | 
20 | batch_size = 128
21 | in_channels = 3
22 | out_channels = 16
23 | height, width = 32, 32
24 | kernel_size = 3
25 | scaling_factor = 2.0
26 | 
27 | def get_inputs():
28 |     return [torch.randn(batch_size, in_channels, height, width)]
29 | 
30 | def get_init_inputs():
31 |     return [in_channels, out_channels, kernel_size, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/75_Gemm_GroupNorm_Min_BiasAdd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a GEMM, Group Normalization, Minimum operation, and Bias addition.
 7 |     """
 8 |     def __init__(self, in_features, out_features, num_groups, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.group_norm = nn.GroupNorm(num_groups, out_features)
12 |         self.bias = nn.Parameter(torch.randn(bias_shape))
13 | 
14 |     def forward(self, x):
15 |         x = self.gemm(x)
16 |         x = self.group_norm(x)
17 |         x = torch.min(x, dim=1, keepdim=True)[0] 
18 |         x = x + self.bias
19 |         return x
20 | 
21 | batch_size = 128
22 | in_features = 512
23 | out_features = 256
24 | num_groups = 8
25 | bias_shape = (1, out_features, 1, 1)
26 | 
27 | def get_inputs():
28 |     return [torch.randn(batch_size, in_features)]
29 | 
30 | def get_init_inputs():
31 |     return [in_features, out_features, num_groups, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/76_Gemm_Add_ReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, adds a bias term, and applies ReLU.
 7 |     """
 8 |     def __init__(self, in_features, out_features, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features, bias=False)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape))
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x (torch.Tensor): Input tensor with shape (batch_size, in_features).
17 |         Returns:
18 |             torch.Tensor: Output tensor with shape (batch_size, out_features).
19 |         """
20 |         x = self.gemm(x)
21 |         x = x + self.bias
22 |         x = torch.relu(x)
23 |         return x
24 | 
25 | batch_size = 128
26 | in_features = 1024
27 | out_features = 512
28 | bias_shape = (out_features,)
29 | 
30 | def get_inputs():
31 |     return [torch.randn(batch_size, in_features)]
32 | 
33 | def get_init_inputs():
34 |     return [in_features, out_features, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/77_ConvTranspose3d_Scale_BatchNorm_GlobalAvgPool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D transposed convolution, scales the output, applies batch normalization, 
 7 |     and then performs global average pooling. 
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, scale_factor, eps=1e-5, momentum=0.1):
10 |         super(Model, self).__init__()
11 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size)
12 |         self.scale_factor = scale_factor
13 |         self.batch_norm = nn.BatchNorm3d(out_channels, eps=eps, momentum=momentum)
14 |         self.global_avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
15 | 
16 |     def forward(self, x):
17 |         x = self.conv_transpose(x)
18 |         x = x * self.scale_factor
19 |         x = self.batch_norm(x)
20 |         x = self.global_avg_pool(x)
21 |         return x
22 | 
23 | batch_size = 16
24 | in_channels = 64
25 | out_channels = 32
26 | depth, height, width = 16, 32, 32
27 | kernel_size = 3
28 | scale_factor = 2.0
29 | 
30 | def get_inputs():
31 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
32 | 
33 | def get_init_inputs():
34 |     return [in_channels, out_channels, kernel_size, scale_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/78_ConvTranspose3d_Max_Max_Sum.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D transposed convolution, followed by two max pooling layers and a sum operation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
11 |         self.max_pool1 = nn.MaxPool3d(kernel_size=2)
12 |         self.max_pool2 = nn.MaxPool3d(kernel_size=3)
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         x = self.max_pool1(x)
17 |         x = self.max_pool2(x)
18 |         x = torch.sum(x, dim=1, keepdim=True) 
19 |         return x
20 | 
21 | batch_size = 16
22 | in_channels = 8
23 | out_channels = 16
24 | depth, height, width = 16, 32, 32
25 | kernel_size = 3
26 | stride = 2
27 | padding = 1
28 | 
29 | def get_inputs():
30 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
31 | 
32 | def get_init_inputs():
33 |     return [in_channels, out_channels, kernel_size, stride, padding]


--------------------------------------------------------------------------------
/KernelBench/level2/79_Conv3d_Multiply_InstanceNorm_Clamp_Multiply_Max.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A 3D convolutional layer followed by multiplication, instance normalization, clamping, multiplication, and a max operation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, multiplier_shape, clamp_min, clamp_max):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
11 |         self.multiplier = nn.Parameter(torch.randn(multiplier_shape))
12 |         self.instance_norm = nn.InstanceNorm3d(out_channels)
13 |         self.clamp_min = clamp_min
14 |         self.clamp_max = clamp_max
15 | 
16 |     def forward(self, x):
17 |         x = self.conv(x)
18 |         x = x * self.multiplier
19 |         x = self.instance_norm(x)
20 |         x = torch.clamp(x, self.clamp_min, self.clamp_max)
21 |         x = x * self.multiplier
22 |         x = torch.max(x, dim=1)[0]
23 |         return x
24 | 
25 | batch_size = 128
26 | in_channels = 3
27 | out_channels = 16
28 | depth, height, width = 16, 32, 32
29 | kernel_size = 3
30 | multiplier_shape = (out_channels, 1, 1, 1)
31 | clamp_min = -1.0
32 | clamp_max = 1.0
33 | 
34 | def get_inputs():
35 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
36 | 
37 | def get_init_inputs():
38 |     return [in_channels, out_channels, kernel_size, multiplier_shape, clamp_min, clamp_max]


--------------------------------------------------------------------------------
/KernelBench/level2/7_Conv3d_ReLU_LeakyReLU_GELU_Sigmoid_BiasAdd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, applies ReLU, LeakyReLU, GELU, Sigmoid activations, and bias in sequence.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, bias_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
12 | 
13 |     def forward(self, x):
14 |         x = self.conv(x)
15 |         x = torch.relu(x)
16 |         x = torch.nn.functional.leaky_relu(x, negative_slope=0.01)
17 |         x = torch.nn.functional.gelu(x)
18 |         x = torch.sigmoid(x)
19 |         x = x + self.bias
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 3
24 | out_channels = 16
25 | depth, height, width = 16, 32, 32
26 | kernel_size = 3
27 | bias_shape = (out_channels, 1, 1, 1)
28 | 
29 | def get_inputs():
30 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
31 | 
32 | def get_init_inputs():
33 |     return [in_channels, out_channels, kernel_size, bias_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/80_Gemm_Max_Subtract_GELU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a GEMM, followed by a max operation, subtraction, and GELU activation.
 7 |     """
 8 |     def __init__(self, in_features, out_features, max_dim):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.max_dim = max_dim
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x: Input tensor of shape (batch_size, in_features)
17 | 
18 |         Returns:
19 |             Output tensor of shape (batch_size, out_features)
20 |         """
21 |         x = self.gemm(x)
22 |         x = torch.max(x, dim=self.max_dim, keepdim=True).values
23 |         x = x - x.mean(dim=1, keepdim=True)
24 |         x = torch.nn.functional.gelu(x)
25 |         return x
26 | 
27 | batch_size = 128
28 | in_features = 512
29 | out_features = 1024
30 | max_dim = 1
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_features)]
34 | 
35 | def get_init_inputs():
36 |     return [in_features, out_features, max_dim]


--------------------------------------------------------------------------------
/KernelBench/level2/81_Gemm_Swish_Divide_Clamp_Tanh_Clamp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a gemm, swish, divide, clamp, tanh, and clamp operations.
 7 |     """
 8 |     def __init__(self, in_features, out_features, bias=True):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features, bias=bias)
11 | 
12 |     def forward(self, x):
13 |         """
14 |         Args:
15 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
16 |         Returns:
17 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
18 |         """
19 |         x = self.gemm(x)
20 |         x = x * torch.sigmoid(x)  # Swish activation
21 |         x = x / 2.0
22 |         x = torch.clamp(x, min=-1.0, max=1.0)  # Clamp between -1 and 1
23 |         x = torch.tanh(x)  # Tanh activation
24 |         x = torch.clamp(x, min=-1.0, max=1.0)  # Clamp between -1 and 1
25 |         return x
26 | 
27 | batch_size = 128
28 | in_features = 1024
29 | out_features = 512
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_features)]
33 | 
34 | def get_init_inputs():
35 |     return [in_features, out_features]


--------------------------------------------------------------------------------
/KernelBench/level2/82_Conv2d_Tanh_Scaling_BiasAdd_Max.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a convolution, applies tanh, scaling, adds a bias term, and then max-pools.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, scaling_factor, bias_shape, pool_kernel_size):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.scaling_factor = scaling_factor
12 |         self.bias = nn.Parameter(torch.randn(bias_shape))
13 |         self.max_pool = nn.MaxPool2d(pool_kernel_size)
14 | 
15 |     def forward(self, x):
16 |         # Convolution
17 |         x = self.conv(x)
18 |         # Tanh activation
19 |         x = torch.tanh(x)
20 |         # Scaling
21 |         x = x * self.scaling_factor
22 |         # Bias addition
23 |         x = x + self.bias
24 |         # Max-pooling
25 |         x = self.max_pool(x)
26 |         return x
27 | 
28 | batch_size = 128
29 | in_channels = 3
30 | out_channels = 16
31 | height, width = 32, 32
32 | kernel_size = 3
33 | scaling_factor = 2.0
34 | bias_shape = (out_channels, 1, 1)
35 | pool_kernel_size = 2
36 | 
37 | def get_inputs():
38 |     return [torch.randn(batch_size, in_channels, height, width)]
39 | 
40 | def get_init_inputs():
41 |     return [in_channels, out_channels, kernel_size, scaling_factor, bias_shape, pool_kernel_size]


--------------------------------------------------------------------------------
/KernelBench/level2/83_Conv3d_GroupNorm_Min_Clamp_Dropout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, applies Group Normalization, minimum, clamp, and dropout.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, groups, min_value, max_value, dropout_p):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
11 |         self.norm = nn.GroupNorm(groups, out_channels)
12 |         self.dropout = nn.Dropout(dropout_p)
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         x = self.norm(x)
17 |         x = torch.min(x, torch.tensor(min_value))
18 |         x = torch.clamp(x, min=min_value, max=max_value)
19 |         x = self.dropout(x)
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 3
24 | out_channels = 16
25 | depth, height, width = 16, 32, 32
26 | kernel_size = 3
27 | groups = 8
28 | min_value = 0.0
29 | max_value = 1.0
30 | dropout_p = 0.2
31 | 
32 | def get_inputs():
33 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
34 | 
35 | def get_init_inputs():
36 |     return [in_channels, out_channels, kernel_size, groups, min_value, max_value, dropout_p]


--------------------------------------------------------------------------------
/KernelBench/level2/84_Gemm_BatchNorm_Scaling_Softmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a matrix multiplication (Gemm), Batch Normalization, scaling, and Softmax.
 7 |     """
 8 |     def __init__(self, in_features, out_features, bn_eps=1e-5, bn_momentum=0.1, scale_shape=(1,)):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.bn = nn.BatchNorm1d(out_features, eps=bn_eps, momentum=bn_momentum)
12 |         self.scale = nn.Parameter(torch.ones(scale_shape))
13 |         self.softmax = nn.Softmax(dim=1)
14 | 
15 |     def forward(self, x):
16 |         """
17 |         Args:
18 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
19 |         Returns:
20 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
21 |         """
22 |         x = self.gemm(x)
23 |         x = self.bn(x)
24 |         x = self.scale * x
25 |         x = self.softmax(x)
26 |         return x
27 | 
28 | batch_size = 128
29 | in_features = 1024
30 | out_features = 512
31 | bn_eps = 1e-5
32 | bn_momentum = 0.1
33 | scale_shape = (1,)
34 | 
35 | def get_inputs():
36 |     return [torch.randn(batch_size, in_features)]
37 | 
38 | def get_init_inputs():
39 |     return [in_features, out_features, bn_eps, bn_momentum, scale_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/86_Matmul_Divide_GELU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a matrix multiplication, divides by a scalar, and applies GELU activation.
 7 |     """
 8 |     def __init__(self, input_size, output_size, divisor):
 9 |         super(Model, self).__init__()
10 |         self.linear = nn.Linear(input_size, output_size)
11 |         self.divisor = divisor
12 | 
13 |     def forward(self, x):
14 |         """
15 |         Args:
16 |             x (torch.Tensor): Input tensor of shape (batch_size, input_size).
17 |         Returns:
18 |             torch.Tensor: Output tensor of shape (batch_size, output_size).
19 |         """
20 |         x = self.linear(x)
21 |         x = x / self.divisor
22 |         x = torch.nn.functional.gelu(x)
23 |         return x
24 | 
25 | batch_size = 128
26 | input_size = 512
27 | output_size = 1024
28 | divisor = 10.0
29 | 
30 | def get_inputs():
31 |     return [torch.randn(batch_size, input_size)]
32 | 
33 | def get_init_inputs():
34 |     return [input_size, output_size, divisor]


--------------------------------------------------------------------------------
/KernelBench/level2/87_Conv2d_Subtract_Subtract_Mish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, subtracts two values, applies Mish activation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, subtract_value_1, subtract_value_2):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
11 |         self.subtract_value_1 = subtract_value_1
12 |         self.subtract_value_2 = subtract_value_2
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         x = x - self.subtract_value_1
17 |         x = x - self.subtract_value_2
18 |         x = torch.nn.functional.mish(x)
19 |         return x
20 | 
21 | batch_size = 128
22 | in_channels = 3
23 | out_channels = 16
24 | height, width = 32, 32
25 | kernel_size = 3
26 | subtract_value_1 = 0.5
27 | subtract_value_2 = 0.2
28 | 
29 | def get_inputs():
30 |     return [torch.randn(batch_size, in_channels, height, width)]
31 | 
32 | def get_init_inputs():
33 |     return [in_channels, out_channels, kernel_size, subtract_value_1, subtract_value_2]


--------------------------------------------------------------------------------
/KernelBench/level2/88_Gemm_GroupNorm_Swish_Multiply_Swish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a GEMM, GroupNorm, Swish, Multiply, and Swish operations.
 7 |     """
 8 |     def __init__(self, in_features, out_features, num_groups, multiply_weight_shape):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.group_norm = nn.GroupNorm(num_groups, out_features)
12 |         self.multiply_weight = nn.Parameter(torch.randn(multiply_weight_shape)) 
13 | 
14 |     def forward(self, x):
15 |         # (batch_size, in_features) -> (batch_size, out_features)
16 |         x = self.gemm(x)
17 |         # (batch_size, out_features) -> (batch_size, out_features)
18 |         x = self.group_norm(x)
19 |         # (batch_size, out_features) -> (batch_size, out_features)
20 |         x = x * torch.sigmoid(x)
21 |         # (batch_size, out_features) -> (batch_size, out_features)
22 |         x = x * self.multiply_weight
23 |         # (batch_size, out_features) -> (batch_size, out_features)
24 |         x = x * torch.sigmoid(x)
25 |         return x
26 | 
27 | batch_size = 128
28 | in_features = 512
29 | out_features = 1024
30 | num_groups = 16
31 | multiply_weight_shape = (out_features,)
32 | 
33 | def get_inputs():
34 |     return [torch.randn(batch_size, in_features)]
35 | 
36 | def get_init_inputs():
37 |     return [in_features, out_features, num_groups, multiply_weight_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/90_Conv3d_LeakyReLU_Sum_Clamp_GELU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a 3D convolution, applies LeakyReLU, sums with a tensor, clamps, and applies GELU activation.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, sum_tensor_shape):
 9 |         super(Model, self).__init__()
10 |         self.conv = nn.Conv3d(in_channels, out_channels, kernel_size)
11 |         self.sum_tensor = nn.Parameter(torch.randn(sum_tensor_shape))
12 | 
13 |     def forward(self, x):
14 |         x = self.conv(x)
15 |         x = torch.nn.functional.leaky_relu(x, negative_slope=0.2)
16 |         x = x + self.sum_tensor
17 |         x = torch.clamp(x, min=-1.0, max=1.0)
18 |         x = torch.nn.functional.gelu(x)
19 |         return x
20 | 
21 | batch_size = 128
22 | in_channels = 3
23 | out_channels = 16
24 | depth, height, width = 16, 32, 32
25 | kernel_size = 3
26 | sum_tensor_shape = (out_channels, 1, 1, 1)
27 | 
28 | def get_inputs():
29 |     return [torch.randn(batch_size, in_channels, depth, height, width)]
30 | 
31 | def get_init_inputs():
32 |     return [in_channels, out_channels, kernel_size, sum_tensor_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/91_ConvTranspose2d_Softmax_BiasAdd_Scaling_Sigmoid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, applies softmax, adds a bias term, scales the result, and applies sigmoid.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape)) 
12 |         self.scaling_factor = scaling_factor
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         x = torch.softmax(x, dim=1)
17 |         x = x + self.bias
18 |         x = x * self.scaling_factor
19 |         x = torch.sigmoid(x)
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 32
24 | out_channels = 64
25 | height, width = 16, 16
26 | kernel_size = 4
27 | stride = 2
28 | padding = 1
29 | output_padding = 1
30 | bias_shape = (out_channels, 1, 1)
31 | scaling_factor = 2.0
32 | 
33 | def get_inputs():
34 |     return [torch.randn(batch_size, in_channels, height, width)]
35 | 
36 | def get_init_inputs():
37 |     return [in_channels, out_channels, kernel_size, stride, padding, output_padding, bias_shape, scaling_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/92_Conv2d_GroupNorm_Tanh_HardSwish_ResidualAdd_LogSumExp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a convolution, applies Group Normalization, Tanh, HardSwish, 
 7 |     Residual Addition, and LogSumExp.
 8 |     """
 9 |     def __init__(self, in_channels, out_channels, kernel_size, groups, eps=1e-5):
10 |         super(Model, self).__init__()
11 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)
12 |         self.group_norm = nn.GroupNorm(groups, out_channels, eps=eps)
13 |         self.tanh = nn.Tanh()
14 |         self.hard_swish = nn.Hardswish()
15 | 
16 |     def forward(self, x):
17 |         # Convolution
18 |         x_conv = self.conv(x)
19 |         # Group Normalization
20 |         x_norm = self.group_norm(x_conv)
21 |         # Tanh
22 |         x_tanh = self.tanh(x_norm)
23 |         # HardSwish
24 |         x_hard_swish = self.hard_swish(x_tanh)
25 |         # Residual Addition
26 |         x_res = x_conv + x_hard_swish
27 |         # LogSumExp
28 |         x_logsumexp = torch.logsumexp(x_res, dim=1, keepdim=True)
29 |         return x_logsumexp
30 | 
31 | batch_size = 128
32 | in_channels = 3
33 | out_channels = 16
34 | height, width = 32, 32
35 | kernel_size = 3
36 | groups = 8
37 | 
38 | def get_inputs():
39 |     return [torch.randn(batch_size, in_channels, height, width)]
40 | 
41 | def get_init_inputs():
42 |     return [in_channels, out_channels, kernel_size, groups]


--------------------------------------------------------------------------------
/KernelBench/level2/93_ConvTranspose2d_Add_Min_GELU_Multiply.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a transposed convolution, adds a value, takes the minimum, applies GELU, and multiplies by a value.
 7 |     """
 8 |     def __init__(self, in_channels, out_channels, kernel_size, stride, add_value, multiply_value):
 9 |         super(Model, self).__init__()
10 |         self.conv_transpose = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride)
11 |         self.add_value = add_value
12 |         self.multiply_value = multiply_value
13 | 
14 |     def forward(self, x):
15 |         x = self.conv_transpose(x)
16 |         x = x + self.add_value
17 |         x = torch.min(x, torch.tensor(0.0))
18 |         x = torch.nn.functional.gelu(x)
19 |         x = x * self.multiply_value
20 |         return x
21 | 
22 | batch_size = 128
23 | in_channels = 32
24 | out_channels = 16
25 | height, width = 32, 32
26 | kernel_size = 4
27 | stride = 2
28 | add_value = 0.5
29 | multiply_value = 2.0
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_channels, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [in_channels, out_channels, kernel_size, stride, add_value, multiply_value]


--------------------------------------------------------------------------------
/KernelBench/level2/94_Gemm_BiasAdd_Hardtanh_Mish_GroupNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model that performs a GEMM, BiasAdd, Hardtanh, Mish, and GroupNorm operations in sequence.
 7 |     """
 8 |     def __init__(self, in_features, out_features, bias_shape, num_groups):
 9 |         super(Model, self).__init__()
10 |         self.gemm = nn.Linear(in_features, out_features)
11 |         self.bias = nn.Parameter(torch.randn(bias_shape))
12 |         self.hardtanh = nn.Hardtanh()
13 |         self.mish = nn.Mish()
14 |         self.groupnorm = nn.GroupNorm(num_groups=num_groups, num_channels=out_features)
15 | 
16 |     def forward(self, x):
17 |         """
18 |         Args:
19 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
20 |         Returns:
21 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
22 |         """
23 |         x = self.gemm(x)
24 |         x = x + self.bias
25 |         x = self.hardtanh(x)
26 |         x = self.mish(x)
27 |         x = self.groupnorm(x)
28 |         return x
29 | 
30 | 
31 | batch_size = 128
32 | in_features = 512
33 | out_features = 1024
34 | bias_shape = (out_features,)
35 | num_groups = 32
36 | 
37 | def get_inputs():
38 |     return [torch.randn(batch_size, in_features)]
39 | 
40 | def get_init_inputs():
41 |     return [in_features, out_features, bias_shape, num_groups]


--------------------------------------------------------------------------------
/KernelBench/level2/95_Matmul_Add_Swish_Tanh_GELU_Hardtanh.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, adds a value, applies Swish, Tanh, GELU, and Hardtanh activation functions.
 7 |     """
 8 |     def __init__(self, in_features, out_features, add_value_shape):
 9 |         super(Model, self).__init__()
10 |         self.matmul = nn.Linear(in_features, out_features)
11 |         self.add_value = nn.Parameter(torch.randn(add_value_shape)) 
12 | 
13 |     def forward(self, x):
14 |         x = self.matmul(x)
15 |         x = x + self.add_value
16 |         x = torch.sigmoid(x) * x # Swish
17 |         x = torch.tanh(x)
18 |         x = torch.nn.functional.gelu(x) # GELU
19 |         x = torch.nn.functional.hardtanh(x, min_val=-1, max_val=1) # Hardtanh
20 |         return x
21 | 
22 | batch_size = 128
23 | in_features = 1024
24 | out_features = 512
25 | add_value_shape = (out_features,)
26 | 
27 | def get_inputs():
28 |     return [torch.randn(batch_size, in_features)]
29 | 
30 | def get_init_inputs():
31 |     return [in_features, out_features, add_value_shape]


--------------------------------------------------------------------------------
/KernelBench/level2/97_Matmul_BatchNorm_BiasAdd_Divide_Swish.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a matrix multiplication, batch normalization, bias addition, division, and Swish activation.
 7 |     """
 8 |     def __init__(self, in_features, out_features, bn_eps=1e-5, bn_momentum=0.1, bias_shape=(1,), divide_value=1.0):
 9 |         super(Model, self).__init__()
10 |         self.matmul = nn.Linear(in_features, out_features)
11 |         self.bn = nn.BatchNorm1d(out_features, eps=bn_eps, momentum=bn_momentum)
12 |         self.bias = nn.Parameter(torch.randn(bias_shape))
13 |         self.divide_value = divide_value
14 | 
15 |     def forward(self, x):
16 |         x = self.matmul(x)
17 |         x = self.bn(x)
18 |         x = x + self.bias
19 |         x = x / self.divide_value
20 |         x = x * torch.sigmoid(x)
21 |         return x
22 | 
23 | batch_size = 128
24 | in_features = 1024
25 | out_features = 512
26 | bn_eps = 1e-5
27 | bn_momentum = 0.1
28 | bias_shape = (1,)
29 | divide_value = 1.0
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, in_features)]
33 | 
34 | def get_init_inputs():
35 |     return [in_features, out_features, bn_eps, bn_momentum, bias_shape, divide_value]


--------------------------------------------------------------------------------
/KernelBench/level2/98_Matmul_AvgPool_GELU_Scale_Max.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     A model implementing the pattern "Matmul_AvgPool_GELU_Scale_Max".
 7 |     """
 8 |     def __init__(self, in_features, out_features, pool_kernel_size, scale_factor):
 9 |         super(Model, self).__init__()
10 |         self.matmul = nn.Linear(in_features, out_features)
11 |         self.avg_pool = nn.AvgPool1d(kernel_size=pool_kernel_size)
12 |         self.scale_factor = scale_factor
13 | 
14 |     def forward(self, x):
15 |         """
16 |         Args:
17 |             x (torch.Tensor): Input tensor of shape (batch_size, in_features).
18 | 
19 |         Returns:
20 |             torch.Tensor: Output tensor of shape (batch_size, out_features).
21 |         """
22 |         x = self.matmul(x)
23 |         x = self.avg_pool(x.unsqueeze(1)).squeeze(1)
24 |         x = torch.nn.functional.gelu(x)
25 |         x = x * self.scale_factor
26 |         x = torch.max(x, dim=1).values
27 |         return x
28 | 
29 | batch_size = 128
30 | in_features = 512
31 | out_features = 256
32 | pool_kernel_size = 4
33 | scale_factor = 2.0
34 | 
35 | def get_inputs():
36 |     return [torch.randn(batch_size, in_features)]
37 | 
38 | def get_init_inputs():
39 |     return [in_features, out_features, pool_kernel_size, scale_factor]


--------------------------------------------------------------------------------
/KernelBench/level2/99_Matmul_GELU_Softmax.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Simple model that performs a matrix multiplication, applies GELU, and then applies Softmax.
 7 |     """
 8 |     def __init__(self, in_features, out_features):
 9 |         super(Model, self).__init__()
10 |         self.linear = nn.Linear(in_features, out_features)
11 | 
12 |     def forward(self, x):
13 |         x = self.linear(x)
14 |         x = torch.nn.functional.gelu(x)
15 |         x = torch.nn.functional.softmax(x, dim=1)
16 |         return x
17 | 
18 | batch_size = 128
19 | in_features = 100
20 | out_features = 10
21 | 
22 | def get_inputs():
23 |     return [torch.randn(batch_size, in_features)]
24 | 
25 | def get_init_inputs():
26 |     return [in_features, out_features]


--------------------------------------------------------------------------------
/KernelBench/level2/9_Matmul_Subtract_Multiply_ReLU.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Model(nn.Module):
 5 |     """
 6 |     Model that performs a matrix multiplication, subtraction, multiplication, and ReLU activation.
 7 |     """
 8 |     def __init__(self, in_features, out_features, subtract_value, multiply_value):
 9 |         super(Model, self).__init__()
10 |         self.linear = nn.Linear(in_features, out_features)
11 |         self.subtract_value = subtract_value
12 |         self.multiply_value = multiply_value
13 | 
14 |     def forward(self, x):
15 |         x = self.linear(x)
16 |         x = x - self.subtract_value
17 |         x = x * self.multiply_value
18 |         x = torch.relu(x)
19 |         return x
20 | 
21 | batch_size = 128
22 | in_features = 10
23 | out_features = 5
24 | subtract_value = 2.0
25 | multiply_value = 1.5
26 | 
27 | def get_inputs():
28 |     return [torch.randn(batch_size, in_features)]
29 | 
30 | def get_init_inputs():
31 |     return [in_features, out_features, subtract_value, multiply_value]


--------------------------------------------------------------------------------
/KernelBench/level3/13_DenseNet121TransitionLayer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Model(nn.Module):
 6 |     def __init__(self, num_input_features: int, num_output_features: int):
 7 |         """
 8 |         :param num_input_features: The number of input feature maps
 9 |         :param num_output_features: The number of output feature maps
10 |         """
11 |         super(Model, self).__init__()
12 |         self.transition = nn.Sequential(
13 |             nn.BatchNorm2d(num_input_features),
14 |             nn.ReLU(inplace=True),
15 |             nn.Conv2d(num_input_features, num_output_features, kernel_size=1, bias=False),
16 |             nn.AvgPool2d(kernel_size=2, stride=2)
17 |         )
18 | 
19 |     def forward(self, x):
20 |         """
21 |         :param x: Input tensor of shape (batch_size, num_input_features, height, width)
22 |         :return: Downsampled tensor with reduced number of feature maps
23 |         """
24 |         return self.transition(x)
25 | 
26 | batch_size = 10
27 | num_input_features = 32
28 | num_output_features = 64
29 | height, width = 224, 224
30 | 
31 | def get_inputs():
32 |     return [torch.randn(batch_size, num_input_features, height, width)]
33 | 
34 | def get_init_inputs():
35 |     return [num_input_features, num_output_features]
36 | 


--------------------------------------------------------------------------------
/KernelBench/level3/1_MLP.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Model(nn.Module):
 6 |     def __init__(self, input_size, layer_sizes, output_size):
 7 |         """
 8 |         :param input_size: The number of input features
 9 |         :param layer_sizes: A list of ints containing the sizes of each hidden layer
10 |         :param output_size: The number of output features
11 |         """
12 |         super(Model, self).__init__()
13 |         
14 |         layers = []
15 |         current_input_size = input_size
16 |         
17 |         for layer_size in layer_sizes:
18 |             layers.append(nn.Linear(current_input_size, layer_size))
19 |             layers.append(nn.ReLU())
20 |             current_input_size = layer_size
21 |         
22 |         layers.append(nn.Linear(current_input_size, output_size))
23 |         
24 |         self.network = nn.Sequential(*layers)
25 |     
26 |     def forward(self, x):
27 |         """
28 |         :param x: The input tensor, shape (batch_size, input_size)
29 |         :return: The output tensor, shape (batch_size, output_size)
30 |         """
31 |         return self.network(x)
32 | 
33 | # Test code
34 | batch_size = 1
35 | input_size = 1000
36 | layer_sizes = [400, 800]
37 | output_size = 500
38 | 
39 | def get_inputs():
40 |     return [torch.randn(batch_size, input_size)]
41 | 
42 | def get_init_inputs():
43 |     return [input_size, layer_sizes, output_size]


--------------------------------------------------------------------------------
/KernelBench/level3/31_VisionAttention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Model(nn.Module):
 6 |     def __init__(self, embed_dim, num_heads):
 7 |         """
 8 |         Attention Block using Multihead Self-Attention.
 9 |         :param embed_dim: Embedding dimension (the number of channels)
10 |         :param num_heads: Number of attention heads
11 |         """
12 |         super(Model, self).__init__()
13 |         self.attn = nn.MultiheadAttention(embed_dim, num_heads)
14 |         self.norm = nn.LayerNorm(embed_dim)
15 | 
16 |     def forward(self, x):
17 |         """
18 |         Forward pass of the AttentionBlock.
19 |         :param x: Input tensor of shape (B, C, H, W)
20 |         :return: Output tensor of the same shape (B, C, H, W)
21 |         """
22 |         B, C, H, W = x.shape
23 |         x = x.view(B, C, H * W).permute(2, 0, 1)  # (seq_len, batch_size, embed_dim)
24 |         attn_output, _ = self.attn(x, x, x)
25 |         x = self.norm(attn_output + x)  # (seq_len, batch_size, embed_dim)
26 |         x = x.permute(1, 2, 0).view(B, C, H, W)
27 |         return x
28 | 
29 | embed_dim = 128
30 | num_heads = 4
31 | batch_size = 2
32 | num_channels = embed_dim
33 | image_height = 128
34 | image_width = 128
35 | 
36 | def get_inputs():
37 |     return [torch.randn(batch_size, num_channels, image_height, image_width)]
38 | 
39 | def get_init_inputs():
40 |     return [embed_dim, num_heads]


--------------------------------------------------------------------------------
/KernelBench/level4/10_google-bigbird-roberta-base_bs1024_seq32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/bigbird-roberta-base"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 32
19 | batch_size = 1024
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/11_google-electra-small-discriminator_bs1_seq511.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/electra-small-discriminator"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 511
19 | batch_size = 1
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/12_google-electra-small-discriminator_bs1024_seq32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/electra-small-discriminator"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 32
19 | batch_size = 1024
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/13_google-reformer-enwik8_bs32_seq256.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/reformer-enwik8"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 256
19 | batch_size = 32
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/14_google-electra-small-discriminator_bs32_seq256.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/electra-small-discriminator"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 256
19 | batch_size = 32
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/15_google-reformer-enwik8_bs1024_seq32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/reformer-enwik8"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 32
19 | batch_size = 1024
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/16_gpt2_bs1_seq1023.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "gpt2"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 1023
19 | batch_size = 1
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/17_facebook-bart-large_bs1024_seq32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "facebook/bart-large"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 32
19 | batch_size = 1024
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/18_EleutherAI-gpt-neo-2p7B_bs512_seq32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "EleutherAI/gpt-neo-2.7B"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 32
19 | batch_size = 512
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/19_gpt2_bs1024_seq32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "gpt2"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 32
19 | batch_size = 1024
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/1_EleutherAI-gpt-neo-2p7B_bs32_seq256.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "EleutherAI/gpt-neo-2.7B"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 256
19 | batch_size = 32
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/20_facebook-bart-large_bs32_seq256.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "facebook/bart-large"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 256
19 | batch_size = 32
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/2_facebook-opt-1p3b_bs1_seq2047.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "facebook/opt-1.3b"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 2047
19 | batch_size = 1
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/3_EleutherAI-gpt-neo-2p7B_bs1_seq2047.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "EleutherAI/gpt-neo-2.7B"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 2047
19 | batch_size = 1
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/4_facebook-opt-1p3b_bs32_seq256.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "facebook/opt-1.3b"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 256
19 | batch_size = 32
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/5_google-bigbird-roberta-base_bs1_seq4095.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/bigbird-roberta-base"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 4095
19 | batch_size = 1
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/6_facebook-bart-large_bs1_seq1023.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "facebook/bart-large"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 1023
19 | batch_size = 1
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/7_gpt2_bs32_seq256.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "gpt2"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 256
19 | batch_size = 32
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/8_facebook-opt-1p3b_bs512_seq32.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "facebook/opt-1.3b"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 32
19 | batch_size = 512
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/KernelBench/level4/9_google-bigbird-roberta-base_bs32_seq256.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from transformers import AutoModelForCausalLM, AutoConfig
 4 | 
 5 | class Model(torch.nn.Module):
 6 |     def __init__(self, model_name, config):
 7 |         super().__init__()
 8 |         self.model_name = model_name
 9 |         self.config = config
10 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_name, config=self.config)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x).logits
14 | 
15 | model_name = "google/bigbird-roberta-base"
16 | config = AutoConfig.from_pretrained(model_name)
17 | vocab_size = config.vocab_size
18 | sequence_length = 256
19 | batch_size = 32
20 | 
21 | def get_inputs():
22 |     inputs = torch.randint(0, vocab_size, (batch_size, sequence_length))
23 |     return [inputs]
24 | 
25 | def get_init_inputs():
26 |     return [model_name, config]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Anne Ouyang, Simon Guo, Azalia Mirhoseini
 4 | Scaling Intelligence Lab, Stanford University
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.


--------------------------------------------------------------------------------
/assets/figures/KernelBenchMascot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/assets/figures/KernelBenchMascot.png


--------------------------------------------------------------------------------
/assets/figures/KernelBenchWorkFlow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/assets/figures/KernelBenchWorkFlow.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | anthropic
 2 | modal
 3 | numpy
 4 | openai
 5 | packaging
 6 | pydra_config
 7 | torch==2.5.0
 8 | tqdm
 9 | datasets
10 | transformers
11 | google-generativeai
12 | together
13 | pytest
14 | ninja
15 | archon-ai
16 | einops


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | if __name__ == "__main__":
4 |     setup(
5 |         name="src",
6 |         version="0.0.1",
7 |         packages=["src"],
8 |     )
9 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/__init__.py


--------------------------------------------------------------------------------
/src/prompts/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This folder includes PyTorch modules paired with CUDA kernels, which are used as in-context examples in KernelBench. 
 3 | 
 4 | 
 5 | 
 6 | Acknowledgements:
 7 | - Fused GeLU and Tiled Matmul: [Christian Mills, GPU MODE Lecture 04](https://christianjmills.com/posts/cuda-mode-notes/lecture-004/)
 8 | - Minimal Flash Attention: [Peter Kim, Minimal Flash Attention](https://github.com/tspeterkim/flash-attention-minimal/tree/main)
 9 | 
10 | There are some examples.
11 | [TODO] Table detailing content and speedups of each example


--------------------------------------------------------------------------------
/src/prompts/few_shot/model_ex_add.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self) -> None:
 8 |         super().__init__()
 9 | 
10 |     def forward(self, a, b):
11 |         return a + b
12 | 
13 | 
14 | def get_inputs():
15 |     # randomly generate input tensors based on the model architecture
16 |     a = torch.randn(1, 128).cuda()
17 |     b = torch.randn(1, 128).cuda()
18 |     return [a, b]
19 | 
20 | 
21 | def get_init_inputs():
22 |     # randomly generate tensors required for initialization based on the model architecture
23 |     return []


--------------------------------------------------------------------------------
/src/prompts/few_shot/model_ex_flash_attn.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Model(nn.Module):
 8 |     """
 9 |     Model that performs an attention operation
10 |     """
11 |     def __init__(self) -> None:
12 |         super().__init__()
13 | 
14 |     def forward(self, Q, K, V):
15 |         att = (Q @ K.transpose(-2, -1) * (1.0 / math.sqrt(K.size(-1))))
16 |         att = F.softmax(att, dim=-1)
17 |         y = att @ V
18 |         return y
19 | 
20 | batch_size = 32
21 | n_head = 12
22 | seq_len = 64
23 | head_embd = 32
24 | 
25 | def get_inputs():
26 |     # randomly generate input tensors based on the model architecture
27 |     Q = torch.randn(batch_size, n_head, seq_len, head_embd)
28 |     K = torch.randn(batch_size, n_head, seq_len, head_embd)
29 |     V = torch.randn(batch_size, n_head, seq_len, head_embd)
30 |     return [Q, K, V]
31 | 
32 | 
33 | def get_init_inputs():
34 |     # randomly generate tensors required for initialization based on the model architecture
35 |     return []
36 | 


--------------------------------------------------------------------------------
/src/prompts/few_shot/model_ex_fuse_gelu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self) -> None:
 8 |         super().__init__()
 9 | 
10 |     def forward(self, x):
11 |         return F.gelu(x, approximate='tanh')
12 | 
13 | 
14 | def get_inputs():
15 |     # randomly generate input tensors based on the model architecture
16 |     x = torch.randn(1024, 1024).cuda()
17 |     return [x]
18 | 
19 | 
20 | def get_init_inputs():
21 |     # randomly generate tensors required for initialization based on the model architecture
22 |     return []
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/src/prompts/few_shot/model_ex_mnist2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self) -> None:
 8 |         super().__init__()
 9 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
10 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
11 |         self.fc1 = nn.Linear(320, 50)
12 |         self.fc2 = nn.Linear(50, 10)
13 | 
14 |     def forward(self, x):
15 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
16 |         x = F.relu(F.max_pool2d(self.conv2(x), 2))
17 |         x = x.view(-1, 320)
18 |         x = F.relu(self.fc1(x))
19 |         x = self.fc2(x)
20 |         return F.log_softmax(x, dim=1)
21 | 
22 | 
23 | def get_inputs():
24 |     # randomly generate input tensors based on the model architecture
25 |     x = torch.randn(1, 1, 28, 28).cuda()
26 |     return [x]
27 | 
28 | 
29 | def get_init_inputs():
30 |     # randomly generate tensors required for initialization based on the model architecture
31 |     return []


--------------------------------------------------------------------------------
/src/prompts/few_shot/model_ex_tiled_matmul.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self) -> None:
 8 |         super().__init__()
 9 | 
10 |     def forward(self, a, b):
11 |         return a@b
12 | 
13 | 
14 | def get_inputs():
15 |     # randomly generate input tensors based on the model architecture
16 |     a = torch.randn(1024, 1024).cuda()
17 |     b = torch.randn(1024, 1024).cuda()
18 |     return [a, b]
19 | 
20 | 
21 | def get_init_inputs():
22 |     # randomly generate tensors required for initialization based on the model architecture
23 |     return []
24 | 
25 | 


--------------------------------------------------------------------------------
/src/prompts/model_ex_0.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/prompts/model_ex_0.py


--------------------------------------------------------------------------------
/src/prompts/model_ex_1.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self) -> None:
 8 |         super().__init__()
 9 | 
10 |     def forward(self, a, b):
11 |         return a + b
12 | 
13 | 
14 | def get_inputs():
15 |     # randomly generate input tensors based on the model architecture
16 |     a = torch.randn(1, 128).cuda()
17 |     b = torch.randn(1, 128).cuda()
18 |     return [a, b]
19 | 
20 | 
21 | def get_init_inputs():
22 |     # randomly generate tensors required for initialization based on the model architecture
23 |     return []
24 | 


--------------------------------------------------------------------------------
/src/prompts/model_ex_2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class Model(nn.Module):
 6 |     def __init__(self) -> None:
 7 |         super().__init__()
 8 |         self.op1 = ...<torch operator 1>...
 9 |         self.op2 = ...<torch operator 2>...
10 |         self.op3 = ...<torch operator 3>...
11 |         self.op4 = ...<torch operator 4>...
12 |         self.op5 = ...<torch operator 5>...
13 |         self.op6 = ...<torch operator 6>...
14 | 
15 |     def forward(self, x):
16 |         x = self.op1(x, ...<some operator params>...)
17 |         x = self.op2(x, ...<some operator params>...)
18 |         x = self.op3(x, ...<some operator params>...)
19 |         x = self.op4(x, ...<some operator params>...)
20 |         x = self.op5(x, ...<some operator params>...)
21 |         x = self.op6(x, ...<some operator params>...)
22 |         return x
23 | 


--------------------------------------------------------------------------------
/src/prompts/model_ex_add.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self) -> None:
 8 |         super().__init__()
 9 | 
10 |     def forward(self, a, b):
11 |         return a + b
12 | 
13 | 
14 | def get_inputs():
15 |     # randomly generate input tensors based on the model architecture
16 |     a = torch.randn(1, 128).cuda()
17 |     b = torch.randn(1, 128).cuda()
18 |     return [a, b]
19 | 
20 | 
21 | def get_init_inputs():
22 |     # randomly generate tensors required for initialization based on the model architecture
23 |     return []
24 | 


--------------------------------------------------------------------------------
/src/prompts/model_new_ex_0.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/prompts/model_new_ex_0.py


--------------------------------------------------------------------------------
/src/scratch/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/scratch/model.py


--------------------------------------------------------------------------------
/src/scratch/model_new.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScalingIntelligence/KernelBench/21fbe5a642898cd60b8f60c7aefb43d475e11f33/src/scratch/model_new.py


--------------------------------------------------------------------------------
/src/unit_tests/test_dataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | from src.dataset import get_code_hash
 4 | 
 5 | """
 6 | Usage 
 7 | pytest test_dataset.py
 8 | """
 9 | 
10 | 
11 | def test_get_code_hash():
12 |     """
13 |     Test collision and equivalence checking
14 |     """
15 | 
16 |     code_snippet_batch_1_v1 = """
17 |     import torch 
18 |     # This is for a single batch
19 |     '''
20 |     Some random multi-line comment
21 |     '''
22 |     B = 1
23 |     """
24 |     
25 |     code_snippet_batch_1_v2 = """
26 |     import torch 
27 |     '''
28 |     More problem descriptions (updated)
29 |     '''
30 |     # low batch setting
31 | 
32 |     B = 1
33 |     """
34 | 
35 |     code_snippet_batch_64 = """
36 |     import torch 
37 |     # This is for a single batch
38 |     '''
39 |     Some random multi-line comment
40 |     '''
41 |     B = 64
42 |     """
43 | 
44 |     assert get_code_hash(code_snippet_batch_1_v1) == get_code_hash(code_snippet_batch_1_v2), \
45 |         "Hash should be equal for semantically equivalent code with different comments"
46 |     
47 |     assert get_code_hash(code_snippet_batch_1_v1) != get_code_hash(code_snippet_batch_64), \
48 |         "Hash should differ for code with different batch sizes"


--------------------------------------------------------------------------------