├── .gitignore ├── .gitmodules ├── README.md ├── iree ├── generate.sh ├── mlir │ ├── linear.py.mlir │ └── minilm.py.mlir ├── models │ ├── driver │ │ └── __init__.py │ ├── linear.py │ ├── minilm.py │ └── resnet.py ├── prepare.sh └── scripts │ └── dump_linalg.sh ├── pytorch ├── generate.sh ├── lib │ └── torch_mlir_compile.py ├── prepare.sh ├── torch-dynamo │ ├── mlir │ │ ├── bert.mlir │ │ ├── conv.mlir │ │ ├── linear.mlir │ │ ├── mnist.mlir │ │ └── resnet18.mlir │ └── models │ │ ├── bert.py │ │ ├── conv.py │ │ ├── linear.py │ │ ├── mnist.py │ │ └── resnet18.py └── torch-script │ ├── mlir │ ├── conv.mlir │ ├── linear.mlir │ └── resnet18.mlir │ └── models │ ├── conv.py │ ├── linear.py │ └── resnet18.py └── tensorflow ├── mlir ├── conv.mlir └── linear.mlir ├── models ├── conv.py └── linear.py └── prepare.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # TF Saved Models 2 | .saved 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | mlir_venv 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plaidml/mlir-generator/4457cbf8207f595fdc049b09954c5c515acb28bf/.gitmodules -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLIR Generator 2 | 3 | ## Rationale 4 | 5 | This repository is meant to help MLIR pass writers to generate MLIR files 6 | that often come from ML/HPC sources, to allow testing and developing their 7 | passes (as well as creating unit tests) with the current state of the known 8 | front-ends. 9 | 10 | ## How To Use 11 | 12 | _Warning: This is very much work-in-progress, don't expect half of it to work._ 13 | 14 | Each front-end has a prepare script, for example: 15 | ``` 16 | cd torch-script 17 | ./prepare.sh 18 | ``` 19 | 20 | It also has models ready to use (after prepared): 21 | ``` 22 | source venv/bin/activate 23 | python models/linear.py 24 | ``` 25 | 26 | Results are in `/mlir`. 27 | 28 | ## Development 29 | 30 | ### Adding a new front-end 31 | 32 | If you want to build an existing front-end, you need to: 33 | 1. Add a new directory for the front-end 34 | 2. Add a `prepare.sh` script that installs it and prepares the virtualenv 35 | 3. Add a `generate.sh` script that generates all the MLIR files 36 | 4. Add models as Python files in `models`, run the export, and save them as MLIR files in `mlir` 37 | -------------------------------------------------------------------------------- /iree/generate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Generate MLIR files from IREE 4 | # Run from inside the repo already, no need to check 5 | 6 | ROOT=$(git rev-parse --show-toplevel) 7 | if [ ! -d "$ROOT" ]; then 8 | echo "Cannot find repository root" 9 | exit 1 10 | fi 11 | 12 | # For each Python script, generate an MLIR file 13 | # with the same name and different extension 14 | pushd "$ROOT/front-ends/iree/models" 15 | for MODEL in $(find . -type f -name \*.py); do 16 | OUT="${MODEL%.py}.mlir" 17 | python "$MODEL" > "../mlir/$OUT" 18 | done 19 | popd 20 | -------------------------------------------------------------------------------- /iree/mlir/linear.py.mlir: -------------------------------------------------------------------------------- 1 | func.func @predict(%arg0: !iree_input.list) -> !iree_input.buffer_view attributes {iree.abi = "{\22a\22:[[\22slist\22,[\22ndarray\22,\22f32\22,2,1,128]]],\22r\22:[[\22ndarray\22,\22f32\22,3,1,1,10]],\22v\22:1}"} { 2 | %c0 = arith.constant 0 : index 3 | %0 = iree_input.list.get %arg0[%c0] : !iree_input.list -> !iree_input.buffer_view 4 | %1 = iree_input.cast.buffer_view_to_tensor %0 : !iree_input.buffer_view -> tensor<1x128xf32> 5 | %2 = call @__inference_predict_670(%1) : (tensor<1x128xf32>) -> tensor<1x1x10xf32> 6 | %3 = iree_input.cast.tensor_to_buffer_view %2 : tensor<1x1x10xf32> -> !iree_input.buffer_view 7 | return %3 : !iree_input.buffer_view 8 | } 9 | 10 | func.func private @__inference_predict_670(%arg0: tensor<1x128xf32> {tf._user_specified_name = "x"}) -> tensor<1x1x10xf32> attributes {tf._construction_context = "kEagerRuntime", tf._input_shapes = [#tf_type.shape<1x128>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>, #tf_type.shape<>], tf.signature.is_stateful} { 11 | %cst = arith.constant dense<-0.000000e+00> : tensor 12 | %cst_0 = arith.constant dense<0xFF800000> : tensor 13 | %cst_1 = arith.constant dense<0.000000e+00> : tensor 14 | %0 = ml_program.global_load @__sm_node14__layers.0.b : tensor<256xf32> 15 | %1 = ml_program.global_load @__sm_node10__output_layer.b : tensor<10xf32> 16 | %2 = ml_program.global_load @__sm_node7__input_layer.b : tensor<256xf32> 17 | %3 = ml_program.global_load @__sm_node13__layers.0.w : tensor<256x256xf32> 18 | %4 = ml_program.global_load @__sm_node9__output_layer.w : tensor<256x10xf32> 19 | %5 = ml_program.global_load @__sm_node6__input_layer.w : tensor<128x256xf32> 20 | %6 = tensor.expand_shape %arg0 [[0], [1, 2]] : tensor<1x128xf32> into tensor<1x1x128xf32> 21 | %7 = tensor.expand_shape %5 [[0, 1], [2]] : tensor<128x256xf32> into tensor<1x128x256xf32> 22 | %8 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 23 | %cst_2 = arith.constant 0.000000e+00 : f32 24 | %9 = linalg.fill ins(%cst_2 : f32) outs(%8 : tensor<1x1x256xf32>) -> tensor<1x1x256xf32> 25 | %10 = linalg.batch_matmul ins(%6, %7 : tensor<1x1x128xf32>, tensor<1x128x256xf32>) outs(%9 : tensor<1x1x256xf32>) -> tensor<1x1x256xf32> 26 | %11 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 27 | %12 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%2 : tensor<256xf32>) outs(%11 : tensor<1x1x256xf32>) { 28 | ^bb0(%arg1: f32): 29 | linalg.yield %arg1 : f32 30 | } -> tensor<1x1x256xf32> 31 | %13 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 32 | %14 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%10, %12 : tensor<1x1x256xf32>, tensor<1x1x256xf32>) outs(%13 : tensor<1x1x256xf32>) { 33 | ^bb0(%arg1: f32): 34 | %61 = arith.addf %arg1, %arg2 : f32 35 | linalg.yield %61 : f32 36 | } -> tensor<1x1x256xf32> 37 | %15 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 38 | %16 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> ()>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst_1 : tensor) outs(%15 : tensor<1x1x256xf32>) { 39 | ^bb0(%arg1: f32): 40 | linalg.yield %arg1 : f32 41 | } -> tensor<1x1x256xf32> 42 | %17 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 43 | %18 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%14, %16 : tensor<1x1x256xf32>, tensor<1x1x256xf32>) outs(%17 : tensor<1x1x256xf32>) { 44 | ^bb0(%arg1: f32): 45 | %61 = arith.maxf %arg1, %arg2 : f32 46 | linalg.yield %61 : f32 47 | } -> tensor<1x1x256xf32> 48 | %19 = tensor.expand_shape %3 [[0, 1], [2]] : tensor<256x256xf32> into tensor<1x256x256xf32> 49 | %20 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 50 | %cst_3 = arith.constant 0.000000e+00 : f32 51 | %21 = linalg.fill ins(%cst_3 : f32) outs(%20 : tensor<1x1x256xf32>) -> tensor<1x1x256xf32> 52 | %22 = linalg.batch_matmul ins(%18, %19 : tensor<1x1x256xf32>, tensor<1x256x256xf32>) outs(%21 : tensor<1x1x256xf32>) -> tensor<1x1x256xf32> 53 | %23 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 54 | %24 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<256xf32>) outs(%23 : tensor<1x1x256xf32>) { 55 | ^bb0(%arg1: f32): 56 | linalg.yield %arg1 : f32 57 | } -> tensor<1x1x256xf32> 58 | %25 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 59 | %26 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%22, %24 : tensor<1x1x256xf32>, tensor<1x1x256xf32>) outs(%25 : tensor<1x1x256xf32>) { 60 | ^bb0(%arg1: f32): 61 | %61 = arith.addf %arg1, %arg2 : f32 62 | linalg.yield %61 : f32 63 | } -> tensor<1x1x256xf32> 64 | %27 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 65 | %28 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> ()>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst_1 : tensor) outs(%27 : tensor<1x1x256xf32>) { 66 | ^bb0(%arg1: f32): 67 | linalg.yield %arg1 : f32 68 | } -> tensor<1x1x256xf32> 69 | %29 = linalg.init_tensor [1, 1, 256] : tensor<1x1x256xf32> 70 | %30 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%26, %28 : tensor<1x1x256xf32>, tensor<1x1x256xf32>) outs(%29 : tensor<1x1x256xf32>) { 71 | ^bb0(%arg1: f32): 72 | %61 = arith.maxf %arg1, %arg2 : f32 73 | linalg.yield %61 : f32 74 | } -> tensor<1x1x256xf32> 75 | %31 = tensor.expand_shape %4 [[0, 1], [2]] : tensor<256x10xf32> into tensor<1x256x10xf32> 76 | %32 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 77 | %cst_4 = arith.constant 0.000000e+00 : f32 78 | %33 = linalg.fill ins(%cst_4 : f32) outs(%32 : tensor<1x1x10xf32>) -> tensor<1x1x10xf32> 79 | %34 = linalg.batch_matmul ins(%30, %31 : tensor<1x1x256xf32>, tensor<1x256x10xf32>) outs(%33 : tensor<1x1x10xf32>) -> tensor<1x1x10xf32> 80 | %35 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 81 | %36 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1 : tensor<10xf32>) outs(%35 : tensor<1x1x10xf32>) { 82 | ^bb0(%arg1: f32): 83 | linalg.yield %arg1 : f32 84 | } -> tensor<1x1x10xf32> 85 | %37 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 86 | %38 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%34, %36 : tensor<1x1x10xf32>, tensor<1x1x10xf32>) outs(%37 : tensor<1x1x10xf32>) { 87 | ^bb0(%arg1: f32): 88 | %61 = arith.addf %arg1, %arg2 : f32 89 | linalg.yield %61 : f32 90 | } -> tensor<1x1x10xf32> 91 | %39 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 92 | %40 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> ()>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst_1 : tensor) outs(%39 : tensor<1x1x10xf32>) { 93 | ^bb0(%arg1: f32): 94 | linalg.yield %arg1 : f32 95 | } -> tensor<1x1x10xf32> 96 | %41 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 97 | %42 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%38, %40 : tensor<1x1x10xf32>, tensor<1x1x10xf32>) outs(%41 : tensor<1x1x10xf32>) { 98 | ^bb0(%arg1: f32): 99 | %61 = arith.maxf %arg1, %arg2 : f32 100 | linalg.yield %61 : f32 101 | } -> tensor<1x1x10xf32> 102 | %cst_5 = arith.constant 0xFF800000 : f32 103 | %43 = linalg.init_tensor [1, 1] : tensor<1x1xf32> 104 | %44 = linalg.fill ins(%cst_5 : f32) outs(%43 : tensor<1x1xf32>) -> tensor<1x1xf32> 105 | %45 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%42 : tensor<1x1x10xf32>) outs(%44 : tensor<1x1xf32>) { 106 | ^bb0(%arg1: f32, %arg2: f32 loc(unknown)): 107 | %61 = tensor.from_elements %arg2 : tensor 108 | %62 = tensor.from_elements %arg1 : tensor 109 | %63 = tensor.extract %61[] : tensor 110 | %64 = tensor.extract %62[] : tensor 111 | %65 = arith.maxf %63, %64 : f32 112 | %66 = tensor.from_elements %65 : tensor 113 | %67 = tensor.extract %66[] : tensor 114 | linalg.yield %67 : f32 115 | } -> tensor<1x1xf32> 116 | %46 = tensor.expand_shape %45 [[0], [1, 2]] : tensor<1x1xf32> into tensor<1x1x1xf32> 117 | %47 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 118 | %48 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%46 : tensor<1x1x1xf32>) outs(%47 : tensor<1x1x10xf32>) { 119 | ^bb0(%arg1: f32): 120 | linalg.yield %arg1 : f32 121 | } -> tensor<1x1x10xf32> 122 | %49 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 123 | %50 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%42, %48 : tensor<1x1x10xf32>, tensor<1x1x10xf32>) outs(%49 : tensor<1x1x10xf32>) { 124 | ^bb0(%arg1: f32): 125 | %61 = arith.subf %arg1, %arg2 : f32 126 | linalg.yield %61 : f32 127 | } -> tensor<1x1x10xf32> 128 | %51 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 129 | %52 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%50 : tensor<1x1x10xf32>) outs(%51 : tensor<1x1x10xf32>) { 130 | ^bb0(%arg1: f32): 131 | %61 = math.exp %arg1 : f32 132 | linalg.yield %61 : f32 133 | } -> tensor<1x1x10xf32> 134 | %cst_6 = arith.constant -0.000000e+00 : f32 135 | %53 = linalg.init_tensor [1, 1] : tensor<1x1xf32> 136 | %54 = linalg.fill ins(%cst_6 : f32) outs(%53 : tensor<1x1xf32>) -> tensor<1x1xf32> 137 | %55 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%52 : tensor<1x1x10xf32>) outs(%54 : tensor<1x1xf32>) { 138 | ^bb0(%arg1: f32, %arg2: f32 loc(unknown)): 139 | %61 = tensor.from_elements %arg2 : tensor 140 | %62 = tensor.from_elements %arg1 : tensor 141 | %63 = tensor.extract %61[] : tensor 142 | %64 = tensor.extract %62[] : tensor 143 | %65 = arith.addf %63, %64 : f32 144 | %66 = tensor.from_elements %65 : tensor 145 | %67 = tensor.extract %66[] : tensor 146 | linalg.yield %67 : f32 147 | } -> tensor<1x1xf32> 148 | %56 = tensor.expand_shape %55 [[0], [1, 2]] : tensor<1x1xf32> into tensor<1x1x1xf32> 149 | %57 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 150 | %58 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%56 : tensor<1x1x1xf32>) outs(%57 : tensor<1x1x10xf32>) { 151 | ^bb0(%arg1: f32): 152 | linalg.yield %arg1 : f32 153 | } -> tensor<1x1x10xf32> 154 | %59 = linalg.init_tensor [1, 1, 10] : tensor<1x1x10xf32> 155 | %60 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%52, %58 : tensor<1x1x10xf32>, tensor<1x1x10xf32>) outs(%59 : tensor<1x1x10xf32>) { 156 | ^bb0(%arg1: f32): 157 | %61 = arith.divf %arg1, %arg2 : f32 158 | linalg.yield %61 : f32 159 | } -> tensor<1x1x10xf32> 160 | return %60 : tensor<1x1x10xf32> 161 | } 162 | 163 | -------------------------------------------------------------------------------- /iree/models/driver/__init__.py: -------------------------------------------------------------------------------- 1 | # Stolen from https://github.com/iree-org/iree-samples/blob/main/ModelCompiler/nlp_models/bert_small_run.py 2 | from iree.compiler import tf as tfc 3 | from iree.compiler import compile_str 4 | 5 | def build_module(model, exports): 6 | # Compile the model using IREE 7 | compiler_module = tfc.compile_module(model, exported_names = exports, import_only=True) 8 | backend = "llvm-cpu" 9 | args = ["--iree-llvm-target-cpu-features=host", 10 | "--iree-mhlo-demote-i64-to-i32=false", 11 | "--iree-flow-demote-i64-to-i32", 12 | "--mlir-print-ir-after=iree-mhlo-to-linalg-on-tensors"] 13 | backend_config = "local-task" 14 | flatbuffer_blob = compile_str(compiler_module, target_backends=[backend], extra_args=args, input_type="mhlo") 15 | -------------------------------------------------------------------------------- /iree/models/linear.py: -------------------------------------------------------------------------------- 1 | # Ref: https://www.tensorflow.org/guide/intro_to_modules 2 | import driver 3 | 4 | import tensorflow as tf 5 | 6 | # TODO: Make these into command-line arguments 7 | BATCH_SIZE = 1 8 | INPUT_LEN = 128 9 | HIDDEN_LEN = 256 10 | OUTPUT_LEN = 10 11 | NUM_LAYERS = 1 12 | 13 | class Dense(tf.Module): 14 | def __init__(self, in_features, out_features, name=None): 15 | super().__init__(name=name) 16 | self.w = tf.Variable( 17 | tf.random.normal([in_features, out_features]), name='w') 18 | self.b = tf.Variable(tf.zeros([out_features]), name='b') 19 | def __call__(self, x): 20 | y = tf.matmul(x, self.w) + self.b 21 | return tf.nn.relu(y) 22 | 23 | class SequentialModule(tf.Module): 24 | def __init__(self, name=None): 25 | super().__init__(name=name) 26 | self.input_layer = Dense(in_features=INPUT_LEN, out_features=HIDDEN_LEN) 27 | self.layers = [] 28 | for layer in range(NUM_LAYERS): 29 | self.layers.append(Dense(in_features=HIDDEN_LEN, out_features=HIDDEN_LEN)) 30 | self.output_layer = Dense(in_features=HIDDEN_LEN, out_features=OUTPUT_LEN) 31 | 32 | @tf.function(input_signature=[[tf.TensorSpec(shape=[BATCH_SIZE,INPUT_LEN],dtype=tf.float32)]]) 33 | def predict(self, x): 34 | x = self.input_layer(x) 35 | for layer in range(NUM_LAYERS): 36 | x = self.layers[layer](x) 37 | x = self.output_layer(x) 38 | return tf.nn.softmax(x) 39 | 40 | if __name__ == "__main__": 41 | driver.build_module(SequentialModule(), ["predict"]) 42 | -------------------------------------------------------------------------------- /iree/models/minilm.py: -------------------------------------------------------------------------------- 1 | # Ref: https://github.com/iree-org/iree-samples/blob/main/ModelCompiler/nlp_models/huggingface_MiniLM_run.py 2 | import driver 3 | 4 | import tensorflow as tf 5 | 6 | from transformers import TFBertModel 7 | 8 | # TODO: Make these into command-line arguments 9 | MAX_SEQUENCE_LENGTH = 512 10 | BATCH_SIZE = 1 11 | 12 | # Create a set of 2-dimensional inputs 13 | bert_input = [tf.TensorSpec(shape=[BATCH_SIZE,MAX_SEQUENCE_LENGTH],dtype=tf.int32), 14 | tf.TensorSpec(shape=[BATCH_SIZE,MAX_SEQUENCE_LENGTH], dtype=tf.int32), 15 | tf.TensorSpec(shape=[BATCH_SIZE,MAX_SEQUENCE_LENGTH], dtype=tf.int32)] 16 | 17 | class BertModule(tf.Module): 18 | def __init__(self): 19 | super(BertModule, self).__init__() 20 | # Create a BERT trainer with the created network. 21 | self.m = TFBertModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased", from_pt=True) 22 | 23 | # Invoke the trainer model on the inputs. This causes the layer to be built. 24 | self.m.predict = lambda x,y,z: self.m.call(input_ids=x, attention_mask=y, token_type_ids=z, training=False) 25 | 26 | @tf.function(input_signature=bert_input) 27 | def predict(self, input_ids, attention_mask, token_type_ids): 28 | return self.m.predict(input_ids, attention_mask, token_type_ids) 29 | 30 | if __name__ == "__main__": 31 | driver.build_module(BertModule(), ["predict"]) 32 | -------------------------------------------------------------------------------- /iree/models/resnet.py: -------------------------------------------------------------------------------- 1 | # Ref: https://github.com/iree-org/iree-samples/blob/main/ModelCompiler/nlp_models/huggingface_MiniLM_run.py 2 | import driver 3 | 4 | import tensorflow as tf 5 | from transformers import ResNetForImageClassification 6 | 7 | # TODO: Make these into command-line arguments 8 | BATCH_SIZE = 1 9 | H = 224 10 | W = 224 11 | C = 3 12 | 13 | # Create a set of 2-dimensional inputs 14 | resnet_input = [tf.TensorSpec(shape=[BATCH_SIZE,H,W,C],dtype=tf.int32)] 15 | 16 | class ResnetModule(tf.Module): 17 | def __init__(self): 18 | super(ResnetModule, self).__init__() 19 | # Create a Resnet trainer with the created network. 20 | self.m = ResNetForImageClassification.from_pretrained("microsoft/resnet-18") 21 | 22 | # Invoke the trainer model on the inputs. This causes the layer to be built. 23 | self.m.predict = lambda x: self.m(input=x, training=False) 24 | 25 | # FIXME: This isn't working 26 | @tf.function(input_signature=resnet_input) 27 | def predict(self, input): 28 | return self.m.predict(*input) 29 | 30 | if __name__ == "__main__": 31 | driver.build_module(ResnetModule(), ["predict"]) 32 | -------------------------------------------------------------------------------- /iree/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Builds IREE following the following documentation: 4 | # https://iree-org.github.io/iree/building-from-source/getting-started/ 5 | # https://iree-org.github.io/iree/building-from-source/python-bindings-and-importers/ 6 | # TPP build as in https://github.com/iree-org/iree/commit/63e12cafdf0bba3263bf2b5f74d7b2381f43af65 7 | 8 | SYNTAX="build_iree.sh [-tpp]" 9 | 10 | set -eu 11 | 12 | BUILD_WITH_TPP=False 13 | if [ $# -ge 1 ] && [ "$1" == "-tpp" ]; then 14 | BUILD_WITH_TPP=True 15 | shift 16 | fi 17 | 18 | BUILD_TYPE=Release 19 | if [ $# -ge 1 ] && [ "$1" == "-d" ]; then 20 | echo "Building debug version" 21 | BUILD_TYPE=Debug 22 | elif [ $# -ge 1 ] && [ "$1" == "-rd" ]; then 23 | echo "Building rel+debug version" 24 | BUILD_TYPE=RelWithDebInfo 25 | fi 26 | 27 | # Run on container/remote directly, need to check 28 | PROJECT="mlir-generator" 29 | if [ -d "$PROJECT" ]; then 30 | cd "$PROJECT" 31 | fi 32 | # Make sure the repo is in a good shape 33 | echo " + Updating submodules" 34 | git submodule update --init --recursive 35 | 36 | # Go into iree subrepo 37 | ROOT="$(git rev-parse --show-toplevel)/external/iree" 38 | if [ ! -d "$ROOT" ]; then 39 | echo "Cannot find repository root" 40 | exit 1 41 | fi 42 | pushd "$ROOT" 43 | 44 | BLD_DIR="$ROOT/build" 45 | git fetch 46 | if [ "$BUILD_WITH_TPP" == "True" ]; then 47 | BLD_DIR="$BLD_DIR/tpp" 48 | git reset --hard origin/tpp 49 | else 50 | BLD_DIR="$BLD_DIR/main" 51 | git reset --hard origin/main 52 | fi 53 | mkdir -p "$BLD_DIR" 54 | VENV_DIR="$BLD_DIR/venv" 55 | 56 | # Always grab a fresh env environment 57 | echo " + Creating a fresh venv" 58 | rm -rf $VENV_DIR 59 | python -m venv $VENV_DIR 60 | echo "export PATH=\$PATH:$BLD_DIR/tools" >> $VENV_DIR/bin/activate 61 | echo "export PYTHONPATH=$BLD_DIR/compiler/bindings/python:$BLD_DIR/runtime/bindings/python" >> $VENV_DIR/bin/activate 62 | source $VENV_DIR/bin/activate 63 | 64 | # Install Python dependencies 65 | echo " + Install Python dependencies" 66 | python -m pip install --upgrade pip 67 | python -m pip install -r $ROOT/runtime/bindings/python/iree/runtime/build_requirements.txt 68 | python -m pip install tensorflow iree-tools-tf keras transformers torch datasets 69 | 70 | # Checkout iree repos too 71 | echo " + Updating submodules" 72 | git submodule update --init --recursive 73 | 74 | # Build iree with LLVM in-tree 75 | echo " + Build iree in-tree" 76 | EXTRA_CMAKE_FLAGS="" 77 | if [ "$BUILD_WITH_TPP" == "True" ]; then 78 | echo " + Adding TPP options" 79 | EXTRA_CMAKE_FLAGS="-DIREE_USE_TPP=ON \ 80 | -DCMAKE_C_FLAGS=-DIREE_HAL_EXECUTABLE_IMPORT_PROVIDER_DEFAULT_FN=iree_samples_tpp_import_provider \ 81 | -DIREE_HAL_EXECUTABLE_LOADER_EXTRA_DEPS=iree_samples_tpp_import" 82 | fi 83 | cmake -GNinja -B$BLD_DIR -S $ROOT \ 84 | -DCMAKE_BUILD_TYPE=Release \ 85 | -DCMAKE_C_COMPILER_LAUNCHER=ccache \ 86 | -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ 87 | -DCMAKE_C_COMPILER=clang \ 88 | -DCMAKE_CXX_COMPILER=clang++ \ 89 | -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ 90 | -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ 91 | -DLLVM_ENABLE_ASSERTIONS=ON \ 92 | -DIREE_ENABLE_ASSERTIONS=ON \ 93 | -DIREE_BUILD_PYTHON_BINDINGS=ON \ 94 | -DIREE_ENABLE_LLD=ON \ 95 | $EXTRA_CMAKE_FLAGS \ 96 | -DPython3_EXECUTABLE=$(which python) 97 | 98 | # Not everything works with TPP 99 | if [ "$BUILD_WITH_TPP" == "True" ]; then 100 | ninja -C "$BLD_DIR" iree-opt iree-compile iree-run-module iree-benchmark-module iree-lld 101 | ninja -C "$BLD_DIR" compiler/bindings/python/iree/compiler/tflite.py 102 | ninja -C "$BLD_DIR" compiler/bindings/python/iree/compiler/tools/tflite.py 103 | ninja -C "$BLD_DIR" runtime/package 104 | else 105 | ninja -C "$BLD_DIR" 106 | fi 107 | 108 | # Python bindings test 109 | echo " + Checking IREE Python bindings" 110 | python -c "import iree.compiler" 111 | python -c "import iree.runtime" 112 | 113 | deactivate 114 | popd 115 | -------------------------------------------------------------------------------- /iree/scripts/dump_linalg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Dumps Linalg-on-tensors MLIR from Python module 4 | SYNTAX="dump_linalg.sh [-tpp] model.py [args]" 5 | 6 | set -eu 7 | 8 | # Use main build or TPP build 9 | BUILD=main 10 | if [ $# -ge 1 ] && [ "$1" == "-tpp" ]; then 11 | BUILD=tpp 12 | shift 13 | fi 14 | 15 | # Python Module 16 | if [ $# -ge 1 ] && [ -f "$1" ]; then 17 | SCRIPT="$1" 18 | shift 19 | else 20 | echo "Syntax: $SYNTAX" 21 | exit 1 22 | fi 23 | ARGS="$@" 24 | 25 | VENV="$(git rev-parse --show-toplevel)/external/iree/build/$BUILD/venv" 26 | if [ ! -d "$VENV" ]; then 27 | echo "Cannot find repository root" 28 | exit 1 29 | fi 30 | source "$VENV"/bin/activate 31 | 32 | # This works on Linux and Darwin 33 | TEMP_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'iree') 34 | 35 | # Run iree-compiler with --mlir-print-ir-after=iree-mhlo-to-linalg-on-tensors 36 | echo "Running [$SCRIPT $ARGS], output on $TEMP_DIR" 37 | python "$SCRIPT" $ARGS > "$TEMP_DIR"/out 2> "$TEMP_DIR"/err || true 38 | 39 | # Clean-up output to grab the IR 40 | MLIR_FILE="$TEMP_DIR"/"$(basename $SCRIPT)".mlir 41 | echo "Cleaning up output, creating MLIR file $MLIR_FILE" 42 | cat "$TEMP_DIR"/err \ 43 | | grep -v ": \w tensorflow" \ 44 | | grep -v "Invoked with" \ 45 | | grep -v "^//" \ 46 | | grep -v "Traceback" \ 47 | | grep -v "File \"" \ 48 | | grep -v VmModule \ 49 | | grep -v TypeError \ 50 | | grep -v "GPU devices" \ 51 | | grep -v Downloading \ 52 | | grep -v TensorFlow > $MLIR_FILE 53 | 54 | # Clean-up debug symbols from MLIR file 55 | sed -i 's/ loc(unknown)//' "$MLIR_FILE" # unknown 56 | sed -i '/=/ s/ loc(fused.*)//' "$MLIR_FILE" # assignments 57 | sed -i '/yield/ s/ loc(fused.*)//' "$MLIR_FILE" # yield 58 | sed -i '/\^bb/ s/ loc(fused.*)/)/' "$MLIR_FILE" # basic block 59 | sed -i '/} -> tensor/ s/ loc(fused.*)//' "$MLIR_FILE" # block end 60 | 61 | # Move the MLIR file into the repository (we know the repo exists) 62 | MLIR_DIR="$(git rev-parse --show-toplevel)/front-ends/iree/mlir" 63 | echo "Moving MLIR file to $MLIR_DIR" 64 | mv "$MLIR_FILE" "$MLIR_DIR" 65 | 66 | rm -r $TEMP_DIR 67 | deactivate 68 | -------------------------------------------------------------------------------- /pytorch/generate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Generate MLIR files from Torch-MLIR 4 | # Run from inside the repo already, no need to check 5 | 6 | ROOT=$(git rev-parse --show-toplevel) 7 | if [ ! -d "$ROOT" ]; then 8 | echo "Cannot find repository root" 9 | exit 1 10 | fi 11 | 12 | # For each Python script, generate an MLIR file 13 | # with the same name and different extension 14 | pushd "$ROOT/pytorch/torch-script/models" 15 | for MODEL in $(find . -type f -name \*.py); do 16 | OUT="${MODEL%.py}.mlir" 17 | python "$MODEL" > "../mlir/$OUT" 18 | done 19 | popd 20 | 21 | # For each Python script, generate an MLIR file 22 | # with the same name and different extension 23 | pushd "$ROOT/pytorch/torch-dynamo/models" 24 | for MODEL in $(find . -type f -name \*.py); do 25 | OUT="${MODEL%.py}.mlir" 26 | python "$MODEL" > "../mlir/$OUT" 27 | done 28 | popd -------------------------------------------------------------------------------- /pytorch/lib/torch_mlir_compile.py: -------------------------------------------------------------------------------- 1 | # Implements the torch_mlir compiler steps into multiple MLIR outputs 2 | 3 | from typing import List 4 | 5 | import torch 6 | import torch_mlir 7 | from torch_mlir.dynamo import make_simple_dynamo_backend 8 | from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend 9 | 10 | def torch_mlir_compile(model, input, output_formats=['torch', 'linalg-on-tensors', 'tosa', 'stablehlo']): 11 | pass 12 | module = None 13 | for format in output_formats: 14 | module = torch_mlir.compile(model, input, output_type=format) 15 | print(format, "\n", module.operation.get_asm(large_elements_limit=10)) 16 | # Returning one of MLIR modules so that TorchDynamo can call the object. 17 | # Otherwise, TorchDynamo does not get invoked. 18 | # TorchScript models do not care for return from torch_mlir_compile. 19 | return module 20 | 21 | @make_simple_dynamo_backend 22 | def refbackend_torchdynamo_backend(fx_graph: torch.fx.GraphModule, 23 | example_inputs: List[torch.Tensor]): 24 | mlir_module = torch_mlir_compile( 25 | fx_graph, example_inputs, output_formats=['linalg-on-tensors']) 26 | backend = refbackend.RefBackendLinalgOnTensorsBackend() 27 | compiled = backend.compile(mlir_module) 28 | loaded = backend.load(compiled) 29 | 30 | def compiled_callable(*inputs): 31 | inputs = [x.numpy() for x in inputs] 32 | result = loaded.forward(*inputs) 33 | if not isinstance(result, tuple): 34 | result = torch.from_numpy(result) 35 | else: 36 | result = tuple(torch.from_numpy(x) for x in result) 37 | return result 38 | return compiled_callable 39 | -------------------------------------------------------------------------------- /pytorch/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Installs torch-mlir following the following documentation: 4 | # https://github.com/llvm/torch-mlir/tree/main 5 | 6 | # Top of current root 7 | ROOT="$(git rev-parse --show-toplevel)" 8 | if [ ! -d "$ROOT" ]; then 9 | echo "Cannot find repository root" 10 | exit 1 11 | fi 12 | cd "$ROOT" 13 | 14 | # Always grab a fresh env environment 15 | echo " + Creating a fresh conda env " 16 | ENV_PATH="${ROOT}/env" 17 | CONDA_DIR="torch-mlir-conda" 18 | CONDA_DIR_PATH="${ENV_PATH}/${CONDA_DIR}/miniconda3/" 19 | ARCH_NAME=$(uname -m) 20 | 21 | mkdir -p ${ENV_PATH} 22 | rm -rf ${CONDA_DIR_PATH} 23 | 24 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${ARCH_NAME}.sh 25 | bash Miniconda3-latest-Linux-${ARCH_NAME}.sh -b -p ${CONDA_DIR_PATH} 26 | eval "$(${CONDA_DIR_PATH}/bin/conda shell.bash hook)" 27 | rm Miniconda3-latest-Linux-${ARCH_NAME}.sh 28 | 29 | conda activate ${CONDA_DIR_PATH} 30 | conda install -y python=3.11.3 31 | 32 | # Install Python dependencies 33 | echo " + Install Python dependencies" 34 | python -m pip install --upgrade pip 35 | pip install --pre torch-mlir torchvision \ 36 | -f https://llvm.github.io/torch-mlir/package-index/ \ 37 | --extra-index-url https://download.pytorch.org/whl/nightly/cpu 38 | 39 | # Install any additional dependencies 40 | pip install transformers 41 | 42 | # Done 43 | echo " + Done." 44 | conda deactivate 45 | echo " + Run conda activate ${CONDA_DIR_PATH} before using torch-mlir" 46 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/mlir/conv.mlir: -------------------------------------------------------------------------------- 1 | linalg-on-tensors 2 | #map = affine_map<(d0, d1, d2, d3) -> (d1)> 3 | #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> 4 | #map2 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> 5 | #map3 = affine_map<(d0, d1, d2, d3) -> (d0, 0, d2, d3)> 6 | #map4 = affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)> 7 | module attributes {torch.debug_module_name = "_lambda"} { 8 | ml_program.global private mutable @global_seed(dense<0> : tensor) : tensor 9 | func.func @forward(%arg0: tensor<32x3x3x3xf32>, %arg1: tensor<32xf32>, %arg2: tensor<2x32x3x3xf32>, %arg3: tensor<2xf32>, %arg4: tensor<1x3x28x28xf32>) -> tensor<1x2x3x3xf32> { 10 | %cst = arith.constant 0.000000e+00 : f32 11 | %cst_0 = arith.constant 0xFF800000 : f32 12 | %c0_i64 = arith.constant 0 : i64 13 | %0 = tensor.empty() : tensor<1x32x9x9xf32> 14 | %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<32xf32>) outs(%0 : tensor<1x32x9x9xf32>) { 15 | ^bb0(%in: f32, %out: f32): 16 | linalg.yield %in : f32 17 | } -> tensor<1x32x9x9xf32> 18 | %2 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<3> : vector<2xi64>} ins(%arg4, %arg0 : tensor<1x3x28x28xf32>, tensor<32x3x3x3xf32>) outs(%1 : tensor<1x32x9x9xf32>) -> tensor<1x32x9x9xf32> 19 | %3 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<1x32x9x9xf32>) outs(%0 : tensor<1x32x9x9xf32>) { 20 | ^bb0(%in: f32, %out: f32): 21 | %19 = arith.cmpf ugt, %in, %cst : f32 22 | %20 = arith.select %19, %in, %cst : f32 23 | linalg.yield %20 : f32 24 | } -> tensor<1x32x9x9xf32> 25 | %4 = tensor.empty() : tensor<1x2x3x3xf32> 26 | %5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg3 : tensor<2xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 27 | ^bb0(%in: f32, %out: f32): 28 | linalg.yield %in : f32 29 | } -> tensor<1x2x3x3xf32> 30 | %6 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<3> : vector<2xi64>} ins(%3, %arg2 : tensor<1x32x9x9xf32>, tensor<2x32x3x3xf32>) outs(%5 : tensor<1x2x3x3xf32>) -> tensor<1x2x3x3xf32> 31 | %7 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6 : tensor<1x2x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 32 | ^bb0(%in: f32, %out: f32): 33 | %19 = arith.cmpf ugt, %in, %cst : f32 34 | %20 = arith.select %19, %in, %cst : f32 35 | linalg.yield %20 : f32 36 | } -> tensor<1x2x3x3xf32> 37 | %8 = tensor.empty() : tensor<1x1x3x3xi64> 38 | %9 = linalg.fill ins(%c0_i64 : i64) outs(%8 : tensor<1x1x3x3xi64>) -> tensor<1x1x3x3xi64> 39 | %10 = tensor.empty() : tensor<1x1x3x3xf32> 40 | %11 = linalg.fill ins(%cst_0 : f32) outs(%10 : tensor<1x1x3x3xf32>) -> tensor<1x1x3x3xf32> 41 | %12:2 = linalg.generic {indexing_maps = [#map1, #map3, #map3], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} ins(%7 : tensor<1x2x3x3xf32>) outs(%11, %9 : tensor<1x1x3x3xf32>, tensor<1x1x3x3xi64>) { 42 | ^bb0(%in: f32, %out: f32, %out_1: i64): 43 | %19 = linalg.index 1 : index 44 | %20 = arith.index_cast %19 : index to i64 45 | %21 = arith.maximumf %in, %out : f32 46 | %22 = arith.cmpf ogt, %in, %out : f32 47 | %23 = arith.select %22, %20, %out_1 : i64 48 | linalg.yield %21, %23 : f32, i64 49 | } -> (tensor<1x1x3x3xf32>, tensor<1x1x3x3xi64>) 50 | %13 = linalg.generic {indexing_maps = [#map2, #map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%7, %12#0 : tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 51 | ^bb0(%in: f32, %in_1: f32, %out: f32): 52 | %19 = arith.subf %in, %in_1 : f32 53 | linalg.yield %19 : f32 54 | } -> tensor<1x2x3x3xf32> 55 | %14 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13 : tensor<1x2x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 56 | ^bb0(%in: f32, %out: f32): 57 | %19 = math.exp %in : f32 58 | linalg.yield %19 : f32 59 | } -> tensor<1x2x3x3xf32> 60 | %15 = linalg.fill ins(%cst : f32) outs(%10 : tensor<1x1x3x3xf32>) -> tensor<1x1x3x3xf32> 61 | %16 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} ins(%14 : tensor<1x2x3x3xf32>) outs(%15 : tensor<1x1x3x3xf32>) { 62 | ^bb0(%in: f32, %out: f32): 63 | %19 = arith.addf %in, %out : f32 64 | linalg.yield %19 : f32 65 | } -> tensor<1x1x3x3xf32> 66 | %17 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%16 : tensor<1x1x3x3xf32>) outs(%10 : tensor<1x1x3x3xf32>) { 67 | ^bb0(%in: f32, %out: f32): 68 | %19 = math.log %in : f32 69 | linalg.yield %19 : f32 70 | } -> tensor<1x1x3x3xf32> 71 | %18 = linalg.generic {indexing_maps = [#map2, #map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13, %17 : tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 72 | ^bb0(%in: f32, %in_1: f32, %out: f32): 73 | %19 = arith.subf %in, %in_1 : f32 74 | linalg.yield %19 : f32 75 | } -> tensor<1x2x3x3xf32> 76 | return %18 : tensor<1x2x3x3xf32> 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/mlir/linear.mlir: -------------------------------------------------------------------------------- 1 | linalg-on-tensors 2 | #map = affine_map<(d0, d1) -> (d0, d1)> 3 | #map1 = affine_map<(d0, d1) -> (d1, d0)> 4 | #map2 = affine_map<(d0, d1) -> (d1)> 5 | #map3 = affine_map<(d0, d1) -> (d0, 0)> 6 | module attributes {torch.debug_module_name = "_lambda"} { 7 | ml_program.global private mutable @global_seed(dense<0> : tensor) : tensor 8 | func.func @forward(%arg0: tensor<256x128xf32>, %arg1: tensor<256xf32>, %arg2: tensor<10x256xf32>, %arg3: tensor<10xf32>, %arg4: tensor<2x128xf32>) -> tensor<2x10xf32> { 9 | %c0_i64 = arith.constant 0 : i64 10 | %cst = arith.constant 0.000000e+00 : f32 11 | %cst_0 = arith.constant 0xFF800000 : f32 12 | %0 = tensor.empty() : tensor<128x256xf32> 13 | %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<256x128xf32>) outs(%0 : tensor<128x256xf32>) { 14 | ^bb0(%in: f32, %out: f32): 15 | linalg.yield %in : f32 16 | } -> tensor<128x256xf32> 17 | %2 = tensor.empty() : tensor<2x256xf32> 18 | %3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<2x256xf32>) -> tensor<2x256xf32> 19 | %4 = linalg.matmul ins(%arg4, %1 : tensor<2x128xf32>, tensor<128x256xf32>) outs(%3 : tensor<2x256xf32>) -> tensor<2x256xf32> 20 | %5 = linalg.generic {indexing_maps = [#map2, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg1, %4 : tensor<256xf32>, tensor<2x256xf32>) outs(%2 : tensor<2x256xf32>) { 21 | ^bb0(%in: f32, %in_1: f32, %out: f32): 22 | %25 = arith.addf %in, %in_1 : f32 23 | linalg.yield %25 : f32 24 | } -> tensor<2x256xf32> 25 | %6 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%5 : tensor<2x256xf32>) outs(%2 : tensor<2x256xf32>) { 26 | ^bb0(%in: f32, %out: f32): 27 | %25 = arith.cmpf ugt, %in, %cst : f32 28 | %26 = arith.select %25, %in, %cst : f32 29 | linalg.yield %26 : f32 30 | } -> tensor<2x256xf32> 31 | %7 = tensor.empty() : tensor<256x10xf32> 32 | %8 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg2 : tensor<10x256xf32>) outs(%7 : tensor<256x10xf32>) { 33 | ^bb0(%in: f32, %out: f32): 34 | linalg.yield %in : f32 35 | } -> tensor<256x10xf32> 36 | %9 = tensor.empty() : tensor<2x10xf32> 37 | %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x10xf32>) -> tensor<2x10xf32> 38 | %11 = linalg.matmul ins(%6, %8 : tensor<2x256xf32>, tensor<256x10xf32>) outs(%10 : tensor<2x10xf32>) -> tensor<2x10xf32> 39 | %12 = linalg.generic {indexing_maps = [#map2, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg3, %11 : tensor<10xf32>, tensor<2x10xf32>) outs(%9 : tensor<2x10xf32>) { 40 | ^bb0(%in: f32, %in_1: f32, %out: f32): 41 | %25 = arith.addf %in, %in_1 : f32 42 | linalg.yield %25 : f32 43 | } -> tensor<2x10xf32> 44 | %13 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%12 : tensor<2x10xf32>) outs(%9 : tensor<2x10xf32>) { 45 | ^bb0(%in: f32, %out: f32): 46 | %25 = arith.cmpf ugt, %in, %cst : f32 47 | %26 = arith.select %25, %in, %cst : f32 48 | linalg.yield %26 : f32 49 | } -> tensor<2x10xf32> 50 | %14 = tensor.empty() : tensor<2x1xi64> 51 | %15 = linalg.fill ins(%c0_i64 : i64) outs(%14 : tensor<2x1xi64>) -> tensor<2x1xi64> 52 | %16 = tensor.empty() : tensor<2x1xf32> 53 | %17 = linalg.fill ins(%cst_0 : f32) outs(%16 : tensor<2x1xf32>) -> tensor<2x1xf32> 54 | %18:2 = linalg.generic {indexing_maps = [#map, #map3, #map3], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<2x10xf32>) outs(%17, %15 : tensor<2x1xf32>, tensor<2x1xi64>) { 55 | ^bb0(%in: f32, %out: f32, %out_1: i64): 56 | %25 = linalg.index 1 : index 57 | %26 = arith.index_cast %25 : index to i64 58 | %27 = arith.maximumf %in, %out : f32 59 | %28 = arith.cmpf ogt, %in, %out : f32 60 | %29 = arith.select %28, %26, %out_1 : i64 61 | linalg.yield %27, %29 : f32, i64 62 | } -> (tensor<2x1xf32>, tensor<2x1xi64>) 63 | %19 = linalg.generic {indexing_maps = [#map, #map3, #map], iterator_types = ["parallel", "parallel"]} ins(%13, %18#0 : tensor<2x10xf32>, tensor<2x1xf32>) outs(%9 : tensor<2x10xf32>) { 64 | ^bb0(%in: f32, %in_1: f32, %out: f32): 65 | %25 = arith.subf %in, %in_1 : f32 66 | linalg.yield %25 : f32 67 | } -> tensor<2x10xf32> 68 | %20 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<2x10xf32>) outs(%9 : tensor<2x10xf32>) { 69 | ^bb0(%in: f32, %out: f32): 70 | %25 = math.exp %in : f32 71 | linalg.yield %25 : f32 72 | } -> tensor<2x10xf32> 73 | %21 = linalg.fill ins(%cst : f32) outs(%16 : tensor<2x1xf32>) -> tensor<2x1xf32> 74 | %22 = linalg.generic {indexing_maps = [#map, #map3], iterator_types = ["parallel", "reduction"]} ins(%20 : tensor<2x10xf32>) outs(%21 : tensor<2x1xf32>) { 75 | ^bb0(%in: f32, %out: f32): 76 | %25 = arith.addf %in, %out : f32 77 | linalg.yield %25 : f32 78 | } -> tensor<2x1xf32> 79 | %23 = linalg.generic {indexing_maps = [#map3, #map], iterator_types = ["parallel", "parallel"]} ins(%22 : tensor<2x1xf32>) outs(%16 : tensor<2x1xf32>) { 80 | ^bb0(%in: f32, %out: f32): 81 | %25 = math.log %in : f32 82 | linalg.yield %25 : f32 83 | } -> tensor<2x1xf32> 84 | %24 = linalg.generic {indexing_maps = [#map, #map3, #map], iterator_types = ["parallel", "parallel"]} ins(%19, %23 : tensor<2x10xf32>, tensor<2x1xf32>) outs(%9 : tensor<2x10xf32>) { 85 | ^bb0(%in: f32, %in_1: f32, %out: f32): 86 | %25 = arith.subf %in, %in_1 : f32 87 | linalg.yield %25 : f32 88 | } -> tensor<2x10xf32> 89 | return %24 : tensor<2x10xf32> 90 | } 91 | } 92 | 93 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/mlir/mnist.mlir: -------------------------------------------------------------------------------- 1 | linalg-on-tensors 2 | #map = affine_map<(d0, d1, d2, d3) -> (d1)> 3 | #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> 4 | #map2 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> 5 | #map3 = affine_map<(d0, d1, d2, d3) -> ()> 6 | #map4 = affine_map<(d0, d1) -> (d0, d1)> 7 | #map5 = affine_map<(d0, d1) -> (d1, d0)> 8 | #map6 = affine_map<(d0, d1) -> (d1)> 9 | #map7 = affine_map<(d0, d1) -> (0, d1)> 10 | #map8 = affine_map<(d0, d1) -> ()> 11 | #map9 = affine_map<(d0, d1) -> (d0, 0)> 12 | #map10 = affine_map<(d0, d1) -> (0, 0)> 13 | module attributes {torch.debug_module_name = "_lambda"} { 14 | ml_program.global private mutable @global_seed(dense<0> : tensor) : tensor 15 | func.func @forward(%arg0: tensor<32x1x3x3xf32>, %arg1: tensor<32xf32>, %arg2: tensor<64x32x3x3xf32>, %arg3: tensor<64xf32>, %arg4: tensor<128x9216xf32>, %arg5: tensor<128xf32>, %arg6: tensor<10x128xf32>, %arg7: tensor<10xf32>, %arg8: tensor<1x1x28x28xf32>) -> tensor<1x10xf32> { 16 | %cst = arith.constant 0.000000e+00 : f32 17 | %cst_0 = arith.constant 0xFF800000 : f32 18 | %c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64 19 | %c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64 20 | %c32_i64 = arith.constant 32 : i64 21 | %cst_1 = arith.constant 5.4210107999999998E-20 : f64 22 | %cst_2 = arith.constant 5.000000e-01 : f64 23 | %cst_3 = arith.constant 0.000000e+00 : f64 24 | %cst_4 = arith.constant 7.500000e-01 : f64 25 | %c0_i64 = arith.constant 0 : i64 26 | %c64_i64 = arith.constant 64 : i64 27 | %c12_i64 = arith.constant 12 : i64 28 | %cst_5 = arith.constant 7.500000e-01 : f32 29 | %c128_i64 = arith.constant 128 : i64 30 | %cst_6 = arith.constant 5.000000e-01 : f32 31 | %0 = tensor.empty() : tensor<1x32x26x26xf32> 32 | %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<32xf32>) outs(%0 : tensor<1x32x26x26xf32>) { 33 | ^bb0(%in: f32, %out: f32): 34 | linalg.yield %in : f32 35 | } -> tensor<1x32x26x26xf32> 36 | %2 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg8, %arg0 : tensor<1x1x28x28xf32>, tensor<32x1x3x3xf32>) outs(%1 : tensor<1x32x26x26xf32>) -> tensor<1x32x26x26xf32> 37 | %3 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<1x32x26x26xf32>) outs(%0 : tensor<1x32x26x26xf32>) { 38 | ^bb0(%in: f32, %out: f32): 39 | %59 = arith.cmpf ugt, %in, %cst : f32 40 | %60 = arith.select %59, %in, %cst : f32 41 | linalg.yield %60 : f32 42 | } -> tensor<1x32x26x26xf32> 43 | %4 = tensor.empty() : tensor<1x64x24x24xf32> 44 | %5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg3 : tensor<64xf32>) outs(%4 : tensor<1x64x24x24xf32>) { 45 | ^bb0(%in: f32, %out: f32): 46 | linalg.yield %in : f32 47 | } -> tensor<1x64x24x24xf32> 48 | %6 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%3, %arg2 : tensor<1x32x26x26xf32>, tensor<64x32x3x3xf32>) outs(%5 : tensor<1x64x24x24xf32>) -> tensor<1x64x24x24xf32> 49 | %7 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6 : tensor<1x64x24x24xf32>) outs(%4 : tensor<1x64x24x24xf32>) { 50 | ^bb0(%in: f32, %out: f32): 51 | %59 = arith.cmpf ugt, %in, %cst : f32 52 | %60 = arith.select %59, %in, %cst : f32 53 | linalg.yield %60 : f32 54 | } -> tensor<1x64x24x24xf32> 55 | %8 = tensor.empty() : tensor<1x64x12x12xf32> 56 | %9 = linalg.fill ins(%cst_0 : f32) outs(%8 : tensor<1x64x12x12xf32>) -> tensor<1x64x12x12xf32> 57 | %10 = tensor.empty() : tensor<2x2xf32> 58 | %11 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%7, %10 : tensor<1x64x24x24xf32>, tensor<2x2xf32>) outs(%9 : tensor<1x64x12x12xf32>) -> tensor<1x64x12x12xf32> 59 | %12 = tensor.empty() : tensor 60 | %13 = linalg.fill ins(%cst_4 : f64) outs(%12 : tensor) -> tensor 61 | %14 = ml_program.global_load @global_seed : tensor 62 | %extracted = tensor.extract %14[] : tensor 63 | %15 = arith.muli %extracted, %c6364136223846793005_i64 : i64 64 | %16 = arith.addi %15, %c1442695040888963407_i64 : i64 65 | %inserted = tensor.insert %16 into %14[] : tensor 66 | ml_program.global_store @global_seed = %inserted : tensor 67 | %17 = tensor.empty() : tensor<1x64x12x12xf64> 68 | %18 = linalg.generic {indexing_maps = [#map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%17 : tensor<1x64x12x12xf64>) { 69 | ^bb0(%out: f64): 70 | %59 = linalg.index 0 : index 71 | %60 = arith.index_cast %59 : index to i64 72 | %61 = linalg.index 1 : index 73 | %62 = arith.index_cast %61 : index to i64 74 | %63 = linalg.index 2 : index 75 | %64 = arith.index_cast %63 : index to i64 76 | %65 = linalg.index 3 : index 77 | %66 = arith.index_cast %65 : index to i64 78 | %67 = arith.muli %60, %c64_i64 : i64 79 | %68 = arith.addi %67, %62 : i64 80 | %69 = arith.muli %68, %c12_i64 : i64 81 | %70 = arith.addi %69, %64 : i64 82 | %71 = arith.muli %70, %c12_i64 : i64 83 | %72 = arith.addi %71, %66 : i64 84 | %73 = arith.muli %72, %16 : i64 85 | %74 = arith.addi %73, %16 : i64 86 | %75 = arith.muli %73, %73 : i64 87 | %76 = arith.addi %75, %73 : i64 88 | %77 = arith.shli %76, %c32_i64 : i64 89 | %78 = arith.shrui %76, %c32_i64 : i64 90 | %79 = arith.ori %77, %78 : i64 91 | %80 = arith.muli %79, %79 : i64 92 | %81 = arith.addi %80, %74 : i64 93 | %82 = arith.shli %81, %c32_i64 : i64 94 | %83 = arith.shrui %81, %c32_i64 : i64 95 | %84 = arith.ori %82, %83 : i64 96 | %85 = arith.muli %84, %84 : i64 97 | %86 = arith.addi %85, %73 : i64 98 | %87 = arith.shli %86, %c32_i64 : i64 99 | %88 = arith.shrui %86, %c32_i64 : i64 100 | %89 = arith.ori %87, %88 : i64 101 | %90 = arith.muli %89, %89 : i64 102 | %91 = arith.addi %90, %74 : i64 103 | %92 = arith.shli %91, %c32_i64 : i64 104 | %93 = arith.shrui %91, %c32_i64 : i64 105 | %94 = arith.ori %92, %93 : i64 106 | %95 = arith.muli %94, %94 : i64 107 | %96 = arith.addi %95, %73 : i64 108 | %97 = arith.shrui %96, %c32_i64 : i64 109 | %98 = arith.xori %91, %97 : i64 110 | %99 = arith.uitofp %98 : i64 to f64 111 | %100 = arith.mulf %99, %cst_1 : f64 112 | %101 = arith.addf %100, %cst_3 : f64 113 | linalg.yield %101 : f64 114 | } -> tensor<1x64x12x12xf64> 115 | %19 = tensor.empty() : tensor<1x64x12x12xi1> 116 | %20 = linalg.generic {indexing_maps = [#map2, #map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18, %13 : tensor<1x64x12x12xf64>, tensor) outs(%19 : tensor<1x64x12x12xi1>) { 117 | ^bb0(%in: f64, %in_9: f64, %out: i1): 118 | %59 = arith.cmpf ult, %in, %in_9 : f64 119 | linalg.yield %59 : i1 120 | } -> tensor<1x64x12x12xi1> 121 | %21 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%20 : tensor<1x64x12x12xi1>) outs(%8 : tensor<1x64x12x12xf32>) { 122 | ^bb0(%in: i1, %out: f32): 123 | %59 = arith.uitofp %in : i1 to f32 124 | linalg.yield %59 : f32 125 | } -> tensor<1x64x12x12xf32> 126 | %22 = linalg.generic {indexing_maps = [#map2, #map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%21, %11 : tensor<1x64x12x12xf32>, tensor<1x64x12x12xf32>) outs(%8 : tensor<1x64x12x12xf32>) { 127 | ^bb0(%in: f32, %in_9: f32, %out: f32): 128 | %59 = arith.mulf %in, %in_9 : f32 129 | linalg.yield %59 : f32 130 | } -> tensor<1x64x12x12xf32> 131 | %23 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%22 : tensor<1x64x12x12xf32>) outs(%8 : tensor<1x64x12x12xf32>) { 132 | ^bb0(%in: f32, %out: f32): 133 | %59 = arith.divf %in, %cst_5 : f32 134 | linalg.yield %59 : f32 135 | } -> tensor<1x64x12x12xf32> 136 | %collapsed = tensor.collapse_shape %23 [[0], [1, 2, 3]] : tensor<1x64x12x12xf32> into tensor<1x9216xf32> 137 | %24 = tensor.empty() : tensor<9216x128xf32> 138 | %25 = linalg.generic {indexing_maps = [#map4, #map5], iterator_types = ["parallel", "parallel"]} ins(%arg4 : tensor<128x9216xf32>) outs(%24 : tensor<9216x128xf32>) { 139 | ^bb0(%in: f32, %out: f32): 140 | linalg.yield %in : f32 141 | } -> tensor<9216x128xf32> 142 | %26 = tensor.empty() : tensor<1x128xf32> 143 | %27 = linalg.fill ins(%cst : f32) outs(%26 : tensor<1x128xf32>) -> tensor<1x128xf32> 144 | %28 = linalg.matmul ins(%collapsed, %25 : tensor<1x9216xf32>, tensor<9216x128xf32>) outs(%27 : tensor<1x128xf32>) -> tensor<1x128xf32> 145 | %29 = linalg.generic {indexing_maps = [#map6, #map7, #map4], iterator_types = ["parallel", "parallel"]} ins(%arg5, %28 : tensor<128xf32>, tensor<1x128xf32>) outs(%26 : tensor<1x128xf32>) { 146 | ^bb0(%in: f32, %in_9: f32, %out: f32): 147 | %59 = arith.addf %in, %in_9 : f32 148 | linalg.yield %59 : f32 149 | } -> tensor<1x128xf32> 150 | %30 = linalg.generic {indexing_maps = [#map7, #map4], iterator_types = ["parallel", "parallel"]} ins(%29 : tensor<1x128xf32>) outs(%26 : tensor<1x128xf32>) { 151 | ^bb0(%in: f32, %out: f32): 152 | %59 = arith.cmpf ugt, %in, %cst : f32 153 | %60 = arith.select %59, %in, %cst : f32 154 | linalg.yield %60 : f32 155 | } -> tensor<1x128xf32> 156 | %31 = linalg.fill ins(%cst_2 : f64) outs(%12 : tensor) -> tensor 157 | %32 = ml_program.global_load @global_seed : tensor 158 | %extracted_7 = tensor.extract %32[] : tensor 159 | %33 = arith.muli %extracted_7, %c6364136223846793005_i64 : i64 160 | %34 = arith.addi %33, %c1442695040888963407_i64 : i64 161 | %inserted_8 = tensor.insert %34 into %32[] : tensor 162 | ml_program.global_store @global_seed = %inserted_8 : tensor 163 | %35 = tensor.empty() : tensor<1x128xf64> 164 | %36 = linalg.generic {indexing_maps = [#map4], iterator_types = ["parallel", "parallel"]} outs(%35 : tensor<1x128xf64>) { 165 | ^bb0(%out: f64): 166 | %59 = linalg.index 0 : index 167 | %60 = arith.index_cast %59 : index to i64 168 | %61 = linalg.index 1 : index 169 | %62 = arith.index_cast %61 : index to i64 170 | %63 = arith.muli %60, %c128_i64 : i64 171 | %64 = arith.addi %63, %62 : i64 172 | %65 = arith.muli %64, %34 : i64 173 | %66 = arith.addi %65, %34 : i64 174 | %67 = arith.muli %65, %65 : i64 175 | %68 = arith.addi %67, %65 : i64 176 | %69 = arith.shli %68, %c32_i64 : i64 177 | %70 = arith.shrui %68, %c32_i64 : i64 178 | %71 = arith.ori %69, %70 : i64 179 | %72 = arith.muli %71, %71 : i64 180 | %73 = arith.addi %72, %66 : i64 181 | %74 = arith.shli %73, %c32_i64 : i64 182 | %75 = arith.shrui %73, %c32_i64 : i64 183 | %76 = arith.ori %74, %75 : i64 184 | %77 = arith.muli %76, %76 : i64 185 | %78 = arith.addi %77, %65 : i64 186 | %79 = arith.shli %78, %c32_i64 : i64 187 | %80 = arith.shrui %78, %c32_i64 : i64 188 | %81 = arith.ori %79, %80 : i64 189 | %82 = arith.muli %81, %81 : i64 190 | %83 = arith.addi %82, %66 : i64 191 | %84 = arith.shli %83, %c32_i64 : i64 192 | %85 = arith.shrui %83, %c32_i64 : i64 193 | %86 = arith.ori %84, %85 : i64 194 | %87 = arith.muli %86, %86 : i64 195 | %88 = arith.addi %87, %65 : i64 196 | %89 = arith.shrui %88, %c32_i64 : i64 197 | %90 = arith.xori %83, %89 : i64 198 | %91 = arith.uitofp %90 : i64 to f64 199 | %92 = arith.mulf %91, %cst_1 : f64 200 | %93 = arith.addf %92, %cst_3 : f64 201 | linalg.yield %93 : f64 202 | } -> tensor<1x128xf64> 203 | %37 = tensor.empty() : tensor<1x128xi1> 204 | %38 = linalg.generic {indexing_maps = [#map7, #map8, #map4], iterator_types = ["parallel", "parallel"]} ins(%36, %31 : tensor<1x128xf64>, tensor) outs(%37 : tensor<1x128xi1>) { 205 | ^bb0(%in: f64, %in_9: f64, %out: i1): 206 | %59 = arith.cmpf ult, %in, %in_9 : f64 207 | linalg.yield %59 : i1 208 | } -> tensor<1x128xi1> 209 | %39 = linalg.generic {indexing_maps = [#map7, #map4], iterator_types = ["parallel", "parallel"]} ins(%38 : tensor<1x128xi1>) outs(%26 : tensor<1x128xf32>) { 210 | ^bb0(%in: i1, %out: f32): 211 | %59 = arith.uitofp %in : i1 to f32 212 | linalg.yield %59 : f32 213 | } -> tensor<1x128xf32> 214 | %40 = linalg.generic {indexing_maps = [#map7, #map7, #map4], iterator_types = ["parallel", "parallel"]} ins(%39, %30 : tensor<1x128xf32>, tensor<1x128xf32>) outs(%26 : tensor<1x128xf32>) { 215 | ^bb0(%in: f32, %in_9: f32, %out: f32): 216 | %59 = arith.mulf %in, %in_9 : f32 217 | linalg.yield %59 : f32 218 | } -> tensor<1x128xf32> 219 | %41 = linalg.generic {indexing_maps = [#map7, #map4], iterator_types = ["parallel", "parallel"]} ins(%40 : tensor<1x128xf32>) outs(%26 : tensor<1x128xf32>) { 220 | ^bb0(%in: f32, %out: f32): 221 | %59 = arith.divf %in, %cst_6 : f32 222 | linalg.yield %59 : f32 223 | } -> tensor<1x128xf32> 224 | %42 = tensor.empty() : tensor<128x10xf32> 225 | %43 = linalg.generic {indexing_maps = [#map4, #map5], iterator_types = ["parallel", "parallel"]} ins(%arg6 : tensor<10x128xf32>) outs(%42 : tensor<128x10xf32>) { 226 | ^bb0(%in: f32, %out: f32): 227 | linalg.yield %in : f32 228 | } -> tensor<128x10xf32> 229 | %44 = tensor.empty() : tensor<1x10xf32> 230 | %45 = linalg.fill ins(%cst : f32) outs(%44 : tensor<1x10xf32>) -> tensor<1x10xf32> 231 | %46 = linalg.matmul ins(%41, %43 : tensor<1x128xf32>, tensor<128x10xf32>) outs(%45 : tensor<1x10xf32>) -> tensor<1x10xf32> 232 | %47 = linalg.generic {indexing_maps = [#map6, #map7, #map4], iterator_types = ["parallel", "parallel"]} ins(%arg7, %46 : tensor<10xf32>, tensor<1x10xf32>) outs(%44 : tensor<1x10xf32>) { 233 | ^bb0(%in: f32, %in_9: f32, %out: f32): 234 | %59 = arith.addf %in, %in_9 : f32 235 | linalg.yield %59 : f32 236 | } -> tensor<1x10xf32> 237 | %48 = tensor.empty() : tensor<1x1xi64> 238 | %49 = linalg.fill ins(%c0_i64 : i64) outs(%48 : tensor<1x1xi64>) -> tensor<1x1xi64> 239 | %50 = tensor.empty() : tensor<1x1xf32> 240 | %51 = linalg.fill ins(%cst_0 : f32) outs(%50 : tensor<1x1xf32>) -> tensor<1x1xf32> 241 | %52:2 = linalg.generic {indexing_maps = [#map4, #map9, #map9], iterator_types = ["parallel", "reduction"]} ins(%47 : tensor<1x10xf32>) outs(%51, %49 : tensor<1x1xf32>, tensor<1x1xi64>) { 242 | ^bb0(%in: f32, %out: f32, %out_9: i64): 243 | %59 = linalg.index 1 : index 244 | %60 = arith.index_cast %59 : index to i64 245 | %61 = arith.maximumf %in, %out : f32 246 | %62 = arith.cmpf ogt, %in, %out : f32 247 | %63 = arith.select %62, %60, %out_9 : i64 248 | linalg.yield %61, %63 : f32, i64 249 | } -> (tensor<1x1xf32>, tensor<1x1xi64>) 250 | %53 = linalg.generic {indexing_maps = [#map7, #map10, #map4], iterator_types = ["parallel", "parallel"]} ins(%47, %52#0 : tensor<1x10xf32>, tensor<1x1xf32>) outs(%44 : tensor<1x10xf32>) { 251 | ^bb0(%in: f32, %in_9: f32, %out: f32): 252 | %59 = arith.subf %in, %in_9 : f32 253 | linalg.yield %59 : f32 254 | } -> tensor<1x10xf32> 255 | %54 = linalg.generic {indexing_maps = [#map7, #map4], iterator_types = ["parallel", "parallel"]} ins(%53 : tensor<1x10xf32>) outs(%44 : tensor<1x10xf32>) { 256 | ^bb0(%in: f32, %out: f32): 257 | %59 = math.exp %in : f32 258 | linalg.yield %59 : f32 259 | } -> tensor<1x10xf32> 260 | %55 = linalg.fill ins(%cst : f32) outs(%50 : tensor<1x1xf32>) -> tensor<1x1xf32> 261 | %56 = linalg.generic {indexing_maps = [#map4, #map9], iterator_types = ["parallel", "reduction"]} ins(%54 : tensor<1x10xf32>) outs(%55 : tensor<1x1xf32>) { 262 | ^bb0(%in: f32, %out: f32): 263 | %59 = arith.addf %in, %out : f32 264 | linalg.yield %59 : f32 265 | } -> tensor<1x1xf32> 266 | %57 = linalg.generic {indexing_maps = [#map10, #map4], iterator_types = ["parallel", "parallel"]} ins(%56 : tensor<1x1xf32>) outs(%50 : tensor<1x1xf32>) { 267 | ^bb0(%in: f32, %out: f32): 268 | %59 = math.log %in : f32 269 | linalg.yield %59 : f32 270 | } -> tensor<1x1xf32> 271 | %58 = linalg.generic {indexing_maps = [#map7, #map10, #map4], iterator_types = ["parallel", "parallel"]} ins(%53, %57 : tensor<1x10xf32>, tensor<1x1xf32>) outs(%44 : tensor<1x10xf32>) { 272 | ^bb0(%in: f32, %in_9: f32, %out: f32): 273 | %59 = arith.subf %in, %in_9 : f32 274 | linalg.yield %59 : f32 275 | } -> tensor<1x10xf32> 276 | return %58 : tensor<1x10xf32> 277 | } 278 | } 279 | 280 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/mlir/resnet18.mlir: -------------------------------------------------------------------------------- 1 | linalg-on-tensors 2 | #map = affine_map<(d0) -> (d0)> 3 | #map1 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> 4 | #map2 = affine_map<(d0, d1, d2, d3) -> (d1, 0, 0)> 5 | #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> 6 | #map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> 7 | #map5 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)> 8 | #map6 = affine_map<(d0, d1, d2, d3) -> (d0, d1, 0, 0)> 9 | #map7 = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)> 10 | #map8 = affine_map<(d0, d1) -> (d0, d1)> 11 | #map9 = affine_map<(d0, d1) -> (d1, d0)> 12 | #map10 = affine_map<(d0, d1) -> (d1)> 13 | #map11 = affine_map<(d0, d1) -> (0, d1)> 14 | module attributes {torch.debug_module_name = "_lambda"} { 15 | ml_program.global private mutable @global_seed(dense<0> : tensor) : tensor 16 | func.func @forward(%arg0: tensor<64x3x7x7xf32>, %arg1: tensor<64xf32>, %arg2: tensor<64xf32>, %arg3: tensor<64x64x3x3xf32>, %arg4: tensor<64xf32>, %arg5: tensor<64xf32>, %arg6: tensor<64x64x3x3xf32>, %arg7: tensor<64xf32>, %arg8: tensor<64xf32>, %arg9: tensor<64x64x3x3xf32>, %arg10: tensor<64xf32>, %arg11: tensor<64xf32>, %arg12: tensor<64x64x3x3xf32>, %arg13: tensor<64xf32>, %arg14: tensor<64xf32>, %arg15: tensor<128x64x3x3xf32>, %arg16: tensor<128xf32>, %arg17: tensor<128xf32>, %arg18: tensor<128x128x3x3xf32>, %arg19: tensor<128xf32>, %arg20: tensor<128xf32>, %arg21: tensor<128x64x1x1xf32>, %arg22: tensor<128xf32>, %arg23: tensor<128xf32>, %arg24: tensor<128x128x3x3xf32>, %arg25: tensor<128xf32>, %arg26: tensor<128xf32>, %arg27: tensor<128x128x3x3xf32>, %arg28: tensor<128xf32>, %arg29: tensor<128xf32>, %arg30: tensor<256x128x3x3xf32>, %arg31: tensor<256xf32>, %arg32: tensor<256xf32>, %arg33: tensor<256x256x3x3xf32>, %arg34: tensor<256xf32>, %arg35: tensor<256xf32>, %arg36: tensor<256x128x1x1xf32>, %arg37: tensor<256xf32>, %arg38: tensor<256xf32>, %arg39: tensor<256x256x3x3xf32>, %arg40: tensor<256xf32>, %arg41: tensor<256xf32>, %arg42: tensor<256x256x3x3xf32>, %arg43: tensor<256xf32>, %arg44: tensor<256xf32>, %arg45: tensor<512x256x3x3xf32>, %arg46: tensor<512xf32>, %arg47: tensor<512xf32>, %arg48: tensor<512x512x3x3xf32>, %arg49: tensor<512xf32>, %arg50: tensor<512xf32>, %arg51: tensor<512x256x1x1xf32>, %arg52: tensor<512xf32>, %arg53: tensor<512xf32>, %arg54: tensor<512x512x3x3xf32>, %arg55: tensor<512xf32>, %arg56: tensor<512xf32>, %arg57: tensor<512x512x3x3xf32>, %arg58: tensor<512xf32>, %arg59: tensor<512xf32>, %arg60: tensor<1000x512xf32>, %arg61: tensor<1000xf32>, %arg62: tensor<64xf32>, %arg63: tensor<64xf32>, %arg64: tensor, %arg65: tensor<64xf32>, %arg66: tensor<64xf32>, %arg67: tensor, %arg68: tensor<64xf32>, %arg69: tensor<64xf32>, %arg70: tensor, %arg71: tensor<64xf32>, %arg72: tensor<64xf32>, %arg73: tensor, %arg74: tensor<64xf32>, %arg75: tensor<64xf32>, %arg76: tensor, %arg77: tensor<128xf32>, %arg78: tensor<128xf32>, %arg79: tensor, %arg80: tensor<128xf32>, %arg81: tensor<128xf32>, %arg82: tensor, %arg83: tensor<128xf32>, %arg84: tensor<128xf32>, %arg85: tensor, %arg86: tensor<128xf32>, %arg87: tensor<128xf32>, %arg88: tensor, %arg89: tensor<128xf32>, %arg90: tensor<128xf32>, %arg91: tensor, %arg92: tensor<256xf32>, %arg93: tensor<256xf32>, %arg94: tensor, %arg95: tensor<256xf32>, %arg96: tensor<256xf32>, %arg97: tensor, %arg98: tensor<256xf32>, %arg99: tensor<256xf32>, %arg100: tensor, %arg101: tensor<256xf32>, %arg102: tensor<256xf32>, %arg103: tensor, %arg104: tensor<256xf32>, %arg105: tensor<256xf32>, %arg106: tensor, %arg107: tensor<512xf32>, %arg108: tensor<512xf32>, %arg109: tensor, %arg110: tensor<512xf32>, %arg111: tensor<512xf32>, %arg112: tensor, %arg113: tensor<512xf32>, %arg114: tensor<512xf32>, %arg115: tensor, %arg116: tensor<512xf32>, %arg117: tensor<512xf32>, %arg118: tensor, %arg119: tensor<512xf32>, %arg120: tensor<512xf32>, %arg121: tensor, %arg122: tensor<1x3x224x224xf32>) -> (tensor<1x1000xf32>, tensor<64x3x7x7xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<128x64x3x3xf32>, tensor<128xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>, tensor<128x64x1x1xf32>, tensor<128xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>, tensor<256x128x3x3xf32>, tensor<256xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>, tensor<256x128x1x1xf32>, tensor<256xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>, tensor<512x256x3x3xf32>, tensor<512xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>, tensor<512x256x1x1xf32>, tensor<512xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<1x3x224x224xf32>, tensor<1x64x112x112xf32>, tensor<1x64x112x112xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xi64>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512xf32>, tensor<512x1000xf32>) { 17 | %c-1_i64 = arith.constant -1 : i64 18 | %c2 = arith.constant 2 : index 19 | %cst = arith.constant 0.000000e+00 : f32 20 | %cst_0 = arith.constant 1.000000e+00 : f32 21 | %cst_1 = arith.constant 0xFF800000 : f32 22 | %cst_2 = arith.constant 1.000000e-05 : f64 23 | %c112 = arith.constant 112 : index 24 | %c3 = arith.constant 3 : index 25 | %c0 = arith.constant 0 : index 26 | %c1 = arith.constant 1 : index 27 | %cst_3 = arith.constant 4.900000e+01 : f32 28 | %padded = tensor.pad %arg122 low[0, 0, 3, 3] high[0, 0, 3, 3] { 29 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 30 | tensor.yield %cst : f32 31 | } : tensor<1x3x224x224xf32> to tensor<1x3x230x230xf32> 32 | %0 = tensor.empty() : tensor<1x64x112x112xf32> 33 | %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> 34 | %2 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded, %arg0 : tensor<1x3x230x230xf32>, tensor<64x3x7x7xf32>) outs(%1 : tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xf32> 35 | %3 = tensor.empty() : tensor<64xf32> 36 | %4 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg63 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 37 | ^bb0(%in: f32, %out: f32): 38 | %216 = arith.truncf %cst_2 : f64 to f32 39 | %217 = arith.addf %in, %216 : f32 40 | linalg.yield %217 : f32 41 | } -> tensor<64xf32> 42 | %5 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%4 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 43 | ^bb0(%in: f32, %out: f32): 44 | %216 = math.sqrt %in : f32 45 | linalg.yield %216 : f32 46 | } -> tensor<64xf32> 47 | %6 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%5 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 48 | ^bb0(%in: f32, %out: f32): 49 | %216 = arith.cmpf one, %in, %cst : f32 50 | cf.assert %216, "unimplemented: tensor with zero element" 51 | %217 = arith.divf %cst_0, %in : f32 52 | linalg.yield %217 : f32 53 | } -> tensor<64xf32> 54 | %expanded = tensor.expand_shape %arg62 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 55 | %expanded_4 = tensor.expand_shape %6 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 56 | %7 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2, %expanded : tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) outs(%0 : tensor<1x64x112x112xf32>) { 57 | ^bb0(%in: f32, %in_100: f32, %out: f32): 58 | %216 = arith.subf %in, %in_100 : f32 59 | linalg.yield %216 : f32 60 | } -> tensor<1x64x112x112xf32> 61 | %8 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%7, %expanded_4 : tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) outs(%0 : tensor<1x64x112x112xf32>) { 62 | ^bb0(%in: f32, %in_100: f32, %out: f32): 63 | %216 = arith.mulf %in, %in_100 : f32 64 | linalg.yield %216 : f32 65 | } -> tensor<1x64x112x112xf32> 66 | %expanded_5 = tensor.expand_shape %arg1 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 67 | %9 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%8, %expanded_5 : tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) outs(%0 : tensor<1x64x112x112xf32>) { 68 | ^bb0(%in: f32, %in_100: f32, %out: f32): 69 | %216 = arith.mulf %in, %in_100 : f32 70 | linalg.yield %216 : f32 71 | } -> tensor<1x64x112x112xf32> 72 | %expanded_6 = tensor.expand_shape %arg2 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 73 | %10 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%9, %expanded_6 : tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) outs(%0 : tensor<1x64x112x112xf32>) { 74 | ^bb0(%in: f32, %in_100: f32, %out: f32): 75 | %216 = arith.addf %in, %in_100 : f32 76 | linalg.yield %216 : f32 77 | } -> tensor<1x64x112x112xf32> 78 | %11 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%10 : tensor<1x64x112x112xf32>) outs(%0 : tensor<1x64x112x112xf32>) { 79 | ^bb0(%in: f32, %out: f32): 80 | %216 = arith.cmpf ugt, %in, %cst : f32 81 | %217 = arith.select %216, %in, %cst : f32 82 | linalg.yield %217 : f32 83 | } -> tensor<1x64x112x112xf32> 84 | %padded_7 = tensor.pad %11 low[0, 0, 1, 1] high[0, 0, 1, 1] { 85 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 86 | tensor.yield %cst_1 : f32 87 | } : tensor<1x64x112x112xf32> to tensor<1x64x114x114xf32> 88 | %12 = tensor.empty() : tensor<1x64x56x56xf32> 89 | %13 = linalg.fill ins(%cst_1 : f32) outs(%12 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> 90 | %14 = tensor.empty() : tensor<3x3xf32> 91 | %15 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_7, %14 : tensor<1x64x114x114xf32>, tensor<3x3xf32>) outs(%13 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> 92 | %16 = tensor.empty() : tensor<1x64x56x56xi64> 93 | %17 = linalg.fill ins(%c-1_i64 : i64) outs(%16 : tensor<1x64x56x56xi64>) -> tensor<1x64x56x56xi64> 94 | %18 = tensor.empty() : tensor<3x3xi64> 95 | %19 = linalg.generic {indexing_maps = [#map4, #map5, #map4], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%15, %18 : tensor<1x64x56x56xf32>, tensor<3x3xi64>) outs(%17 : tensor<1x64x56x56xi64>) { 96 | ^bb0(%in: f32, %in_100: i64, %out: i64): 97 | %216 = linalg.index 0 : index 98 | %217 = linalg.index 1 : index 99 | %218 = linalg.index 2 : index 100 | %219 = linalg.index 3 : index 101 | %220 = linalg.index 4 : index 102 | %221 = linalg.index 5 : index 103 | %222 = arith.muli %218, %c2 : index 104 | %223 = arith.addi %222, %220 : index 105 | %224 = arith.muli %219, %c2 : index 106 | %225 = arith.addi %224, %221 : index 107 | %extracted = tensor.extract %padded_7[%216, %217, %223, %225] : tensor<1x64x114x114xf32> 108 | %226 = arith.cmpf oeq, %extracted, %in : f32 109 | %227 = arith.subi %223, %c1 : index 110 | %228 = arith.subi %225, %c1 : index 111 | %229 = arith.muli %227, %c112 : index 112 | %230 = arith.addi %229, %228 : index 113 | %231 = arith.index_cast %230 : index to i64 114 | %232 = arith.select %226, %231, %out : i64 115 | %233 = arith.cmpi eq, %out, %c-1_i64 : i64 116 | %234 = arith.select %233, %232, %out : i64 117 | linalg.yield %234 : i64 118 | } -> tensor<1x64x56x56xi64> 119 | %padded_8 = tensor.pad %15 low[0, 0, 1, 1] high[0, 0, 1, 1] { 120 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 121 | tensor.yield %cst : f32 122 | } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> 123 | %20 = linalg.fill ins(%cst : f32) outs(%12 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> 124 | %21 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_8, %arg3 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%20 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> 125 | %22 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg66 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 126 | ^bb0(%in: f32, %out: f32): 127 | %216 = arith.truncf %cst_2 : f64 to f32 128 | %217 = arith.addf %in, %216 : f32 129 | linalg.yield %217 : f32 130 | } -> tensor<64xf32> 131 | %23 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%22 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 132 | ^bb0(%in: f32, %out: f32): 133 | %216 = math.sqrt %in : f32 134 | linalg.yield %216 : f32 135 | } -> tensor<64xf32> 136 | %24 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%23 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 137 | ^bb0(%in: f32, %out: f32): 138 | %216 = arith.cmpf one, %in, %cst : f32 139 | cf.assert %216, "unimplemented: tensor with zero element" 140 | %217 = arith.divf %cst_0, %in : f32 141 | linalg.yield %217 : f32 142 | } -> tensor<64xf32> 143 | %expanded_9 = tensor.expand_shape %arg65 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 144 | %expanded_10 = tensor.expand_shape %24 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 145 | %25 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%21, %expanded_9 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 146 | ^bb0(%in: f32, %in_100: f32, %out: f32): 147 | %216 = arith.subf %in, %in_100 : f32 148 | linalg.yield %216 : f32 149 | } -> tensor<1x64x56x56xf32> 150 | %26 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%25, %expanded_10 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 151 | ^bb0(%in: f32, %in_100: f32, %out: f32): 152 | %216 = arith.mulf %in, %in_100 : f32 153 | linalg.yield %216 : f32 154 | } -> tensor<1x64x56x56xf32> 155 | %expanded_11 = tensor.expand_shape %arg4 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 156 | %27 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%26, %expanded_11 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 157 | ^bb0(%in: f32, %in_100: f32, %out: f32): 158 | %216 = arith.mulf %in, %in_100 : f32 159 | linalg.yield %216 : f32 160 | } -> tensor<1x64x56x56xf32> 161 | %expanded_12 = tensor.expand_shape %arg5 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 162 | %28 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%27, %expanded_12 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 163 | ^bb0(%in: f32, %in_100: f32, %out: f32): 164 | %216 = arith.addf %in, %in_100 : f32 165 | linalg.yield %216 : f32 166 | } -> tensor<1x64x56x56xf32> 167 | %29 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%28 : tensor<1x64x56x56xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 168 | ^bb0(%in: f32, %out: f32): 169 | %216 = arith.cmpf ugt, %in, %cst : f32 170 | %217 = arith.select %216, %in, %cst : f32 171 | linalg.yield %217 : f32 172 | } -> tensor<1x64x56x56xf32> 173 | %padded_13 = tensor.pad %29 low[0, 0, 1, 1] high[0, 0, 1, 1] { 174 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 175 | tensor.yield %cst : f32 176 | } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> 177 | %30 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_13, %arg6 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%20 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> 178 | %31 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg69 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 179 | ^bb0(%in: f32, %out: f32): 180 | %216 = arith.truncf %cst_2 : f64 to f32 181 | %217 = arith.addf %in, %216 : f32 182 | linalg.yield %217 : f32 183 | } -> tensor<64xf32> 184 | %32 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%31 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 185 | ^bb0(%in: f32, %out: f32): 186 | %216 = math.sqrt %in : f32 187 | linalg.yield %216 : f32 188 | } -> tensor<64xf32> 189 | %33 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%32 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 190 | ^bb0(%in: f32, %out: f32): 191 | %216 = arith.cmpf one, %in, %cst : f32 192 | cf.assert %216, "unimplemented: tensor with zero element" 193 | %217 = arith.divf %cst_0, %in : f32 194 | linalg.yield %217 : f32 195 | } -> tensor<64xf32> 196 | %expanded_14 = tensor.expand_shape %arg68 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 197 | %expanded_15 = tensor.expand_shape %33 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 198 | %34 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%30, %expanded_14 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 199 | ^bb0(%in: f32, %in_100: f32, %out: f32): 200 | %216 = arith.subf %in, %in_100 : f32 201 | linalg.yield %216 : f32 202 | } -> tensor<1x64x56x56xf32> 203 | %35 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%34, %expanded_15 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 204 | ^bb0(%in: f32, %in_100: f32, %out: f32): 205 | %216 = arith.mulf %in, %in_100 : f32 206 | linalg.yield %216 : f32 207 | } -> tensor<1x64x56x56xf32> 208 | %expanded_16 = tensor.expand_shape %arg7 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 209 | %36 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%35, %expanded_16 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 210 | ^bb0(%in: f32, %in_100: f32, %out: f32): 211 | %216 = arith.mulf %in, %in_100 : f32 212 | linalg.yield %216 : f32 213 | } -> tensor<1x64x56x56xf32> 214 | %expanded_17 = tensor.expand_shape %arg8 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 215 | %37 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%36, %expanded_17 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 216 | ^bb0(%in: f32, %in_100: f32, %out: f32): 217 | %216 = arith.addf %in, %in_100 : f32 218 | linalg.yield %216 : f32 219 | } -> tensor<1x64x56x56xf32> 220 | %38 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%37, %15 : tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 221 | ^bb0(%in: f32, %in_100: f32, %out: f32): 222 | %216 = arith.addf %in, %in_100 : f32 223 | linalg.yield %216 : f32 224 | } -> tensor<1x64x56x56xf32> 225 | %39 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%38 : tensor<1x64x56x56xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 226 | ^bb0(%in: f32, %out: f32): 227 | %216 = arith.cmpf ugt, %in, %cst : f32 228 | %217 = arith.select %216, %in, %cst : f32 229 | linalg.yield %217 : f32 230 | } -> tensor<1x64x56x56xf32> 231 | %padded_18 = tensor.pad %39 low[0, 0, 1, 1] high[0, 0, 1, 1] { 232 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 233 | tensor.yield %cst : f32 234 | } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> 235 | %40 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_18, %arg9 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%20 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> 236 | %41 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg72 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 237 | ^bb0(%in: f32, %out: f32): 238 | %216 = arith.truncf %cst_2 : f64 to f32 239 | %217 = arith.addf %in, %216 : f32 240 | linalg.yield %217 : f32 241 | } -> tensor<64xf32> 242 | %42 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%41 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 243 | ^bb0(%in: f32, %out: f32): 244 | %216 = math.sqrt %in : f32 245 | linalg.yield %216 : f32 246 | } -> tensor<64xf32> 247 | %43 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%42 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 248 | ^bb0(%in: f32, %out: f32): 249 | %216 = arith.cmpf one, %in, %cst : f32 250 | cf.assert %216, "unimplemented: tensor with zero element" 251 | %217 = arith.divf %cst_0, %in : f32 252 | linalg.yield %217 : f32 253 | } -> tensor<64xf32> 254 | %expanded_19 = tensor.expand_shape %arg71 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 255 | %expanded_20 = tensor.expand_shape %43 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 256 | %44 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%40, %expanded_19 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 257 | ^bb0(%in: f32, %in_100: f32, %out: f32): 258 | %216 = arith.subf %in, %in_100 : f32 259 | linalg.yield %216 : f32 260 | } -> tensor<1x64x56x56xf32> 261 | %45 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%44, %expanded_20 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 262 | ^bb0(%in: f32, %in_100: f32, %out: f32): 263 | %216 = arith.mulf %in, %in_100 : f32 264 | linalg.yield %216 : f32 265 | } -> tensor<1x64x56x56xf32> 266 | %expanded_21 = tensor.expand_shape %arg10 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 267 | %46 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%45, %expanded_21 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 268 | ^bb0(%in: f32, %in_100: f32, %out: f32): 269 | %216 = arith.mulf %in, %in_100 : f32 270 | linalg.yield %216 : f32 271 | } -> tensor<1x64x56x56xf32> 272 | %expanded_22 = tensor.expand_shape %arg11 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 273 | %47 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%46, %expanded_22 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 274 | ^bb0(%in: f32, %in_100: f32, %out: f32): 275 | %216 = arith.addf %in, %in_100 : f32 276 | linalg.yield %216 : f32 277 | } -> tensor<1x64x56x56xf32> 278 | %48 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%47 : tensor<1x64x56x56xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 279 | ^bb0(%in: f32, %out: f32): 280 | %216 = arith.cmpf ugt, %in, %cst : f32 281 | %217 = arith.select %216, %in, %cst : f32 282 | linalg.yield %217 : f32 283 | } -> tensor<1x64x56x56xf32> 284 | %padded_23 = tensor.pad %48 low[0, 0, 1, 1] high[0, 0, 1, 1] { 285 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 286 | tensor.yield %cst : f32 287 | } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> 288 | %49 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_23, %arg12 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%20 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> 289 | %50 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg75 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 290 | ^bb0(%in: f32, %out: f32): 291 | %216 = arith.truncf %cst_2 : f64 to f32 292 | %217 = arith.addf %in, %216 : f32 293 | linalg.yield %217 : f32 294 | } -> tensor<64xf32> 295 | %51 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%50 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 296 | ^bb0(%in: f32, %out: f32): 297 | %216 = math.sqrt %in : f32 298 | linalg.yield %216 : f32 299 | } -> tensor<64xf32> 300 | %52 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%51 : tensor<64xf32>) outs(%3 : tensor<64xf32>) { 301 | ^bb0(%in: f32, %out: f32): 302 | %216 = arith.cmpf one, %in, %cst : f32 303 | cf.assert %216, "unimplemented: tensor with zero element" 304 | %217 = arith.divf %cst_0, %in : f32 305 | linalg.yield %217 : f32 306 | } -> tensor<64xf32> 307 | %expanded_24 = tensor.expand_shape %arg74 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 308 | %expanded_25 = tensor.expand_shape %52 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 309 | %53 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%49, %expanded_24 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 310 | ^bb0(%in: f32, %in_100: f32, %out: f32): 311 | %216 = arith.subf %in, %in_100 : f32 312 | linalg.yield %216 : f32 313 | } -> tensor<1x64x56x56xf32> 314 | %54 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%53, %expanded_25 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 315 | ^bb0(%in: f32, %in_100: f32, %out: f32): 316 | %216 = arith.mulf %in, %in_100 : f32 317 | linalg.yield %216 : f32 318 | } -> tensor<1x64x56x56xf32> 319 | %expanded_26 = tensor.expand_shape %arg13 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 320 | %55 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%54, %expanded_26 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 321 | ^bb0(%in: f32, %in_100: f32, %out: f32): 322 | %216 = arith.mulf %in, %in_100 : f32 323 | linalg.yield %216 : f32 324 | } -> tensor<1x64x56x56xf32> 325 | %expanded_27 = tensor.expand_shape %arg14 [[0, 1, 2]] : tensor<64xf32> into tensor<64x1x1xf32> 326 | %56 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%55, %expanded_27 : tensor<1x64x56x56xf32>, tensor<64x1x1xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 327 | ^bb0(%in: f32, %in_100: f32, %out: f32): 328 | %216 = arith.addf %in, %in_100 : f32 329 | linalg.yield %216 : f32 330 | } -> tensor<1x64x56x56xf32> 331 | %57 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%56, %39 : tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 332 | ^bb0(%in: f32, %in_100: f32, %out: f32): 333 | %216 = arith.addf %in, %in_100 : f32 334 | linalg.yield %216 : f32 335 | } -> tensor<1x64x56x56xf32> 336 | %58 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%57 : tensor<1x64x56x56xf32>) outs(%12 : tensor<1x64x56x56xf32>) { 337 | ^bb0(%in: f32, %out: f32): 338 | %216 = arith.cmpf ugt, %in, %cst : f32 339 | %217 = arith.select %216, %in, %cst : f32 340 | linalg.yield %217 : f32 341 | } -> tensor<1x64x56x56xf32> 342 | %padded_28 = tensor.pad %58 low[0, 0, 1, 1] high[0, 0, 1, 1] { 343 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 344 | tensor.yield %cst : f32 345 | } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> 346 | %59 = tensor.empty() : tensor<1x128x28x28xf32> 347 | %60 = linalg.fill ins(%cst : f32) outs(%59 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> 348 | %61 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_28, %arg15 : tensor<1x64x58x58xf32>, tensor<128x64x3x3xf32>) outs(%60 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> 349 | %62 = tensor.empty() : tensor<128xf32> 350 | %63 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg78 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 351 | ^bb0(%in: f32, %out: f32): 352 | %216 = arith.truncf %cst_2 : f64 to f32 353 | %217 = arith.addf %in, %216 : f32 354 | linalg.yield %217 : f32 355 | } -> tensor<128xf32> 356 | %64 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%63 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 357 | ^bb0(%in: f32, %out: f32): 358 | %216 = math.sqrt %in : f32 359 | linalg.yield %216 : f32 360 | } -> tensor<128xf32> 361 | %65 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%64 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 362 | ^bb0(%in: f32, %out: f32): 363 | %216 = arith.cmpf one, %in, %cst : f32 364 | cf.assert %216, "unimplemented: tensor with zero element" 365 | %217 = arith.divf %cst_0, %in : f32 366 | linalg.yield %217 : f32 367 | } -> tensor<128xf32> 368 | %expanded_29 = tensor.expand_shape %arg77 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 369 | %expanded_30 = tensor.expand_shape %65 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 370 | %66 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%61, %expanded_29 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 371 | ^bb0(%in: f32, %in_100: f32, %out: f32): 372 | %216 = arith.subf %in, %in_100 : f32 373 | linalg.yield %216 : f32 374 | } -> tensor<1x128x28x28xf32> 375 | %67 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%66, %expanded_30 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 376 | ^bb0(%in: f32, %in_100: f32, %out: f32): 377 | %216 = arith.mulf %in, %in_100 : f32 378 | linalg.yield %216 : f32 379 | } -> tensor<1x128x28x28xf32> 380 | %expanded_31 = tensor.expand_shape %arg16 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 381 | %68 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%67, %expanded_31 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 382 | ^bb0(%in: f32, %in_100: f32, %out: f32): 383 | %216 = arith.mulf %in, %in_100 : f32 384 | linalg.yield %216 : f32 385 | } -> tensor<1x128x28x28xf32> 386 | %expanded_32 = tensor.expand_shape %arg17 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 387 | %69 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%68, %expanded_32 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 388 | ^bb0(%in: f32, %in_100: f32, %out: f32): 389 | %216 = arith.addf %in, %in_100 : f32 390 | linalg.yield %216 : f32 391 | } -> tensor<1x128x28x28xf32> 392 | %70 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%69 : tensor<1x128x28x28xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 393 | ^bb0(%in: f32, %out: f32): 394 | %216 = arith.cmpf ugt, %in, %cst : f32 395 | %217 = arith.select %216, %in, %cst : f32 396 | linalg.yield %217 : f32 397 | } -> tensor<1x128x28x28xf32> 398 | %padded_33 = tensor.pad %70 low[0, 0, 1, 1] high[0, 0, 1, 1] { 399 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 400 | tensor.yield %cst : f32 401 | } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> 402 | %71 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_33, %arg18 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%60 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> 403 | %72 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg81 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 404 | ^bb0(%in: f32, %out: f32): 405 | %216 = arith.truncf %cst_2 : f64 to f32 406 | %217 = arith.addf %in, %216 : f32 407 | linalg.yield %217 : f32 408 | } -> tensor<128xf32> 409 | %73 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%72 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 410 | ^bb0(%in: f32, %out: f32): 411 | %216 = math.sqrt %in : f32 412 | linalg.yield %216 : f32 413 | } -> tensor<128xf32> 414 | %74 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%73 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 415 | ^bb0(%in: f32, %out: f32): 416 | %216 = arith.cmpf one, %in, %cst : f32 417 | cf.assert %216, "unimplemented: tensor with zero element" 418 | %217 = arith.divf %cst_0, %in : f32 419 | linalg.yield %217 : f32 420 | } -> tensor<128xf32> 421 | %expanded_34 = tensor.expand_shape %arg80 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 422 | %expanded_35 = tensor.expand_shape %74 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 423 | %75 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%71, %expanded_34 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 424 | ^bb0(%in: f32, %in_100: f32, %out: f32): 425 | %216 = arith.subf %in, %in_100 : f32 426 | linalg.yield %216 : f32 427 | } -> tensor<1x128x28x28xf32> 428 | %76 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%75, %expanded_35 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 429 | ^bb0(%in: f32, %in_100: f32, %out: f32): 430 | %216 = arith.mulf %in, %in_100 : f32 431 | linalg.yield %216 : f32 432 | } -> tensor<1x128x28x28xf32> 433 | %expanded_36 = tensor.expand_shape %arg19 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 434 | %77 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%76, %expanded_36 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 435 | ^bb0(%in: f32, %in_100: f32, %out: f32): 436 | %216 = arith.mulf %in, %in_100 : f32 437 | linalg.yield %216 : f32 438 | } -> tensor<1x128x28x28xf32> 439 | %expanded_37 = tensor.expand_shape %arg20 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 440 | %78 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%77, %expanded_37 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 441 | ^bb0(%in: f32, %in_100: f32, %out: f32): 442 | %216 = arith.addf %in, %in_100 : f32 443 | linalg.yield %216 : f32 444 | } -> tensor<1x128x28x28xf32> 445 | %79 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%58, %arg21 : tensor<1x64x56x56xf32>, tensor<128x64x1x1xf32>) outs(%60 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> 446 | %80 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg84 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 447 | ^bb0(%in: f32, %out: f32): 448 | %216 = arith.truncf %cst_2 : f64 to f32 449 | %217 = arith.addf %in, %216 : f32 450 | linalg.yield %217 : f32 451 | } -> tensor<128xf32> 452 | %81 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%80 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 453 | ^bb0(%in: f32, %out: f32): 454 | %216 = math.sqrt %in : f32 455 | linalg.yield %216 : f32 456 | } -> tensor<128xf32> 457 | %82 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%81 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 458 | ^bb0(%in: f32, %out: f32): 459 | %216 = arith.cmpf one, %in, %cst : f32 460 | cf.assert %216, "unimplemented: tensor with zero element" 461 | %217 = arith.divf %cst_0, %in : f32 462 | linalg.yield %217 : f32 463 | } -> tensor<128xf32> 464 | %expanded_38 = tensor.expand_shape %arg83 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 465 | %expanded_39 = tensor.expand_shape %82 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 466 | %83 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%79, %expanded_38 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 467 | ^bb0(%in: f32, %in_100: f32, %out: f32): 468 | %216 = arith.subf %in, %in_100 : f32 469 | linalg.yield %216 : f32 470 | } -> tensor<1x128x28x28xf32> 471 | %84 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%83, %expanded_39 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 472 | ^bb0(%in: f32, %in_100: f32, %out: f32): 473 | %216 = arith.mulf %in, %in_100 : f32 474 | linalg.yield %216 : f32 475 | } -> tensor<1x128x28x28xf32> 476 | %expanded_40 = tensor.expand_shape %arg22 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 477 | %85 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%84, %expanded_40 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 478 | ^bb0(%in: f32, %in_100: f32, %out: f32): 479 | %216 = arith.mulf %in, %in_100 : f32 480 | linalg.yield %216 : f32 481 | } -> tensor<1x128x28x28xf32> 482 | %expanded_41 = tensor.expand_shape %arg23 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 483 | %86 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%85, %expanded_41 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 484 | ^bb0(%in: f32, %in_100: f32, %out: f32): 485 | %216 = arith.addf %in, %in_100 : f32 486 | linalg.yield %216 : f32 487 | } -> tensor<1x128x28x28xf32> 488 | %87 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%78, %86 : tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 489 | ^bb0(%in: f32, %in_100: f32, %out: f32): 490 | %216 = arith.addf %in, %in_100 : f32 491 | linalg.yield %216 : f32 492 | } -> tensor<1x128x28x28xf32> 493 | %88 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%87 : tensor<1x128x28x28xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 494 | ^bb0(%in: f32, %out: f32): 495 | %216 = arith.cmpf ugt, %in, %cst : f32 496 | %217 = arith.select %216, %in, %cst : f32 497 | linalg.yield %217 : f32 498 | } -> tensor<1x128x28x28xf32> 499 | %padded_42 = tensor.pad %88 low[0, 0, 1, 1] high[0, 0, 1, 1] { 500 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 501 | tensor.yield %cst : f32 502 | } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> 503 | %89 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_42, %arg24 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%60 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> 504 | %90 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg87 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 505 | ^bb0(%in: f32, %out: f32): 506 | %216 = arith.truncf %cst_2 : f64 to f32 507 | %217 = arith.addf %in, %216 : f32 508 | linalg.yield %217 : f32 509 | } -> tensor<128xf32> 510 | %91 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%90 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 511 | ^bb0(%in: f32, %out: f32): 512 | %216 = math.sqrt %in : f32 513 | linalg.yield %216 : f32 514 | } -> tensor<128xf32> 515 | %92 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%91 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 516 | ^bb0(%in: f32, %out: f32): 517 | %216 = arith.cmpf one, %in, %cst : f32 518 | cf.assert %216, "unimplemented: tensor with zero element" 519 | %217 = arith.divf %cst_0, %in : f32 520 | linalg.yield %217 : f32 521 | } -> tensor<128xf32> 522 | %expanded_43 = tensor.expand_shape %arg86 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 523 | %expanded_44 = tensor.expand_shape %92 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 524 | %93 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%89, %expanded_43 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 525 | ^bb0(%in: f32, %in_100: f32, %out: f32): 526 | %216 = arith.subf %in, %in_100 : f32 527 | linalg.yield %216 : f32 528 | } -> tensor<1x128x28x28xf32> 529 | %94 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%93, %expanded_44 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 530 | ^bb0(%in: f32, %in_100: f32, %out: f32): 531 | %216 = arith.mulf %in, %in_100 : f32 532 | linalg.yield %216 : f32 533 | } -> tensor<1x128x28x28xf32> 534 | %expanded_45 = tensor.expand_shape %arg25 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 535 | %95 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%94, %expanded_45 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 536 | ^bb0(%in: f32, %in_100: f32, %out: f32): 537 | %216 = arith.mulf %in, %in_100 : f32 538 | linalg.yield %216 : f32 539 | } -> tensor<1x128x28x28xf32> 540 | %expanded_46 = tensor.expand_shape %arg26 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 541 | %96 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%95, %expanded_46 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 542 | ^bb0(%in: f32, %in_100: f32, %out: f32): 543 | %216 = arith.addf %in, %in_100 : f32 544 | linalg.yield %216 : f32 545 | } -> tensor<1x128x28x28xf32> 546 | %97 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%96 : tensor<1x128x28x28xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 547 | ^bb0(%in: f32, %out: f32): 548 | %216 = arith.cmpf ugt, %in, %cst : f32 549 | %217 = arith.select %216, %in, %cst : f32 550 | linalg.yield %217 : f32 551 | } -> tensor<1x128x28x28xf32> 552 | %padded_47 = tensor.pad %97 low[0, 0, 1, 1] high[0, 0, 1, 1] { 553 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 554 | tensor.yield %cst : f32 555 | } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> 556 | %98 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_47, %arg27 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%60 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> 557 | %99 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg90 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 558 | ^bb0(%in: f32, %out: f32): 559 | %216 = arith.truncf %cst_2 : f64 to f32 560 | %217 = arith.addf %in, %216 : f32 561 | linalg.yield %217 : f32 562 | } -> tensor<128xf32> 563 | %100 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%99 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 564 | ^bb0(%in: f32, %out: f32): 565 | %216 = math.sqrt %in : f32 566 | linalg.yield %216 : f32 567 | } -> tensor<128xf32> 568 | %101 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%100 : tensor<128xf32>) outs(%62 : tensor<128xf32>) { 569 | ^bb0(%in: f32, %out: f32): 570 | %216 = arith.cmpf one, %in, %cst : f32 571 | cf.assert %216, "unimplemented: tensor with zero element" 572 | %217 = arith.divf %cst_0, %in : f32 573 | linalg.yield %217 : f32 574 | } -> tensor<128xf32> 575 | %expanded_48 = tensor.expand_shape %arg89 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 576 | %expanded_49 = tensor.expand_shape %101 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 577 | %102 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%98, %expanded_48 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 578 | ^bb0(%in: f32, %in_100: f32, %out: f32): 579 | %216 = arith.subf %in, %in_100 : f32 580 | linalg.yield %216 : f32 581 | } -> tensor<1x128x28x28xf32> 582 | %103 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%102, %expanded_49 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 583 | ^bb0(%in: f32, %in_100: f32, %out: f32): 584 | %216 = arith.mulf %in, %in_100 : f32 585 | linalg.yield %216 : f32 586 | } -> tensor<1x128x28x28xf32> 587 | %expanded_50 = tensor.expand_shape %arg28 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 588 | %104 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%103, %expanded_50 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 589 | ^bb0(%in: f32, %in_100: f32, %out: f32): 590 | %216 = arith.mulf %in, %in_100 : f32 591 | linalg.yield %216 : f32 592 | } -> tensor<1x128x28x28xf32> 593 | %expanded_51 = tensor.expand_shape %arg29 [[0, 1, 2]] : tensor<128xf32> into tensor<128x1x1xf32> 594 | %105 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%104, %expanded_51 : tensor<1x128x28x28xf32>, tensor<128x1x1xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 595 | ^bb0(%in: f32, %in_100: f32, %out: f32): 596 | %216 = arith.addf %in, %in_100 : f32 597 | linalg.yield %216 : f32 598 | } -> tensor<1x128x28x28xf32> 599 | %106 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%105, %88 : tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 600 | ^bb0(%in: f32, %in_100: f32, %out: f32): 601 | %216 = arith.addf %in, %in_100 : f32 602 | linalg.yield %216 : f32 603 | } -> tensor<1x128x28x28xf32> 604 | %107 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%106 : tensor<1x128x28x28xf32>) outs(%59 : tensor<1x128x28x28xf32>) { 605 | ^bb0(%in: f32, %out: f32): 606 | %216 = arith.cmpf ugt, %in, %cst : f32 607 | %217 = arith.select %216, %in, %cst : f32 608 | linalg.yield %217 : f32 609 | } -> tensor<1x128x28x28xf32> 610 | %padded_52 = tensor.pad %107 low[0, 0, 1, 1] high[0, 0, 1, 1] { 611 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 612 | tensor.yield %cst : f32 613 | } : tensor<1x128x28x28xf32> to tensor<1x128x30x30xf32> 614 | %108 = tensor.empty() : tensor<1x256x14x14xf32> 615 | %109 = linalg.fill ins(%cst : f32) outs(%108 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> 616 | %110 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_52, %arg30 : tensor<1x128x30x30xf32>, tensor<256x128x3x3xf32>) outs(%109 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> 617 | %111 = tensor.empty() : tensor<256xf32> 618 | %112 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg93 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 619 | ^bb0(%in: f32, %out: f32): 620 | %216 = arith.truncf %cst_2 : f64 to f32 621 | %217 = arith.addf %in, %216 : f32 622 | linalg.yield %217 : f32 623 | } -> tensor<256xf32> 624 | %113 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%112 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 625 | ^bb0(%in: f32, %out: f32): 626 | %216 = math.sqrt %in : f32 627 | linalg.yield %216 : f32 628 | } -> tensor<256xf32> 629 | %114 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%113 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 630 | ^bb0(%in: f32, %out: f32): 631 | %216 = arith.cmpf one, %in, %cst : f32 632 | cf.assert %216, "unimplemented: tensor with zero element" 633 | %217 = arith.divf %cst_0, %in : f32 634 | linalg.yield %217 : f32 635 | } -> tensor<256xf32> 636 | %expanded_53 = tensor.expand_shape %arg92 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 637 | %expanded_54 = tensor.expand_shape %114 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 638 | %115 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%110, %expanded_53 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 639 | ^bb0(%in: f32, %in_100: f32, %out: f32): 640 | %216 = arith.subf %in, %in_100 : f32 641 | linalg.yield %216 : f32 642 | } -> tensor<1x256x14x14xf32> 643 | %116 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%115, %expanded_54 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 644 | ^bb0(%in: f32, %in_100: f32, %out: f32): 645 | %216 = arith.mulf %in, %in_100 : f32 646 | linalg.yield %216 : f32 647 | } -> tensor<1x256x14x14xf32> 648 | %expanded_55 = tensor.expand_shape %arg31 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 649 | %117 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%116, %expanded_55 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 650 | ^bb0(%in: f32, %in_100: f32, %out: f32): 651 | %216 = arith.mulf %in, %in_100 : f32 652 | linalg.yield %216 : f32 653 | } -> tensor<1x256x14x14xf32> 654 | %expanded_56 = tensor.expand_shape %arg32 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 655 | %118 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%117, %expanded_56 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 656 | ^bb0(%in: f32, %in_100: f32, %out: f32): 657 | %216 = arith.addf %in, %in_100 : f32 658 | linalg.yield %216 : f32 659 | } -> tensor<1x256x14x14xf32> 660 | %119 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%118 : tensor<1x256x14x14xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 661 | ^bb0(%in: f32, %out: f32): 662 | %216 = arith.cmpf ugt, %in, %cst : f32 663 | %217 = arith.select %216, %in, %cst : f32 664 | linalg.yield %217 : f32 665 | } -> tensor<1x256x14x14xf32> 666 | %padded_57 = tensor.pad %119 low[0, 0, 1, 1] high[0, 0, 1, 1] { 667 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 668 | tensor.yield %cst : f32 669 | } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> 670 | %120 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_57, %arg33 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%109 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> 671 | %121 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg96 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 672 | ^bb0(%in: f32, %out: f32): 673 | %216 = arith.truncf %cst_2 : f64 to f32 674 | %217 = arith.addf %in, %216 : f32 675 | linalg.yield %217 : f32 676 | } -> tensor<256xf32> 677 | %122 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%121 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 678 | ^bb0(%in: f32, %out: f32): 679 | %216 = math.sqrt %in : f32 680 | linalg.yield %216 : f32 681 | } -> tensor<256xf32> 682 | %123 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%122 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 683 | ^bb0(%in: f32, %out: f32): 684 | %216 = arith.cmpf one, %in, %cst : f32 685 | cf.assert %216, "unimplemented: tensor with zero element" 686 | %217 = arith.divf %cst_0, %in : f32 687 | linalg.yield %217 : f32 688 | } -> tensor<256xf32> 689 | %expanded_58 = tensor.expand_shape %arg95 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 690 | %expanded_59 = tensor.expand_shape %123 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 691 | %124 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%120, %expanded_58 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 692 | ^bb0(%in: f32, %in_100: f32, %out: f32): 693 | %216 = arith.subf %in, %in_100 : f32 694 | linalg.yield %216 : f32 695 | } -> tensor<1x256x14x14xf32> 696 | %125 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%124, %expanded_59 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 697 | ^bb0(%in: f32, %in_100: f32, %out: f32): 698 | %216 = arith.mulf %in, %in_100 : f32 699 | linalg.yield %216 : f32 700 | } -> tensor<1x256x14x14xf32> 701 | %expanded_60 = tensor.expand_shape %arg34 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 702 | %126 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%125, %expanded_60 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 703 | ^bb0(%in: f32, %in_100: f32, %out: f32): 704 | %216 = arith.mulf %in, %in_100 : f32 705 | linalg.yield %216 : f32 706 | } -> tensor<1x256x14x14xf32> 707 | %expanded_61 = tensor.expand_shape %arg35 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 708 | %127 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%126, %expanded_61 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 709 | ^bb0(%in: f32, %in_100: f32, %out: f32): 710 | %216 = arith.addf %in, %in_100 : f32 711 | linalg.yield %216 : f32 712 | } -> tensor<1x256x14x14xf32> 713 | %128 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%107, %arg36 : tensor<1x128x28x28xf32>, tensor<256x128x1x1xf32>) outs(%109 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> 714 | %129 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg99 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 715 | ^bb0(%in: f32, %out: f32): 716 | %216 = arith.truncf %cst_2 : f64 to f32 717 | %217 = arith.addf %in, %216 : f32 718 | linalg.yield %217 : f32 719 | } -> tensor<256xf32> 720 | %130 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%129 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 721 | ^bb0(%in: f32, %out: f32): 722 | %216 = math.sqrt %in : f32 723 | linalg.yield %216 : f32 724 | } -> tensor<256xf32> 725 | %131 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%130 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 726 | ^bb0(%in: f32, %out: f32): 727 | %216 = arith.cmpf one, %in, %cst : f32 728 | cf.assert %216, "unimplemented: tensor with zero element" 729 | %217 = arith.divf %cst_0, %in : f32 730 | linalg.yield %217 : f32 731 | } -> tensor<256xf32> 732 | %expanded_62 = tensor.expand_shape %arg98 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 733 | %expanded_63 = tensor.expand_shape %131 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 734 | %132 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%128, %expanded_62 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 735 | ^bb0(%in: f32, %in_100: f32, %out: f32): 736 | %216 = arith.subf %in, %in_100 : f32 737 | linalg.yield %216 : f32 738 | } -> tensor<1x256x14x14xf32> 739 | %133 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%132, %expanded_63 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 740 | ^bb0(%in: f32, %in_100: f32, %out: f32): 741 | %216 = arith.mulf %in, %in_100 : f32 742 | linalg.yield %216 : f32 743 | } -> tensor<1x256x14x14xf32> 744 | %expanded_64 = tensor.expand_shape %arg37 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 745 | %134 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%133, %expanded_64 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 746 | ^bb0(%in: f32, %in_100: f32, %out: f32): 747 | %216 = arith.mulf %in, %in_100 : f32 748 | linalg.yield %216 : f32 749 | } -> tensor<1x256x14x14xf32> 750 | %expanded_65 = tensor.expand_shape %arg38 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 751 | %135 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%134, %expanded_65 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 752 | ^bb0(%in: f32, %in_100: f32, %out: f32): 753 | %216 = arith.addf %in, %in_100 : f32 754 | linalg.yield %216 : f32 755 | } -> tensor<1x256x14x14xf32> 756 | %136 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%127, %135 : tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 757 | ^bb0(%in: f32, %in_100: f32, %out: f32): 758 | %216 = arith.addf %in, %in_100 : f32 759 | linalg.yield %216 : f32 760 | } -> tensor<1x256x14x14xf32> 761 | %137 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%136 : tensor<1x256x14x14xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 762 | ^bb0(%in: f32, %out: f32): 763 | %216 = arith.cmpf ugt, %in, %cst : f32 764 | %217 = arith.select %216, %in, %cst : f32 765 | linalg.yield %217 : f32 766 | } -> tensor<1x256x14x14xf32> 767 | %padded_66 = tensor.pad %137 low[0, 0, 1, 1] high[0, 0, 1, 1] { 768 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 769 | tensor.yield %cst : f32 770 | } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> 771 | %138 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_66, %arg39 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%109 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> 772 | %139 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg102 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 773 | ^bb0(%in: f32, %out: f32): 774 | %216 = arith.truncf %cst_2 : f64 to f32 775 | %217 = arith.addf %in, %216 : f32 776 | linalg.yield %217 : f32 777 | } -> tensor<256xf32> 778 | %140 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%139 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 779 | ^bb0(%in: f32, %out: f32): 780 | %216 = math.sqrt %in : f32 781 | linalg.yield %216 : f32 782 | } -> tensor<256xf32> 783 | %141 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%140 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 784 | ^bb0(%in: f32, %out: f32): 785 | %216 = arith.cmpf one, %in, %cst : f32 786 | cf.assert %216, "unimplemented: tensor with zero element" 787 | %217 = arith.divf %cst_0, %in : f32 788 | linalg.yield %217 : f32 789 | } -> tensor<256xf32> 790 | %expanded_67 = tensor.expand_shape %arg101 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 791 | %expanded_68 = tensor.expand_shape %141 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 792 | %142 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%138, %expanded_67 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 793 | ^bb0(%in: f32, %in_100: f32, %out: f32): 794 | %216 = arith.subf %in, %in_100 : f32 795 | linalg.yield %216 : f32 796 | } -> tensor<1x256x14x14xf32> 797 | %143 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%142, %expanded_68 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 798 | ^bb0(%in: f32, %in_100: f32, %out: f32): 799 | %216 = arith.mulf %in, %in_100 : f32 800 | linalg.yield %216 : f32 801 | } -> tensor<1x256x14x14xf32> 802 | %expanded_69 = tensor.expand_shape %arg40 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 803 | %144 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%143, %expanded_69 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 804 | ^bb0(%in: f32, %in_100: f32, %out: f32): 805 | %216 = arith.mulf %in, %in_100 : f32 806 | linalg.yield %216 : f32 807 | } -> tensor<1x256x14x14xf32> 808 | %expanded_70 = tensor.expand_shape %arg41 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 809 | %145 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%144, %expanded_70 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 810 | ^bb0(%in: f32, %in_100: f32, %out: f32): 811 | %216 = arith.addf %in, %in_100 : f32 812 | linalg.yield %216 : f32 813 | } -> tensor<1x256x14x14xf32> 814 | %146 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%145 : tensor<1x256x14x14xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 815 | ^bb0(%in: f32, %out: f32): 816 | %216 = arith.cmpf ugt, %in, %cst : f32 817 | %217 = arith.select %216, %in, %cst : f32 818 | linalg.yield %217 : f32 819 | } -> tensor<1x256x14x14xf32> 820 | %padded_71 = tensor.pad %146 low[0, 0, 1, 1] high[0, 0, 1, 1] { 821 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 822 | tensor.yield %cst : f32 823 | } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> 824 | %147 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_71, %arg42 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%109 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> 825 | %148 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg105 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 826 | ^bb0(%in: f32, %out: f32): 827 | %216 = arith.truncf %cst_2 : f64 to f32 828 | %217 = arith.addf %in, %216 : f32 829 | linalg.yield %217 : f32 830 | } -> tensor<256xf32> 831 | %149 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%148 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 832 | ^bb0(%in: f32, %out: f32): 833 | %216 = math.sqrt %in : f32 834 | linalg.yield %216 : f32 835 | } -> tensor<256xf32> 836 | %150 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%149 : tensor<256xf32>) outs(%111 : tensor<256xf32>) { 837 | ^bb0(%in: f32, %out: f32): 838 | %216 = arith.cmpf one, %in, %cst : f32 839 | cf.assert %216, "unimplemented: tensor with zero element" 840 | %217 = arith.divf %cst_0, %in : f32 841 | linalg.yield %217 : f32 842 | } -> tensor<256xf32> 843 | %expanded_72 = tensor.expand_shape %arg104 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 844 | %expanded_73 = tensor.expand_shape %150 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 845 | %151 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%147, %expanded_72 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 846 | ^bb0(%in: f32, %in_100: f32, %out: f32): 847 | %216 = arith.subf %in, %in_100 : f32 848 | linalg.yield %216 : f32 849 | } -> tensor<1x256x14x14xf32> 850 | %152 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%151, %expanded_73 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 851 | ^bb0(%in: f32, %in_100: f32, %out: f32): 852 | %216 = arith.mulf %in, %in_100 : f32 853 | linalg.yield %216 : f32 854 | } -> tensor<1x256x14x14xf32> 855 | %expanded_74 = tensor.expand_shape %arg43 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 856 | %153 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%152, %expanded_74 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 857 | ^bb0(%in: f32, %in_100: f32, %out: f32): 858 | %216 = arith.mulf %in, %in_100 : f32 859 | linalg.yield %216 : f32 860 | } -> tensor<1x256x14x14xf32> 861 | %expanded_75 = tensor.expand_shape %arg44 [[0, 1, 2]] : tensor<256xf32> into tensor<256x1x1xf32> 862 | %154 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%153, %expanded_75 : tensor<1x256x14x14xf32>, tensor<256x1x1xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 863 | ^bb0(%in: f32, %in_100: f32, %out: f32): 864 | %216 = arith.addf %in, %in_100 : f32 865 | linalg.yield %216 : f32 866 | } -> tensor<1x256x14x14xf32> 867 | %155 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%154, %137 : tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 868 | ^bb0(%in: f32, %in_100: f32, %out: f32): 869 | %216 = arith.addf %in, %in_100 : f32 870 | linalg.yield %216 : f32 871 | } -> tensor<1x256x14x14xf32> 872 | %156 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%155 : tensor<1x256x14x14xf32>) outs(%108 : tensor<1x256x14x14xf32>) { 873 | ^bb0(%in: f32, %out: f32): 874 | %216 = arith.cmpf ugt, %in, %cst : f32 875 | %217 = arith.select %216, %in, %cst : f32 876 | linalg.yield %217 : f32 877 | } -> tensor<1x256x14x14xf32> 878 | %padded_76 = tensor.pad %156 low[0, 0, 1, 1] high[0, 0, 1, 1] { 879 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 880 | tensor.yield %cst : f32 881 | } : tensor<1x256x14x14xf32> to tensor<1x256x16x16xf32> 882 | %157 = tensor.empty() : tensor<1x512x7x7xf32> 883 | %158 = linalg.fill ins(%cst : f32) outs(%157 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> 884 | %159 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_76, %arg45 : tensor<1x256x16x16xf32>, tensor<512x256x3x3xf32>) outs(%158 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> 885 | %160 = tensor.empty() : tensor<512xf32> 886 | %161 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg108 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 887 | ^bb0(%in: f32, %out: f32): 888 | %216 = arith.truncf %cst_2 : f64 to f32 889 | %217 = arith.addf %in, %216 : f32 890 | linalg.yield %217 : f32 891 | } -> tensor<512xf32> 892 | %162 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%161 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 893 | ^bb0(%in: f32, %out: f32): 894 | %216 = math.sqrt %in : f32 895 | linalg.yield %216 : f32 896 | } -> tensor<512xf32> 897 | %163 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%162 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 898 | ^bb0(%in: f32, %out: f32): 899 | %216 = arith.cmpf one, %in, %cst : f32 900 | cf.assert %216, "unimplemented: tensor with zero element" 901 | %217 = arith.divf %cst_0, %in : f32 902 | linalg.yield %217 : f32 903 | } -> tensor<512xf32> 904 | %expanded_77 = tensor.expand_shape %arg107 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 905 | %expanded_78 = tensor.expand_shape %163 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 906 | %164 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%159, %expanded_77 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 907 | ^bb0(%in: f32, %in_100: f32, %out: f32): 908 | %216 = arith.subf %in, %in_100 : f32 909 | linalg.yield %216 : f32 910 | } -> tensor<1x512x7x7xf32> 911 | %165 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%164, %expanded_78 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 912 | ^bb0(%in: f32, %in_100: f32, %out: f32): 913 | %216 = arith.mulf %in, %in_100 : f32 914 | linalg.yield %216 : f32 915 | } -> tensor<1x512x7x7xf32> 916 | %expanded_79 = tensor.expand_shape %arg46 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 917 | %166 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%165, %expanded_79 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 918 | ^bb0(%in: f32, %in_100: f32, %out: f32): 919 | %216 = arith.mulf %in, %in_100 : f32 920 | linalg.yield %216 : f32 921 | } -> tensor<1x512x7x7xf32> 922 | %expanded_80 = tensor.expand_shape %arg47 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 923 | %167 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%166, %expanded_80 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 924 | ^bb0(%in: f32, %in_100: f32, %out: f32): 925 | %216 = arith.addf %in, %in_100 : f32 926 | linalg.yield %216 : f32 927 | } -> tensor<1x512x7x7xf32> 928 | %168 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%167 : tensor<1x512x7x7xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 929 | ^bb0(%in: f32, %out: f32): 930 | %216 = arith.cmpf ugt, %in, %cst : f32 931 | %217 = arith.select %216, %in, %cst : f32 932 | linalg.yield %217 : f32 933 | } -> tensor<1x512x7x7xf32> 934 | %padded_81 = tensor.pad %168 low[0, 0, 1, 1] high[0, 0, 1, 1] { 935 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 936 | tensor.yield %cst : f32 937 | } : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32> 938 | %169 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_81, %arg48 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%158 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> 939 | %170 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg111 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 940 | ^bb0(%in: f32, %out: f32): 941 | %216 = arith.truncf %cst_2 : f64 to f32 942 | %217 = arith.addf %in, %216 : f32 943 | linalg.yield %217 : f32 944 | } -> tensor<512xf32> 945 | %171 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%170 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 946 | ^bb0(%in: f32, %out: f32): 947 | %216 = math.sqrt %in : f32 948 | linalg.yield %216 : f32 949 | } -> tensor<512xf32> 950 | %172 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%171 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 951 | ^bb0(%in: f32, %out: f32): 952 | %216 = arith.cmpf one, %in, %cst : f32 953 | cf.assert %216, "unimplemented: tensor with zero element" 954 | %217 = arith.divf %cst_0, %in : f32 955 | linalg.yield %217 : f32 956 | } -> tensor<512xf32> 957 | %expanded_82 = tensor.expand_shape %arg110 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 958 | %expanded_83 = tensor.expand_shape %172 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 959 | %173 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%169, %expanded_82 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 960 | ^bb0(%in: f32, %in_100: f32, %out: f32): 961 | %216 = arith.subf %in, %in_100 : f32 962 | linalg.yield %216 : f32 963 | } -> tensor<1x512x7x7xf32> 964 | %174 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%173, %expanded_83 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 965 | ^bb0(%in: f32, %in_100: f32, %out: f32): 966 | %216 = arith.mulf %in, %in_100 : f32 967 | linalg.yield %216 : f32 968 | } -> tensor<1x512x7x7xf32> 969 | %expanded_84 = tensor.expand_shape %arg49 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 970 | %175 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%174, %expanded_84 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 971 | ^bb0(%in: f32, %in_100: f32, %out: f32): 972 | %216 = arith.mulf %in, %in_100 : f32 973 | linalg.yield %216 : f32 974 | } -> tensor<1x512x7x7xf32> 975 | %expanded_85 = tensor.expand_shape %arg50 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 976 | %176 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%175, %expanded_85 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 977 | ^bb0(%in: f32, %in_100: f32, %out: f32): 978 | %216 = arith.addf %in, %in_100 : f32 979 | linalg.yield %216 : f32 980 | } -> tensor<1x512x7x7xf32> 981 | %177 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%156, %arg51 : tensor<1x256x14x14xf32>, tensor<512x256x1x1xf32>) outs(%158 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> 982 | %178 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg114 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 983 | ^bb0(%in: f32, %out: f32): 984 | %216 = arith.truncf %cst_2 : f64 to f32 985 | %217 = arith.addf %in, %216 : f32 986 | linalg.yield %217 : f32 987 | } -> tensor<512xf32> 988 | %179 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%178 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 989 | ^bb0(%in: f32, %out: f32): 990 | %216 = math.sqrt %in : f32 991 | linalg.yield %216 : f32 992 | } -> tensor<512xf32> 993 | %180 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%179 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 994 | ^bb0(%in: f32, %out: f32): 995 | %216 = arith.cmpf one, %in, %cst : f32 996 | cf.assert %216, "unimplemented: tensor with zero element" 997 | %217 = arith.divf %cst_0, %in : f32 998 | linalg.yield %217 : f32 999 | } -> tensor<512xf32> 1000 | %expanded_86 = tensor.expand_shape %arg113 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1001 | %expanded_87 = tensor.expand_shape %180 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1002 | %181 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%177, %expanded_86 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1003 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1004 | %216 = arith.subf %in, %in_100 : f32 1005 | linalg.yield %216 : f32 1006 | } -> tensor<1x512x7x7xf32> 1007 | %182 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%181, %expanded_87 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1008 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1009 | %216 = arith.mulf %in, %in_100 : f32 1010 | linalg.yield %216 : f32 1011 | } -> tensor<1x512x7x7xf32> 1012 | %expanded_88 = tensor.expand_shape %arg52 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1013 | %183 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%182, %expanded_88 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1014 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1015 | %216 = arith.mulf %in, %in_100 : f32 1016 | linalg.yield %216 : f32 1017 | } -> tensor<1x512x7x7xf32> 1018 | %expanded_89 = tensor.expand_shape %arg53 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1019 | %184 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%183, %expanded_89 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1020 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1021 | %216 = arith.addf %in, %in_100 : f32 1022 | linalg.yield %216 : f32 1023 | } -> tensor<1x512x7x7xf32> 1024 | %185 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%176, %184 : tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1025 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1026 | %216 = arith.addf %in, %in_100 : f32 1027 | linalg.yield %216 : f32 1028 | } -> tensor<1x512x7x7xf32> 1029 | %186 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%185 : tensor<1x512x7x7xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1030 | ^bb0(%in: f32, %out: f32): 1031 | %216 = arith.cmpf ugt, %in, %cst : f32 1032 | %217 = arith.select %216, %in, %cst : f32 1033 | linalg.yield %217 : f32 1034 | } -> tensor<1x512x7x7xf32> 1035 | %padded_90 = tensor.pad %186 low[0, 0, 1, 1] high[0, 0, 1, 1] { 1036 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 1037 | tensor.yield %cst : f32 1038 | } : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32> 1039 | %187 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_90, %arg54 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%158 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> 1040 | %188 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg117 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 1041 | ^bb0(%in: f32, %out: f32): 1042 | %216 = arith.truncf %cst_2 : f64 to f32 1043 | %217 = arith.addf %in, %216 : f32 1044 | linalg.yield %217 : f32 1045 | } -> tensor<512xf32> 1046 | %189 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%188 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 1047 | ^bb0(%in: f32, %out: f32): 1048 | %216 = math.sqrt %in : f32 1049 | linalg.yield %216 : f32 1050 | } -> tensor<512xf32> 1051 | %190 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%189 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 1052 | ^bb0(%in: f32, %out: f32): 1053 | %216 = arith.cmpf one, %in, %cst : f32 1054 | cf.assert %216, "unimplemented: tensor with zero element" 1055 | %217 = arith.divf %cst_0, %in : f32 1056 | linalg.yield %217 : f32 1057 | } -> tensor<512xf32> 1058 | %expanded_91 = tensor.expand_shape %arg116 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1059 | %expanded_92 = tensor.expand_shape %190 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1060 | %191 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%187, %expanded_91 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1061 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1062 | %216 = arith.subf %in, %in_100 : f32 1063 | linalg.yield %216 : f32 1064 | } -> tensor<1x512x7x7xf32> 1065 | %192 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%191, %expanded_92 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1066 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1067 | %216 = arith.mulf %in, %in_100 : f32 1068 | linalg.yield %216 : f32 1069 | } -> tensor<1x512x7x7xf32> 1070 | %expanded_93 = tensor.expand_shape %arg55 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1071 | %193 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%192, %expanded_93 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1072 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1073 | %216 = arith.mulf %in, %in_100 : f32 1074 | linalg.yield %216 : f32 1075 | } -> tensor<1x512x7x7xf32> 1076 | %expanded_94 = tensor.expand_shape %arg56 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1077 | %194 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%193, %expanded_94 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1078 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1079 | %216 = arith.addf %in, %in_100 : f32 1080 | linalg.yield %216 : f32 1081 | } -> tensor<1x512x7x7xf32> 1082 | %195 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%194 : tensor<1x512x7x7xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1083 | ^bb0(%in: f32, %out: f32): 1084 | %216 = arith.cmpf ugt, %in, %cst : f32 1085 | %217 = arith.select %216, %in, %cst : f32 1086 | linalg.yield %217 : f32 1087 | } -> tensor<1x512x7x7xf32> 1088 | %padded_95 = tensor.pad %195 low[0, 0, 1, 1] high[0, 0, 1, 1] { 1089 | ^bb0(%arg123: index, %arg124: index, %arg125: index, %arg126: index): 1090 | tensor.yield %cst : f32 1091 | } : tensor<1x512x7x7xf32> to tensor<1x512x9x9xf32> 1092 | %196 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_95, %arg57 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%158 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> 1093 | %197 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg120 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 1094 | ^bb0(%in: f32, %out: f32): 1095 | %216 = arith.truncf %cst_2 : f64 to f32 1096 | %217 = arith.addf %in, %216 : f32 1097 | linalg.yield %217 : f32 1098 | } -> tensor<512xf32> 1099 | %198 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%197 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 1100 | ^bb0(%in: f32, %out: f32): 1101 | %216 = math.sqrt %in : f32 1102 | linalg.yield %216 : f32 1103 | } -> tensor<512xf32> 1104 | %199 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%198 : tensor<512xf32>) outs(%160 : tensor<512xf32>) { 1105 | ^bb0(%in: f32, %out: f32): 1106 | %216 = arith.cmpf one, %in, %cst : f32 1107 | cf.assert %216, "unimplemented: tensor with zero element" 1108 | %217 = arith.divf %cst_0, %in : f32 1109 | linalg.yield %217 : f32 1110 | } -> tensor<512xf32> 1111 | %expanded_96 = tensor.expand_shape %arg119 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1112 | %expanded_97 = tensor.expand_shape %199 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1113 | %200 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%196, %expanded_96 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1114 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1115 | %216 = arith.subf %in, %in_100 : f32 1116 | linalg.yield %216 : f32 1117 | } -> tensor<1x512x7x7xf32> 1118 | %201 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%200, %expanded_97 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1119 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1120 | %216 = arith.mulf %in, %in_100 : f32 1121 | linalg.yield %216 : f32 1122 | } -> tensor<1x512x7x7xf32> 1123 | %expanded_98 = tensor.expand_shape %arg58 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1124 | %202 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%201, %expanded_98 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1125 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1126 | %216 = arith.mulf %in, %in_100 : f32 1127 | linalg.yield %216 : f32 1128 | } -> tensor<1x512x7x7xf32> 1129 | %expanded_99 = tensor.expand_shape %arg59 [[0, 1, 2]] : tensor<512xf32> into tensor<512x1x1xf32> 1130 | %203 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%202, %expanded_99 : tensor<1x512x7x7xf32>, tensor<512x1x1xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1131 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1132 | %216 = arith.addf %in, %in_100 : f32 1133 | linalg.yield %216 : f32 1134 | } -> tensor<1x512x7x7xf32> 1135 | %204 = linalg.generic {indexing_maps = [#map1, #map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%203, %186 : tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1136 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1137 | %216 = arith.addf %in, %in_100 : f32 1138 | linalg.yield %216 : f32 1139 | } -> tensor<1x512x7x7xf32> 1140 | %205 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%204 : tensor<1x512x7x7xf32>) outs(%157 : tensor<1x512x7x7xf32>) { 1141 | ^bb0(%in: f32, %out: f32): 1142 | %216 = arith.cmpf ugt, %in, %cst : f32 1143 | %217 = arith.select %216, %in, %cst : f32 1144 | linalg.yield %217 : f32 1145 | } -> tensor<1x512x7x7xf32> 1146 | %206 = tensor.empty() : tensor<1x512x1x1xf32> 1147 | %207 = linalg.fill ins(%cst : f32) outs(%206 : tensor<1x512x1x1xf32>) -> tensor<1x512x1x1xf32> 1148 | %208 = linalg.generic {indexing_maps = [#map3, #map6], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} ins(%205 : tensor<1x512x7x7xf32>) outs(%207 : tensor<1x512x1x1xf32>) { 1149 | ^bb0(%in: f32, %out: f32): 1150 | %216 = arith.addf %in, %out : f32 1151 | linalg.yield %216 : f32 1152 | } -> tensor<1x512x1x1xf32> 1153 | %209 = linalg.generic {indexing_maps = [#map7, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%208 : tensor<1x512x1x1xf32>) outs(%206 : tensor<1x512x1x1xf32>) { 1154 | ^bb0(%in: f32, %out: f32): 1155 | %216 = arith.divf %in, %cst_3 : f32 1156 | linalg.yield %216 : f32 1157 | } -> tensor<1x512x1x1xf32> 1158 | %collapsed = tensor.collapse_shape %209 [[0], [1, 2, 3]] : tensor<1x512x1x1xf32> into tensor<1x512xf32> 1159 | %210 = tensor.empty() : tensor<512x1000xf32> 1160 | %211 = linalg.generic {indexing_maps = [#map8, #map9], iterator_types = ["parallel", "parallel"]} ins(%arg60 : tensor<1000x512xf32>) outs(%210 : tensor<512x1000xf32>) { 1161 | ^bb0(%in: f32, %out: f32): 1162 | linalg.yield %in : f32 1163 | } -> tensor<512x1000xf32> 1164 | %212 = tensor.empty() : tensor<1x1000xf32> 1165 | %213 = linalg.fill ins(%cst : f32) outs(%212 : tensor<1x1000xf32>) -> tensor<1x1000xf32> 1166 | %214 = linalg.matmul ins(%collapsed, %211 : tensor<1x512xf32>, tensor<512x1000xf32>) outs(%213 : tensor<1x1000xf32>) -> tensor<1x1000xf32> 1167 | %215 = linalg.generic {indexing_maps = [#map10, #map11, #map8], iterator_types = ["parallel", "parallel"]} ins(%arg61, %214 : tensor<1000xf32>, tensor<1x1000xf32>) outs(%212 : tensor<1x1000xf32>) { 1168 | ^bb0(%in: f32, %in_100: f32, %out: f32): 1169 | %216 = arith.addf %in, %in_100 : f32 1170 | linalg.yield %216 : f32 1171 | } -> tensor<1x1000xf32> 1172 | return %215, %arg0, %arg1, %arg3, %arg4, %arg6, %arg7, %arg9, %arg10, %arg12, %arg13, %arg15, %arg16, %arg18, %arg19, %arg21, %arg22, %arg24, %arg25, %arg27, %arg28, %arg30, %arg31, %arg33, %arg34, %arg36, %arg37, %arg39, %arg40, %arg42, %arg43, %arg45, %arg46, %arg48, %arg49, %arg51, %arg52, %arg54, %arg55, %arg57, %arg58, %arg62, %arg63, %arg65, %arg66, %arg68, %arg69, %arg71, %arg72, %arg74, %arg75, %arg77, %arg78, %arg80, %arg81, %arg83, %arg84, %arg86, %arg87, %arg89, %arg90, %arg92, %arg93, %arg95, %arg96, %arg98, %arg99, %arg101, %arg102, %arg104, %arg105, %arg107, %arg108, %arg110, %arg111, %arg113, %arg114, %arg116, %arg117, %arg119, %arg120, %arg122, %2, %11, %15, %19, %21, %29, %30, %39, %40, %48, %49, %58, %61, %70, %71, %79, %88, %89, %97, %98, %107, %110, %119, %120, %128, %137, %138, %146, %147, %156, %159, %168, %169, %177, %186, %187, %195, %196, %205, %collapsed, %211 : tensor<1x1000xf32>, tensor<64x3x7x7xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<64x64x3x3xf32>, tensor<64xf32>, tensor<128x64x3x3xf32>, tensor<128xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>, tensor<128x64x1x1xf32>, tensor<128xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>, tensor<128x128x3x3xf32>, tensor<128xf32>, tensor<256x128x3x3xf32>, tensor<256xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>, tensor<256x128x1x1xf32>, tensor<256xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>, tensor<256x256x3x3xf32>, tensor<256xf32>, tensor<512x256x3x3xf32>, tensor<512xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>, tensor<512x256x1x1xf32>, tensor<512xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>, tensor<512x512x3x3xf32>, tensor<512xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<256xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<512xf32>, tensor<1x3x224x224xf32>, tensor<1x64x112x112xf32>, tensor<1x64x112x112xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xi64>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x64x56x56xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x128x28x28xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x256x14x14xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512x7x7xf32>, tensor<1x512xf32>, tensor<512x1000xf32> 1173 | } 1174 | } 1175 | 1176 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/models/bert.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch._dynamo as dynamo 7 | 8 | from transformers import AutoTokenizer, BertModel 9 | 10 | import sys 11 | sys.path.append('../../lib') 12 | from torch_mlir_compile import refbackend_torchdynamo_backend 13 | 14 | 15 | def main(): 16 | device = torch.device("cpu") 17 | # The bare Bert Model transformer outputting raw hidden-states 18 | # without any specific head on top. 19 | # https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertModel 20 | bert_model_name = "bert-base-uncased" 21 | model = BertModel.from_pretrained(bert_model_name).to(device) 22 | dynamo_callable = dynamo.optimize(refbackend_torchdynamo_backend)(model) 23 | 24 | tokenizer = AutoTokenizer.from_pretrained(bert_model_name) 25 | inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") 26 | dynamo_callable(**inputs) 27 | 28 | if __name__ == '__main__': 29 | with torch.no_grad(): 30 | main() 31 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/models/conv.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from typing import List 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch._dynamo as dynamo 9 | 10 | import sys 11 | sys.path.append('../../lib') 12 | from torch_mlir_compile import refbackend_torchdynamo_backend 13 | 14 | class Net(nn.Module): 15 | def __init__(self): 16 | super(Net, self).__init__() 17 | self.conv1 = nn.Conv2d(3, 32, 3, 3) 18 | self.conv2 = nn.Conv2d(32, 2, 3, 3) 19 | 20 | def forward(self, x): 21 | x = self.conv1(x) 22 | x = F.relu(x) 23 | x = self.conv2(x) 24 | x = F.relu(x) 25 | output = F.log_softmax(x, dim=1) 26 | return output 27 | 28 | 29 | def main(): 30 | device = torch.device("cpu") 31 | simple = Net().to(device) 32 | dynamo_callable = dynamo.optimize(refbackend_torchdynamo_backend)(simple) 33 | dynamo_callable(torch.ones(1, 3, 28, 28)) 34 | 35 | if __name__ == '__main__': 36 | with torch.no_grad(): 37 | main() 38 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/models/linear.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch._dynamo as dynamo 7 | 8 | import sys 9 | sys.path.append('../../lib') 10 | from torch_mlir_compile import refbackend_torchdynamo_backend 11 | 12 | 13 | class Net(nn.Module): 14 | def __init__(self): 15 | super(Net, self).__init__() 16 | self.fc1 = nn.Linear(128, 256) 17 | self.fc2 = nn.Linear(256, 10) 18 | 19 | def forward(self, x): 20 | x = self.fc1(x) 21 | x = F.relu(x) 22 | x = self.fc2(x) 23 | x = F.relu(x) 24 | output = F.log_softmax(x, dim=1) 25 | return output 26 | 27 | 28 | def main(): 29 | device = torch.device("cpu") 30 | simple = Net().to(device) 31 | dynamo_callable = dynamo.optimize(refbackend_torchdynamo_backend)(simple) 32 | dynamo_callable(torch.ones(2, 128)) 33 | 34 | if __name__ == '__main__': 35 | with torch.no_grad(): 36 | main() 37 | -------------------------------------------------------------------------------- /pytorch/torch-dynamo/models/mnist.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch._dynamo as dynamo 7 | 8 | import sys 9 | sys.path.append('../../lib') 10 | from torch_mlir_compile import refbackend_torchdynamo_backend 11 | 12 | 13 | # Model taken from PyTorch examples - https://github.com/pytorch/examples/blob/main/mnist/main.py 14 | class MNIST(nn.Module): 15 | def __init__(self): 16 | super(MNIST, self).__init__() 17 | self.conv1 = nn.Conv2d(1, 32, 3, 1) 18 | self.conv2 = nn.Conv2d(32, 64, 3, 1) 19 | self.dropout1 = nn.Dropout(0.25) 20 | self.dropout2 = nn.Dropout(0.5) 21 | self.fc1 = nn.Linear(9216, 128) 22 | self.fc2 = nn.Linear(128, 10) 23 | 24 | def forward(self, x): 25 | x = self.conv1(x) 26 | x = F.relu(x) 27 | x = self.conv2(x) 28 | x = F.relu(x) 29 | x = F.max_pool2d(x, 2) 30 | x = self.dropout1(x) 31 | x = torch.flatten(x, 1) 32 | x = self.fc1(x) 33 | x = F.relu(x) 34 | x = self.dropout2(x) 35 | x = self.fc2(x) 36 | output = F.log_softmax(x, dim=1) 37 | return output 38 | 39 | def main(): 40 | device = torch.device("cpu") 41 | mnist = MNIST().to(device) 42 | dynamo_callable = dynamo.optimize(refbackend_torchdynamo_backend)(mnist) 43 | dynamo_callable(torch.ones(1, 1, 28, 28)) 44 | 45 | if __name__ == '__main__': 46 | with torch.no_grad(): 47 | main() -------------------------------------------------------------------------------- /pytorch/torch-dynamo/models/resnet18.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch._dynamo as dynamo 4 | 5 | import sys 6 | sys.path.append('../../lib') 7 | from torch_mlir_compile import refbackend_torchdynamo_backend 8 | 9 | resnet18 = torchvision.models.resnet18(weights=torchvision.models.resnet.ResNet18_Weights.IMAGENET1K_V1) 10 | resnet18.eval() 11 | 12 | dynamo_callable = dynamo.optimize(refbackend_torchdynamo_backend)(resnet18) 13 | dynamo_callable(torch.ones(1, 3, 224, 224)) 14 | -------------------------------------------------------------------------------- /pytorch/torch-script/mlir/conv.mlir: -------------------------------------------------------------------------------- 1 | torch 2 | module attributes {torch.debug_module_name = "Net"} { 3 | func.func @forward(%arg0: !torch.vtensor<[1,3,28,28],f32>) -> !torch.vtensor<[1,2,3,3],f32> { 4 | %true = torch.constant.bool true 5 | %float1.000000e00 = torch.constant.float 1.000000e+00 6 | %none = torch.constant.none 7 | %false = torch.constant.bool false 8 | %0 = torch.vtensor.literal(dense<[-0.0228016917, 0.0496884249]> : tensor<2xf32>) : !torch.vtensor<[2],f32> 9 | %1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2x32x3x3xf32>) : !torch.vtensor<[2,32,3,3],f32> 10 | %2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<32xf32>) : !torch.vtensor<[32],f32> 11 | %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<32x3x3x3xf32>) : !torch.vtensor<[32,3,3,3],f32> 12 | %int0 = torch.constant.int 0 13 | %int1 = torch.constant.int 1 14 | %int3 = torch.constant.int 3 15 | %4 = torch.prim.ListConstruct %int3, %int3 : (!torch.int, !torch.int) -> !torch.list 16 | %5 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list 17 | %6 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list 18 | %7 = torch.prim.ListConstruct : () -> !torch.list 19 | %8 = torch.aten.convolution %arg0, %3, %2, %4, %5, %6, %false, %7, %int1 : !torch.vtensor<[1,3,28,28],f32>, !torch.vtensor<[32,3,3,3],f32>, !torch.vtensor<[32],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.list, !torch.int -> !torch.vtensor<[1,32,9,9],f32> 20 | %9 = torch.aten.relu %8 : !torch.vtensor<[1,32,9,9],f32> -> !torch.vtensor<[1,32,9,9],f32> 21 | %10 = torch.prim.ListConstruct : () -> !torch.list 22 | %11 = torch.aten.convolution %9, %1, %0, %4, %5, %6, %false, %10, %int1 : !torch.vtensor<[1,32,9,9],f32>, !torch.vtensor<[2,32,3,3],f32>, !torch.vtensor<[2],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.list, !torch.int -> !torch.vtensor<[1,2,3,3],f32> 23 | %12 = torch.aten.relu %11 : !torch.vtensor<[1,2,3,3],f32> -> !torch.vtensor<[1,2,3,3],f32> 24 | %values, %indices = torch.aten.max.dim %12, %int1, %true : !torch.vtensor<[1,2,3,3],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,1,3,3],f32>, !torch.vtensor<[1,1,3,3],si64> 25 | %13 = torch.aten.sub.Tensor %12, %values, %float1.000000e00 : !torch.vtensor<[1,2,3,3],f32>, !torch.vtensor<[1,1,3,3],f32>, !torch.float -> !torch.vtensor<[1,2,3,3],f32> 26 | %14 = torch.aten.exp %13 : !torch.vtensor<[1,2,3,3],f32> -> !torch.vtensor<[1,2,3,3],f32> 27 | %15 = torch.prim.ListConstruct %int1 : (!torch.int) -> !torch.list 28 | %16 = torch.aten.sum.dim_IntList %14, %15, %true, %none : !torch.vtensor<[1,2,3,3],f32>, !torch.list, !torch.bool, !torch.none -> !torch.vtensor<[1,1,3,3],f32> 29 | %17 = torch.aten.log %16 : !torch.vtensor<[1,1,3,3],f32> -> !torch.vtensor<[1,1,3,3],f32> 30 | %18 = torch.aten.sub.Tensor %13, %17, %float1.000000e00 : !torch.vtensor<[1,2,3,3],f32>, !torch.vtensor<[1,1,3,3],f32>, !torch.float -> !torch.vtensor<[1,2,3,3],f32> 31 | return %18 : !torch.vtensor<[1,2,3,3],f32> 32 | } 33 | } 34 | 35 | linalg-on-tensors 36 | #map = affine_map<(d0, d1, d2, d3) -> (d1)> 37 | #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> 38 | #map2 = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> 39 | #map3 = affine_map<(d0, d1, d2, d3) -> (d0, 0, d2, d3)> 40 | #map4 = affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)> 41 | module attributes {torch.debug_module_name = "Net"} { 42 | ml_program.global private mutable @global_seed(dense<0> : tensor) : tensor 43 | func.func @forward(%arg0: tensor<1x3x28x28xf32>) -> tensor<1x2x3x3xf32> { 44 | %cst = arith.constant dense<[-0.0228016917, 0.0496884249]> : tensor<2xf32> 45 | %cst_0 = arith.constant dense_resource<__elided__> : tensor<2x32x3x3xf32> 46 | %cst_1 = arith.constant dense_resource<__elided__> : tensor<32xf32> 47 | %cst_2 = arith.constant dense_resource<__elided__> : tensor<32x3x3x3xf32> 48 | %cst_3 = arith.constant 0.000000e+00 : f32 49 | %cst_4 = arith.constant 0xFF800000 : f32 50 | %c0_i64 = arith.constant 0 : i64 51 | %0 = tensor.empty() : tensor<1x32x9x9xf32> 52 | %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_1 : tensor<32xf32>) outs(%0 : tensor<1x32x9x9xf32>) { 53 | ^bb0(%in: f32, %out: f32): 54 | linalg.yield %in : f32 55 | } -> tensor<1x32x9x9xf32> 56 | %2 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<3> : vector<2xi64>} ins(%arg0, %cst_2 : tensor<1x3x28x28xf32>, tensor<32x3x3x3xf32>) outs(%1 : tensor<1x32x9x9xf32>) -> tensor<1x32x9x9xf32> 57 | %3 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<1x32x9x9xf32>) outs(%0 : tensor<1x32x9x9xf32>) { 58 | ^bb0(%in: f32, %out: f32): 59 | %19 = arith.cmpf ugt, %in, %cst_3 : f32 60 | %20 = arith.select %19, %in, %cst_3 : f32 61 | linalg.yield %20 : f32 62 | } -> tensor<1x32x9x9xf32> 63 | %4 = tensor.empty() : tensor<1x2x3x3xf32> 64 | %5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst : tensor<2xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 65 | ^bb0(%in: f32, %out: f32): 66 | linalg.yield %in : f32 67 | } -> tensor<1x2x3x3xf32> 68 | %6 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<3> : vector<2xi64>} ins(%3, %cst_0 : tensor<1x32x9x9xf32>, tensor<2x32x3x3xf32>) outs(%5 : tensor<1x2x3x3xf32>) -> tensor<1x2x3x3xf32> 69 | %7 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6 : tensor<1x2x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 70 | ^bb0(%in: f32, %out: f32): 71 | %19 = arith.cmpf ugt, %in, %cst_3 : f32 72 | %20 = arith.select %19, %in, %cst_3 : f32 73 | linalg.yield %20 : f32 74 | } -> tensor<1x2x3x3xf32> 75 | %8 = tensor.empty() : tensor<1x1x3x3xi64> 76 | %9 = linalg.fill ins(%c0_i64 : i64) outs(%8 : tensor<1x1x3x3xi64>) -> tensor<1x1x3x3xi64> 77 | %10 = tensor.empty() : tensor<1x1x3x3xf32> 78 | %11 = linalg.fill ins(%cst_4 : f32) outs(%10 : tensor<1x1x3x3xf32>) -> tensor<1x1x3x3xf32> 79 | %12:2 = linalg.generic {indexing_maps = [#map1, #map3, #map3], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} ins(%7 : tensor<1x2x3x3xf32>) outs(%11, %9 : tensor<1x1x3x3xf32>, tensor<1x1x3x3xi64>) { 80 | ^bb0(%in: f32, %out: f32, %out_5: i64): 81 | %19 = linalg.index 1 : index 82 | %20 = arith.index_cast %19 : index to i64 83 | %21 = arith.maximumf %in, %out : f32 84 | %22 = arith.cmpf ogt, %in, %out : f32 85 | %23 = arith.select %22, %20, %out_5 : i64 86 | linalg.yield %21, %23 : f32, i64 87 | } -> (tensor<1x1x3x3xf32>, tensor<1x1x3x3xi64>) 88 | %13 = linalg.generic {indexing_maps = [#map2, #map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%7, %12#0 : tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 89 | ^bb0(%in: f32, %in_5: f32, %out: f32): 90 | %19 = arith.subf %in, %in_5 : f32 91 | linalg.yield %19 : f32 92 | } -> tensor<1x2x3x3xf32> 93 | %14 = linalg.generic {indexing_maps = [#map2, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13 : tensor<1x2x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 94 | ^bb0(%in: f32, %out: f32): 95 | %19 = math.exp %in : f32 96 | linalg.yield %19 : f32 97 | } -> tensor<1x2x3x3xf32> 98 | %15 = linalg.fill ins(%cst_3 : f32) outs(%10 : tensor<1x1x3x3xf32>) -> tensor<1x1x3x3xf32> 99 | %16 = linalg.generic {indexing_maps = [#map1, #map3], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} ins(%14 : tensor<1x2x3x3xf32>) outs(%15 : tensor<1x1x3x3xf32>) { 100 | ^bb0(%in: f32, %out: f32): 101 | %19 = arith.addf %in, %out : f32 102 | linalg.yield %19 : f32 103 | } -> tensor<1x1x3x3xf32> 104 | %17 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%16 : tensor<1x1x3x3xf32>) outs(%10 : tensor<1x1x3x3xf32>) { 105 | ^bb0(%in: f32, %out: f32): 106 | %19 = math.log %in : f32 107 | linalg.yield %19 : f32 108 | } -> tensor<1x1x3x3xf32> 109 | %18 = linalg.generic {indexing_maps = [#map2, #map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13, %17 : tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) outs(%4 : tensor<1x2x3x3xf32>) { 110 | ^bb0(%in: f32, %in_5: f32, %out: f32): 111 | %19 = arith.subf %in, %in_5 : f32 112 | linalg.yield %19 : f32 113 | } -> tensor<1x2x3x3xf32> 114 | return %18 : tensor<1x2x3x3xf32> 115 | } 116 | } 117 | 118 | tosa 119 | module attributes {torch.debug_module_name = "Net"} { 120 | func.func @forward(%arg0: tensor<1x3x28x28xf32>) -> tensor<1x2x3x3xf32> { 121 | %0 = "tosa.const"() <{value = dense<[-0.0228016917, 0.0496884249]> : tensor<2xf32>}> : () -> tensor<2xf32> 122 | %1 = "tosa.const"() <{value = dense_resource<__elided__> : tensor<2x32x3x3xf32>}> : () -> tensor<2x32x3x3xf32> 123 | %2 = "tosa.const"() <{value = dense_resource<__elided__> : tensor<32xf32>}> : () -> tensor<32xf32> 124 | %3 = "tosa.const"() <{value = dense_resource<__elided__> : tensor<32x3x3x3xf32>}> : () -> tensor<32x3x3x3xf32> 125 | %4 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> 126 | %5 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> 127 | %6 = tosa.transpose %arg0, %4 : (tensor<1x3x28x28xf32>, tensor<4xi32>) -> tensor<1x28x28x3xf32> 128 | %7 = tosa.transpose %3, %4 : (tensor<32x3x3x3xf32>, tensor<4xi32>) -> tensor<32x3x3x3xf32> 129 | %8 = tosa.conv2d %6, %7, %2 {dilation = array, pad = array, stride = array} : (tensor<1x28x28x3xf32>, tensor<32x3x3x3xf32>, tensor<32xf32>) -> tensor<1x9x9x32xf32> 130 | %9 = tosa.transpose %8, %5 : (tensor<1x9x9x32xf32>, tensor<4xi32>) -> tensor<1x32x9x9xf32> 131 | %10 = tosa.clamp %9 {max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x32x9x9xf32>) -> tensor<1x32x9x9xf32> 132 | %11 = tosa.transpose %10, %4 : (tensor<1x32x9x9xf32>, tensor<4xi32>) -> tensor<1x9x9x32xf32> 133 | %12 = tosa.transpose %1, %4 : (tensor<2x32x3x3xf32>, tensor<4xi32>) -> tensor<2x3x3x32xf32> 134 | %13 = tosa.conv2d %11, %12, %0 {dilation = array, pad = array, stride = array} : (tensor<1x9x9x32xf32>, tensor<2x3x3x32xf32>, tensor<2xf32>) -> tensor<1x3x3x2xf32> 135 | %14 = tosa.transpose %13, %5 : (tensor<1x3x3x2xf32>, tensor<4xi32>) -> tensor<1x2x3x3xf32> 136 | %15 = tosa.clamp %14 {max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x2x3x3xf32>) -> tensor<1x2x3x3xf32> 137 | %16 = tosa.reduce_max %15 {axis = 1 : i32} : (tensor<1x2x3x3xf32>) -> tensor<1x1x3x3xf32> 138 | %17 = tosa.sub %15, %16 : (tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) -> tensor<1x2x3x3xf32> 139 | %18 = tosa.exp %17 : (tensor<1x2x3x3xf32>) -> tensor<1x2x3x3xf32> 140 | %19 = tosa.reduce_sum %18 {axis = 1 : i32} : (tensor<1x2x3x3xf32>) -> tensor<1x1x3x3xf32> 141 | %20 = tosa.log %19 : (tensor<1x1x3x3xf32>) -> tensor<1x1x3x3xf32> 142 | %21 = tosa.sub %17, %20 : (tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) -> tensor<1x2x3x3xf32> 143 | return %21 : tensor<1x2x3x3xf32> 144 | } 145 | } 146 | 147 | stablehlo 148 | module attributes {torch.debug_module_name = "Net"} { 149 | func.func @forward(%arg0: tensor<1x3x28x28xf32>) -> tensor<1x2x3x3xf32> { 150 | %0 = stablehlo.constant dense<[-0.0228016917, 0.0496884249]> : tensor<2xf32> 151 | %1 = stablehlo.constant dense_resource<__elided__> : tensor<2x32x3x3xf32> 152 | %2 = stablehlo.constant dense_resource<__elided__> : tensor<32xf32> 153 | %3 = stablehlo.constant dense_resource<__elided__> : tensor<32x3x3x3xf32> 154 | %4 = stablehlo.constant dense<0xFF800000> : tensor 155 | %5 = stablehlo.constant dense<0> : tensor 156 | %6 = stablehlo.constant dense<0.000000e+00> : tensor 157 | %cst = arith.constant dense<[32, 1, 1]> : tensor<3xi64> 158 | %7 = chlo.constant dense<0.000000e+00> : tensor<1x32x9x9xf32> 159 | %cst_0 = arith.constant dense<[2, 1, 1]> : tensor<3xi64> 160 | %8 = chlo.constant dense<0.000000e+00> : tensor<1x2x3x3xf32> 161 | %cst_1 = arith.constant dense<[1, 2, 3, 3]> : tensor<4xi64> 162 | %cst_2 = arith.constant dense<[1, 1, 3, 3]> : tensor<4xi64> 163 | %9 = stablehlo.convolution(%arg0, %3) dim_numbers = [b, f, 0, 1]x[o, i, 0, 1]->[b, f, 0, 1], window = {stride = [3, 3], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x3x28x28xf32>, tensor<32x3x3x3xf32>) -> tensor<1x32x9x9xf32> 164 | %10 = stablehlo.dynamic_reshape %2, %cst : (tensor<32xf32>, tensor<3xi64>) -> tensor<32x1x1xf32> 165 | %11 = chlo.broadcast_add %9, %10 : (tensor<1x32x9x9xf32>, tensor<32x1x1xf32>) -> tensor<1x32x9x9xf32> 166 | %12 = stablehlo.maximum %11, %7 : tensor<1x32x9x9xf32> 167 | %13 = stablehlo.convolution(%12, %1) dim_numbers = [b, f, 0, 1]x[o, i, 0, 1]->[b, f, 0, 1], window = {stride = [3, 3], pad = [[0, 0], [0, 0]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x32x9x9xf32>, tensor<2x32x3x3xf32>) -> tensor<1x2x3x3xf32> 168 | %14 = stablehlo.dynamic_reshape %0, %cst_0 : (tensor<2xf32>, tensor<3xi64>) -> tensor<2x1x1xf32> 169 | %15 = chlo.broadcast_add %13, %14 : (tensor<1x2x3x3xf32>, tensor<2x1x1xf32>) -> tensor<1x2x3x3xf32> 170 | %16 = stablehlo.maximum %15, %8 : tensor<1x2x3x3xf32> 171 | %17 = stablehlo.dynamic_iota %cst_1, dim = 1 : (tensor<4xi64>) -> tensor<1x2x3x3xi64> 172 | %18:2 = stablehlo.reduce(%16 init: %4), (%17 init: %5) across dimensions = [1] : (tensor<1x2x3x3xf32>, tensor<1x2x3x3xi64>, tensor, tensor) -> (tensor<1x3x3xf32>, tensor<1x3x3xi64>) 173 | reducer(%arg1: tensor, %arg3: tensor) (%arg2: tensor, %arg4: tensor) { 174 | %26 = stablehlo.compare GE, %arg1, %arg3, FLOAT : (tensor, tensor) -> tensor 175 | %27 = stablehlo.select %26, %arg1, %arg3 : tensor, tensor 176 | %28 = stablehlo.compare EQ, %arg1, %arg3, FLOAT : (tensor, tensor) -> tensor 177 | %29 = stablehlo.minimum %arg2, %arg4 : tensor 178 | %30 = stablehlo.select %26, %arg2, %arg4 : tensor, tensor 179 | %31 = stablehlo.select %28, %29, %30 : tensor, tensor 180 | stablehlo.return %27, %31 : tensor, tensor 181 | } 182 | %19 = stablehlo.dynamic_reshape %18#0, %cst_2 : (tensor<1x3x3xf32>, tensor<4xi64>) -> tensor<1x1x3x3xf32> 183 | %20 = chlo.broadcast_subtract %16, %19 : (tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) -> tensor<1x2x3x3xf32> 184 | %21 = stablehlo.exponential %20 : tensor<1x2x3x3xf32> 185 | %22 = stablehlo.reduce(%21 init: %6) applies stablehlo.add across dimensions = [1] : (tensor<1x2x3x3xf32>, tensor) -> tensor<1x3x3xf32> 186 | %23 = stablehlo.dynamic_reshape %22, %cst_2 : (tensor<1x3x3xf32>, tensor<4xi64>) -> tensor<1x1x3x3xf32> 187 | %24 = stablehlo.log %23 : tensor<1x1x3x3xf32> 188 | %25 = chlo.broadcast_subtract %20, %24 : (tensor<1x2x3x3xf32>, tensor<1x1x3x3xf32>) -> tensor<1x2x3x3xf32> 189 | return %25 : tensor<1x2x3x3xf32> 190 | } 191 | } 192 | 193 | -------------------------------------------------------------------------------- /pytorch/torch-script/mlir/linear.mlir: -------------------------------------------------------------------------------- 1 | torch 2 | module attributes {torch.debug_module_name = "Net"} { 3 | func.func @forward(%arg0: !torch.vtensor<[2,128],f32>) -> !torch.vtensor<[2,10],f32> { 4 | %true = torch.constant.bool true 5 | %float1.000000e00 = torch.constant.float 1.000000e+00 6 | %none = torch.constant.none 7 | %int0 = torch.constant.int 0 8 | %int1 = torch.constant.int 1 9 | %0 = torch.vtensor.literal(dense<[-0.00630987436, -0.0443928167, 0.0618280694, -0.0368138924, -0.0515485033, 0.00771782547, -0.0303224251, -0.0296016484, 0.0289968103, 0.0607223138]> : tensor<10xf32>) : !torch.vtensor<[10],f32> 10 | %1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10x256xf32>) : !torch.vtensor<[10,256],f32> 11 | %2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32> 12 | %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x128xf32>) : !torch.vtensor<[256,128],f32> 13 | %4 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[256,128],f32>, !torch.int, !torch.int -> !torch.vtensor<[128,256],f32> 14 | %5 = torch.aten.mm %arg0, %4 : !torch.vtensor<[2,128],f32>, !torch.vtensor<[128,256],f32> -> !torch.vtensor<[2,256],f32> 15 | %6 = torch.aten.add.Tensor %5, %2, %float1.000000e00 : !torch.vtensor<[2,256],f32>, !torch.vtensor<[256],f32>, !torch.float -> !torch.vtensor<[2,256],f32> 16 | %7 = torch.aten.relu %6 : !torch.vtensor<[2,256],f32> -> !torch.vtensor<[2,256],f32> 17 | %8 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[10,256],f32>, !torch.int, !torch.int -> !torch.vtensor<[256,10],f32> 18 | %9 = torch.aten.mm %7, %8 : !torch.vtensor<[2,256],f32>, !torch.vtensor<[256,10],f32> -> !torch.vtensor<[2,10],f32> 19 | %10 = torch.aten.add.Tensor %9, %0, %float1.000000e00 : !torch.vtensor<[2,10],f32>, !torch.vtensor<[10],f32>, !torch.float -> !torch.vtensor<[2,10],f32> 20 | %11 = torch.aten.relu %10 : !torch.vtensor<[2,10],f32> -> !torch.vtensor<[2,10],f32> 21 | %values, %indices = torch.aten.max.dim %11, %int1, %true : !torch.vtensor<[2,10],f32>, !torch.int, !torch.bool -> !torch.vtensor<[2,1],f32>, !torch.vtensor<[2,1],si64> 22 | %12 = torch.aten.sub.Tensor %11, %values, %float1.000000e00 : !torch.vtensor<[2,10],f32>, !torch.vtensor<[2,1],f32>, !torch.float -> !torch.vtensor<[2,10],f32> 23 | %13 = torch.aten.exp %12 : !torch.vtensor<[2,10],f32> -> !torch.vtensor<[2,10],f32> 24 | %14 = torch.prim.ListConstruct %int1 : (!torch.int) -> !torch.list 25 | %15 = torch.aten.sum.dim_IntList %13, %14, %true, %none : !torch.vtensor<[2,10],f32>, !torch.list, !torch.bool, !torch.none -> !torch.vtensor<[2,1],f32> 26 | %16 = torch.aten.log %15 : !torch.vtensor<[2,1],f32> -> !torch.vtensor<[2,1],f32> 27 | %17 = torch.aten.sub.Tensor %12, %16, %float1.000000e00 : !torch.vtensor<[2,10],f32>, !torch.vtensor<[2,1],f32>, !torch.float -> !torch.vtensor<[2,10],f32> 28 | return %17 : !torch.vtensor<[2,10],f32> 29 | } 30 | } 31 | 32 | linalg-on-tensors 33 | #map = affine_map<(d0, d1) -> (d0, d1)> 34 | #map1 = affine_map<(d0, d1) -> (d1, d0)> 35 | #map2 = affine_map<(d0, d1) -> (d1)> 36 | #map3 = affine_map<(d0, d1) -> (d0, 0)> 37 | module attributes {torch.debug_module_name = "Net"} { 38 | ml_program.global private mutable @global_seed(dense<0> : tensor) : tensor 39 | func.func @forward(%arg0: tensor<2x128xf32>) -> tensor<2x10xf32> { 40 | %c0_i64 = arith.constant 0 : i64 41 | %cst = arith.constant dense<[-0.00630987436, -0.0443928167, 0.0618280694, -0.0368138924, -0.0515485033, 0.00771782547, -0.0303224251, -0.0296016484, 0.0289968103, 0.0607223138]> : tensor<10xf32> 42 | %cst_0 = arith.constant dense_resource<__elided__> : tensor<10x256xf32> 43 | %cst_1 = arith.constant dense_resource<__elided__> : tensor<256xf32> 44 | %cst_2 = arith.constant dense_resource<__elided__> : tensor<256x128xf32> 45 | %cst_3 = arith.constant 0.000000e+00 : f32 46 | %cst_4 = arith.constant 0xFF800000 : f32 47 | %0 = tensor.empty() : tensor<128x256xf32> 48 | %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%cst_2 : tensor<256x128xf32>) outs(%0 : tensor<128x256xf32>) { 49 | ^bb0(%in: f32, %out: f32): 50 | linalg.yield %in : f32 51 | } -> tensor<128x256xf32> 52 | %2 = tensor.empty() : tensor<2x256xf32> 53 | %3 = linalg.fill ins(%cst_3 : f32) outs(%2 : tensor<2x256xf32>) -> tensor<2x256xf32> 54 | %4 = linalg.matmul ins(%arg0, %1 : tensor<2x128xf32>, tensor<128x256xf32>) outs(%3 : tensor<2x256xf32>) -> tensor<2x256xf32> 55 | %5 = linalg.generic {indexing_maps = [#map, #map2, #map], iterator_types = ["parallel", "parallel"]} ins(%4, %cst_1 : tensor<2x256xf32>, tensor<256xf32>) outs(%2 : tensor<2x256xf32>) { 56 | ^bb0(%in: f32, %in_5: f32, %out: f32): 57 | %25 = arith.addf %in, %in_5 : f32 58 | linalg.yield %25 : f32 59 | } -> tensor<2x256xf32> 60 | %6 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%5 : tensor<2x256xf32>) outs(%2 : tensor<2x256xf32>) { 61 | ^bb0(%in: f32, %out: f32): 62 | %25 = arith.cmpf ugt, %in, %cst_3 : f32 63 | %26 = arith.select %25, %in, %cst_3 : f32 64 | linalg.yield %26 : f32 65 | } -> tensor<2x256xf32> 66 | %7 = tensor.empty() : tensor<256x10xf32> 67 | %8 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%cst_0 : tensor<10x256xf32>) outs(%7 : tensor<256x10xf32>) { 68 | ^bb0(%in: f32, %out: f32): 69 | linalg.yield %in : f32 70 | } -> tensor<256x10xf32> 71 | %9 = tensor.empty() : tensor<2x10xf32> 72 | %10 = linalg.fill ins(%cst_3 : f32) outs(%9 : tensor<2x10xf32>) -> tensor<2x10xf32> 73 | %11 = linalg.matmul ins(%6, %8 : tensor<2x256xf32>, tensor<256x10xf32>) outs(%10 : tensor<2x10xf32>) -> tensor<2x10xf32> 74 | %12 = linalg.generic {indexing_maps = [#map, #map2, #map], iterator_types = ["parallel", "parallel"]} ins(%11, %cst : tensor<2x10xf32>, tensor<10xf32>) outs(%9 : tensor<2x10xf32>) { 75 | ^bb0(%in: f32, %in_5: f32, %out: f32): 76 | %25 = arith.addf %in, %in_5 : f32 77 | linalg.yield %25 : f32 78 | } -> tensor<2x10xf32> 79 | %13 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%12 : tensor<2x10xf32>) outs(%9 : tensor<2x10xf32>) { 80 | ^bb0(%in: f32, %out: f32): 81 | %25 = arith.cmpf ugt, %in, %cst_3 : f32 82 | %26 = arith.select %25, %in, %cst_3 : f32 83 | linalg.yield %26 : f32 84 | } -> tensor<2x10xf32> 85 | %14 = tensor.empty() : tensor<2x1xi64> 86 | %15 = linalg.fill ins(%c0_i64 : i64) outs(%14 : tensor<2x1xi64>) -> tensor<2x1xi64> 87 | %16 = tensor.empty() : tensor<2x1xf32> 88 | %17 = linalg.fill ins(%cst_4 : f32) outs(%16 : tensor<2x1xf32>) -> tensor<2x1xf32> 89 | %18:2 = linalg.generic {indexing_maps = [#map, #map3, #map3], iterator_types = ["parallel", "reduction"]} ins(%13 : tensor<2x10xf32>) outs(%17, %15 : tensor<2x1xf32>, tensor<2x1xi64>) { 90 | ^bb0(%in: f32, %out: f32, %out_5: i64): 91 | %25 = linalg.index 1 : index 92 | %26 = arith.index_cast %25 : index to i64 93 | %27 = arith.maximumf %in, %out : f32 94 | %28 = arith.cmpf ogt, %in, %out : f32 95 | %29 = arith.select %28, %26, %out_5 : i64 96 | linalg.yield %27, %29 : f32, i64 97 | } -> (tensor<2x1xf32>, tensor<2x1xi64>) 98 | %19 = linalg.generic {indexing_maps = [#map, #map3, #map], iterator_types = ["parallel", "parallel"]} ins(%13, %18#0 : tensor<2x10xf32>, tensor<2x1xf32>) outs(%9 : tensor<2x10xf32>) { 99 | ^bb0(%in: f32, %in_5: f32, %out: f32): 100 | %25 = arith.subf %in, %in_5 : f32 101 | linalg.yield %25 : f32 102 | } -> tensor<2x10xf32> 103 | %20 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%19 : tensor<2x10xf32>) outs(%9 : tensor<2x10xf32>) { 104 | ^bb0(%in: f32, %out: f32): 105 | %25 = math.exp %in : f32 106 | linalg.yield %25 : f32 107 | } -> tensor<2x10xf32> 108 | %21 = linalg.fill ins(%cst_3 : f32) outs(%16 : tensor<2x1xf32>) -> tensor<2x1xf32> 109 | %22 = linalg.generic {indexing_maps = [#map, #map3], iterator_types = ["parallel", "reduction"]} ins(%20 : tensor<2x10xf32>) outs(%21 : tensor<2x1xf32>) { 110 | ^bb0(%in: f32, %out: f32): 111 | %25 = arith.addf %in, %out : f32 112 | linalg.yield %25 : f32 113 | } -> tensor<2x1xf32> 114 | %23 = linalg.generic {indexing_maps = [#map3, #map], iterator_types = ["parallel", "parallel"]} ins(%22 : tensor<2x1xf32>) outs(%16 : tensor<2x1xf32>) { 115 | ^bb0(%in: f32, %out: f32): 116 | %25 = math.log %in : f32 117 | linalg.yield %25 : f32 118 | } -> tensor<2x1xf32> 119 | %24 = linalg.generic {indexing_maps = [#map, #map3, #map], iterator_types = ["parallel", "parallel"]} ins(%19, %23 : tensor<2x10xf32>, tensor<2x1xf32>) outs(%9 : tensor<2x10xf32>) { 120 | ^bb0(%in: f32, %in_5: f32, %out: f32): 121 | %25 = arith.subf %in, %in_5 : f32 122 | linalg.yield %25 : f32 123 | } -> tensor<2x10xf32> 124 | return %24 : tensor<2x10xf32> 125 | } 126 | } 127 | 128 | tosa 129 | module attributes {torch.debug_module_name = "Net"} { 130 | func.func @forward(%arg0: tensor<2x128xf32>) -> tensor<2x10xf32> { 131 | %0 = "tosa.const"() <{value = dense_resource<__elided__> : tensor<10x256xf32>}> : () -> tensor<10x256xf32> 132 | %1 = "tosa.const"() <{value = dense_resource<__elided__> : tensor<256x128xf32>}> : () -> tensor<256x128xf32> 133 | %2 = "tosa.const"() <{value = dense<[1, 0]> : tensor<2xi32>}> : () -> tensor<2xi32> 134 | %3 = "tosa.const"() <{value = dense<[[-0.00630987436, -0.0443928167, 0.0618280694, -0.0368138924, -0.0515485033, 0.00771782547, -0.0303224251, -0.0296016484, 0.0289968103, 0.0607223138]]> : tensor<1x10xf32>}> : () -> tensor<1x10xf32> 135 | %4 = "tosa.const"() <{value = dense_resource<__elided__> : tensor<1x256xf32>}> : () -> tensor<1x256xf32> 136 | %5 = tosa.transpose %1, %2 : (tensor<256x128xf32>, tensor<2xi32>) -> tensor<128x256xf32> 137 | %6 = tosa.reshape %arg0 {new_shape = array} : (tensor<2x128xf32>) -> tensor<1x2x128xf32> 138 | %7 = tosa.reshape %5 {new_shape = array} : (tensor<128x256xf32>) -> tensor<1x128x256xf32> 139 | %8 = tosa.matmul %6, %7 : (tensor<1x2x128xf32>, tensor<1x128x256xf32>) -> tensor<1x2x256xf32> 140 | %9 = tosa.reshape %8 {new_shape = array} : (tensor<1x2x256xf32>) -> tensor<2x256xf32> 141 | %10 = tosa.add %9, %4 : (tensor<2x256xf32>, tensor<1x256xf32>) -> tensor<2x256xf32> 142 | %11 = tosa.clamp %10 {max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<2x256xf32>) -> tensor<2x256xf32> 143 | %12 = tosa.transpose %0, %2 : (tensor<10x256xf32>, tensor<2xi32>) -> tensor<256x10xf32> 144 | %13 = tosa.reshape %11 {new_shape = array} : (tensor<2x256xf32>) -> tensor<1x2x256xf32> 145 | %14 = tosa.reshape %12 {new_shape = array} : (tensor<256x10xf32>) -> tensor<1x256x10xf32> 146 | %15 = tosa.matmul %13, %14 : (tensor<1x2x256xf32>, tensor<1x256x10xf32>) -> tensor<1x2x10xf32> 147 | %16 = tosa.reshape %15 {new_shape = array} : (tensor<1x2x10xf32>) -> tensor<2x10xf32> 148 | %17 = tosa.add %16, %3 : (tensor<2x10xf32>, tensor<1x10xf32>) -> tensor<2x10xf32> 149 | %18 = tosa.clamp %17 {max_fp = 3.40282347E+38 : f32, max_int = 2147483647 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<2x10xf32>) -> tensor<2x10xf32> 150 | %19 = tosa.reduce_max %18 {axis = 1 : i32} : (tensor<2x10xf32>) -> tensor<2x1xf32> 151 | %20 = tosa.sub %18, %19 : (tensor<2x10xf32>, tensor<2x1xf32>) -> tensor<2x10xf32> 152 | %21 = tosa.exp %20 : (tensor<2x10xf32>) -> tensor<2x10xf32> 153 | %22 = tosa.reduce_sum %21 {axis = 1 : i32} : (tensor<2x10xf32>) -> tensor<2x1xf32> 154 | %23 = tosa.log %22 : (tensor<2x1xf32>) -> tensor<2x1xf32> 155 | %24 = tosa.sub %20, %23 : (tensor<2x10xf32>, tensor<2x1xf32>) -> tensor<2x10xf32> 156 | return %24 : tensor<2x10xf32> 157 | } 158 | } 159 | 160 | stablehlo 161 | module attributes {torch.debug_module_name = "Net"} { 162 | func.func @forward(%arg0: tensor<2x128xf32>) -> tensor<2x10xf32> { 163 | %0 = stablehlo.constant dense<[-0.00630987436, -0.0443928167, 0.0618280694, -0.0368138924, -0.0515485033, 0.00771782547, -0.0303224251, -0.0296016484, 0.0289968103, 0.0607223138]> : tensor<10xf32> 164 | %1 = stablehlo.constant dense_resource<__elided__> : tensor<10x256xf32> 165 | %2 = stablehlo.constant dense_resource<__elided__> : tensor<256xf32> 166 | %3 = stablehlo.constant dense_resource<__elided__> : tensor<256x128xf32> 167 | %4 = stablehlo.constant dense<0xFF800000> : tensor 168 | %5 = stablehlo.constant dense<0> : tensor 169 | %6 = stablehlo.constant dense<0.000000e+00> : tensor 170 | %7 = chlo.constant dense<0.000000e+00> : tensor<2x256xf32> 171 | %8 = chlo.constant dense<0.000000e+00> : tensor<2x10xf32> 172 | %cst = arith.constant dense<[2, 10]> : tensor<2xi64> 173 | %cst_0 = arith.constant dense<[2, 1]> : tensor<2xi64> 174 | %9 = stablehlo.transpose %3, dims = [1, 0] : (tensor<256x128xf32>) -> tensor<128x256xf32> 175 | %10 = stablehlo.dot %arg0, %9 : (tensor<2x128xf32>, tensor<128x256xf32>) -> tensor<2x256xf32> 176 | %11 = chlo.broadcast_add %10, %2 : (tensor<2x256xf32>, tensor<256xf32>) -> tensor<2x256xf32> 177 | %12 = stablehlo.maximum %11, %7 : tensor<2x256xf32> 178 | %13 = stablehlo.transpose %1, dims = [1, 0] : (tensor<10x256xf32>) -> tensor<256x10xf32> 179 | %14 = stablehlo.dot %12, %13 : (tensor<2x256xf32>, tensor<256x10xf32>) -> tensor<2x10xf32> 180 | %15 = chlo.broadcast_add %14, %0 : (tensor<2x10xf32>, tensor<10xf32>) -> tensor<2x10xf32> 181 | %16 = stablehlo.maximum %15, %8 : tensor<2x10xf32> 182 | %17 = stablehlo.dynamic_iota %cst, dim = 1 : (tensor<2xi64>) -> tensor<2x10xi64> 183 | %18:2 = stablehlo.reduce(%16 init: %4), (%17 init: %5) across dimensions = [1] : (tensor<2x10xf32>, tensor<2x10xi64>, tensor, tensor) -> (tensor<2xf32>, tensor<2xi64>) 184 | reducer(%arg1: tensor, %arg3: tensor) (%arg2: tensor, %arg4: tensor) { 185 | %26 = stablehlo.compare GE, %arg1, %arg3, FLOAT : (tensor, tensor) -> tensor 186 | %27 = stablehlo.select %26, %arg1, %arg3 : tensor, tensor 187 | %28 = stablehlo.compare EQ, %arg1, %arg3, FLOAT : (tensor, tensor) -> tensor 188 | %29 = stablehlo.minimum %arg2, %arg4 : tensor 189 | %30 = stablehlo.select %26, %arg2, %arg4 : tensor, tensor 190 | %31 = stablehlo.select %28, %29, %30 : tensor, tensor 191 | stablehlo.return %27, %31 : tensor, tensor 192 | } 193 | %19 = stablehlo.dynamic_reshape %18#0, %cst_0 : (tensor<2xf32>, tensor<2xi64>) -> tensor<2x1xf32> 194 | %20 = chlo.broadcast_subtract %16, %19 : (tensor<2x10xf32>, tensor<2x1xf32>) -> tensor<2x10xf32> 195 | %21 = stablehlo.exponential %20 : tensor<2x10xf32> 196 | %22 = stablehlo.reduce(%21 init: %6) applies stablehlo.add across dimensions = [1] : (tensor<2x10xf32>, tensor) -> tensor<2xf32> 197 | %23 = stablehlo.dynamic_reshape %22, %cst_0 : (tensor<2xf32>, tensor<2xi64>) -> tensor<2x1xf32> 198 | %24 = stablehlo.log %23 : tensor<2x1xf32> 199 | %25 = chlo.broadcast_subtract %20, %24 : (tensor<2x10xf32>, tensor<2x1xf32>) -> tensor<2x10xf32> 200 | return %25 : tensor<2x10xf32> 201 | } 202 | } 203 | 204 | -------------------------------------------------------------------------------- /pytorch/torch-script/models/conv.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | import sys 8 | sys.path.append('../../lib') 9 | from torch_mlir_compile import torch_mlir_compile 10 | 11 | class Net(nn.Module): 12 | def __init__(self): 13 | super(Net, self).__init__() 14 | self.conv1 = nn.Conv2d(3, 32, 3, 3) 15 | self.conv2 = nn.Conv2d(32, 2, 3, 3) 16 | 17 | def forward(self, x): 18 | x = self.conv1(x) 19 | x = F.relu(x) 20 | x = self.conv2(x) 21 | x = F.relu(x) 22 | output = F.log_softmax(x, dim=1) 23 | return output 24 | 25 | def main(): 26 | device = torch.device("cpu") 27 | simple = Net().to(device) 28 | # Any other dialect segfaults 29 | torch_mlir_compile(simple, torch.ones(1, 3, 28, 28)) 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /pytorch/torch-script/models/linear.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | import sys 8 | sys.path.append('../../lib') 9 | from torch_mlir_compile import torch_mlir_compile 10 | 11 | class Net(nn.Module): 12 | def __init__(self): 13 | super(Net, self).__init__() 14 | self.fc1 = nn.Linear(128, 256) 15 | self.fc2 = nn.Linear(256, 10) 16 | 17 | def forward(self, x): 18 | x = self.fc1(x) 19 | x = F.relu(x) 20 | x = self.fc2(x) 21 | x = F.relu(x) 22 | output = F.log_softmax(x, dim=1) 23 | return output 24 | 25 | def main(): 26 | device = torch.device("cpu") 27 | simple = Net().to(device) 28 | torch_mlir_compile(simple, torch.ones(2, 128)) 29 | 30 | if __name__ == '__main__': 31 | main() 32 | -------------------------------------------------------------------------------- /pytorch/torch-script/models/resnet18.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | import sys 5 | sys.path.append('../../lib') 6 | from torch_mlir_compile import torch_mlir_compile 7 | 8 | resnet18 = torchvision.models.resnet18(weights=torchvision.models.resnet.ResNet18_Weights.IMAGENET1K_V1) 9 | resnet18.eval() 10 | 11 | torch_mlir_compile(resnet18, torch.ones(1, 3, 224, 224),['torch', 'linalg-on-tensors', 'tosa']) -------------------------------------------------------------------------------- /tensorflow/mlir/conv.mlir: -------------------------------------------------------------------------------- 1 | module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 1087 : i32}} { 2 | func @main(%arg0: tensor<2x28x28x3xf32>) -> tensor<2x?x?x?xf32> attributes {tf.entry_function = {control_outputs = "", inputs = "args_0:0", outputs = "Identity:0"}} { 3 | %0 = "tf.Placeholder"() {device = "", shape = #tf_type.shape<>} : () -> tensor 4 | %1 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 5 | %2 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 6 | %3 = "tf.Conv2D"(%arg0, %2) {data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 1, 1], use_cudnn_on_gpu = true} : (tensor<2x28x28x3xf32>, tensor<*xf32>) -> tensor<2x?x?x?xf32> 7 | %4 = "tf.BiasAdd"(%3, %1) {data_format = "NHWC", device = ""} : (tensor<2x?x?x?xf32>, tensor<*xf32>) -> tensor<2x?x?x?xf32> 8 | %5 = "tf.Relu"(%4) {device = ""} : (tensor<2x?x?x?xf32>) -> tensor<2x?x?x?xf32> 9 | %6 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 10 | %7 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 11 | %8 = "tf.Conv2D"(%5, %7) {data_format = "NHWC", device = "", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "VALID", strides = [1, 1, 1, 1], use_cudnn_on_gpu = true} : (tensor<2x?x?x?xf32>, tensor<*xf32>) -> tensor<2x?x?x?xf32> 12 | %9 = "tf.BiasAdd"(%8, %6) {data_format = "NHWC", device = ""} : (tensor<2x?x?x?xf32>, tensor<*xf32>) -> tensor<2x?x?x?xf32> 13 | %10 = "tf.Relu"(%9) {device = ""} : (tensor<2x?x?x?xf32>) -> tensor<2x?x?x?xf32> 14 | %11 = "tf.MaxPool"(%10) {data_format = "NHWC", device = "", explicit_paddings = [], ksize = [1, 23, 23, 1], padding = "VALID", strides = [1, 1, 1, 1]} : (tensor<2x?x?x?xf32>) -> tensor<2x?x?x?xf32> 15 | %12 = "tf.Identity"(%11) {device = ""} : (tensor<2x?x?x?xf32>) -> tensor<2x?x?x?xf32> 16 | return %12 : tensor<2x?x?x?xf32> 17 | } 18 | } 19 | 20 | -------------------------------------------------------------------------------- /tensorflow/mlir/linear.mlir: -------------------------------------------------------------------------------- 1 | module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 1087 : i32}} { 2 | func @main(%arg0: tensor<2x16xf32>) -> tensor<2x?xf32> attributes {tf.entry_function = {control_outputs = "", inputs = "args_0:0", outputs = "Identity:0"}} { 3 | %0 = "tf.Placeholder"() {device = "", shape = #tf_type.shape<>} : () -> tensor 4 | %1 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 5 | %2 = "tf.MatMul"(%arg0, %1) {device = "", transpose_a = false, transpose_b = false} : (tensor<2x16xf32>, tensor<*xf32>) -> tensor<2x?xf32> 6 | %3 = "tf.Relu"(%2) {device = ""} : (tensor<2x?xf32>) -> tensor<2x?xf32> 7 | %4 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 8 | %5 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 9 | %6 = "tf.MatMul"(%3, %5) {device = "", transpose_a = false, transpose_b = false} : (tensor<2x?xf32>, tensor<*xf32>) -> tensor<2x?xf32> 10 | %7 = "tf.BiasAdd"(%6, %4) {data_format = "NHWC", device = ""} : (tensor<2x?xf32>, tensor<*xf32>) -> tensor<2x?xf32> 11 | %8 = "tf.Relu"(%7) {device = ""} : (tensor<2x?xf32>) -> tensor<2x?xf32> 12 | %9 = "tf.ReadVariableOp"(%0) {device = ""} : (tensor) -> tensor<*xf32> 13 | %10 = "tf.MatMul"(%8, %9) {device = "", transpose_a = false, transpose_b = false} : (tensor<2x?xf32>, tensor<*xf32>) -> tensor<2x?xf32> 14 | %11 = "tf.Identity"(%10) {device = ""} : (tensor<2x?xf32>) -> tensor<2x?xf32> 15 | return %11 : tensor<2x?xf32> 16 | } 17 | } 18 | 19 | -------------------------------------------------------------------------------- /tensorflow/models/conv.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 3 | import tensorflow as tf 4 | from tensorflow.python.pywrap_mlir import import_graphdef 5 | 6 | class MyModel(tf.keras.Model): 7 | def build(self, input_shape): 8 | # https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D 9 | self.x = tf.keras.layers.InputLayer(input_shape=input_shape, batch_size=2) 10 | self.c1 = tf.keras.layers.Conv2D(2, 3, activation='relu') 11 | self.c2 = tf.keras.layers.Conv2D(2, 3, activation='relu', use_bias=True) 12 | # https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPool2D 13 | self.out = tf.keras.layers.MaxPool2D(pool_size=(23, 23), strides=(1, 1)) 14 | 15 | def call(self, x): 16 | x = self.x(x) 17 | x = self.c1(x) 18 | x = self.c2(x) 19 | x = self.out(x) 20 | return x 21 | 22 | 23 | if __name__ == "__main__": 24 | # NHWC shape 25 | input_shape=(2, 28, 28, 3) 26 | 27 | model = MyModel(tf.TensorSpec(shape=input_shape, dtype=tf.float32)) 28 | 29 | # model.compile(optimizer='sgd', loss='mse') 30 | # model.fit( 31 | # tf.constant(tf.ones(shape=input_shape), dtype=tf.float32), 32 | # tf.constant(tf.ones(shape=(2, 2), dtype=tf.float32))) 33 | 34 | func = tf.function(model, input_signature=[tf.TensorSpec(shape=input_shape, dtype=tf.float32)]) 35 | concrete_func = func.get_concrete_function( 36 | tf.constant(tf.ones(shape=input_shape, dtype=tf.float32)) 37 | ) 38 | 39 | graph = concrete_func.graph.as_graph_def() 40 | mlir_tf = import_graphdef( 41 | graph, 42 | "tf-standard-pipeline", 43 | False, 44 | input_names=["args_0:0"], 45 | input_data_types=["DT_FLOAT"], 46 | input_data_shapes=["2,28,28,3"], 47 | output_names=["Identity:0"], 48 | ) 49 | print(mlir_tf) -------------------------------------------------------------------------------- /tensorflow/models/linear.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 3 | import tensorflow as tf 4 | from tensorflow.python.pywrap_mlir import import_graphdef 5 | 6 | class MyModel(tf.keras.Model): 7 | def build(self, input_shape): 8 | self.x = tf.keras.layers.InputLayer(input_shape=input_shape, batch_size=2) 9 | self.d1 = tf.keras.layers.Dense(32, activation='relu', use_bias=False) 10 | self.d2 = tf.keras.layers.Dense(32, activation='relu', use_bias=True) 11 | self.out = tf.keras.layers.Dense(10, use_bias=False) 12 | 13 | def call(self, x): 14 | x = self.x(x) 15 | x = self.d1(x) 16 | x = self.d2(x) 17 | x = self.out(x) 18 | return x 19 | 20 | if __name__ == "__main__": 21 | # Input shape 22 | input_shape=(2, 16) 23 | output_shape=(2, 10) 24 | 25 | # See: https://github.com/tensorflow/tensorflow/issues/50521 26 | model = MyModel(tf.TensorSpec(shape=input_shape, dtype=tf.float32)) 27 | 28 | # print("First make sure that the model has the right shape, and input flows through output") 29 | # # https://www.tensorflow.org/api_docs/python/tf/keras/Sequential 30 | # model.compile(optimizer='sgd', loss='mse') 31 | # model.fit( 32 | # tf.constant(tf.ones(shape=input_shape), dtype=tf.float32), 33 | # tf.constant(tf.ones(shape=output_shape, dtype=tf.float32))) 34 | 35 | # https://www.tensorflow.org/api_docs/python/tf/function#input_signatures_2 36 | func = tf.function(model, input_signature=[tf.TensorSpec(shape=input_shape, dtype=tf.float32)]) 37 | concrete_func = func.get_concrete_function( 38 | tf.constant(tf.ones(shape=input_shape, dtype=tf.float32)) 39 | ) 40 | 41 | # Basically what convert_graph_def should do 42 | graph = concrete_func.graph.as_graph_def() 43 | mlir_tf = import_graphdef( 44 | graph, 45 | "tf-standard-pipeline", 46 | False, 47 | input_names=["args_0:0"], 48 | input_data_types=["DT_FLOAT"], 49 | input_data_shapes=["2,16"], 50 | output_names=["Identity:0"], 51 | ) 52 | print(mlir_tf) 53 | -------------------------------------------------------------------------------- /tensorflow/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Installs TensorFlow on a virtual environment 4 | 5 | # Run on container/remote directly, need to check 6 | PROJECT="mlir-generator" 7 | if [ -d "$PROJECT" ]; then 8 | cd "$PROJECT" 9 | fi 10 | 11 | # Go into tensorflow subrepo 12 | # This is created on the fly, not a submodule 13 | ROOT="$(git rev-parse --show-toplevel)/external/tensorflow" 14 | rm -rf "$ROOT" 15 | mkdir -p "$ROOT" 16 | pushd "$ROOT" 17 | 18 | # Always grab a fresh env environment 19 | echo " + Creating a fresh venv" 20 | rm -rf mlir_venv 21 | python -m venv mlir_venv 22 | source mlir_venv/bin/activate 23 | 24 | # Install Python dependencies 25 | echo " + Install Python dependencies" 26 | python -m pip install --upgrade pip 27 | python -m pip install tensorflow keras 28 | 29 | popd 30 | --------------------------------------------------------------------------------