├── .github └── workflows │ ├── npm-publish.yml │ └── deploy-docs.yml ├── package.json ├── LICENSE ├── CHANGELOG.md ├── .gitignore ├── demo ├── styles.css └── index.html ├── README.md ├── tsconfig.json ├── dist ├── index.d.ts └── index.js └── src └── index.ts /.github/workflows/npm-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created 2 | # For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages 3 | 4 | name: Publish Package to npmjs 5 | on: 6 | release: 7 | types: [published] 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | # Setup .npmrc file to publish to npm 14 | - uses: actions/setup-node@v4 15 | with: 16 | node-version: '20.x' 17 | registry-url: 'https://registry.npmjs.org' 18 | - run: npm ci 19 | - run: npm publish 20 | env: 21 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webgpu-spd", 3 | "version": "3.0.0", 4 | "description": "A port of AMD's Single Pass Downsampler for WebGPU", 5 | "type": "module", 6 | "main": "dist/index.js", 7 | "types": "dist/index.d.ts", 8 | "files": [ 9 | "/dist/**" 10 | ], 11 | "scripts": { 12 | "docs": "typedoc src/index.ts", 13 | "build": "tsc" 14 | }, 15 | "repository": { 16 | "type": "git", 17 | "url": "git+https://github.com/JolifantoBambla/webgpu-spd.git" 18 | }, 19 | "keywords": [ 20 | "WebGPU", 21 | "Mipmap", 22 | "GPU", 23 | "Graphics" 24 | ], 25 | "author": "Lukas Herzberger", 26 | "license": "MIT", 27 | "bugs": { 28 | "url": "https://github.com/JolifantoBambla/webgpu-spd/issues" 29 | }, 30 | "homepage": "https://github.com/JolifantoBambla/webgpu-spd#readme", 31 | "devDependencies": { 32 | "@webgpu/types": "^0.1.40", 33 | "typedoc": "^0.25.13", 34 | "typescript": "^5.4.5" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Lukas Herzberger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [Unreleased] 4 | 5 | ## [v3.0.0] - 2025-08-03 6 | 7 | ### Added 8 | 9 | - Add support for texture formats enabled by the device feature [texture-formats-tier1](https://www.w3.org/TR/webgpu/#texture-formats-tier1). 10 | 11 | ### Changed 12 | 13 | - Use subgroup built-ins for downsampling by default if the device feature [subgroups](https://www.w3.org/TR/webgpu/#subgroups) is enabled. 14 | - Move texture format `bgra8unorm` out of `WebGPUSinglePassDownsampler::supportedFormats`. 15 | - If the texture format supports it, bind mip 6 as `'read-write'` storage texture instead of duplicating texture data in an extra buffer in case more than 6 mips are generated per pass. 16 | 17 | ### Fixed 18 | 19 | - Fix handling of barriers for active workgroup counter. 20 | - Cast downsampling weight to concrete scalar type for average filter. 21 | - Fix minor typing issues. 22 | 23 | ## [v2.0.1] - 2024-06-20 24 | 25 | ### Fixed 26 | 27 | - Fix handling of cases where a texture's number of array layers exceeds the maximum number of array layers per pass. 28 | 29 | ## [v2.0.0] - 2024-04-25 30 | 31 | ### Added 32 | 33 | - Add support for specifying the maximum number of array layers that can be downsampled per pass when configuring the device using `SPDPrepareDeviceDescriptor.maxArrayLayersPerPass`. 34 | - Add support for specifying the maximum number of mip levels that can be downsampled per pass when configuring the device using `SPDPrepareDeviceDescriptor.maxMipsPerPass`. 35 | - Add support for using `f16` instead of `f32` during downsampling. 36 | 37 | ### Changed 38 | 39 | - Depending on the limit supported by a device, up to 12 mip levels can be generated within a single pass now. 40 | - `WebGPUSinglePassDownsampler.setPreferredLimits` now accepts an optional `GPUAdapter` as input to clamp this limit to what the adapter allows. 41 | 42 | ### Fixed 43 | 44 | - Fix handling of integer formats (`i32` and `u32`). 45 | 46 | -------------------------------------------------------------------------------- /.github/workflows/deploy-docs.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy docs 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | tags: 8 | - '*' 9 | 10 | # Allows you to run this workflow manually from the Actions tab 11 | workflow_dispatch: 12 | 13 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 14 | permissions: 15 | contents: read 16 | pages: write 17 | id-token: write 18 | 19 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 20 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 21 | concurrency: 22 | group: "pages" 23 | cancel-in-progress: false 24 | 25 | jobs: 26 | # Single deploy job since we're just deploying 27 | deploy: 28 | environment: 29 | name: github-pages 30 | url: ${{ steps.deployment.outputs.page_url }} 31 | runs-on: ubuntu-latest 32 | steps: 33 | - name: Checkout 34 | uses: actions/checkout@v4 35 | - name: Setup Node.js environment 36 | uses: actions/setup-node@v4.0.2 37 | - name: Install Dependencies 38 | run: npm install 39 | - name: Build Docs 40 | run: npm run docs 41 | - name: Publish dist folder with docs 42 | run: | 43 | mkdir -p docs/1.0.0 44 | npm install webgpu-spd@1.0.0 45 | cp -r node_modules/webgpu-spd/dist docs/1.0.0/dist 46 | mkdir -p docs/2.x 47 | npm install webgpu-spd@2.0.1 48 | cp -r node_modules/webgpu-spd/dist docs/2.x/dist 49 | mkdir -p docs/3.x 50 | cp -r dist docs/3.x/dist 51 | cp -r demo docs/demo 52 | - name: Setup Pages 53 | uses: actions/configure-pages@v5 54 | - name: Upload artifact 55 | uses: actions/upload-pages-artifact@v3 56 | with: 57 | path: 'docs/' 58 | - name: Deploy to GitHub Pages 59 | id: deployment 60 | uses: actions/deploy-pages@v4 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | 93 | # Gatsby files 94 | .cache/ 95 | # Comment in the public line in if your project uses Gatsby and not Next.js 96 | # https://nextjs.org/blog/next-9-1#public-directory-support 97 | # public 98 | 99 | # vuepress build output 100 | .vuepress/dist 101 | 102 | # vuepress v2.x temp and cache directory 103 | .temp 104 | .cache 105 | 106 | # Docusaurus cache and generated files 107 | .docusaurus 108 | 109 | # Serverless directories 110 | .serverless/ 111 | 112 | # FuseBox cache 113 | .fusebox/ 114 | 115 | # DynamoDB Local files 116 | .dynamodb/ 117 | 118 | # TernJS port file 119 | .tern-port 120 | 121 | # Stores VSCode versions used for testing VSCode extensions 122 | .vscode-test 123 | 124 | # yarn v2 125 | .yarn/cache 126 | .yarn/unplugged 127 | .yarn/build-state.yml 128 | .yarn/install-state.gz 129 | .pnp.* 130 | 131 | docs -------------------------------------------------------------------------------- /demo/styles.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: Arial, sans-serif; 4 | } 5 | 6 | .container { 7 | display: flex; 8 | } 9 | 10 | .sidebar { 11 | position: fixed; 12 | left: 0; 13 | top: 0; 14 | width: 250px; 15 | height: 100%; 16 | background-color: #2c3e50; /* Dark blue background color */ 17 | color: #fff; 18 | padding: 20px; 19 | } 20 | 21 | .sidebar h2, 22 | .sidebar h3 { 23 | color: #fff; 24 | } 25 | 26 | .sidebar h2 { 27 | margin-bottom: 20px; 28 | } 29 | 30 | .sidebar h3 { 31 | margin-bottom: 10px; 32 | } 33 | 34 | .form-section { 35 | margin-bottom: 20px; /* Increased margin for better separation */ 36 | } 37 | 38 | .sidebar label { 39 | margin-bottom: 5px; 40 | } 41 | 42 | .sidebar select, 43 | .sidebar textarea, 44 | .sidebar input[type="number"], 45 | .sidebar input[type="file"], 46 | .sidebar input[type="range"], 47 | .sidebar button[type="button"] { 48 | padding: 8px; 49 | border: none; 50 | border-radius: 5px; 51 | width: 100%; 52 | } 53 | 54 | .sidebar textarea { 55 | resize: vertical; /* Allow vertical resizing */ 56 | } 57 | 58 | .sidebar button[type="button"] { 59 | padding: 10px 20px; /* Increased padding for better clickability */ 60 | background-color: #3498db; /* Blue button color */ 61 | color: white; 62 | border: none; 63 | border-radius: 5px; 64 | cursor: pointer; 65 | transition: background-color 0.3s; /* Smooth transition on hover */ 66 | } 67 | 68 | .sidebar button[type="button"]:hover { 69 | background-color: #2980b9; /* Darker blue on hover */ 70 | } 71 | 72 | .main-content { 73 | margin-left: 250px; /* Adjust according to sidebar width */ 74 | padding: 20px; 75 | } 76 | 77 | .additional-coordinates { 78 | display: none; /* Initially hidden */ 79 | } 80 | 81 | .sidebar input[type="checkbox"]:checked ~ .additional-coordinates { 82 | display: block; /* Display when checkbox is checked */ 83 | } 84 | 85 | .input-group { 86 | display: flex; 87 | } 88 | 89 | .input-group input { 90 | flex: 1; 91 | } 92 | 93 | .form-section { 94 | margin-bottom: 20px; /* Increased margin for better separation */ 95 | position: relative; /* Relative positioning for the form section */ 96 | } 97 | 98 | .form-section input[type="checkbox"] { 99 | position: absolute; /* Absolute positioning for the checkbox */ 100 | right: 0; /* Position the checkbox to the right */ 101 | top: 0; /* Align checkbox vertically with the label */ 102 | margin-top: 0; /* Reset margin */ 103 | } 104 | 105 | .output-value { 106 | position: absolute; 107 | top: 0; 108 | right: 0; 109 | font-size: 0.8em; 110 | color: #aaa; 111 | } 112 | 113 | .form-section input[type="text"] { 114 | width: 100%; /* Set width to 100% of the parent container */ 115 | height: 5em; /* Set height to 5 lines */ 116 | padding: 8px; /* Add padding */ 117 | box-sizing: border-box; /* Include padding and border in width/height calculation */ 118 | } 119 | 120 | canvas { 121 | position: fixed; 122 | margin-left: 290px; 123 | } 124 | 125 | output { 126 | position: absolute; 127 | top: 0; 128 | left: calc(100% - 10px); /* Adjust as needed */ 129 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WebGPU SPD 2 | 3 | A utility library for generating up to 12 mip levels for 2d textures & texture arrays in a single WebGPU compute pass. 4 | 5 | ## Docs 6 | 7 | Find the docs [here](https://jolifantobambla.github.io/webgpu-spd). 8 | 9 | Try it out [here](https://jolifantobambla.github.io/webgpu-spd/demo). 10 | 11 | ## Installation 12 | 13 | ### NPM 14 | ```bash 15 | npm install webgpu-spd 16 | ``` 17 | 18 | ### From GitHub 19 | ```js 20 | import { WebGPUSinglePassDownsampler } from 'https://jolifantobambla.github.io/webgpu-spd/3.x/dist/index.js'; 21 | ``` 22 | 23 | ### From UNPKG 24 | ```js 25 | import { WebGPUSinglePassDownsampler } from 'https://unpkg.com/webgpu-spd@3.0.0/dist/index.js'; 26 | ``` 27 | 28 | ## Usage 29 | 30 | WebGPU SPD downsamples 2d textures and 2d texture arrays using compute pipelines generating up to 12 mip levels in a single pass (all array layers are processed in the same pass). The maximum number of mip levels that can be generated within a single pass depends on the `maxStorageTexturesPerShaderStage` limit supported by the device used. 31 | Should the number of mip levels requested for a texture exceed this limit, multiple passes, generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels each, will be used instead. 32 | The mip levels generated for a given input texture are stored either in the input texture or in a separate target texture if specified. 33 | This output texture must support `GPUTextureUsage.STORAGE_BINDING` with access mode `"write-only"`. 34 | 35 | #### Generate mipmaps 36 | ```js 37 | import { WebGPUSinglePassDownsampler, maxMipLevelCount } from 'webgpu-spd'; 38 | 39 | const downsampler = new WebGPUSinglePassDownsampler(); 40 | 41 | const size = [/* size + array layers */]; 42 | const texture = device.createTexture({ 43 | size, 44 | mipLevelCount: maxMipLevelCount(size[0], size[1]), 45 | format: 'rgba8unorm', 46 | usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING, 47 | }); 48 | 49 | // write mip level 0 50 | 51 | downsampler.generateMipmaps(device, texture); 52 | ``` 53 | 54 | #### Downsample a texture each frame 55 | ```js 56 | import { WebGPUSinglePassDownsampler, SPDFilters } from 'webgpu-spd'; 57 | 58 | // during setup 59 | const downsampler = new WebGPUSinglePassDownsampler(); 60 | const downsampleDepthPass = downsampler.preparePass(device, linearDepthTexture, { filter: SPDFilters.Min }); 61 | 62 | // in render loop 63 | const commandEncoder = device.createCommandEncoder(); 64 | 65 | const computePassEncoder = commandEncoder.beginComputePass(); 66 | downsampleDepthPass.encode(computePassEncoder); 67 | computePassEncoder.end(); 68 | 69 | device.queue.submit([commandEncoder.finish()]); 70 | ``` 71 | 72 | #### Downsample into target 73 | ```js 74 | import { WebGPUSinglePassDownsampler, maxMipLevelCount } from 'webgpu-spd'; 75 | 76 | const downsampler = new WebGPUSinglePassDownsampler(); 77 | 78 | const size = [/* width, height, array layers */]; 79 | const texture = device.createTexture({ 80 | size, 81 | mipLevelCount: 1, 82 | format: 'rgba8unorm', 83 | usage: GPUTextureUsage.TEXTURE_BINDING, 84 | }); 85 | const target = device.createTexture({ 86 | size: [size[0] / 2, size[1] / 2, size[2]], 87 | mipLevelCount: maxMipLevelCount(size[0], size[1]) - 1, 88 | format: 'rgba8unorm', 89 | usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING, 90 | }); 91 | 92 | // write mip level 0 93 | 94 | downsampler.generateMipmaps(device, texture, { target }); 95 | ``` 96 | 97 | #### Use min-max filter to generate a min-max pyramid for single-channel textures 98 | 99 | The `SPDFilters.MinMax` filter provided by WebGPU SPD is a special filter that is meant to be used with input textures using single-channel formats like `"r32float"`, and a target texture using a two-channel format like `"rg32float"`. 100 | After the downsampling pass, the target texture will contain the minimum values in the red channel and the maximum values in the green channel. 101 | 102 | ```js 103 | import { WebGPUSinglePassDownsampler, SPDFilters, maxMipLevelCount } from 'webgpu-spd'; 104 | 105 | // during setup 106 | const downsampler = new WebGPUSinglePassDownsampler(); 107 | const linearDepth = device.createTexture({ 108 | size: [/* gBuffer size */], 109 | mipLevelCount: 1, 110 | format: 'r32float', 111 | usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING, 112 | }); 113 | const minMaxDepthPyramid = device.createTexture({ 114 | size: [linearDepth.width / 2, linearDepth.height / 2], 115 | mipLevelCount: maxMipLevelCount(linearDepth.width, linearDepth.height) - 1 116 | format: 'rg32float', 117 | usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING, 118 | }); 119 | const minMaxDepthPass = downsampler.preparePass(device, linearDepth, { 120 | target: minMaxDepthPyramid, 121 | filter: SPDFilters.MinMax, 122 | }); 123 | 124 | // in render loop 125 | 126 | // ... write mip level 0 of linearDepth 127 | 128 | minMaxDepthPass.encode(computePassEncoder); 129 | ``` 130 | 131 | #### Prepare pipelines for expected formats 132 | 133 | In the above examples, GPU resources, like compute pipelines and bind group layouts etc., are created on the fly the first time a new configuration of `GPUDevice`, `GPUTextureFormat`, filter, and precision is needed. 134 | 135 | WebGPU SPD also supports allocating resources during setup, like this: 136 | 137 | ```js 138 | import { WebGPUSinglePassDownsampler, SPDFilters, SPDPrecision } from 'webgpu-spd'; 139 | 140 | const downsampler = new WebGPUSinglePassDownsampler({ device, formats: [ 141 | { format: 'rgba8unorm', halfPrecision: true }, 142 | { format: 'r32float', filters: [ SPDFilters.Min ] }, 143 | ]}); 144 | 145 | // alternatively call 146 | downsampler.prepareDeviceResources({ device, formats: [ 147 | { format: 'rgba8unorm', halfPrecision: true }, 148 | { format: 'r32float', filters: [ SPDFilters.Min ] }, 149 | ]}); 150 | ``` 151 | 152 | #### Limit the number of mip levels and array layers per pass 153 | 154 | Generating more than 6 mip levels per pass might not be supported on each platform due to buffers being not coherent by default yet. 155 | WebGPU SPD uses `min(device.limits.maxStorageTexturesPerPass, 12)` by default and can thus be implicitly configured using the device's limit. 156 | However, this might not be desirable in all cases, so WebGPU SPD can be configured to use a different limit by setting the corresponding option when preparing device resources. 157 | 158 | If more than 6 mip levels are downsampled per pass, WebGPU SPD allocates additional internal resources to store intermediate texture data (`16 * 64 * 64 * maxArrayLayersPerPass` bytes) and for control flow purposes (`4 * maxArrayLayersPerPass` bytes). 159 | The size of these resources depends on the number of array layers that can be downsampled each pass. 160 | If a texture's number of array layers exceeds the number of array layers per pass, multiple passes will be used instead. 161 | By default, WebGPU SPD uses the device's `maxTextureArrayLayers` limit. 162 | 163 | WebGPU SPD can be configured to use different limits like this: 164 | 165 | ```js 166 | import { WebGPUSinglePassDownsampler, SPDFilters } from 'webgpu-spd'; 167 | 168 | const downsampler = new WebGPUSinglePassDownsampler({ device, maxMipsPerPass: 6, maxArrayLayersPerPass: 1 }); 169 | 170 | // alternatively call 171 | downsampler.prepareDeviceResources({ device, maxMipsPerPass: 6, maxArrayLayersPerPass: 1 }); 172 | ``` 173 | 174 | #### Handling device loss 175 | ```js 176 | import { WebGPUSinglePassDownsampler, SPDFilters } from 'webgpu-spd'; 177 | 178 | const formatConfigs = [ 179 | { format: 'rgba8unorm' }, 180 | { format: 'r32float', filters: [ SPDFilters.Min ] }, 181 | ]; 182 | 183 | // on new device 184 | downsampler.deregisterDevice(oldDevice); 185 | downsampler.prepareDeviceResources({ device: newDevice, formats: formatConfig s}); 186 | downsampleTexturePass = downsampler.preparePass(newDevice, texture); 187 | ``` 188 | 189 | #### Use custom filters 190 | 191 | Custom filters for downsampling a quad to a single pixel can be registered with WebGPU SPD using `registerFilter`. 192 | The given WGSL code must at least define a reduction function with the following name and signature: 193 | 194 | ```wgsl 195 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 196 | ``` 197 | 198 | If a filter is known to be only used with a single scalar type (e.g., `u32`), uses of `SPDScalar` can also be replaced by that scalar type. 199 | 200 | For example, a custom filter that only takes a single pixel value out of the four given ones could be implemented and used like this: 201 | 202 | ```js 203 | import { WebGPUSinglePassDownsampler } from 'webgpu-spd'; 204 | 205 | const downsampler = new WebGPUSinglePassDownsampler(); 206 | downsampler.registerFilter('upperLeft', ` 207 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 208 | return v0; 209 | } 210 | `); 211 | 212 | // ... 213 | 214 | downsampler.generateMipmaps(device, texture, { filter: 'upperLeft' }); 215 | ``` 216 | 217 | #### Downsample image region 218 | 219 | ```js 220 | import { WebGPUSinglePassDownsampler } from 'webgpu-spd'; 221 | 222 | const downsampler = new WebGPUSinglePassDownsampler(); 223 | 224 | const sizeHalf = [texture.width / 2, texture.height / 2]; 225 | downsampler.generateMipmaps(device, texture, { offset: sizeHalf, size: sizeHalf}); 226 | ``` 227 | 228 | ## Contributions 229 | 230 | Contributions are very welcome. If you find a bug or think some important functionality is missing, please file an issue [here](https://github.com/JolifantoBambla/webgpu-spd/issues). If want to help out yourself, feel free to submit a pull request [here](https://github.com/JolifantoBambla/webgpu-spd/pulls). 231 | 232 | ## Acknowledgements 233 | 234 | This library is a WebGPU port of the FidelityFX Single Pass Downsampler (SPD) included in AMD's [FidelityFX-SDK](https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK). 235 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig to read more about this file */ 4 | 5 | /* Projects */ 6 | // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ 7 | // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ 8 | // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ 9 | // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ 10 | // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ 11 | // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ 12 | 13 | /* Language and Environment */ 14 | "target": "ESNext", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ 15 | //"lib": ["ESNext", "DOM"], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ 16 | // "jsx": "preserve", /* Specify what JSX code is generated. */ 17 | // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ 18 | // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ 19 | // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ 20 | // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ 21 | // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ 22 | // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ 23 | // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ 24 | // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ 25 | // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ 26 | 27 | /* Modules */ 28 | "module": "ESNext", 29 | //"rootDir": "", /* Specify the root folder within your source files. */ 30 | //"moduleResolution": "Node", /* Specify how TypeScript looks up a file from a given module specifier. */ 31 | // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ 32 | // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ 33 | // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ 34 | "typeRoots": [ 35 | "./node_modules/@webgpu/types", 36 | "./node_modules/@types", 37 | ], /* Specify multiple folders that act like './node_modules/@types'. */ 38 | // "types": [], /* Specify type package names to be included without being referenced in a source file. */ 39 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ 40 | // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ 41 | // "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */ 42 | // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */ 43 | // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */ 44 | // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */ 45 | // "resolveJsonModule": true, /* Enable importing .json files. */ 46 | // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */ 47 | // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */ 48 | 49 | /* JavaScript Support */ 50 | // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ 51 | // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ 52 | // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ 53 | 54 | /* Emit */ 55 | "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ 56 | // "declarationMap": true, /* Create sourcemaps for d.ts files. */ 57 | // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ 58 | //"sourceMap": true, /* Create source map files for emitted JavaScript files. */ 59 | // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ 60 | // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ 61 | "outDir": "dist/", /* Specify an output folder for all emitted files. */ 62 | // "removeComments": true, /* Disable emitting comments. */ 63 | // "noEmit": true, /* Disable emitting files from a compilation. */ 64 | // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ 65 | // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */ 66 | // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ 67 | // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ 68 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ 69 | // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ 70 | // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ 71 | // "newLine": "crlf", /* Set the newline character for emitting files. */ 72 | // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ 73 | // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ 74 | // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ 75 | // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ 76 | // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ 77 | // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ 78 | 79 | /* Interop Constraints */ 80 | // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ 81 | // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */ 82 | // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ 83 | "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */ 84 | // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ 85 | "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ 86 | 87 | /* Type Checking */ 88 | "strict": true, /* Enable all strict type-checking options. */ 89 | // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ 90 | // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ 91 | // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ 92 | // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ 93 | // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ 94 | // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ 95 | // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ 96 | // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ 97 | // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ 98 | // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ 99 | // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ 100 | // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ 101 | // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ 102 | // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ 103 | // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ 104 | // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ 105 | // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ 106 | // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ 107 | 108 | /* Completeness */ 109 | // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ 110 | "skipLibCheck": true /* Skip type checking all .d.ts files. */ 111 | }, 112 | "include": [ 113 | "src/*" 114 | ] 115 | } 116 | -------------------------------------------------------------------------------- /dist/index.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /** 3 | * The names of all predefined filters of {@link WebGPUSinglePassDownsampler}. 4 | * Custom ones can be registered with an instance of {@link WebGPUSinglePassDownsampler} using {@link WebGPUSinglePassDownsampler.registerFilter}. 5 | */ 6 | export declare enum SPDFilters { 7 | /** 8 | * Takes the channel-wise average of 4 pixels. 9 | */ 10 | Average = "average", 11 | /** 12 | * Takes the channel-wise minimum of 4 pixels. 13 | */ 14 | Min = "min", 15 | /** 16 | * Takes the channel-wise maximum of 4 pixels. 17 | */ 18 | Max = "max", 19 | /** 20 | * Takes the minimum of the red channel and the maximum of the red and green channel and stores the result in the red and green channel respectively. 21 | * This really only makes sense for single-channel input textures (where only the red channel holds any data), e.g., for generating a min-max pyramid of a depth buffer. 22 | */ 23 | MinMax = "minmax" 24 | } 25 | declare class SPDPassInner { 26 | private pipeline; 27 | private bindGroups; 28 | private dispatchDimensions; 29 | constructor(pipeline: GPUComputePipeline, bindGroups: Array, dispatchDimensions: [GPUSize32, GPUSize32, GPUSize32]); 30 | encode(computePass: GPUComputePassEncoder): void; 31 | } 32 | /** 33 | * A compute pass for downsampling a texture. 34 | */ 35 | export declare class SPDPass { 36 | private passes; 37 | /** 38 | * The texture the mipmaps will be written to by this {@link SPDPass}, once {@link SPDPass.encode} is called. 39 | */ 40 | readonly target: GPUTexture; 41 | /** @ignore */ 42 | constructor(passes: Array, target: GPUTexture); 43 | /** 44 | * Encodes the configured mipmap generation pass(es) with the given {@link GPUComputePassEncoder}. 45 | * All bind groups indices used by {@link SPDPass} are reset to `null` to prevent unintentional bindings of internal bind groups for subsequent pipelines encoded in the same {@link GPUComputePassEncoder}. 46 | * @param computePassEncoder The {@link GPUComputePassEncoder} to encode this mipmap generation pass with. 47 | * @returns The {@link computePassEncoder} 48 | */ 49 | encode(computePassEncoder: GPUComputePassEncoder): GPUComputePassEncoder; 50 | /** 51 | * Returns the number of passes that will be encoded by calling this instance's {@link SPDPass.encode} method. 52 | */ 53 | get numPasses(): number; 54 | } 55 | /** 56 | * Configuration for {@link WebGPUSinglePassDownsampler.preparePass}. 57 | */ 58 | export interface SPDPassConfig { 59 | /** 60 | * The name of the filter to use for downsampling the given texture. 61 | * Should be one of the filters registered with {@link WebGPUSinglePassDownsampler}. 62 | * Defaults to {@link SPDFilters.Average}. 63 | */ 64 | filter?: string; 65 | /** 66 | * The target texture the generated mipmaps are written to. 67 | * Its usage must include {@link GPUTextureUsage.STORAGE_BINDING}. 68 | * Its format must support {@link GPUStorageTextureAccess:"write-only"}. 69 | * Its size must be big enough to store the first mip level generated for the input texture. 70 | * It must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. 71 | * Defaults to the given input texture. 72 | */ 73 | target?: GPUTexture; 74 | /** 75 | * The upper left corner of the image region mipmaps should be generated for. 76 | * Defaults to [0,0]. 77 | */ 78 | offset?: [number, number]; 79 | /** 80 | * The size of the image reagion mipmaps should be generated for. 81 | * Default to [texture.width - 1 - offset[0], texture.height - 1 - offset[1]]. 82 | */ 83 | size?: [number, number]; 84 | /** 85 | * The number of mipmaps to generate. 86 | * Defaults to target.mipLevelCount. 87 | */ 88 | numMips?: number; 89 | /** 90 | * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters. 91 | * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled). 92 | * Falls back to full precision, if the texture format is not a float format. 93 | * Defaults to false. 94 | */ 95 | halfPrecision?: boolean; 96 | } 97 | export interface SPDPrepareFormatDescriptor { 98 | /** 99 | * The texture format to prepare downsampling pipelines for. 100 | */ 101 | format: GPUTextureFormat; 102 | /** 103 | * The names of downsampling filters that to prepare downsampling pipelines for the given {@link format} for. 104 | * Defaults to {@link SPDFilters.Average}. 105 | */ 106 | filters?: Set; 107 | /** 108 | * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters. 109 | * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled). 110 | * Falls back to full precision, if the texture format is not a float format. 111 | * Defaults to false. 112 | */ 113 | halfPrecision?: boolean; 114 | } 115 | export interface SPDPrepareDeviceDescriptor { 116 | /** 117 | * The device to prepare downsampling pipelines for. 118 | */ 119 | device: GPUDevice; 120 | /** 121 | * The formats to prepare downsampling pipelines for. 122 | */ 123 | formats?: Array; 124 | /** 125 | * The maximum number of array layers will be downsampled on the {@link device} within a single pass. 126 | * If a texture has more, downsampling will be split up into multiple passes handling up to this limit of array layers each. 127 | * Defaults to device.limits.maxTextureArrayLayers. 128 | */ 129 | maxArrayLayersPerPass?: number; 130 | /** 131 | * The maximum number of mip levels that can be generated on the {@link device} within a single pass. 132 | * Note that generating more than 6 mip levels per pass is currently not supported on all platforms. 133 | * Defaults to `Math.min(device.limits.maxStorageTexturesPerShaderStage, 12)`. 134 | */ 135 | maxMipsPerPass?: number; 136 | /** 137 | * If true, disables all uses of subgroup built-ins by the downsampler even if the `'subgroups'` feature is enabled on the {@link device}. 138 | */ 139 | disableSubgroups?: boolean; 140 | } 141 | /** 142 | * Returns the maximum number of mip levels for a given n-dimensional size. 143 | * @param size The size to compute the maximum number of mip levels for 144 | * @returns The maximum number of mip levels for the given size 145 | */ 146 | export declare function maxMipLevelCount(...size: number[]): number; 147 | /** 148 | * A helper class for downsampling 2D {@link GPUTexture} (& arrays) using as few passes as possible on a {@link GPUDevice} depending on its {@link GPUSupportedLimits}. 149 | * Up to 12 mip levels can be generated within a single pass, if {@link GPUSupportedLimits.maxStorageTexturesPerShaderStage} supports it. 150 | */ 151 | export declare class WebGPUSinglePassDownsampler { 152 | private filters; 153 | private devicePipelines; 154 | /** 155 | * The set of formats supported by WebGPU SPD. 156 | */ 157 | static readonly supportedFormats: Set; 158 | /** 159 | * The set of additionally supported formats supported if the feature 'bgra8unorm-storage' is enabled. 160 | */ 161 | static readonly supportedFormatsBgra8UnormStorage: Set; 162 | /** 163 | * The set of additionally supported formats if the feature 'texture-formats-tier1' is enabled. 164 | */ 165 | static readonly supportedFormatsTier1: Set; 166 | /** 167 | * The set of formats that support read-write access. 168 | */ 169 | static readonly supportedReadWriteFormats: Set; 170 | /** 171 | * The set of formats that support read-write access if the feature 'texture-formats-tier2' is enabled. 172 | */ 173 | static readonly supportedReadWriteFormatsTier2: Set; 174 | /** 175 | * Sets the preferred device limits for {@link WebGPUSinglePassDownsampler} in a given record of limits. 176 | * Existing preferred device limits are either increased or left untouched. 177 | * If {@link limits} is undefined, creates a new record of preferred device limits for {@link WebGPUSinglePassDownsampler}. 178 | * The result can be used to set {@link GPUDeviceDescriptor.requiredLimits} when requesting a device. 179 | * @param limits A record of device limits set to update with the preferred limits for {@link WebGPUSinglePassDownsampler} 180 | * @param adapter If this is set, the preferred limits that are set by this function will be clamped to {@link GPUAdapter.limits}. 181 | * @returns The updated or created set of device limits with all preferred limits for {@link WebGPUSinglePassDownsampler} set 182 | */ 183 | static setPreferredLimits(limits?: Record, adapter?: GPUAdapter): Record; 184 | /** 185 | * Creates a new {@link WebGPUSinglePassDownsampler}. 186 | * On its own, {@link WebGPUSinglePassDownsampler} does not allocate any GPU resources. 187 | * Optionally, prepare GPU resources for a given {@link SPDPrepareDeviceDescriptor}. 188 | * @param prepareDescriptor An optional descriptor for preparing GPU resources 189 | * @see WebGPUSinglePassDownsampler.prepareDeviceResources 190 | */ 191 | constructor(prepareDescriptor?: SPDPrepareDeviceDescriptor); 192 | /** 193 | * Prepares GPU resources required by {@link WebGPUSinglePassDownsampler} to downsample textures for a given {@link SPDPrepareDeviceDescriptor}. 194 | * @param prepareDescriptor a descriptor for preparing GPU resources 195 | */ 196 | prepareDeviceResources(prepareDescriptor: SPDPrepareDeviceDescriptor): void; 197 | private getOrCreateDevicePipelines; 198 | /** 199 | * Deregisters all resources stored for a given device. 200 | * @param device The device resources should be deregistered for 201 | */ 202 | deregisterDevice(device: GPUDevice): void; 203 | /** 204 | * Registers a new downsampling filter operation that can be injected into the downsampling shader for new pipelines. 205 | * 206 | * The given WGSL code must (at least) specify a function to reduce four values into one with the following name and signature: 207 | * 208 | * `spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4` 209 | * 210 | * @param name The unique name of the filter operation 211 | * @param wgsl The WGSL code to inject into the downsampling shader as the filter operation 212 | */ 213 | registerFilter(name: string, wgsl: string): void; 214 | /** 215 | * Prepares a pass to downsample a 2d texture / 2d texture array. 216 | * The produced {@link SPDPass} can be used multiple times to repeatedly downsampling a texture, e.g., for downsampling the depth buffer each frame. 217 | * For one-time use, {@link WebGPUSinglePassDownsampler.generateMipmaps} can be used instead. 218 | * 219 | * By default, the texture is downsampled `texture.mipLevelCount - 1` times using an averaging filter, i.e., 4 pixel values from the parent level are averaged to produce a single pixel in the current mip level. 220 | * This behavior can be configured using the optional {@link config} parameter. 221 | * For example, instead of writing the mip levels into the input texture itself, a separate target texture can be specified using {@link SPDPassConfig.target}. 222 | * Other configuration options include using a different (possibly custom) filter, only downsampling a subregion of the input texture, and limiting the number of mip levels to generate, e.g., if a min-max pyramid is only needed up to a certain tile resolution. 223 | * If the given filter does not exist, an averaging filter will be used as a fallback. 224 | * The image region to downsample and the number of mip levels to generate are clamped to the input texture's size, and the output texture's `mipLevelCount`. 225 | * 226 | * Depending on the number of mip levels to generate and the device's `maxStorageTexturesPerShaderStage` limit, the {@link SPDPass} will internally consist of multiple passes, each generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels. 227 | * 228 | * @param device The device the {@link SPDPass} should be prepared for 229 | * @param texture The texture that is to be processed by the {@link SPDPass}. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. Must support {@link GPUTextureUsage.TEXTURE_BINDING}, and, if no other target is given, {@link GPUTextureUsage.STORAGE_BINDING}. 230 | * @param config The config for the {@link SPDPass} 231 | * @returns The prepared {@link SPDPass} or undefined if preparation failed or if no mipmaps would be generated. 232 | * @throws If the {@link GPUTextureFormat} of {@link SPDPassConfig.target} is not supported (does not support {@link GPUStorageTextureAccess:"write-only"} on the given {@link device}). 233 | * @throws If the size of {@link SPDPassConfig.target} is too small to store the first mip level generated for {@link texture} 234 | * @throws If {@link texture} or {@link SPDPassConfig.target} is not a 2d texture. 235 | * @see WebGPUSinglePassDownsampler.generateMipmaps 236 | * @see WebGPUSinglePassDownsampler.registerFilter 237 | * @see WebGPUSinglePassDownsampler.setPreferredLimits 238 | */ 239 | preparePass(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): SPDPass | undefined; 240 | /** 241 | * Generates mipmaps for the given texture. 242 | * For textures that will be downsampled more than once, consider generating a {@link SPDPass} using {@link WebGPUSinglePassDownsampler.preparePass} and calling its {@link SPDPass.encode} method. 243 | * This way, allocated GPU resources for downsampling the texture can be reused. 244 | * @param device The device to use for downsampling the texture 245 | * @param texture The texture to generate mipmaps for. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. 246 | * @param config The config for mipmap generation 247 | * @returns True if mipmaps were generated, false otherwise 248 | * @throws If {@link WebGPUSinglePassDownsampler.preparePass} threw an error. 249 | * @see WebGPUSinglePassDownsampler.preparePass 250 | */ 251 | generateMipmaps(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): boolean; 252 | } 253 | export {}; 254 | -------------------------------------------------------------------------------- /demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | WebGPU SPD 7 | 8 | 9 | 10 |
11 | 92 | 93 |
94 | 95 | 338 | 339 | 340 | -------------------------------------------------------------------------------- /dist/index.js: -------------------------------------------------------------------------------- 1 | function makeShaderCode(outputFormat, filterOp = SPD_FILTER_AVERAGE, numMips, scalarType, hasSubgroups, mip6SupportsReadWrite) { 2 | const texelType = scalarType === SPDScalarType.I32 ? 'i32' : (scalarType === SPDScalarType.U32 ? 'u32' : 'f32'); 3 | const useF16 = scalarType === SPDScalarType.F16; 4 | const filterCode = filterOp === SPD_FILTER_AVERAGE && !['f32', 'f16'].includes(texelType) ? filterOp.replace('* SPDScalar(0.25)', '/ 4') : filterOp; 5 | const mipsBindings = Array(numMips).fill(0) 6 | .map((_, i) => { 7 | if (i == 5 && numMips > 6 && mip6SupportsReadWrite) { 8 | return `@group(0) @binding(6) var dst_mip_6: texture_storage_2d_array<${outputFormat}, read_write>;`; 9 | } 10 | return `@group(0) @binding(${i + 1}) var dst_mip_${i + 1}: texture_storage_2d_array<${outputFormat}, write>;`; 11 | }) 12 | .join('\n'); 13 | // todo: get rid of this branching as soon as WGSL supports arrays of texture_storage_2d_array 14 | const mipsAccessorBody = Array(numMips).fill(0) 15 | .map((_, i) => { 16 | if (i == 5 && numMips > 6 && !mip6SupportsReadWrite) { 17 | return ` else if mip == 6 { 18 | textureStore(dst_mip_6, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'}); 19 | mip_dst_6_buffer[slice][uv.y][uv.x] = value; 20 | }`; 21 | } 22 | return `${i === 0 ? '' : ' else '}if mip == ${i + 1} { 23 | textureStore(dst_mip_${i + 1}, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'}); 24 | }`; 25 | }) 26 | .join(''); 27 | const mipsAccessor = `fn store_dst_mip(value: vec4, uv: vec2, slice: u32, mip: u32) {\n${mipsAccessorBody}\n}`; 28 | const midMipAccessor = mip6SupportsReadWrite ? `return vec4(textureLoad(dst_mip_6, uv, slice));` : `return mip_dst_6_buffer[slice][uv.y][uv.x];`; 29 | return /* wgsl */ ` 30 | // This file is part of the FidelityFX SDK. 31 | // 32 | // Copyright (C) 2023 Advanced Micro Devices, Inc. 33 | // 34 | // Permission is hereby granted, free of charge, to any person obtaining a copy 35 | // of this software and associated documentation files(the “Software”), to deal 36 | // in the Software without restriction, including without limitation the rights 37 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 38 | // copies of the Software, and to permit persons to whom the Software is 39 | // furnished to do so, subject to the following conditions : 40 | // 41 | // The above copyright notice and this permission notice shall be included in 42 | // all copies or substantial portions of the Software. 43 | // 44 | // THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 47 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 49 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 50 | // THE SOFTWARE. 51 | 52 | 53 | // Definitions -------------------------------------------------------------------------------------------------------- 54 | 55 | ${useF16 ? 'enable f16;' : ''} 56 | ${hasSubgroups ? 'enable subgroups;' : ''} 57 | 58 | alias SPDScalar = ${scalarType}; 59 | 60 | // Helpers ------------------------------------------------------------------------------------------------------------ 61 | 62 | /** 63 | * A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. 64 | * 65 | * The 64-wide lane indices to 8x8 remapping is performed as follows: 66 | * 00 01 08 09 10 11 18 19 67 | * 02 03 0a 0b 12 13 1a 1b 68 | * 04 05 0c 0d 14 15 1c 1d 69 | * 06 07 0e 0f 16 17 1e 1f 70 | * 20 21 28 29 30 31 38 39 71 | * 22 23 2a 2b 32 33 3a 3b 72 | * 24 25 2c 2d 34 35 3c 3d 73 | * 26 27 2e 2f 36 37 3e 3f 74 | * 75 | * @param a: The input 1D coordinate to remap. 76 | * 77 | * @returns The remapped 2D coordinates. 78 | */ 79 | fn remap_for_wave_reduction(a: u32) -> vec2 { 80 | return vec2( 81 | insertBits(extractBits(a, 2u, 3u), a, 0u, 1u), 82 | insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u) 83 | ); 84 | } 85 | 86 | fn map_to_xy(local_invocation_index: u32) -> vec2 { 87 | let sub_xy: vec2 = remap_for_wave_reduction(local_invocation_index % 64); 88 | return vec2( 89 | sub_xy.x + 8 * ((local_invocation_index >> 6) % 2), 90 | sub_xy.y + 8 * ((local_invocation_index >> 7)) 91 | ); 92 | } 93 | 94 | /* 95 | * Compute a linear value from a SRGB value. 96 | * 97 | * @param value: The value to convert to linear from SRGB. 98 | * 99 | * @returns A value in SRGB space. 100 | */ 101 | /* 102 | fn srgb_to_linear(value: SPDScalar) -> SPDScalar { 103 | let j = vec3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); 104 | let k = vec2(1.055, -0.055); 105 | return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); 106 | } 107 | */ 108 | 109 | // Resources & Accessors ----------------------------------------------------------------------------------------------- 110 | struct DownsamplePassMeta { 111 | work_group_offset: vec2, 112 | num_work_groups: u32, 113 | mips: u32, 114 | } 115 | 116 | // In the original version dst_mip_i is an image2Darray [SPD_MAX_MIP_LEVELS+1], i.e., 12+1, but WGSL doesn't support arrays of textures yet 117 | // Also these are read_write because for mips 7-13, the workgroup reads from mip level 6 - since most formats don't support read_write access in WGSL yet, we use a single read_write buffer in such cases instead 118 | @group(0) @binding(0) var src_mip_0: texture_2d_array<${texelType}>; 119 | ${mipsBindings} 120 | 121 | @group(1) @binding(0) var downsample_pass_meta : DownsamplePassMeta; 122 | @group(1) @binding(1) var spd_global_counter: array>; 123 | // this is only used if read_write access is not supported for the texture format 124 | @group(1) @binding(2) var mip_dst_6_buffer: array, 64>, 64>>; 125 | 126 | fn get_mips() -> u32 { 127 | return downsample_pass_meta.mips; 128 | } 129 | 130 | fn get_num_work_groups() -> u32 { 131 | return downsample_pass_meta.num_work_groups; 132 | } 133 | 134 | fn get_work_group_offset() -> vec2 { 135 | return downsample_pass_meta.work_group_offset; 136 | } 137 | 138 | fn load_src_image(uv: vec2, slice: u32) -> vec4 { 139 | return vec4(textureLoad(src_mip_0, uv, slice, 0)); 140 | } 141 | 142 | fn load_mid_mip_image(uv: vec2, slice: u32) -> vec4 { 143 | ${numMips > 6 ? midMipAccessor : 'return vec4();'} 144 | } 145 | 146 | ${mipsAccessor} 147 | 148 | // Workgroup ----------------------------------------------------------------------------------------------------------- 149 | 150 | ${useF16 ? ` 151 | var spd_intermediate_rg: array, 16>, 16>; 152 | var spd_intermediate_bg: array, 16>, 16>; 153 | ` : ` 154 | var spd_intermediate_r: array, 16>; 155 | var spd_intermediate_g: array, 16>; 156 | var spd_intermediate_b: array, 16>; 157 | var spd_intermediate_a: array, 16>; 158 | `} 159 | var spd_counter: atomic; 160 | 161 | fn spd_increase_atomic_counter(slice: u32) { 162 | atomicStore(&spd_counter, atomicAdd(&spd_global_counter[slice], 1)); 163 | } 164 | 165 | fn spd_get_atomic_counter() -> u32 { 166 | return workgroupUniformLoad(&spd_counter); 167 | } 168 | 169 | fn spd_reset_atomic_counter(slice: u32) { 170 | atomicStore(&spd_global_counter[slice], 0); 171 | } 172 | 173 | // Cotnrol flow -------------------------------------------------------------------------------------------------------- 174 | 175 | fn spd_barrier() { 176 | // in glsl this does: groupMemoryBarrier(); barrier(); 177 | workgroupBarrier(); 178 | } 179 | 180 | // Only last active workgroup should proceed 181 | fn spd_exit_workgroup(num_work_groups: u32, local_invocation_index: u32, slice: u32) -> bool { 182 | // global atomic counter 183 | if (local_invocation_index == 0) { 184 | spd_increase_atomic_counter(slice); 185 | } 186 | storageBarrier(); 187 | return spd_get_atomic_counter() != (num_work_groups - 1); 188 | } 189 | 190 | // Pixel access -------------------------------------------------------------------------------------------------------- 191 | 192 | ${filterCode} 193 | 194 | ${hasSubgroups ? ` 195 | fn spd_reduce_quad(value: vec4) -> vec4 { 196 | let v0 = value; 197 | let v1 = quadSwapX(value); 198 | let v2 = quadSwapY(value); 199 | let v3 = quadSwapDiagonal(value); 200 | return spd_reduce_4(v0, v1, v2, v3); 201 | } 202 | ` : ''} 203 | 204 | fn spd_store(pix: vec2, out_value: vec4, mip: u32, slice: u32) { 205 | store_dst_mip(out_value, pix, slice, mip + 1); 206 | } 207 | 208 | fn spd_load_intermediate(x: u32, y: u32) -> vec4 { 209 | return vec4(${useF16 ? ` 210 | spd_intermediate_rg[x][y], 211 | spd_intermediate_ba[x][y],` : ` 212 | spd_intermediate_r[x][y], 213 | spd_intermediate_g[x][y], 214 | spd_intermediate_b[x][y], 215 | spd_intermediate_a[x][y],`}); 216 | } 217 | 218 | fn spd_store_intermediate(x: u32, y: u32, value: vec4) { 219 | ${useF16 ? ` 220 | spd_intermediate_rg[x][y] = value.rg; 221 | spd_intermediate_ba[x][y] = value.ba;` : ` 222 | spd_intermediate_r[x][y] = value.r; 223 | spd_intermediate_g[x][y] = value.g; 224 | spd_intermediate_b[x][y] = value.b; 225 | spd_intermediate_a[x][y] = value.a;`} 226 | } 227 | 228 | fn spd_reduce_intermediate(i0: vec2, i1: vec2, i2: vec2, i3: vec2) -> vec4 { 229 | let v0 = spd_load_intermediate(i0.x, i0.y); 230 | let v1 = spd_load_intermediate(i1.x, i1.y); 231 | let v2 = spd_load_intermediate(i2.x, i2.y); 232 | let v3 = spd_load_intermediate(i3.x, i3.y); 233 | return spd_reduce_4(v0, v1, v2, v3); 234 | } 235 | 236 | fn spd_reduce_load_4(base: vec2, slice: u32) -> vec4 { 237 | let v0 = load_src_image(base + vec2(0, 0), slice); 238 | let v1 = load_src_image(base + vec2(0, 1), slice); 239 | let v2 = load_src_image(base + vec2(1, 0), slice); 240 | let v3 = load_src_image(base + vec2(1, 1), slice); 241 | return spd_reduce_4(v0, v1, v2, v3); 242 | } 243 | 244 | fn spd_reduce_load_mid_mip_4(base: vec2, slice: u32) -> vec4 { 245 | let v0 = load_mid_mip_image(base + vec2(0, 0), slice); 246 | let v1 = load_mid_mip_image(base + vec2(0, 1), slice); 247 | let v2 = load_mid_mip_image(base + vec2(1, 0), slice); 248 | let v3 = load_mid_mip_image(base + vec2(1, 1), slice); 249 | return spd_reduce_4(v0, v1, v2, v3); 250 | } 251 | 252 | // Main logic --------------------------------------------------------------------------------------------------------- 253 | 254 | fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 255 | var v: array, 4>; 256 | 257 | let workgroup64 = workgroup_id.xy * 64; 258 | let workgroup32 = workgroup_id.xy * 32; 259 | let workgroup16 = workgroup_id.xy * 16; 260 | 261 | var tex = workgroup64 + vec2(x * 2, y * 2); 262 | var pix = workgroup32 + vec2(x, y); 263 | v[0] = spd_reduce_load_4(tex, slice); 264 | spd_store(pix, v[0], 0, slice); 265 | 266 | tex = workgroup64 + vec2(x * 2 + 32, y * 2); 267 | pix = workgroup32 + vec2(x + 16, y); 268 | v[1] = spd_reduce_load_4(tex, slice); 269 | spd_store(pix, v[1], 0, slice); 270 | 271 | tex = workgroup64 + vec2(x * 2, y * 2 + 32); 272 | pix = workgroup32 + vec2(x, y + 16); 273 | v[2] = spd_reduce_load_4(tex, slice); 274 | spd_store(pix, v[2], 0, slice); 275 | 276 | tex = workgroup64 + vec2(x * 2 + 32, y * 2 + 32); 277 | pix = workgroup32 + vec2(x + 16, y + 16); 278 | v[3] = spd_reduce_load_4(tex, slice); 279 | spd_store(pix, v[3], 0, slice); 280 | 281 | if mip <= 1 { 282 | return; 283 | } 284 | 285 | ${hasSubgroups ? ` 286 | v[0] = spd_reduce_quad(v[0]); 287 | v[1] = spd_reduce_quad(v[1]); 288 | v[2] = spd_reduce_quad(v[2]); 289 | v[3] = spd_reduce_quad(v[3]); 290 | 291 | if (local_invocation_index % 4) == 0 { 292 | spd_store(workgroup16 + vec2(x / 2, y / 2), v[0], 1, slice); 293 | spd_store_intermediate(x / 2, y / 2, v[0]); 294 | 295 | spd_store(workgroup16 + vec2(x / 2 + 8, y / 2), v[1], 1, slice); 296 | spd_store_intermediate(x / 2 + 8, y / 2, v[1]); 297 | 298 | spd_store(workgroup16 + vec2(x / 2, y / 2 + 8), v[2], 1, slice); 299 | spd_store_intermediate(x / 2, y / 2 + 8, v[2]); 300 | 301 | spd_store(workgroup16 + vec2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); 302 | spd_store_intermediate(x / 2 + 8, y / 2 + 8, v[3]); 303 | } 304 | ` : ` 305 | for (var i = 0u; i < 4u; i++) { 306 | spd_store_intermediate(x, y, v[i]); 307 | spd_barrier(); 308 | if local_invocation_index < 64 { 309 | v[i] = spd_reduce_intermediate( 310 | vec2(x * 2 + 0, y * 2 + 0), 311 | vec2(x * 2 + 1, y * 2 + 0), 312 | vec2(x * 2 + 0, y * 2 + 1), 313 | vec2(x * 2 + 1, y * 2 + 1) 314 | ); 315 | spd_store(workgroup16 + vec2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); 316 | } 317 | spd_barrier(); 318 | } 319 | 320 | if local_invocation_index < 64 { 321 | spd_store_intermediate(x + 0, y + 0, v[0]); 322 | spd_store_intermediate(x + 8, y + 0, v[1]); 323 | spd_store_intermediate(x + 0, y + 8, v[2]); 324 | spd_store_intermediate(x + 8, y + 8, v[3]); 325 | } 326 | `} 327 | } 328 | 329 | fn spd_downsample_mip_2(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 330 | ${hasSubgroups ? ` 331 | var v = spd_load_intermediate(x, y); 332 | v = spd_reduce_quad(v); 333 | // quad index 0 stores result 334 | if (local_invocation_index % 4) == 0 { 335 | spd_store(workgroup_id.xy * 8 + vec2(x / 2, y / 2), v, mip, slice); 336 | spd_store_intermediate(x + (y / 2) % 2, y, v); 337 | } 338 | ` : ` 339 | if local_invocation_index < 64u { 340 | let v = spd_reduce_intermediate( 341 | vec2(x * 2 + 0, y * 2 + 0), 342 | vec2(x * 2 + 1, y * 2 + 0), 343 | vec2(x * 2 + 0, y * 2 + 1), 344 | vec2(x * 2 + 1, y * 2 + 1) 345 | ); 346 | spd_store(workgroup_id.xy * 8 + vec2(x, y), v, mip, slice); 347 | // store to LDS, try to reduce bank conflicts 348 | // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 349 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 350 | // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 351 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 352 | // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 353 | // ... 354 | // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 355 | spd_store_intermediate(x * 2 + y % 2, y * 2, v); 356 | } 357 | `} 358 | } 359 | 360 | fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 361 | ${hasSubgroups ? ` 362 | // subgroup ops must be in uniform control flow 363 | var v = spd_load_intermediate(x * 2 + y % 2, y * 2); 364 | v = spd_reduce_quad(v); 365 | 366 | // quad index 0 stores result 367 | if local_invocation_index < 64u && (local_invocation_index % 4) == 0 { 368 | spd_store(workgroup_id.xy * 4 + vec2(x / 2, y / 2), v, mip, slice); 369 | spd_store_intermediate(x * 2 + y / 2, y * 2, v); 370 | } 371 | ` : ` 372 | if local_invocation_index < 16u { 373 | // x 0 x 0 374 | // 0 0 0 0 375 | // 0 x 0 x 376 | // 0 0 0 0 377 | let v = spd_reduce_intermediate( 378 | vec2(x * 4 + 0 + 0, y * 4 + 0), 379 | vec2(x * 4 + 2 + 0, y * 4 + 0), 380 | vec2(x * 4 + 0 + 1, y * 4 + 2), 381 | vec2(x * 4 + 2 + 1, y * 4 + 2) 382 | ); 383 | spd_store(workgroup_id.xy * 4 + vec2(x, y), v, mip, slice); 384 | // store to LDS 385 | // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 386 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 387 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 388 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 389 | // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 390 | // ... 391 | // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 392 | // ... 393 | // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 394 | // ... 395 | spd_store_intermediate(x * 4 + y, y * 4, v); 396 | } 397 | `} 398 | } 399 | 400 | fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 401 | ${hasSubgroups ? ` 402 | // subgroup ops must be in uniform control flow 403 | var v = spd_load_intermediate(x * 4 + y, y * 4); 404 | v = spd_reduce_quad(v); 405 | 406 | // quad index 0 stores result 407 | if local_invocation_index < 16u && (local_invocation_index % 4) == 0 { 408 | spd_store(workgroup_id.xy * 2 + vec2(x / 2, y / 2), v, mip, slice); 409 | spd_store_intermediate(x / 2 + y, 0, v); 410 | } 411 | ` : ` 412 | if local_invocation_index < 4u { 413 | // x 0 0 0 x 0 0 0 414 | // ... 415 | // 0 x 0 0 0 x 0 0 416 | let v = spd_reduce_intermediate( 417 | vec2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), 418 | vec2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), 419 | vec2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), 420 | vec2(x * 8 + 4 + 1 + y * 2, y * 8 + 4) 421 | ); 422 | spd_store(workgroup_id.xy * 2 + vec2(x, y), v, mip, slice); 423 | // store to LDS 424 | // x x x x 0 ... 425 | // 0 ... 426 | spd_store_intermediate(x + y * 2, 0, v); 427 | } 428 | `} 429 | } 430 | 431 | fn spd_downsample_mip_5(workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 432 | ${hasSubgroups ? ` 433 | // subgroup ops must be in uniform control flow 434 | var v = spd_load_intermediate(local_invocation_index, 0); 435 | v = spd_reduce_quad(v); 436 | 437 | // quad index 0 stores result 438 | if local_invocation_index < 4u && (local_invocation_index % 4) == 0 { 439 | spd_store(workgroup_id.xy, v, mip, slice); 440 | } 441 | ` : ` 442 | if local_invocation_index < 1u { 443 | // x x x x 0 ... 444 | // 0 ... 445 | let v = spd_reduce_intermediate(vec2(0, 0), vec2(1, 0), vec2(2, 0), vec2(3, 0)); 446 | spd_store(workgroup_id.xy, v, mip, slice); 447 | } 448 | `} 449 | } 450 | 451 | fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) { 452 | if mips <= base_mip { 453 | return; 454 | } 455 | spd_barrier(); 456 | spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice); 457 | 458 | if mips <= base_mip + 1 { 459 | return; 460 | } 461 | spd_barrier(); 462 | spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice); 463 | 464 | if mips <= base_mip + 2 { 465 | return; 466 | } 467 | spd_barrier(); 468 | spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice); 469 | 470 | if mips <= base_mip + 3 { 471 | return; 472 | } 473 | spd_barrier(); 474 | spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice); 475 | } 476 | 477 | fn spd_downsample_last_four(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) { 478 | if mips <= base_mip { 479 | return; 480 | } 481 | spd_barrier(); 482 | spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice); 483 | 484 | if mips <= base_mip + 1 { 485 | return; 486 | } 487 | spd_barrier(); 488 | spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice); 489 | 490 | if mips <= base_mip + 2 { 491 | return; 492 | } 493 | spd_barrier(); 494 | spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice); 495 | 496 | if mips <= base_mip + 3 { 497 | return; 498 | } 499 | spd_barrier(); 500 | spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice); 501 | } 502 | 503 | fn spd_downsample_mips_6_7(x: u32, y: u32, mips: u32, slice: u32) { 504 | ${mip6SupportsReadWrite ? 'textureBarrier();' : ''} 505 | 506 | var tex = vec2(x * 4 + 0, y * 4 + 0); 507 | var pix = vec2(x * 2 + 0, y * 2 + 0); 508 | let v0 = spd_reduce_load_mid_mip_4(tex, slice); 509 | spd_store(pix, v0, 6, slice); 510 | 511 | tex = vec2(x * 4 + 2, y * 4 + 0); 512 | pix = vec2(x * 2 + 1, y * 2 + 0); 513 | let v1 = spd_reduce_load_mid_mip_4(tex, slice); 514 | spd_store(pix, v1, 6, slice); 515 | 516 | tex = vec2(x * 4 + 0, y * 4 + 2); 517 | pix = vec2(x * 2 + 0, y * 2 + 1); 518 | let v2 = spd_reduce_load_mid_mip_4(tex, slice); 519 | spd_store(pix, v2, 6, slice); 520 | 521 | tex = vec2(x * 4 + 2, y * 4 + 2); 522 | pix = vec2(x * 2 + 1, y * 2 + 1); 523 | let v3 = spd_reduce_load_mid_mip_4(tex, slice); 524 | spd_store(pix, v3, 6, slice); 525 | 526 | if mips <= 7 { 527 | return; 528 | } 529 | // no barrier needed, working on values only from the same thread 530 | 531 | let v = spd_reduce_4(v0, v1, v2, v3); 532 | spd_store(vec2(x, y), v, 7, slice); 533 | spd_store_intermediate(x, y, v); 534 | } 535 | 536 | fn spd_downsample_last_6(x: u32, y: u32, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) { 537 | if mips <= 6 { 538 | return; 539 | } 540 | 541 | // increase the global atomic counter for the given slice and check if it's the last remaining thread group: 542 | // terminate if not, continue if yes. 543 | if spd_exit_workgroup(num_work_groups, local_invocation_index, slice) { 544 | return; 545 | } 546 | 547 | // reset the global atomic counter back to 0 for the next spd dispatch 548 | spd_reset_atomic_counter(slice); 549 | 550 | // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. 551 | // compute MIP level 6 and 7 552 | spd_downsample_mips_6_7(x, y, mips, slice); 553 | 554 | // compute MIP level 8, 9, 10, 11 555 | spd_downsample_last_four(x, y, vec2(0, 0), local_invocation_index, 8, mips, slice); 556 | } 557 | 558 | /// Downsamples a 64x64 tile based on the work group id. 559 | /// If after downsampling it's the last active thread group, computes the remaining MIP levels. 560 | /// 561 | /// @param [in] workGroupID index of the work group / thread group 562 | /// @param [in] localInvocationIndex index of the thread within the thread group in 1D 563 | /// @param [in] mips the number of total MIP levels to compute for the input texture 564 | /// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice 565 | /// @param [in] slice the slice of the input texture 566 | fn spd_downsample(workgroup_id: vec2, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) { 567 | let xy = map_to_xy(local_invocation_index); 568 | spd_downsample_mips_0_1(xy.x, xy.y, workgroup_id, local_invocation_index, mips, slice); 569 | spd_downsample_next_four(xy.x, xy.y, workgroup_id, local_invocation_index, 2, mips, slice); 570 | ${numMips > 6 ? 'spd_downsample_last_6(xy.x, xy.y, local_invocation_index, mips, num_work_groups, slice);' : ''} 571 | } 572 | 573 | // Entry points ------------------------------------------------------------------------------------------------------- 574 | 575 | @compute 576 | @workgroup_size(256, 1, 1) 577 | fn downsample(@builtin(local_invocation_index) local_invocation_index: u32, @builtin(workgroup_id) workgroup_id: vec3) { 578 | spd_downsample( 579 | workgroup_id.xy + get_work_group_offset(), 580 | local_invocation_index, 581 | get_mips(), 582 | get_num_work_groups(), 583 | workgroup_id.z 584 | ); 585 | } 586 | `; 587 | } 588 | const SPD_FILTER_AVERAGE = /* wgsl */ ` 589 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 590 | return (v0 + v1 + v2 + v3) * SPDScalar(0.25); 591 | } 592 | `; 593 | const SPD_FILTER_MIN = /* wgsl */ ` 594 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 595 | return min(min(v0, v1), min(v2, v3)); 596 | } 597 | `; 598 | const SPD_FILTER_MAX = /* wgsl */ ` 599 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 600 | return max(max(v0, v1), max(v2, v3)); 601 | } 602 | `; 603 | const SPD_FILTER_MINMAX = /* wgsl */ ` 604 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 605 | let max4 = max(max(v0.xy, v1.xy), max(v2.xy, v3.xy)); 606 | return vec4(min(min(v0.x, v1.x), min(v2.x, v3.x)), max(max4.x, max4.y), 0, 0); 607 | } 608 | `; 609 | /** 610 | * The names of all predefined filters of {@link WebGPUSinglePassDownsampler}. 611 | * Custom ones can be registered with an instance of {@link WebGPUSinglePassDownsampler} using {@link WebGPUSinglePassDownsampler.registerFilter}. 612 | */ 613 | export var SPDFilters; 614 | (function (SPDFilters) { 615 | /** 616 | * Takes the channel-wise average of 4 pixels. 617 | */ 618 | SPDFilters["Average"] = "average"; 619 | /** 620 | * Takes the channel-wise minimum of 4 pixels. 621 | */ 622 | SPDFilters["Min"] = "min"; 623 | /** 624 | * Takes the channel-wise maximum of 4 pixels. 625 | */ 626 | SPDFilters["Max"] = "max"; 627 | /** 628 | * Takes the minimum of the red channel and the maximum of the red and green channel and stores the result in the red and green channel respectively. 629 | * This really only makes sense for single-channel input textures (where only the red channel holds any data), e.g., for generating a min-max pyramid of a depth buffer. 630 | */ 631 | SPDFilters["MinMax"] = "minmax"; 632 | })(SPDFilters || (SPDFilters = {})); 633 | class SPDPassInner { 634 | pipeline; 635 | bindGroups; 636 | dispatchDimensions; 637 | constructor(pipeline, bindGroups, dispatchDimensions) { 638 | this.pipeline = pipeline; 639 | this.bindGroups = bindGroups; 640 | this.dispatchDimensions = dispatchDimensions; 641 | } 642 | encode(computePass) { 643 | computePass.setPipeline(this.pipeline); 644 | this.bindGroups.forEach((bindGroup, index) => { 645 | computePass.setBindGroup(index, bindGroup); 646 | }); 647 | computePass.dispatchWorkgroups(this.dispatchDimensions[0], this.dispatchDimensions[1], this.dispatchDimensions[2]); 648 | } 649 | } 650 | /** 651 | * A compute pass for downsampling a texture. 652 | */ 653 | export class SPDPass { 654 | passes; 655 | /** 656 | * The texture the mipmaps will be written to by this {@link SPDPass}, once {@link SPDPass.encode} is called. 657 | */ 658 | target; 659 | /** @ignore */ 660 | constructor(passes, target) { 661 | this.passes = passes; 662 | this.target = target; 663 | } 664 | /** 665 | * Encodes the configured mipmap generation pass(es) with the given {@link GPUComputePassEncoder}. 666 | * All bind groups indices used by {@link SPDPass} are reset to `null` to prevent unintentional bindings of internal bind groups for subsequent pipelines encoded in the same {@link GPUComputePassEncoder}. 667 | * @param computePassEncoder The {@link GPUComputePassEncoder} to encode this mipmap generation pass with. 668 | * @returns The {@link computePassEncoder} 669 | */ 670 | encode(computePassEncoder) { 671 | this.passes.forEach(p => p.encode(computePassEncoder)); 672 | computePassEncoder.setBindGroup(0, null); 673 | computePassEncoder.setBindGroup(1, null); 674 | return computePassEncoder; 675 | } 676 | /** 677 | * Returns the number of passes that will be encoded by calling this instance's {@link SPDPass.encode} method. 678 | */ 679 | get numPasses() { 680 | return this.passes.length; 681 | } 682 | } 683 | var SPDScalarType; 684 | (function (SPDScalarType) { 685 | SPDScalarType["F32"] = "f32"; 686 | SPDScalarType["F16"] = "f16"; 687 | SPDScalarType["I32"] = "i32"; 688 | SPDScalarType["U32"] = "u32"; 689 | })(SPDScalarType || (SPDScalarType = {})); 690 | class SPDPipeline { 691 | mipsLayout; 692 | pipelines; 693 | constructor(mipsLayout, pipelines) { 694 | this.mipsLayout = mipsLayout; 695 | this.pipelines = pipelines; 696 | } 697 | } 698 | function sanitizeScalarType(device, format, halfPrecision) { 699 | const texelType = format.toLocaleLowerCase().includes('sint') ? SPDScalarType.I32 : (format.toLocaleLowerCase().includes('uint') ? SPDScalarType.U32 : SPDScalarType.F32); 700 | if (halfPrecision && !device.features.has('shader-f16')) { 701 | console.warn(`[sanitizeScalarType]: half precision requested but the device feature 'shader-f16' is not enabled, falling back to full precision`); 702 | } 703 | if (halfPrecision && texelType !== SPDScalarType.F32) { 704 | console.warn(`[sanitizeScalarType]: half precision requested for non-float format (${format}, uses ${texelType}), falling back to full precision`); 705 | } 706 | return halfPrecision && !device.features.has('shader-f16') && texelType === SPDScalarType.F32 ? SPDScalarType.F16 : texelType; 707 | } 708 | class DevicePipelines { 709 | device; 710 | maxMipsPerPass; 711 | maxArrayLayers; 712 | disableSubgroups; 713 | internalResourcesBindGroupLayout; 714 | internalResourcesBindGroupLayout12; 715 | internalResourcesBindGroupLayout12RW; 716 | atomicCounters; 717 | midMipBuffers; 718 | pipelines; 719 | constructor(device, maxArrayLayers, maxMipsPerPass, disableSubgroups) { 720 | this.device = new WeakRef(device); 721 | this.maxMipsPerPass = Math.min(device.limits.maxStorageTexturesPerShaderStage, maxMipsPerPass ?? 12); 722 | this.maxArrayLayers = Math.min(device.limits.maxTextureArrayLayers, maxArrayLayers ?? device.limits.maxTextureArrayLayers); 723 | this.disableSubgroups = disableSubgroups ?? false; 724 | this.pipelines = new Map(); 725 | this.atomicCounters = new Map(); 726 | this.midMipBuffers = new Map(); 727 | this.internalResourcesBindGroupLayout = device.createBindGroupLayout({ 728 | entries: [{ 729 | binding: 0, 730 | visibility: GPUShaderStage.COMPUTE, 731 | buffer: { 732 | type: 'uniform', 733 | hasDynamicOffset: false, 734 | minBindingSize: 16, 735 | }, 736 | }], 737 | }); 738 | if (this.maxMipsPerPass > 6) { 739 | this.internalResourcesBindGroupLayout12 = device.createBindGroupLayout({ 740 | entries: [ 741 | { 742 | binding: 0, 743 | visibility: GPUShaderStage.COMPUTE, 744 | buffer: { 745 | type: 'uniform', 746 | hasDynamicOffset: false, 747 | minBindingSize: 16, 748 | }, 749 | }, 750 | { 751 | binding: 1, 752 | visibility: GPUShaderStage.COMPUTE, 753 | buffer: { 754 | type: 'storage', 755 | hasDynamicOffset: false, 756 | minBindingSize: 4, 757 | }, 758 | }, 759 | { 760 | binding: 2, 761 | visibility: GPUShaderStage.COMPUTE, 762 | buffer: { 763 | type: 'storage', 764 | hasDynamicOffset: false, 765 | minBindingSize: 16 * 64 * 64, 766 | }, 767 | }, 768 | ], 769 | }); 770 | this.internalResourcesBindGroupLayout12RW = device.createBindGroupLayout({ 771 | entries: [ 772 | { 773 | binding: 0, 774 | visibility: GPUShaderStage.COMPUTE, 775 | buffer: { 776 | type: 'uniform', 777 | hasDynamicOffset: false, 778 | minBindingSize: 16, 779 | }, 780 | }, 781 | { 782 | binding: 1, 783 | visibility: GPUShaderStage.COMPUTE, 784 | buffer: { 785 | type: 'storage', 786 | hasDynamicOffset: false, 787 | minBindingSize: 4, 788 | }, 789 | }, 790 | ], 791 | }); 792 | } 793 | } 794 | preparePipelines(pipelineConfigs) { 795 | const device = this.device.deref(); 796 | if (device) { 797 | pipelineConfigs?.forEach(c => { 798 | const scalarType = sanitizeScalarType(device, c.format, c.halfPrecision ?? false); 799 | Array.from(c.filters ?? [SPD_FILTER_AVERAGE]).map(filter => { 800 | for (let i = 0; i < this.maxMipsPerPass; ++i) { 801 | this.getOrCreatePipeline(c.format, filter, i + 1, scalarType); 802 | } 803 | }); 804 | }); 805 | } 806 | } 807 | supportsReadWrite(targetFormat) { 808 | const device = this.device.deref(); 809 | if (!device) { 810 | return false; 811 | } 812 | return WebGPUSinglePassDownsampler.supportedReadWriteFormats.has(targetFormat) || (device.features.has('texture-formats-tier2') && WebGPUSinglePassDownsampler.supportedReadWriteFormatsTier2.has(targetFormat)); 813 | } 814 | createPipeline(targetFormat, filterCode, numMips, scalarType) { 815 | const device = this.device.deref(); 816 | if (!device) { 817 | return undefined; 818 | } 819 | const rwSupport = this.supportsReadWrite(targetFormat); 820 | const mipsBindGroupLayout = device.createBindGroupLayout({ 821 | entries: Array(Math.min(numMips, this.maxMipsPerPass) + 1).fill(0).map((_, i) => { 822 | const entry = { 823 | binding: i, 824 | visibility: GPUShaderStage.COMPUTE, 825 | }; 826 | if (i === 0) { 827 | entry.texture = { 828 | sampleType: scalarType === SPDScalarType.I32 ? 'sint' : (scalarType === SPDScalarType.U32 ? 'uint' : 'unfilterable-float'), 829 | viewDimension: '2d-array', 830 | multisampled: false, 831 | }; 832 | } 833 | else { 834 | entry.storageTexture = { 835 | access: (i === 6 && numMips > 6 && rwSupport) ? 'read-write' : 'write-only', 836 | format: targetFormat, 837 | viewDimension: '2d-array', 838 | }; 839 | } 840 | return entry; 841 | }) 842 | }); 843 | return new SPDPipeline(mipsBindGroupLayout, device.createComputePipeline({ 844 | compute: { 845 | module: device.createShaderModule({ 846 | code: makeShaderCode(targetFormat, filterCode, Math.min(numMips, this.maxMipsPerPass), scalarType, device.features.has('subgroups') && !this.disableSubgroups, rwSupport), 847 | }), 848 | entryPoint: 'downsample', 849 | }, 850 | layout: device.createPipelineLayout({ 851 | bindGroupLayouts: [ 852 | mipsBindGroupLayout, 853 | numMips > 6 ? (rwSupport ? this.internalResourcesBindGroupLayout12RW : this.internalResourcesBindGroupLayout12) : this.internalResourcesBindGroupLayout, 854 | ], 855 | }), 856 | })); 857 | } 858 | getOrCreatePipeline(targetFormat, filterCode, numMipsToCreate, scalarType) { 859 | if (!this.pipelines.has(targetFormat)) { 860 | this.pipelines.set(targetFormat, new Map()); 861 | } 862 | if (!this.pipelines.get(targetFormat)?.has(scalarType)) { 863 | this.pipelines.get(targetFormat)?.set(scalarType, new Map()); 864 | } 865 | if (!this.pipelines.get(targetFormat)?.get(scalarType)?.has(filterCode)) { 866 | this.pipelines.get(targetFormat)?.get(scalarType)?.set(filterCode, new Map()); 867 | } 868 | if (!this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.has(numMipsToCreate)) { 869 | const pipelines = this.createPipeline(targetFormat, filterCode, numMipsToCreate, scalarType); 870 | if (pipelines) { 871 | this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.set(numMipsToCreate, pipelines); 872 | } 873 | } 874 | return this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.get(numMipsToCreate); 875 | } 876 | getOrCreateAtomicCountersBuffer(device, numArrayLayers) { 877 | if (!this.atomicCounters.has(numArrayLayers)) { 878 | const atomicCountersBuffer = device.createBuffer({ 879 | size: 4 * numArrayLayers, 880 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, 881 | }); 882 | device.queue.writeBuffer(atomicCountersBuffer, 0, new Uint32Array(Array(numArrayLayers).fill(0))); 883 | this.atomicCounters.set(numArrayLayers, atomicCountersBuffer); 884 | } 885 | return this.atomicCounters.get(numArrayLayers); 886 | } 887 | getOrCreateMidMipBuffer(device, numArrayLayers) { 888 | if (!this.midMipBuffers.has(numArrayLayers)) { 889 | this.midMipBuffers.set(numArrayLayers, device.createBuffer({ 890 | size: 16 * 64 * 64 * numArrayLayers, 891 | usage: GPUBufferUsage.STORAGE, 892 | })); 893 | } 894 | return this.midMipBuffers.get(numArrayLayers); 895 | } 896 | createMetaBindGroup(device, meta, halfPrecision, readWriteSupport) { 897 | const metaBuffer = device.createBuffer({ 898 | size: 16, 899 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST 900 | }); 901 | device.queue.writeBuffer(metaBuffer, 0, new Uint32Array([ 902 | ...meta.workgroupOffset, 903 | meta.numWorkGroups, 904 | meta.numMips, 905 | ])); 906 | if (meta.numMips > 6) { 907 | const numArrayLayersForPrecision = halfPrecision ? Math.ceil(meta.numArrayLayers / 2) : meta.numArrayLayers; 908 | if (readWriteSupport) { 909 | return device.createBindGroup({ 910 | layout: this.internalResourcesBindGroupLayout12RW, 911 | entries: [ 912 | { 913 | binding: 0, 914 | resource: { 915 | buffer: metaBuffer, 916 | }, 917 | }, 918 | { 919 | binding: 1, 920 | resource: { 921 | buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision), 922 | }, 923 | }, 924 | ] 925 | }); 926 | } 927 | else { 928 | return device.createBindGroup({ 929 | layout: this.internalResourcesBindGroupLayout12, 930 | entries: [ 931 | { 932 | binding: 0, 933 | resource: { 934 | buffer: metaBuffer, 935 | }, 936 | }, 937 | { 938 | binding: 1, 939 | resource: { 940 | buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision), 941 | }, 942 | }, 943 | { 944 | binding: 2, 945 | resource: { 946 | buffer: this.getOrCreateMidMipBuffer(device, numArrayLayersForPrecision), 947 | }, 948 | }, 949 | ] 950 | }); 951 | } 952 | } 953 | else { 954 | return device.createBindGroup({ 955 | layout: this.internalResourcesBindGroupLayout, 956 | entries: [{ 957 | binding: 0, 958 | resource: { 959 | buffer: metaBuffer, 960 | }, 961 | }] 962 | }); 963 | } 964 | } 965 | preparePass(texture, target, filterCode, offset, size, numMipsTotal, scalarType) { 966 | const device = this.device.deref(); 967 | if (!device) { 968 | return undefined; 969 | } 970 | const rwSupport = this.supportsReadWrite(target.format); 971 | const passes = []; 972 | for (let baseArrayLayer = 0; baseArrayLayer < target.depthOrArrayLayers; baseArrayLayer += this.maxArrayLayers) { 973 | const numArrayLayersThisPass = Math.min(target.depthOrArrayLayers - baseArrayLayer, this.maxArrayLayers); 974 | for (let baseMip = 0; baseMip < numMipsTotal - 1; baseMip += this.maxMipsPerPass) { 975 | const numMipsThisPass = Math.min(numMipsTotal - 1 - baseMip, this.maxMipsPerPass); 976 | const baseMipOffset = offset.map(o => Math.trunc(o / Math.pow(2, baseMip))); 977 | const baseMipSize = size.map(s => Math.max(Math.trunc(s / Math.pow(2, baseMip)), 1)); 978 | const workgroupOffset = baseMipOffset.map(o => Math.trunc(o / 64)); 979 | const dispatchDimensions = baseMipOffset.map((o, i) => Math.trunc((o + baseMipSize[i] - 1) / 64) + 1 - workgroupOffset[i]); 980 | const numWorkGroups = dispatchDimensions.reduce((product, v) => v * product, 1); 981 | const metaBindGroup = this.createMetaBindGroup(device, { 982 | workgroupOffset, 983 | numWorkGroups, 984 | numMips: numMipsThisPass, 985 | numArrayLayers: numArrayLayersThisPass, 986 | }, scalarType === SPDScalarType.F16, rwSupport); 987 | // todo: handle missing pipeline 988 | const pipeline = this.getOrCreatePipeline(target.format, filterCode, numMipsThisPass, scalarType); 989 | const mipViews = Array(numMipsThisPass + 1).fill(0).map((_, i) => { 990 | if (baseMip === 0 && i === 0) { 991 | return texture.createView({ 992 | dimension: '2d-array', 993 | baseMipLevel: 0, 994 | mipLevelCount: 1, 995 | baseArrayLayer, 996 | arrayLayerCount: numArrayLayersThisPass, 997 | }); 998 | } 999 | else { 1000 | const mip = baseMip + i; 1001 | return target.createView({ 1002 | dimension: '2d-array', 1003 | baseMipLevel: texture === target ? mip : mip - 1, 1004 | mipLevelCount: 1, 1005 | baseArrayLayer, 1006 | arrayLayerCount: numArrayLayersThisPass, 1007 | }); 1008 | } 1009 | }); 1010 | const mipsBindGroup = device.createBindGroup({ 1011 | layout: pipeline.mipsLayout, 1012 | entries: mipViews.map((v, i) => { 1013 | return { 1014 | binding: i, 1015 | resource: v, 1016 | }; 1017 | }), 1018 | }); 1019 | passes.push(new SPDPassInner(pipeline.pipelines, [mipsBindGroup, metaBindGroup], [...dispatchDimensions, numArrayLayersThisPass])); 1020 | } 1021 | } 1022 | return new SPDPass(passes, target); 1023 | } 1024 | } 1025 | /** 1026 | * Returns the maximum number of mip levels for a given n-dimensional size. 1027 | * @param size The size to compute the maximum number of mip levels for 1028 | * @returns The maximum number of mip levels for the given size 1029 | */ 1030 | export function maxMipLevelCount(...size) { 1031 | return 1 + Math.trunc(Math.log2(Math.max(0, ...size))); 1032 | } 1033 | /** 1034 | * A helper class for downsampling 2D {@link GPUTexture} (& arrays) using as few passes as possible on a {@link GPUDevice} depending on its {@link GPUSupportedLimits}. 1035 | * Up to 12 mip levels can be generated within a single pass, if {@link GPUSupportedLimits.maxStorageTexturesPerShaderStage} supports it. 1036 | */ 1037 | export class WebGPUSinglePassDownsampler { 1038 | filters; 1039 | devicePipelines; 1040 | /** 1041 | * The set of formats supported by WebGPU SPD. 1042 | */ 1043 | static supportedFormats = new Set([ 1044 | 'rgba8unorm', 1045 | 'rgba8snorm', 1046 | 'rgba8uint', 1047 | 'rgba8sint', 1048 | 'rgba16uint', 1049 | 'rgba16sint', 1050 | 'rgba16float', 1051 | 'r32uint', 1052 | 'r32sint', 1053 | 'r32float', 1054 | 'rg32uint', 1055 | 'rg32sint', 1056 | 'rg32float', 1057 | 'rgba32uint', 1058 | 'rgba32sint', 1059 | 'rgba32float', 1060 | ]); 1061 | /** 1062 | * The set of additionally supported formats supported if the feature 'bgra8unorm-storage' is enabled. 1063 | */ 1064 | static supportedFormatsBgra8UnormStorage = new Set([ 1065 | 'bgra8unorm', 1066 | ]); 1067 | /** 1068 | * The set of additionally supported formats if the feature 'texture-formats-tier1' is enabled. 1069 | */ 1070 | static supportedFormatsTier1 = new Set([ 1071 | 'r8unorm', 1072 | 'r8snorm', 1073 | 'r8uint', 1074 | 'r8sint', 1075 | 'rg8unorm', 1076 | 'rg8snorm', 1077 | 'rg8uint', 1078 | 'rg8sint', 1079 | 'r16unorm', 1080 | 'r16snorm', 1081 | 'r16uint', 1082 | 'r16sint', 1083 | 'r16float', 1084 | 'rg16unorm', 1085 | 'rg16snorm', 1086 | 'rg16uint', 1087 | 'rg16sint', 1088 | 'rg16float', 1089 | 'rgba16unorm', 1090 | 'rgba16snorm', 1091 | 'rgb10a2uint', 1092 | 'rgb10a2unorm', 1093 | 'rg11b10ufloat', 1094 | ]); 1095 | /** 1096 | * The set of formats that support read-write access. 1097 | */ 1098 | static supportedReadWriteFormats = new Set([ 1099 | 'r32uint', 1100 | 'r32sint', 1101 | 'r32float', 1102 | ]); 1103 | /** 1104 | * The set of formats that support read-write access if the feature 'texture-formats-tier2' is enabled. 1105 | */ 1106 | static supportedReadWriteFormatsTier2 = new Set([ 1107 | 'r8unorm', 1108 | 'r8uint', 1109 | 'r8sint', 1110 | 'rgba8unorm', 1111 | 'rgba8uint', 1112 | 'rgba8sint', 1113 | 'r16uint', 1114 | 'r16sint', 1115 | 'r16float', 1116 | 'rgba16uint', 1117 | 'rgba16sint', 1118 | 'rgba16float', 1119 | 'rgba32uint', 1120 | 'rgba32sint', 1121 | 'rgba32float', 1122 | ]); 1123 | /** 1124 | * Sets the preferred device limits for {@link WebGPUSinglePassDownsampler} in a given record of limits. 1125 | * Existing preferred device limits are either increased or left untouched. 1126 | * If {@link limits} is undefined, creates a new record of preferred device limits for {@link WebGPUSinglePassDownsampler}. 1127 | * The result can be used to set {@link GPUDeviceDescriptor.requiredLimits} when requesting a device. 1128 | * @param limits A record of device limits set to update with the preferred limits for {@link WebGPUSinglePassDownsampler} 1129 | * @param adapter If this is set, the preferred limits that are set by this function will be clamped to {@link GPUAdapter.limits}. 1130 | * @returns The updated or created set of device limits with all preferred limits for {@link WebGPUSinglePassDownsampler} set 1131 | */ 1132 | static setPreferredLimits(limits, adapter) { 1133 | if (!limits) { 1134 | limits = {}; 1135 | } 1136 | const maxStorageTexturesPerShaderStage = Math.min(adapter?.limits.maxStorageTexturesPerShaderStage ?? 6, 6); 1137 | limits.maxStorageTexturesPerShaderStage = Math.max(limits.maxStorageTexturesPerShaderStage ?? maxStorageTexturesPerShaderStage, maxStorageTexturesPerShaderStage); 1138 | return limits; 1139 | } 1140 | /** 1141 | * Creates a new {@link WebGPUSinglePassDownsampler}. 1142 | * On its own, {@link WebGPUSinglePassDownsampler} does not allocate any GPU resources. 1143 | * Optionally, prepare GPU resources for a given {@link SPDPrepareDeviceDescriptor}. 1144 | * @param prepareDescriptor An optional descriptor for preparing GPU resources 1145 | * @see WebGPUSinglePassDownsampler.prepareDeviceResources 1146 | */ 1147 | constructor(prepareDescriptor) { 1148 | this.filters = new Map([ 1149 | [SPDFilters.Average, SPD_FILTER_AVERAGE], 1150 | [SPDFilters.Min, SPD_FILTER_MIN], 1151 | [SPDFilters.Max, SPD_FILTER_MAX], 1152 | [SPDFilters.MinMax, SPD_FILTER_MINMAX], 1153 | ]); 1154 | this.devicePipelines = new Map(); 1155 | if (prepareDescriptor) { 1156 | this.prepareDeviceResources(prepareDescriptor); 1157 | } 1158 | } 1159 | /** 1160 | * Prepares GPU resources required by {@link WebGPUSinglePassDownsampler} to downsample textures for a given {@link SPDPrepareDeviceDescriptor}. 1161 | * @param prepareDescriptor a descriptor for preparing GPU resources 1162 | */ 1163 | prepareDeviceResources(prepareDescriptor) { 1164 | this.getOrCreateDevicePipelines(prepareDescriptor.device, prepareDescriptor.maxArrayLayersPerPass, prepareDescriptor.maxMipsPerPass, prepareDescriptor.disableSubgroups)?.preparePipelines(prepareDescriptor?.formats?.map(format => { 1165 | return { 1166 | ...format, 1167 | filters: new Set(Array.from(format.filters ?? []).map(filter => this.filters.get(filter) ?? SPD_FILTER_AVERAGE)), 1168 | }; 1169 | })); 1170 | } 1171 | getOrCreateDevicePipelines(device, maxArrayLayers, maxMipsPerPass, disableSubgroups) { 1172 | if (!this.devicePipelines.has(device)) { 1173 | this.devicePipelines.set(device, new DevicePipelines(device, maxArrayLayers, maxMipsPerPass, disableSubgroups)); 1174 | } 1175 | return this.devicePipelines.get(device); 1176 | } 1177 | /** 1178 | * Deregisters all resources stored for a given device. 1179 | * @param device The device resources should be deregistered for 1180 | */ 1181 | deregisterDevice(device) { 1182 | this.devicePipelines.delete(device); 1183 | } 1184 | /** 1185 | * Registers a new downsampling filter operation that can be injected into the downsampling shader for new pipelines. 1186 | * 1187 | * The given WGSL code must (at least) specify a function to reduce four values into one with the following name and signature: 1188 | * 1189 | * `spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4` 1190 | * 1191 | * @param name The unique name of the filter operation 1192 | * @param wgsl The WGSL code to inject into the downsampling shader as the filter operation 1193 | */ 1194 | registerFilter(name, wgsl) { 1195 | if (this.filters.has(name)) { 1196 | console.warn(`[WebGPUSinglePassDownsampler::registerFilter]: overriding existing filter '${name}'. Previously generated pipelines are not affected.`); 1197 | } 1198 | this.filters.set(name, wgsl); 1199 | } 1200 | /** 1201 | * Prepares a pass to downsample a 2d texture / 2d texture array. 1202 | * The produced {@link SPDPass} can be used multiple times to repeatedly downsampling a texture, e.g., for downsampling the depth buffer each frame. 1203 | * For one-time use, {@link WebGPUSinglePassDownsampler.generateMipmaps} can be used instead. 1204 | * 1205 | * By default, the texture is downsampled `texture.mipLevelCount - 1` times using an averaging filter, i.e., 4 pixel values from the parent level are averaged to produce a single pixel in the current mip level. 1206 | * This behavior can be configured using the optional {@link config} parameter. 1207 | * For example, instead of writing the mip levels into the input texture itself, a separate target texture can be specified using {@link SPDPassConfig.target}. 1208 | * Other configuration options include using a different (possibly custom) filter, only downsampling a subregion of the input texture, and limiting the number of mip levels to generate, e.g., if a min-max pyramid is only needed up to a certain tile resolution. 1209 | * If the given filter does not exist, an averaging filter will be used as a fallback. 1210 | * The image region to downsample and the number of mip levels to generate are clamped to the input texture's size, and the output texture's `mipLevelCount`. 1211 | * 1212 | * Depending on the number of mip levels to generate and the device's `maxStorageTexturesPerShaderStage` limit, the {@link SPDPass} will internally consist of multiple passes, each generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels. 1213 | * 1214 | * @param device The device the {@link SPDPass} should be prepared for 1215 | * @param texture The texture that is to be processed by the {@link SPDPass}. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. Must support {@link GPUTextureUsage.TEXTURE_BINDING}, and, if no other target is given, {@link GPUTextureUsage.STORAGE_BINDING}. 1216 | * @param config The config for the {@link SPDPass} 1217 | * @returns The prepared {@link SPDPass} or undefined if preparation failed or if no mipmaps would be generated. 1218 | * @throws If the {@link GPUTextureFormat} of {@link SPDPassConfig.target} is not supported (does not support {@link GPUStorageTextureAccess:"write-only"} on the given {@link device}). 1219 | * @throws If the size of {@link SPDPassConfig.target} is too small to store the first mip level generated for {@link texture} 1220 | * @throws If {@link texture} or {@link SPDPassConfig.target} is not a 2d texture. 1221 | * @see WebGPUSinglePassDownsampler.generateMipmaps 1222 | * @see WebGPUSinglePassDownsampler.registerFilter 1223 | * @see WebGPUSinglePassDownsampler.setPreferredLimits 1224 | */ 1225 | preparePass(device, texture, config) { 1226 | const target = config?.target ?? texture; 1227 | const filter = config?.filter ?? SPDFilters.Average; 1228 | const offset = (config?.offset ?? [0, 0]).map((o, d) => Math.max(0, Math.min(o, (d === 0 ? texture.width : texture.height) - 1))); 1229 | const size = (config?.size ?? [texture.width, texture.height]).map((s, d) => Math.max(0, Math.min(s, (d === 0 ? texture.width : texture.height) - offset[d]))); 1230 | const numMips = Math.min(Math.max(config?.numMips ?? target.mipLevelCount, 0), maxMipLevelCount(...size)); 1231 | if (numMips < 2) { 1232 | console.warn(`[WebGPUSinglePassDownsampler::prepare]: no mips to create (numMips = ${numMips})`); 1233 | return undefined; 1234 | } 1235 | if (!(WebGPUSinglePassDownsampler.supportedFormats.has(target.format) || 1236 | (device.features.has('bgra8unorm-storage') && WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage.has(target.format)) || 1237 | ((device.features.has('texture-formats-tier1') || device.features.has('texture-formats-tier2')) && WebGPUSinglePassDownsampler.supportedFormatsTier1.has(target.format)))) { 1238 | throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported. (Supported formats: ${WebGPUSinglePassDownsampler.supportedFormats}, and ${WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage} (if 'bgra8unorm-storage' is enabled), and ${WebGPUSinglePassDownsampler.supportedFormatsTier1} (if 'texture-formats-tier1' is enabled))`); 1239 | } 1240 | if (target.format === 'bgra8unorm' && !device.features.has('bgra8unorm-storage')) { 1241 | throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported without feature 'bgra8unorm-storage' enabled`); 1242 | } 1243 | if (target.width < Math.max(1, Math.floor(size[0] / 2)) || target.height < Math.max(1, Math.floor(size[1] / 2))) { 1244 | throw new Error(`[WebGPUSinglePassDownsampler::prepare]: target too small (${[target.width, target.height]}) for input size ${size}`); 1245 | } 1246 | if (target.dimension !== '2d' || texture.dimension !== '2d') { 1247 | throw new Error('[WebGPUSinglePassDownsampler::prepare]: texture or target is not a 2d texture'); 1248 | } 1249 | if (!this.filters.has(filter)) { 1250 | console.warn(`[WebGPUSinglePassDownsampler::prepare]: unknown filter ${filter}, falling back to average`); 1251 | } 1252 | if (filter === SPD_FILTER_MINMAX && target.format.includes('r32')) { 1253 | console.warn(`[WebGPUSinglePassDownsampler::prepare]: filter ${filter} makes no sense for one-component target format ${target.format}`); 1254 | } 1255 | const filterCode = this.filters.get(filter) ?? SPD_FILTER_AVERAGE; 1256 | const scalarType = sanitizeScalarType(device, target.format, config?.halfPrecision ?? false); 1257 | return this.getOrCreateDevicePipelines(device)?.preparePass(texture, target, filterCode, offset, size, numMips, scalarType); 1258 | } 1259 | /** 1260 | * Generates mipmaps for the given texture. 1261 | * For textures that will be downsampled more than once, consider generating a {@link SPDPass} using {@link WebGPUSinglePassDownsampler.preparePass} and calling its {@link SPDPass.encode} method. 1262 | * This way, allocated GPU resources for downsampling the texture can be reused. 1263 | * @param device The device to use for downsampling the texture 1264 | * @param texture The texture to generate mipmaps for. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. 1265 | * @param config The config for mipmap generation 1266 | * @returns True if mipmaps were generated, false otherwise 1267 | * @throws If {@link WebGPUSinglePassDownsampler.preparePass} threw an error. 1268 | * @see WebGPUSinglePassDownsampler.preparePass 1269 | */ 1270 | generateMipmaps(device, texture, config) { 1271 | const pass = this.preparePass(device, texture, config); 1272 | if (!pass) { 1273 | return false; 1274 | } 1275 | else { 1276 | const commandEncoder = device.createCommandEncoder(); 1277 | pass?.encode(commandEncoder.beginComputePass()).end(); 1278 | device.queue.submit([commandEncoder.finish()]); 1279 | return true; 1280 | } 1281 | } 1282 | } 1283 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | function makeShaderCode(outputFormat: string, filterOp: string = SPD_FILTER_AVERAGE, numMips: number, scalarType: SPDScalarType, hasSubgroups: boolean, mip6SupportsReadWrite: boolean): string { 2 | const texelType = scalarType === SPDScalarType.I32 ? 'i32' : (scalarType === SPDScalarType.U32 ? 'u32' : 'f32'); 3 | const useF16 = scalarType === SPDScalarType.F16; 4 | 5 | const filterCode = filterOp === SPD_FILTER_AVERAGE && !['f32', 'f16'].includes(texelType) ? filterOp.replace('* SPDScalar(0.25)', '/ 4') : filterOp; 6 | 7 | const mipsBindings = Array(numMips).fill(0) 8 | .map((_, i) => { 9 | if (i == 5 && numMips > 6 && mip6SupportsReadWrite) { 10 | return `@group(0) @binding(6) var dst_mip_6: texture_storage_2d_array<${outputFormat}, read_write>;`; 11 | } 12 | return `@group(0) @binding(${i + 1}) var dst_mip_${i + 1}: texture_storage_2d_array<${outputFormat}, write>;`; 13 | }) 14 | .join('\n'); 15 | 16 | // todo: get rid of this branching as soon as WGSL supports arrays of texture_storage_2d_array 17 | const mipsAccessorBody = Array(numMips).fill(0) 18 | .map((_, i) => { 19 | if (i == 5 && numMips > 6 && !mip6SupportsReadWrite) { 20 | return ` else if mip == 6 { 21 | textureStore(dst_mip_6, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'}); 22 | mip_dst_6_buffer[slice][uv.y][uv.x] = value; 23 | }` 24 | } 25 | return `${i === 0 ? '' : ' else '}if mip == ${i + 1} { 26 | textureStore(dst_mip_${i + 1}, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'}); 27 | }`; 28 | }) 29 | .join(''); 30 | 31 | const mipsAccessor = `fn store_dst_mip(value: vec4, uv: vec2, slice: u32, mip: u32) {\n${mipsAccessorBody}\n}` 32 | const midMipAccessor = mip6SupportsReadWrite ? `return vec4(textureLoad(dst_mip_6, uv, slice));` : `return mip_dst_6_buffer[slice][uv.y][uv.x];`; 33 | 34 | return /* wgsl */` 35 | // This file is part of the FidelityFX SDK. 36 | // 37 | // Copyright (C) 2023 Advanced Micro Devices, Inc. 38 | // 39 | // Permission is hereby granted, free of charge, to any person obtaining a copy 40 | // of this software and associated documentation files(the “Software”), to deal 41 | // in the Software without restriction, including without limitation the rights 42 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 43 | // copies of the Software, and to permit persons to whom the Software is 44 | // furnished to do so, subject to the following conditions : 45 | // 46 | // The above copyright notice and this permission notice shall be included in 47 | // all copies or substantial portions of the Software. 48 | // 49 | // THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 50 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 51 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 52 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 53 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 54 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 55 | // THE SOFTWARE. 56 | 57 | 58 | // Definitions -------------------------------------------------------------------------------------------------------- 59 | 60 | ${useF16 ? 'enable f16;' : ''} 61 | ${hasSubgroups ? 'enable subgroups;' : ''} 62 | 63 | alias SPDScalar = ${scalarType}; 64 | 65 | // Helpers ------------------------------------------------------------------------------------------------------------ 66 | 67 | /** 68 | * A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. 69 | * 70 | * The 64-wide lane indices to 8x8 remapping is performed as follows: 71 | * 00 01 08 09 10 11 18 19 72 | * 02 03 0a 0b 12 13 1a 1b 73 | * 04 05 0c 0d 14 15 1c 1d 74 | * 06 07 0e 0f 16 17 1e 1f 75 | * 20 21 28 29 30 31 38 39 76 | * 22 23 2a 2b 32 33 3a 3b 77 | * 24 25 2c 2d 34 35 3c 3d 78 | * 26 27 2e 2f 36 37 3e 3f 79 | * 80 | * @param a: The input 1D coordinate to remap. 81 | * 82 | * @returns The remapped 2D coordinates. 83 | */ 84 | fn remap_for_wave_reduction(a: u32) -> vec2 { 85 | return vec2( 86 | insertBits(extractBits(a, 2u, 3u), a, 0u, 1u), 87 | insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u) 88 | ); 89 | } 90 | 91 | fn map_to_xy(local_invocation_index: u32) -> vec2 { 92 | let sub_xy: vec2 = remap_for_wave_reduction(local_invocation_index % 64); 93 | return vec2( 94 | sub_xy.x + 8 * ((local_invocation_index >> 6) % 2), 95 | sub_xy.y + 8 * ((local_invocation_index >> 7)) 96 | ); 97 | } 98 | 99 | /* 100 | * Compute a linear value from a SRGB value. 101 | * 102 | * @param value: The value to convert to linear from SRGB. 103 | * 104 | * @returns A value in SRGB space. 105 | */ 106 | /* 107 | fn srgb_to_linear(value: SPDScalar) -> SPDScalar { 108 | let j = vec3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); 109 | let k = vec2(1.055, -0.055); 110 | return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); 111 | } 112 | */ 113 | 114 | // Resources & Accessors ----------------------------------------------------------------------------------------------- 115 | struct DownsamplePassMeta { 116 | work_group_offset: vec2, 117 | num_work_groups: u32, 118 | mips: u32, 119 | } 120 | 121 | // In the original version dst_mip_i is an image2Darray [SPD_MAX_MIP_LEVELS+1], i.e., 12+1, but WGSL doesn't support arrays of textures yet 122 | // Also these are read_write because for mips 7-13, the workgroup reads from mip level 6 - since most formats don't support read_write access in WGSL yet, we use a single read_write buffer in such cases instead 123 | @group(0) @binding(0) var src_mip_0: texture_2d_array<${texelType}>; 124 | ${mipsBindings} 125 | 126 | @group(1) @binding(0) var downsample_pass_meta : DownsamplePassMeta; 127 | @group(1) @binding(1) var spd_global_counter: array>; 128 | // this is only used if read_write access is not supported for the texture format 129 | @group(1) @binding(2) var mip_dst_6_buffer: array, 64>, 64>>; 130 | 131 | fn get_mips() -> u32 { 132 | return downsample_pass_meta.mips; 133 | } 134 | 135 | fn get_num_work_groups() -> u32 { 136 | return downsample_pass_meta.num_work_groups; 137 | } 138 | 139 | fn get_work_group_offset() -> vec2 { 140 | return downsample_pass_meta.work_group_offset; 141 | } 142 | 143 | fn load_src_image(uv: vec2, slice: u32) -> vec4 { 144 | return vec4(textureLoad(src_mip_0, uv, slice, 0)); 145 | } 146 | 147 | fn load_mid_mip_image(uv: vec2, slice: u32) -> vec4 { 148 | ${numMips > 6 ? midMipAccessor : 'return vec4();'} 149 | } 150 | 151 | ${mipsAccessor} 152 | 153 | // Workgroup ----------------------------------------------------------------------------------------------------------- 154 | 155 | ${useF16 ? ` 156 | var spd_intermediate_rg: array, 16>, 16>; 157 | var spd_intermediate_bg: array, 16>, 16>; 158 | `: ` 159 | var spd_intermediate_r: array, 16>; 160 | var spd_intermediate_g: array, 16>; 161 | var spd_intermediate_b: array, 16>; 162 | var spd_intermediate_a: array, 16>; 163 | `} 164 | var spd_counter: atomic; 165 | 166 | fn spd_increase_atomic_counter(slice: u32) { 167 | atomicStore(&spd_counter, atomicAdd(&spd_global_counter[slice], 1)); 168 | } 169 | 170 | fn spd_get_atomic_counter() -> u32 { 171 | return workgroupUniformLoad(&spd_counter); 172 | } 173 | 174 | fn spd_reset_atomic_counter(slice: u32) { 175 | atomicStore(&spd_global_counter[slice], 0); 176 | } 177 | 178 | // Cotnrol flow -------------------------------------------------------------------------------------------------------- 179 | 180 | fn spd_barrier() { 181 | // in glsl this does: groupMemoryBarrier(); barrier(); 182 | workgroupBarrier(); 183 | } 184 | 185 | // Only last active workgroup should proceed 186 | fn spd_exit_workgroup(num_work_groups: u32, local_invocation_index: u32, slice: u32) -> bool { 187 | // global atomic counter 188 | if (local_invocation_index == 0) { 189 | spd_increase_atomic_counter(slice); 190 | } 191 | storageBarrier(); 192 | return spd_get_atomic_counter() != (num_work_groups - 1); 193 | } 194 | 195 | // Pixel access -------------------------------------------------------------------------------------------------------- 196 | 197 | ${filterCode} 198 | 199 | ${hasSubgroups ? ` 200 | fn spd_reduce_quad(value: vec4) -> vec4 { 201 | let v0 = value; 202 | let v1 = quadSwapX(value); 203 | let v2 = quadSwapY(value); 204 | let v3 = quadSwapDiagonal(value); 205 | return spd_reduce_4(v0, v1, v2, v3); 206 | } 207 | ` : ''} 208 | 209 | fn spd_store(pix: vec2, out_value: vec4, mip: u32, slice: u32) { 210 | store_dst_mip(out_value, pix, slice, mip + 1); 211 | } 212 | 213 | fn spd_load_intermediate(x: u32, y: u32) -> vec4 { 214 | return vec4(${useF16 ? ` 215 | spd_intermediate_rg[x][y], 216 | spd_intermediate_ba[x][y],` : ` 217 | spd_intermediate_r[x][y], 218 | spd_intermediate_g[x][y], 219 | spd_intermediate_b[x][y], 220 | spd_intermediate_a[x][y],` 221 | }); 222 | } 223 | 224 | fn spd_store_intermediate(x: u32, y: u32, value: vec4) { 225 | ${useF16 ? ` 226 | spd_intermediate_rg[x][y] = value.rg; 227 | spd_intermediate_ba[x][y] = value.ba;` : ` 228 | spd_intermediate_r[x][y] = value.r; 229 | spd_intermediate_g[x][y] = value.g; 230 | spd_intermediate_b[x][y] = value.b; 231 | spd_intermediate_a[x][y] = value.a;`} 232 | } 233 | 234 | fn spd_reduce_intermediate(i0: vec2, i1: vec2, i2: vec2, i3: vec2) -> vec4 { 235 | let v0 = spd_load_intermediate(i0.x, i0.y); 236 | let v1 = spd_load_intermediate(i1.x, i1.y); 237 | let v2 = spd_load_intermediate(i2.x, i2.y); 238 | let v3 = spd_load_intermediate(i3.x, i3.y); 239 | return spd_reduce_4(v0, v1, v2, v3); 240 | } 241 | 242 | fn spd_reduce_load_4(base: vec2, slice: u32) -> vec4 { 243 | let v0 = load_src_image(base + vec2(0, 0), slice); 244 | let v1 = load_src_image(base + vec2(0, 1), slice); 245 | let v2 = load_src_image(base + vec2(1, 0), slice); 246 | let v3 = load_src_image(base + vec2(1, 1), slice); 247 | return spd_reduce_4(v0, v1, v2, v3); 248 | } 249 | 250 | fn spd_reduce_load_mid_mip_4(base: vec2, slice: u32) -> vec4 { 251 | let v0 = load_mid_mip_image(base + vec2(0, 0), slice); 252 | let v1 = load_mid_mip_image(base + vec2(0, 1), slice); 253 | let v2 = load_mid_mip_image(base + vec2(1, 0), slice); 254 | let v3 = load_mid_mip_image(base + vec2(1, 1), slice); 255 | return spd_reduce_4(v0, v1, v2, v3); 256 | } 257 | 258 | // Main logic --------------------------------------------------------------------------------------------------------- 259 | 260 | fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 261 | var v: array, 4>; 262 | 263 | let workgroup64 = workgroup_id.xy * 64; 264 | let workgroup32 = workgroup_id.xy * 32; 265 | let workgroup16 = workgroup_id.xy * 16; 266 | 267 | var tex = workgroup64 + vec2(x * 2, y * 2); 268 | var pix = workgroup32 + vec2(x, y); 269 | v[0] = spd_reduce_load_4(tex, slice); 270 | spd_store(pix, v[0], 0, slice); 271 | 272 | tex = workgroup64 + vec2(x * 2 + 32, y * 2); 273 | pix = workgroup32 + vec2(x + 16, y); 274 | v[1] = spd_reduce_load_4(tex, slice); 275 | spd_store(pix, v[1], 0, slice); 276 | 277 | tex = workgroup64 + vec2(x * 2, y * 2 + 32); 278 | pix = workgroup32 + vec2(x, y + 16); 279 | v[2] = spd_reduce_load_4(tex, slice); 280 | spd_store(pix, v[2], 0, slice); 281 | 282 | tex = workgroup64 + vec2(x * 2 + 32, y * 2 + 32); 283 | pix = workgroup32 + vec2(x + 16, y + 16); 284 | v[3] = spd_reduce_load_4(tex, slice); 285 | spd_store(pix, v[3], 0, slice); 286 | 287 | if mip <= 1 { 288 | return; 289 | } 290 | 291 | ${hasSubgroups ? ` 292 | v[0] = spd_reduce_quad(v[0]); 293 | v[1] = spd_reduce_quad(v[1]); 294 | v[2] = spd_reduce_quad(v[2]); 295 | v[3] = spd_reduce_quad(v[3]); 296 | 297 | if (local_invocation_index % 4) == 0 { 298 | spd_store(workgroup16 + vec2(x / 2, y / 2), v[0], 1, slice); 299 | spd_store_intermediate(x / 2, y / 2, v[0]); 300 | 301 | spd_store(workgroup16 + vec2(x / 2 + 8, y / 2), v[1], 1, slice); 302 | spd_store_intermediate(x / 2 + 8, y / 2, v[1]); 303 | 304 | spd_store(workgroup16 + vec2(x / 2, y / 2 + 8), v[2], 1, slice); 305 | spd_store_intermediate(x / 2, y / 2 + 8, v[2]); 306 | 307 | spd_store(workgroup16 + vec2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); 308 | spd_store_intermediate(x / 2 + 8, y / 2 + 8, v[3]); 309 | } 310 | ` : ` 311 | for (var i = 0u; i < 4u; i++) { 312 | spd_store_intermediate(x, y, v[i]); 313 | spd_barrier(); 314 | if local_invocation_index < 64 { 315 | v[i] = spd_reduce_intermediate( 316 | vec2(x * 2 + 0, y * 2 + 0), 317 | vec2(x * 2 + 1, y * 2 + 0), 318 | vec2(x * 2 + 0, y * 2 + 1), 319 | vec2(x * 2 + 1, y * 2 + 1) 320 | ); 321 | spd_store(workgroup16 + vec2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); 322 | } 323 | spd_barrier(); 324 | } 325 | 326 | if local_invocation_index < 64 { 327 | spd_store_intermediate(x + 0, y + 0, v[0]); 328 | spd_store_intermediate(x + 8, y + 0, v[1]); 329 | spd_store_intermediate(x + 0, y + 8, v[2]); 330 | spd_store_intermediate(x + 8, y + 8, v[3]); 331 | } 332 | `} 333 | } 334 | 335 | fn spd_downsample_mip_2(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 336 | ${hasSubgroups ? ` 337 | var v = spd_load_intermediate(x, y); 338 | v = spd_reduce_quad(v); 339 | // quad index 0 stores result 340 | if (local_invocation_index % 4) == 0 { 341 | spd_store(workgroup_id.xy * 8 + vec2(x / 2, y / 2), v, mip, slice); 342 | spd_store_intermediate(x + (y / 2) % 2, y, v); 343 | } 344 | ` : ` 345 | if local_invocation_index < 64u { 346 | let v = spd_reduce_intermediate( 347 | vec2(x * 2 + 0, y * 2 + 0), 348 | vec2(x * 2 + 1, y * 2 + 0), 349 | vec2(x * 2 + 0, y * 2 + 1), 350 | vec2(x * 2 + 1, y * 2 + 1) 351 | ); 352 | spd_store(workgroup_id.xy * 8 + vec2(x, y), v, mip, slice); 353 | // store to LDS, try to reduce bank conflicts 354 | // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 355 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 356 | // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 357 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 358 | // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 359 | // ... 360 | // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 361 | spd_store_intermediate(x * 2 + y % 2, y * 2, v); 362 | } 363 | `} 364 | } 365 | 366 | fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 367 | ${hasSubgroups ? ` 368 | // subgroup ops must be in uniform control flow 369 | var v = spd_load_intermediate(x * 2 + y % 2, y * 2); 370 | v = spd_reduce_quad(v); 371 | 372 | // quad index 0 stores result 373 | if local_invocation_index < 64u && (local_invocation_index % 4) == 0 { 374 | spd_store(workgroup_id.xy * 4 + vec2(x / 2, y / 2), v, mip, slice); 375 | spd_store_intermediate(x * 2 + y / 2, y * 2, v); 376 | } 377 | ` : ` 378 | if local_invocation_index < 16u { 379 | // x 0 x 0 380 | // 0 0 0 0 381 | // 0 x 0 x 382 | // 0 0 0 0 383 | let v = spd_reduce_intermediate( 384 | vec2(x * 4 + 0 + 0, y * 4 + 0), 385 | vec2(x * 4 + 2 + 0, y * 4 + 0), 386 | vec2(x * 4 + 0 + 1, y * 4 + 2), 387 | vec2(x * 4 + 2 + 1, y * 4 + 2) 388 | ); 389 | spd_store(workgroup_id.xy * 4 + vec2(x, y), v, mip, slice); 390 | // store to LDS 391 | // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 392 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 393 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 394 | // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 395 | // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 396 | // ... 397 | // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 398 | // ... 399 | // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 400 | // ... 401 | spd_store_intermediate(x * 4 + y, y * 4, v); 402 | } 403 | `} 404 | } 405 | 406 | fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 407 | ${hasSubgroups ? ` 408 | // subgroup ops must be in uniform control flow 409 | var v = spd_load_intermediate(x * 4 + y, y * 4); 410 | v = spd_reduce_quad(v); 411 | 412 | // quad index 0 stores result 413 | if local_invocation_index < 16u && (local_invocation_index % 4) == 0 { 414 | spd_store(workgroup_id.xy * 2 + vec2(x / 2, y / 2), v, mip, slice); 415 | spd_store_intermediate(x / 2 + y, 0, v); 416 | } 417 | ` : ` 418 | if local_invocation_index < 4u { 419 | // x 0 0 0 x 0 0 0 420 | // ... 421 | // 0 x 0 0 0 x 0 0 422 | let v = spd_reduce_intermediate( 423 | vec2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), 424 | vec2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), 425 | vec2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), 426 | vec2(x * 8 + 4 + 1 + y * 2, y * 8 + 4) 427 | ); 428 | spd_store(workgroup_id.xy * 2 + vec2(x, y), v, mip, slice); 429 | // store to LDS 430 | // x x x x 0 ... 431 | // 0 ... 432 | spd_store_intermediate(x + y * 2, 0, v); 433 | } 434 | `} 435 | } 436 | 437 | fn spd_downsample_mip_5(workgroup_id: vec2, local_invocation_index: u32, mip: u32, slice: u32) { 438 | ${hasSubgroups ? ` 439 | // subgroup ops must be in uniform control flow 440 | var v = spd_load_intermediate(local_invocation_index, 0); 441 | v = spd_reduce_quad(v); 442 | 443 | // quad index 0 stores result 444 | if local_invocation_index < 4u && (local_invocation_index % 4) == 0 { 445 | spd_store(workgroup_id.xy, v, mip, slice); 446 | } 447 | ` : ` 448 | if local_invocation_index < 1u { 449 | // x x x x 0 ... 450 | // 0 ... 451 | let v = spd_reduce_intermediate(vec2(0, 0), vec2(1, 0), vec2(2, 0), vec2(3, 0)); 452 | spd_store(workgroup_id.xy, v, mip, slice); 453 | } 454 | `} 455 | } 456 | 457 | fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) { 458 | if mips <= base_mip { 459 | return; 460 | } 461 | spd_barrier(); 462 | spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice); 463 | 464 | if mips <= base_mip + 1 { 465 | return; 466 | } 467 | spd_barrier(); 468 | spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice); 469 | 470 | if mips <= base_mip + 2 { 471 | return; 472 | } 473 | spd_barrier(); 474 | spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice); 475 | 476 | if mips <= base_mip + 3 { 477 | return; 478 | } 479 | spd_barrier(); 480 | spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice); 481 | } 482 | 483 | fn spd_downsample_last_four(x: u32, y: u32, workgroup_id: vec2, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) { 484 | if mips <= base_mip { 485 | return; 486 | } 487 | spd_barrier(); 488 | spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice); 489 | 490 | if mips <= base_mip + 1 { 491 | return; 492 | } 493 | spd_barrier(); 494 | spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice); 495 | 496 | if mips <= base_mip + 2 { 497 | return; 498 | } 499 | spd_barrier(); 500 | spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice); 501 | 502 | if mips <= base_mip + 3 { 503 | return; 504 | } 505 | spd_barrier(); 506 | spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice); 507 | } 508 | 509 | fn spd_downsample_mips_6_7(x: u32, y: u32, mips: u32, slice: u32) { 510 | ${mip6SupportsReadWrite ? 'textureBarrier();' : ''} 511 | 512 | var tex = vec2(x * 4 + 0, y * 4 + 0); 513 | var pix = vec2(x * 2 + 0, y * 2 + 0); 514 | let v0 = spd_reduce_load_mid_mip_4(tex, slice); 515 | spd_store(pix, v0, 6, slice); 516 | 517 | tex = vec2(x * 4 + 2, y * 4 + 0); 518 | pix = vec2(x * 2 + 1, y * 2 + 0); 519 | let v1 = spd_reduce_load_mid_mip_4(tex, slice); 520 | spd_store(pix, v1, 6, slice); 521 | 522 | tex = vec2(x * 4 + 0, y * 4 + 2); 523 | pix = vec2(x * 2 + 0, y * 2 + 1); 524 | let v2 = spd_reduce_load_mid_mip_4(tex, slice); 525 | spd_store(pix, v2, 6, slice); 526 | 527 | tex = vec2(x * 4 + 2, y * 4 + 2); 528 | pix = vec2(x * 2 + 1, y * 2 + 1); 529 | let v3 = spd_reduce_load_mid_mip_4(tex, slice); 530 | spd_store(pix, v3, 6, slice); 531 | 532 | if mips <= 7 { 533 | return; 534 | } 535 | // no barrier needed, working on values only from the same thread 536 | 537 | let v = spd_reduce_4(v0, v1, v2, v3); 538 | spd_store(vec2(x, y), v, 7, slice); 539 | spd_store_intermediate(x, y, v); 540 | } 541 | 542 | fn spd_downsample_last_6(x: u32, y: u32, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) { 543 | if mips <= 6 { 544 | return; 545 | } 546 | 547 | // increase the global atomic counter for the given slice and check if it's the last remaining thread group: 548 | // terminate if not, continue if yes. 549 | if spd_exit_workgroup(num_work_groups, local_invocation_index, slice) { 550 | return; 551 | } 552 | 553 | // reset the global atomic counter back to 0 for the next spd dispatch 554 | spd_reset_atomic_counter(slice); 555 | 556 | // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. 557 | // compute MIP level 6 and 7 558 | spd_downsample_mips_6_7(x, y, mips, slice); 559 | 560 | // compute MIP level 8, 9, 10, 11 561 | spd_downsample_last_four(x, y, vec2(0, 0), local_invocation_index, 8, mips, slice); 562 | } 563 | 564 | /// Downsamples a 64x64 tile based on the work group id. 565 | /// If after downsampling it's the last active thread group, computes the remaining MIP levels. 566 | /// 567 | /// @param [in] workGroupID index of the work group / thread group 568 | /// @param [in] localInvocationIndex index of the thread within the thread group in 1D 569 | /// @param [in] mips the number of total MIP levels to compute for the input texture 570 | /// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice 571 | /// @param [in] slice the slice of the input texture 572 | fn spd_downsample(workgroup_id: vec2, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) { 573 | let xy = map_to_xy(local_invocation_index); 574 | spd_downsample_mips_0_1(xy.x, xy.y, workgroup_id, local_invocation_index, mips, slice); 575 | spd_downsample_next_four(xy.x, xy.y, workgroup_id, local_invocation_index, 2, mips, slice); 576 | ${numMips > 6 ? 'spd_downsample_last_6(xy.x, xy.y, local_invocation_index, mips, num_work_groups, slice);' : ''} 577 | } 578 | 579 | // Entry points ------------------------------------------------------------------------------------------------------- 580 | 581 | @compute 582 | @workgroup_size(256, 1, 1) 583 | fn downsample(@builtin(local_invocation_index) local_invocation_index: u32, @builtin(workgroup_id) workgroup_id: vec3) { 584 | spd_downsample( 585 | workgroup_id.xy + get_work_group_offset(), 586 | local_invocation_index, 587 | get_mips(), 588 | get_num_work_groups(), 589 | workgroup_id.z 590 | ); 591 | } 592 | `; 593 | } 594 | 595 | const SPD_FILTER_AVERAGE: string = /* wgsl */` 596 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 597 | return (v0 + v1 + v2 + v3) * SPDScalar(0.25); 598 | } 599 | `; 600 | 601 | const SPD_FILTER_MIN = /* wgsl */` 602 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 603 | return min(min(v0, v1), min(v2, v3)); 604 | } 605 | `; 606 | 607 | const SPD_FILTER_MAX = /* wgsl */` 608 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 609 | return max(max(v0, v1), max(v2, v3)); 610 | } 611 | `; 612 | 613 | const SPD_FILTER_MINMAX = /* wgsl */` 614 | fn spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4 { 615 | let max4 = max(max(v0.xy, v1.xy), max(v2.xy, v3.xy)); 616 | return vec4(min(min(v0.x, v1.x), min(v2.x, v3.x)), max(max4.x, max4.y), 0, 0); 617 | } 618 | `; 619 | 620 | /** 621 | * The names of all predefined filters of {@link WebGPUSinglePassDownsampler}. 622 | * Custom ones can be registered with an instance of {@link WebGPUSinglePassDownsampler} using {@link WebGPUSinglePassDownsampler.registerFilter}. 623 | */ 624 | export enum SPDFilters { 625 | /** 626 | * Takes the channel-wise average of 4 pixels. 627 | */ 628 | Average = 'average', 629 | 630 | /** 631 | * Takes the channel-wise minimum of 4 pixels. 632 | */ 633 | Min = 'min', 634 | 635 | /** 636 | * Takes the channel-wise maximum of 4 pixels. 637 | */ 638 | Max = 'max', 639 | 640 | /** 641 | * Takes the minimum of the red channel and the maximum of the red and green channel and stores the result in the red and green channel respectively. 642 | * This really only makes sense for single-channel input textures (where only the red channel holds any data), e.g., for generating a min-max pyramid of a depth buffer. 643 | */ 644 | MinMax = 'minmax', 645 | } 646 | 647 | class SPDPassInner { 648 | constructor(private pipeline: GPUComputePipeline, private bindGroups: Array, private dispatchDimensions: [GPUSize32, GPUSize32, GPUSize32]) {} 649 | encode(computePass: GPUComputePassEncoder) { 650 | computePass.setPipeline(this.pipeline); 651 | this.bindGroups.forEach((bindGroup, index) => { 652 | computePass.setBindGroup(index, bindGroup); 653 | }); 654 | computePass.dispatchWorkgroups(this.dispatchDimensions[0], this.dispatchDimensions[1], this.dispatchDimensions[2]); 655 | } 656 | } 657 | 658 | /** 659 | * A compute pass for downsampling a texture. 660 | */ 661 | export class SPDPass { 662 | /** 663 | * The texture the mipmaps will be written to by this {@link SPDPass}, once {@link SPDPass.encode} is called. 664 | */ 665 | readonly target: GPUTexture 666 | 667 | /** @ignore */ 668 | constructor(private passes: Array, target: GPUTexture) { 669 | this.target = target; 670 | } 671 | /** 672 | * Encodes the configured mipmap generation pass(es) with the given {@link GPUComputePassEncoder}. 673 | * All bind groups indices used by {@link SPDPass} are reset to `null` to prevent unintentional bindings of internal bind groups for subsequent pipelines encoded in the same {@link GPUComputePassEncoder}. 674 | * @param computePassEncoder The {@link GPUComputePassEncoder} to encode this mipmap generation pass with. 675 | * @returns The {@link computePassEncoder} 676 | */ 677 | encode(computePassEncoder: GPUComputePassEncoder): GPUComputePassEncoder { 678 | this.passes.forEach(p => p.encode(computePassEncoder)); 679 | computePassEncoder.setBindGroup(0, null); 680 | computePassEncoder.setBindGroup(1, null); 681 | return computePassEncoder; 682 | } 683 | 684 | /** 685 | * Returns the number of passes that will be encoded by calling this instance's {@link SPDPass.encode} method. 686 | */ 687 | get numPasses(): number { 688 | return this.passes.length 689 | } 690 | } 691 | 692 | enum SPDScalarType { 693 | F32 = 'f32', 694 | F16 = 'f16', 695 | I32 = 'i32', 696 | U32 = 'u32', 697 | } 698 | 699 | /** 700 | * Configuration for {@link WebGPUSinglePassDownsampler.preparePass}. 701 | */ 702 | export interface SPDPassConfig { 703 | /** 704 | * The name of the filter to use for downsampling the given texture. 705 | * Should be one of the filters registered with {@link WebGPUSinglePassDownsampler}. 706 | * Defaults to {@link SPDFilters.Average}. 707 | */ 708 | filter?: string, 709 | 710 | /** 711 | * The target texture the generated mipmaps are written to. 712 | * Its usage must include {@link GPUTextureUsage.STORAGE_BINDING}. 713 | * Its format must support {@link GPUStorageTextureAccess:"write-only"}. 714 | * Its size must be big enough to store the first mip level generated for the input texture. 715 | * It must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. 716 | * Defaults to the given input texture. 717 | */ 718 | target?: GPUTexture, 719 | 720 | /** 721 | * The upper left corner of the image region mipmaps should be generated for. 722 | * Defaults to [0,0]. 723 | */ 724 | offset?: [number, number], 725 | 726 | /** 727 | * The size of the image reagion mipmaps should be generated for. 728 | * Default to [texture.width - 1 - offset[0], texture.height - 1 - offset[1]]. 729 | */ 730 | size?: [number, number], 731 | 732 | /** 733 | * The number of mipmaps to generate. 734 | * Defaults to target.mipLevelCount. 735 | */ 736 | numMips?: number, 737 | 738 | /** 739 | * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters. 740 | * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled). 741 | * Falls back to full precision, if the texture format is not a float format. 742 | * Defaults to false. 743 | */ 744 | halfPrecision?: boolean; 745 | } 746 | 747 | interface GPUDownsamplingMeta { 748 | workgroupOffset: [number, number], 749 | numWorkGroups: number, 750 | numMips: number, 751 | numArrayLayers: number, 752 | } 753 | 754 | class SPDPipeline { 755 | constructor(readonly mipsLayout: GPUBindGroupLayout, readonly pipelines: GPUComputePipeline) {} 756 | } 757 | 758 | export interface SPDPrepareFormatDescriptor { 759 | /** 760 | * The texture format to prepare downsampling pipelines for. 761 | */ 762 | format: GPUTextureFormat, 763 | 764 | /** 765 | * The names of downsampling filters that to prepare downsampling pipelines for the given {@link format} for. 766 | * Defaults to {@link SPDFilters.Average}. 767 | */ 768 | filters?: Set, 769 | 770 | /** 771 | * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters. 772 | * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled). 773 | * Falls back to full precision, if the texture format is not a float format. 774 | * Defaults to false. 775 | */ 776 | halfPrecision?: boolean, 777 | } 778 | 779 | export interface SPDPrepareDeviceDescriptor { 780 | /** 781 | * The device to prepare downsampling pipelines for. 782 | */ 783 | device: GPUDevice, 784 | 785 | /** 786 | * The formats to prepare downsampling pipelines for. 787 | */ 788 | formats?: Array, 789 | 790 | /** 791 | * The maximum number of array layers will be downsampled on the {@link device} within a single pass. 792 | * If a texture has more, downsampling will be split up into multiple passes handling up to this limit of array layers each. 793 | * Defaults to device.limits.maxTextureArrayLayers. 794 | */ 795 | maxArrayLayersPerPass?: number, 796 | 797 | /** 798 | * The maximum number of mip levels that can be generated on the {@link device} within a single pass. 799 | * Note that generating more than 6 mip levels per pass is currently not supported on all platforms. 800 | * Defaults to `Math.min(device.limits.maxStorageTexturesPerShaderStage, 12)`. 801 | */ 802 | maxMipsPerPass?: number, 803 | 804 | /** 805 | * If true, disables all uses of subgroup built-ins by the downsampler even if the `'subgroups'` feature is enabled on the {@link device}. 806 | */ 807 | disableSubgroups?: boolean, 808 | } 809 | 810 | function sanitizeScalarType(device: GPUDevice, format: GPUTextureFormat, halfPrecision: boolean): SPDScalarType { 811 | const texelType = format.toLocaleLowerCase().includes('sint') ? SPDScalarType.I32 : (format.toLocaleLowerCase().includes('uint') ? SPDScalarType.U32 : SPDScalarType.F32); 812 | if (halfPrecision && !device.features.has('shader-f16')) { 813 | console.warn(`[sanitizeScalarType]: half precision requested but the device feature 'shader-f16' is not enabled, falling back to full precision`); 814 | } 815 | if (halfPrecision && texelType !== SPDScalarType.F32) { 816 | console.warn(`[sanitizeScalarType]: half precision requested for non-float format (${format}, uses ${texelType}), falling back to full precision`); 817 | } 818 | return halfPrecision && !device.features.has('shader-f16') && texelType === SPDScalarType.F32 ? SPDScalarType.F16 : texelType; 819 | } 820 | 821 | class DevicePipelines { 822 | private device: WeakRef; 823 | private readonly maxMipsPerPass: number; 824 | private readonly maxArrayLayers: number; 825 | private readonly disableSubgroups: boolean; 826 | private readonly internalResourcesBindGroupLayout: GPUBindGroupLayout; 827 | private readonly internalResourcesBindGroupLayout12?: GPUBindGroupLayout; 828 | private readonly internalResourcesBindGroupLayout12RW?: GPUBindGroupLayout; 829 | private atomicCounters: Map; 830 | private midMipBuffers: Map; 831 | private pipelines: Map>>>; 832 | 833 | constructor(device: GPUDevice, maxArrayLayers?: number, maxMipsPerPass?: number, disableSubgroups?: boolean) { 834 | this.device = new WeakRef(device); 835 | this.maxMipsPerPass = Math.min(device.limits.maxStorageTexturesPerShaderStage, maxMipsPerPass ?? 12); 836 | this.maxArrayLayers = Math.min(device.limits.maxTextureArrayLayers, maxArrayLayers ?? device.limits.maxTextureArrayLayers); 837 | this.disableSubgroups = disableSubgroups ?? false; 838 | this.pipelines = new Map(); 839 | this.atomicCounters = new Map(); 840 | this.midMipBuffers = new Map(); 841 | 842 | this.internalResourcesBindGroupLayout = device.createBindGroupLayout({ 843 | entries: [{ 844 | binding: 0, 845 | visibility: GPUShaderStage.COMPUTE, 846 | buffer: { 847 | type: 'uniform', 848 | hasDynamicOffset: false, 849 | minBindingSize: 16, 850 | }, 851 | }], 852 | }); 853 | 854 | if (this.maxMipsPerPass > 6) { 855 | this.internalResourcesBindGroupLayout12 = device.createBindGroupLayout({ 856 | entries: [ 857 | { 858 | binding: 0, 859 | visibility: GPUShaderStage.COMPUTE, 860 | buffer: { 861 | type: 'uniform', 862 | hasDynamicOffset: false, 863 | minBindingSize: 16, 864 | }, 865 | }, 866 | { 867 | binding: 1, 868 | visibility: GPUShaderStage.COMPUTE, 869 | buffer: { 870 | type: 'storage', 871 | hasDynamicOffset: false, 872 | minBindingSize: 4, 873 | }, 874 | }, 875 | { 876 | binding: 2, 877 | visibility: GPUShaderStage.COMPUTE, 878 | buffer: { 879 | type: 'storage', 880 | hasDynamicOffset: false, 881 | minBindingSize: 16 * 64 * 64, 882 | }, 883 | }, 884 | ], 885 | }); 886 | this.internalResourcesBindGroupLayout12RW = device.createBindGroupLayout({ 887 | entries: [ 888 | { 889 | binding: 0, 890 | visibility: GPUShaderStage.COMPUTE, 891 | buffer: { 892 | type: 'uniform', 893 | hasDynamicOffset: false, 894 | minBindingSize: 16, 895 | }, 896 | }, 897 | { 898 | binding: 1, 899 | visibility: GPUShaderStage.COMPUTE, 900 | buffer: { 901 | type: 'storage', 902 | hasDynamicOffset: false, 903 | minBindingSize: 4, 904 | }, 905 | }, 906 | ], 907 | }); 908 | } 909 | } 910 | 911 | preparePipelines(pipelineConfigs?: Array) { 912 | const device = this.device.deref(); 913 | if (device) { 914 | pipelineConfigs?.forEach(c => { 915 | const scalarType = sanitizeScalarType(device, c.format, c.halfPrecision ?? false); 916 | Array.from(c.filters ?? [SPD_FILTER_AVERAGE]).map(filter => { 917 | for (let i = 0; i < this.maxMipsPerPass; ++i) { 918 | this.getOrCreatePipeline(c.format, filter, i + 1, scalarType); 919 | } 920 | }); 921 | }); 922 | } 923 | } 924 | 925 | private supportsReadWrite(targetFormat: GPUTextureFormat): boolean { 926 | const device = this.device.deref(); 927 | if (!device) { 928 | return false; 929 | } 930 | return WebGPUSinglePassDownsampler.supportedReadWriteFormats.has(targetFormat) || (device.features.has('texture-formats-tier2') && WebGPUSinglePassDownsampler.supportedReadWriteFormatsTier2.has(targetFormat)); 931 | } 932 | 933 | private createPipeline(targetFormat: GPUTextureFormat, filterCode: string, numMips: number, scalarType: SPDScalarType): SPDPipeline | undefined { 934 | const device = this.device.deref(); 935 | if (!device) { 936 | return undefined; 937 | } 938 | 939 | const rwSupport = this.supportsReadWrite(targetFormat); 940 | 941 | const mipsBindGroupLayout = device.createBindGroupLayout({ 942 | entries: Array(Math.min(numMips, this.maxMipsPerPass) + 1).fill(0).map((_, i) => { 943 | const entry: GPUBindGroupLayoutEntry = { 944 | binding: i, 945 | visibility: GPUShaderStage.COMPUTE, 946 | }; 947 | if (i === 0) { 948 | entry.texture = { 949 | sampleType: scalarType === SPDScalarType.I32 ? 'sint' : (scalarType === SPDScalarType.U32 ? 'uint' : 'unfilterable-float'), 950 | viewDimension: '2d-array', 951 | multisampled: false, 952 | }; 953 | } else { 954 | entry.storageTexture = { 955 | access: (i === 6 && numMips > 6 && rwSupport) ? 'read-write' : 'write-only', 956 | format: targetFormat, 957 | viewDimension: '2d-array', 958 | }; 959 | } 960 | return entry; 961 | }) 962 | }); 963 | 964 | return new SPDPipeline( 965 | mipsBindGroupLayout, 966 | device.createComputePipeline({ 967 | compute: { 968 | module: device.createShaderModule({ 969 | code: makeShaderCode(targetFormat, filterCode, Math.min(numMips, this.maxMipsPerPass), scalarType, device.features.has('subgroups') && !this.disableSubgroups, rwSupport), 970 | }), 971 | entryPoint: 'downsample', 972 | }, 973 | layout: device.createPipelineLayout({ 974 | bindGroupLayouts: [ 975 | mipsBindGroupLayout, 976 | numMips > 6 ? (rwSupport ? this.internalResourcesBindGroupLayout12RW! : this.internalResourcesBindGroupLayout12!) : this.internalResourcesBindGroupLayout, 977 | ], 978 | }), 979 | }), 980 | ); 981 | } 982 | 983 | private getOrCreatePipeline(targetFormat: GPUTextureFormat, filterCode: string, numMipsToCreate: number, scalarType: SPDScalarType): SPDPipeline | undefined { 984 | if (!this.pipelines.has(targetFormat)) { 985 | this.pipelines.set(targetFormat, new Map()); 986 | } 987 | if (!this.pipelines.get(targetFormat)?.has(scalarType)) { 988 | this.pipelines.get(targetFormat)?.set(scalarType, new Map()); 989 | } 990 | if (!this.pipelines.get(targetFormat)?.get(scalarType)?.has(filterCode)) { 991 | this.pipelines.get(targetFormat)?.get(scalarType)?.set(filterCode, new Map()); 992 | } 993 | if (!this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.has(numMipsToCreate)) { 994 | const pipelines = this.createPipeline(targetFormat, filterCode, numMipsToCreate, scalarType); 995 | if (pipelines) { 996 | this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.set(numMipsToCreate, pipelines); 997 | } 998 | } 999 | return this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.get(numMipsToCreate); 1000 | } 1001 | 1002 | private getOrCreateAtomicCountersBuffer(device: GPUDevice, numArrayLayers: number): GPUBuffer { 1003 | if (!this.atomicCounters.has(numArrayLayers)) { 1004 | const atomicCountersBuffer = device.createBuffer({ 1005 | size: 4 * numArrayLayers, 1006 | usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, 1007 | }); 1008 | device.queue.writeBuffer(atomicCountersBuffer, 0, new Uint32Array(Array(numArrayLayers).fill(0))); 1009 | this.atomicCounters.set(numArrayLayers, atomicCountersBuffer); 1010 | } 1011 | return this.atomicCounters.get(numArrayLayers)! 1012 | } 1013 | 1014 | private getOrCreateMidMipBuffer(device: GPUDevice, numArrayLayers: number): GPUBuffer { 1015 | if (!this.midMipBuffers.has(numArrayLayers)) { 1016 | this.midMipBuffers.set(numArrayLayers, device.createBuffer({ 1017 | size: 16 * 64 * 64 * numArrayLayers, 1018 | usage: GPUBufferUsage.STORAGE, 1019 | })); 1020 | } 1021 | return this.midMipBuffers.get(numArrayLayers)! 1022 | } 1023 | 1024 | 1025 | private createMetaBindGroup(device: GPUDevice, meta: GPUDownsamplingMeta, halfPrecision: boolean, readWriteSupport: boolean): GPUBindGroup { 1026 | const metaBuffer = device.createBuffer({ 1027 | size: 16, 1028 | usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST 1029 | }); 1030 | device.queue.writeBuffer(metaBuffer, 0, new Uint32Array([ 1031 | ...meta.workgroupOffset, 1032 | meta.numWorkGroups, 1033 | meta.numMips, 1034 | ])); 1035 | if (meta.numMips > 6) { 1036 | const numArrayLayersForPrecision = halfPrecision ? Math.ceil(meta.numArrayLayers / 2) : meta.numArrayLayers; 1037 | if (readWriteSupport) { 1038 | return device.createBindGroup({ 1039 | layout: this.internalResourcesBindGroupLayout12RW!, 1040 | entries: [ 1041 | { 1042 | binding: 0, 1043 | resource: { 1044 | buffer: metaBuffer, 1045 | }, 1046 | }, 1047 | { 1048 | binding: 1, 1049 | resource: { 1050 | buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision), 1051 | }, 1052 | }, 1053 | ] 1054 | }); 1055 | } else { 1056 | return device.createBindGroup({ 1057 | layout: this.internalResourcesBindGroupLayout12!, 1058 | entries: [ 1059 | { 1060 | binding: 0, 1061 | resource: { 1062 | buffer: metaBuffer, 1063 | }, 1064 | }, 1065 | { 1066 | binding: 1, 1067 | resource: { 1068 | buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision), 1069 | }, 1070 | }, 1071 | { 1072 | binding: 2, 1073 | resource: { 1074 | buffer: this.getOrCreateMidMipBuffer(device, numArrayLayersForPrecision), 1075 | }, 1076 | }, 1077 | ] 1078 | }); 1079 | } 1080 | } else { 1081 | return device.createBindGroup({ 1082 | layout: this.internalResourcesBindGroupLayout, 1083 | entries: [{ 1084 | binding: 0, 1085 | resource: { 1086 | buffer: metaBuffer, 1087 | }, 1088 | }] 1089 | }); 1090 | } 1091 | } 1092 | 1093 | preparePass(texture: GPUTexture, target: GPUTexture, filterCode: string, offset: [number, number], size: [number, number], numMipsTotal: number, scalarType: SPDScalarType): SPDPass | undefined { 1094 | const device = this.device.deref(); 1095 | if (!device) { 1096 | return undefined; 1097 | } 1098 | 1099 | const rwSupport = this.supportsReadWrite(target.format); 1100 | 1101 | const passes = []; 1102 | for (let baseArrayLayer = 0; baseArrayLayer < target.depthOrArrayLayers; baseArrayLayer += this.maxArrayLayers) { 1103 | const numArrayLayersThisPass = Math.min(target.depthOrArrayLayers - baseArrayLayer, this.maxArrayLayers); 1104 | for (let baseMip = 0; baseMip < numMipsTotal - 1; baseMip += this.maxMipsPerPass) { 1105 | const numMipsThisPass = Math.min(numMipsTotal - 1 - baseMip, this.maxMipsPerPass); 1106 | 1107 | const baseMipOffset = offset.map(o => Math.trunc(o / Math.pow(2, baseMip))); 1108 | const baseMipSize = size.map(s => Math.max(Math.trunc(s / Math.pow(2, baseMip)), 1)); 1109 | const workgroupOffset = baseMipOffset.map(o => Math.trunc(o / 64)) as [number, number]; 1110 | const dispatchDimensions = baseMipOffset.map((o, i) => Math.trunc((o + baseMipSize[i] - 1) / 64) + 1 - workgroupOffset[i]) as [number, number]; 1111 | const numWorkGroups = dispatchDimensions.reduce((product, v) => v * product, 1); 1112 | 1113 | const metaBindGroup = this.createMetaBindGroup( 1114 | device, 1115 | { 1116 | workgroupOffset, 1117 | numWorkGroups, 1118 | numMips: numMipsThisPass, 1119 | numArrayLayers: numArrayLayersThisPass, 1120 | }, 1121 | scalarType === SPDScalarType.F16, 1122 | rwSupport, 1123 | ); 1124 | 1125 | // todo: handle missing pipeline 1126 | const pipeline = this.getOrCreatePipeline(target.format, filterCode, numMipsThisPass, scalarType)!; 1127 | 1128 | const mipViews = Array(numMipsThisPass + 1).fill(0).map((_, i) => { 1129 | if (baseMip === 0 && i === 0) { 1130 | return texture.createView({ 1131 | dimension: '2d-array', 1132 | baseMipLevel: 0, 1133 | mipLevelCount: 1, 1134 | baseArrayLayer, 1135 | arrayLayerCount: numArrayLayersThisPass, 1136 | }); 1137 | } else { 1138 | const mip = baseMip + i; 1139 | return target.createView({ 1140 | dimension: '2d-array', 1141 | baseMipLevel: texture === target ? mip : mip - 1, 1142 | mipLevelCount: 1, 1143 | baseArrayLayer, 1144 | arrayLayerCount: numArrayLayersThisPass, 1145 | }); 1146 | } 1147 | }); 1148 | 1149 | const mipsBindGroup = device.createBindGroup({ 1150 | layout: pipeline.mipsLayout, 1151 | entries: mipViews.map((v, i) => { 1152 | return { 1153 | binding: i, 1154 | resource: v, 1155 | }; 1156 | }), 1157 | }); 1158 | passes.push(new SPDPassInner(pipeline.pipelines, [mipsBindGroup, metaBindGroup], [...dispatchDimensions, numArrayLayersThisPass])); 1159 | } 1160 | } 1161 | return new SPDPass(passes, target); 1162 | } 1163 | } 1164 | 1165 | /** 1166 | * Returns the maximum number of mip levels for a given n-dimensional size. 1167 | * @param size The size to compute the maximum number of mip levels for 1168 | * @returns The maximum number of mip levels for the given size 1169 | */ 1170 | export function maxMipLevelCount(...size: number[]): number { 1171 | return 1 + Math.trunc(Math.log2(Math.max(0, ...size))); 1172 | } 1173 | 1174 | /** 1175 | * A helper class for downsampling 2D {@link GPUTexture} (& arrays) using as few passes as possible on a {@link GPUDevice} depending on its {@link GPUSupportedLimits}. 1176 | * Up to 12 mip levels can be generated within a single pass, if {@link GPUSupportedLimits.maxStorageTexturesPerShaderStage} supports it. 1177 | */ 1178 | export class WebGPUSinglePassDownsampler { 1179 | private filters: Map; 1180 | private devicePipelines: WeakMap; 1181 | 1182 | /** 1183 | * The set of formats supported by WebGPU SPD. 1184 | */ 1185 | static readonly supportedFormats: Set = new Set([ 1186 | 'rgba8unorm', 1187 | 'rgba8snorm', 1188 | 'rgba8uint', 1189 | 'rgba8sint', 1190 | 'rgba16uint', 1191 | 'rgba16sint', 1192 | 'rgba16float', 1193 | 'r32uint', 1194 | 'r32sint', 1195 | 'r32float', 1196 | 'rg32uint', 1197 | 'rg32sint', 1198 | 'rg32float', 1199 | 'rgba32uint', 1200 | 'rgba32sint', 1201 | 'rgba32float', 1202 | ]); 1203 | 1204 | /** 1205 | * The set of additionally supported formats supported if the feature 'bgra8unorm-storage' is enabled. 1206 | */ 1207 | static readonly supportedFormatsBgra8UnormStorage: Set = new Set([ 1208 | 'bgra8unorm', 1209 | ]); 1210 | 1211 | /** 1212 | * The set of additionally supported formats if the feature 'texture-formats-tier1' is enabled. 1213 | */ 1214 | static readonly supportedFormatsTier1: Set = new Set([ 1215 | 'r8unorm', 1216 | 'r8snorm', 1217 | 'r8uint', 1218 | 'r8sint', 1219 | 'rg8unorm', 1220 | 'rg8snorm', 1221 | 'rg8uint', 1222 | 'rg8sint', 1223 | 'r16unorm', 1224 | 'r16snorm', 1225 | 'r16uint', 1226 | 'r16sint', 1227 | 'r16float', 1228 | 'rg16unorm', 1229 | 'rg16snorm', 1230 | 'rg16uint', 1231 | 'rg16sint', 1232 | 'rg16float', 1233 | 'rgba16unorm', 1234 | 'rgba16snorm', 1235 | 'rgb10a2uint', 1236 | 'rgb10a2unorm', 1237 | 'rg11b10ufloat', 1238 | ]); 1239 | 1240 | /** 1241 | * The set of formats that support read-write access. 1242 | */ 1243 | static readonly supportedReadWriteFormats: Set = new Set([ 1244 | 'r32uint', 1245 | 'r32sint', 1246 | 'r32float', 1247 | ]); 1248 | 1249 | /** 1250 | * The set of formats that support read-write access if the feature 'texture-formats-tier2' is enabled. 1251 | */ 1252 | static readonly supportedReadWriteFormatsTier2: Set = new Set([ 1253 | 'r8unorm', 1254 | 'r8uint', 1255 | 'r8sint', 1256 | 'rgba8unorm', 1257 | 'rgba8uint', 1258 | 'rgba8sint', 1259 | 'r16uint', 1260 | 'r16sint', 1261 | 'r16float', 1262 | 'rgba16uint', 1263 | 'rgba16sint', 1264 | 'rgba16float', 1265 | 'rgba32uint', 1266 | 'rgba32sint', 1267 | 'rgba32float', 1268 | ]); 1269 | 1270 | /** 1271 | * Sets the preferred device limits for {@link WebGPUSinglePassDownsampler} in a given record of limits. 1272 | * Existing preferred device limits are either increased or left untouched. 1273 | * If {@link limits} is undefined, creates a new record of preferred device limits for {@link WebGPUSinglePassDownsampler}. 1274 | * The result can be used to set {@link GPUDeviceDescriptor.requiredLimits} when requesting a device. 1275 | * @param limits A record of device limits set to update with the preferred limits for {@link WebGPUSinglePassDownsampler} 1276 | * @param adapter If this is set, the preferred limits that are set by this function will be clamped to {@link GPUAdapter.limits}. 1277 | * @returns The updated or created set of device limits with all preferred limits for {@link WebGPUSinglePassDownsampler} set 1278 | */ 1279 | static setPreferredLimits(limits?: Record, adapter?: GPUAdapter): Record { 1280 | if (!limits) { 1281 | limits = {}; 1282 | } 1283 | const maxStorageTexturesPerShaderStage = Math.min(adapter?.limits.maxStorageTexturesPerShaderStage ?? 6, 6); 1284 | limits.maxStorageTexturesPerShaderStage = Math.max(limits.maxStorageTexturesPerShaderStage ?? maxStorageTexturesPerShaderStage, maxStorageTexturesPerShaderStage); 1285 | return limits; 1286 | } 1287 | 1288 | /** 1289 | * Creates a new {@link WebGPUSinglePassDownsampler}. 1290 | * On its own, {@link WebGPUSinglePassDownsampler} does not allocate any GPU resources. 1291 | * Optionally, prepare GPU resources for a given {@link SPDPrepareDeviceDescriptor}. 1292 | * @param prepareDescriptor An optional descriptor for preparing GPU resources 1293 | * @see WebGPUSinglePassDownsampler.prepareDeviceResources 1294 | */ 1295 | constructor(prepareDescriptor?: SPDPrepareDeviceDescriptor) { 1296 | this.filters = new Map([ 1297 | [SPDFilters.Average, SPD_FILTER_AVERAGE], 1298 | [SPDFilters.Min, SPD_FILTER_MIN], 1299 | [SPDFilters.Max, SPD_FILTER_MAX], 1300 | [SPDFilters.MinMax, SPD_FILTER_MINMAX], 1301 | ]); 1302 | this.devicePipelines = new Map(); 1303 | 1304 | if (prepareDescriptor) { 1305 | this.prepareDeviceResources(prepareDescriptor); 1306 | } 1307 | } 1308 | 1309 | /** 1310 | * Prepares GPU resources required by {@link WebGPUSinglePassDownsampler} to downsample textures for a given {@link SPDPrepareDeviceDescriptor}. 1311 | * @param prepareDescriptor a descriptor for preparing GPU resources 1312 | */ 1313 | prepareDeviceResources(prepareDescriptor: SPDPrepareDeviceDescriptor) { 1314 | this.getOrCreateDevicePipelines(prepareDescriptor.device, prepareDescriptor.maxArrayLayersPerPass, prepareDescriptor.maxMipsPerPass, prepareDescriptor.disableSubgroups)?.preparePipelines(prepareDescriptor?.formats?.map(format => { 1315 | return { 1316 | ...format, 1317 | filters: new Set(Array.from(format.filters ?? []).map(filter => this.filters.get(filter) ?? SPD_FILTER_AVERAGE)), 1318 | }; 1319 | })); 1320 | } 1321 | 1322 | private getOrCreateDevicePipelines(device: GPUDevice, maxArrayLayers?: number, maxMipsPerPass?: number, disableSubgroups?: boolean): DevicePipelines | undefined { 1323 | if (!this.devicePipelines.has(device)) { 1324 | this.devicePipelines.set(device, new DevicePipelines(device, maxArrayLayers, maxMipsPerPass, disableSubgroups)); 1325 | } 1326 | return this.devicePipelines.get(device); 1327 | } 1328 | 1329 | /** 1330 | * Deregisters all resources stored for a given device. 1331 | * @param device The device resources should be deregistered for 1332 | */ 1333 | deregisterDevice(device: GPUDevice) { 1334 | this.devicePipelines.delete(device); 1335 | } 1336 | 1337 | /** 1338 | * Registers a new downsampling filter operation that can be injected into the downsampling shader for new pipelines. 1339 | * 1340 | * The given WGSL code must (at least) specify a function to reduce four values into one with the following name and signature: 1341 | * 1342 | * `spd_reduce_4(v0: vec4, v1: vec4, v2: vec4, v3: vec4) -> vec4` 1343 | * 1344 | * @param name The unique name of the filter operation 1345 | * @param wgsl The WGSL code to inject into the downsampling shader as the filter operation 1346 | */ 1347 | registerFilter(name: string, wgsl: string) { 1348 | if (this.filters.has(name)) { 1349 | console.warn(`[WebGPUSinglePassDownsampler::registerFilter]: overriding existing filter '${name}'. Previously generated pipelines are not affected.`); 1350 | } 1351 | this.filters.set(name, wgsl); 1352 | } 1353 | 1354 | /** 1355 | * Prepares a pass to downsample a 2d texture / 2d texture array. 1356 | * The produced {@link SPDPass} can be used multiple times to repeatedly downsampling a texture, e.g., for downsampling the depth buffer each frame. 1357 | * For one-time use, {@link WebGPUSinglePassDownsampler.generateMipmaps} can be used instead. 1358 | * 1359 | * By default, the texture is downsampled `texture.mipLevelCount - 1` times using an averaging filter, i.e., 4 pixel values from the parent level are averaged to produce a single pixel in the current mip level. 1360 | * This behavior can be configured using the optional {@link config} parameter. 1361 | * For example, instead of writing the mip levels into the input texture itself, a separate target texture can be specified using {@link SPDPassConfig.target}. 1362 | * Other configuration options include using a different (possibly custom) filter, only downsampling a subregion of the input texture, and limiting the number of mip levels to generate, e.g., if a min-max pyramid is only needed up to a certain tile resolution. 1363 | * If the given filter does not exist, an averaging filter will be used as a fallback. 1364 | * The image region to downsample and the number of mip levels to generate are clamped to the input texture's size, and the output texture's `mipLevelCount`. 1365 | * 1366 | * Depending on the number of mip levels to generate and the device's `maxStorageTexturesPerShaderStage` limit, the {@link SPDPass} will internally consist of multiple passes, each generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels. 1367 | * 1368 | * @param device The device the {@link SPDPass} should be prepared for 1369 | * @param texture The texture that is to be processed by the {@link SPDPass}. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. Must support {@link GPUTextureUsage.TEXTURE_BINDING}, and, if no other target is given, {@link GPUTextureUsage.STORAGE_BINDING}. 1370 | * @param config The config for the {@link SPDPass} 1371 | * @returns The prepared {@link SPDPass} or undefined if preparation failed or if no mipmaps would be generated. 1372 | * @throws If the {@link GPUTextureFormat} of {@link SPDPassConfig.target} is not supported (does not support {@link GPUStorageTextureAccess:"write-only"} on the given {@link device}). 1373 | * @throws If the size of {@link SPDPassConfig.target} is too small to store the first mip level generated for {@link texture} 1374 | * @throws If {@link texture} or {@link SPDPassConfig.target} is not a 2d texture. 1375 | * @see WebGPUSinglePassDownsampler.generateMipmaps 1376 | * @see WebGPUSinglePassDownsampler.registerFilter 1377 | * @see WebGPUSinglePassDownsampler.setPreferredLimits 1378 | */ 1379 | preparePass(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): SPDPass | undefined { 1380 | const target = config?.target ?? texture; 1381 | const filter = config?.filter ?? SPDFilters.Average; 1382 | const offset = (config?.offset ?? [0, 0]).map((o, d) => Math.max(0, Math.min(o, (d === 0 ? texture.width : texture.height) - 1))) as [number, number]; 1383 | const size = (config?.size ?? [texture.width, texture.height]).map((s, d) => Math.max(0, Math.min(s, (d === 0 ? texture.width : texture.height) - offset[d]))) as [number, number]; 1384 | const numMips = Math.min(Math.max(config?.numMips ?? target.mipLevelCount, 0), maxMipLevelCount(...size)); 1385 | 1386 | if (numMips < 2) { 1387 | console.warn(`[WebGPUSinglePassDownsampler::prepare]: no mips to create (numMips = ${numMips})`); 1388 | return undefined; 1389 | } 1390 | if (!(WebGPUSinglePassDownsampler.supportedFormats.has(target.format) || 1391 | (device.features.has('bgra8unorm-storage') && WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage.has(target.format)) || 1392 | ((device.features.has('texture-formats-tier1') || device.features.has('texture-formats-tier2')) && WebGPUSinglePassDownsampler.supportedFormatsTier1.has(target.format)))) 1393 | { 1394 | throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported. (Supported formats: ${WebGPUSinglePassDownsampler.supportedFormats}, and ${WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage} (if 'bgra8unorm-storage' is enabled), and ${WebGPUSinglePassDownsampler.supportedFormatsTier1} (if 'texture-formats-tier1' is enabled))`); 1395 | } 1396 | if (target.format === 'bgra8unorm' && !device.features.has('bgra8unorm-storage')) { 1397 | throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported without feature 'bgra8unorm-storage' enabled`); 1398 | } 1399 | if (target.width < Math.max(1, Math.floor(size[0] / 2)) || target.height < Math.max(1, Math.floor(size[1] / 2))) { 1400 | throw new Error(`[WebGPUSinglePassDownsampler::prepare]: target too small (${[target.width, target.height]}) for input size ${size}`); 1401 | } 1402 | if (target.dimension !== '2d' || texture.dimension !== '2d') { 1403 | throw new Error('[WebGPUSinglePassDownsampler::prepare]: texture or target is not a 2d texture'); 1404 | } 1405 | if (!this.filters.has(filter)) { 1406 | console.warn(`[WebGPUSinglePassDownsampler::prepare]: unknown filter ${filter}, falling back to average`); 1407 | } 1408 | if (filter === SPD_FILTER_MINMAX && target.format.includes('r32')) { 1409 | console.warn(`[WebGPUSinglePassDownsampler::prepare]: filter ${filter} makes no sense for one-component target format ${target.format}`); 1410 | } 1411 | const filterCode = this.filters.get(filter) ?? SPD_FILTER_AVERAGE; 1412 | const scalarType = sanitizeScalarType(device, target.format, config?.halfPrecision ?? false); 1413 | 1414 | return this.getOrCreateDevicePipelines(device)?.preparePass(texture, target, filterCode, offset, size, numMips, scalarType); 1415 | } 1416 | 1417 | /** 1418 | * Generates mipmaps for the given texture. 1419 | * For textures that will be downsampled more than once, consider generating a {@link SPDPass} using {@link WebGPUSinglePassDownsampler.preparePass} and calling its {@link SPDPass.encode} method. 1420 | * This way, allocated GPU resources for downsampling the texture can be reused. 1421 | * @param device The device to use for downsampling the texture 1422 | * @param texture The texture to generate mipmaps for. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. 1423 | * @param config The config for mipmap generation 1424 | * @returns True if mipmaps were generated, false otherwise 1425 | * @throws If {@link WebGPUSinglePassDownsampler.preparePass} threw an error. 1426 | * @see WebGPUSinglePassDownsampler.preparePass 1427 | */ 1428 | generateMipmaps(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): boolean { 1429 | const pass = this.preparePass(device, texture, config); 1430 | if (!pass) { 1431 | return false; 1432 | } else { 1433 | const commandEncoder = device.createCommandEncoder(); 1434 | pass?.encode(commandEncoder.beginComputePass()).end(); 1435 | device.queue.submit([commandEncoder.finish()]); 1436 | return true; 1437 | } 1438 | } 1439 | } 1440 | 1441 | --------------------------------------------------------------------------------