├── .github
    └── workflows
    │   ├── npm-publish.yml
    │   └── deploy-docs.yml
├── package.json
├── LICENSE
├── CHANGELOG.md
├── .gitignore
├── demo
    ├── styles.css
    └── index.html
├── README.md
├── tsconfig.json
├── dist
    ├── index.d.ts
    └── index.js
└── src
    └── index.ts


/.github/workflows/npm-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages
 3 | 
 4 | name: Publish Package to npmjs
 5 | on:
 6 |   release:
 7 |     types: [published]
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       # Setup .npmrc file to publish to npm
14 |       - uses: actions/setup-node@v4
15 |         with:
16 |           node-version: '20.x'
17 |           registry-url: 'https://registry.npmjs.org'
18 |       - run: npm ci
19 |       - run: npm publish
20 |         env:
21 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
22 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "webgpu-spd",
 3 |   "version": "3.0.0",
 4 |   "description": "A port of AMD's Single Pass Downsampler for WebGPU",
 5 |   "type": "module",
 6 |   "main": "dist/index.js",
 7 |   "types": "dist/index.d.ts",
 8 |   "files": [
 9 |     "/dist/**"
10 |   ],
11 |   "scripts": {
12 |     "docs": "typedoc src/index.ts",
13 |     "build": "tsc"
14 |   },
15 |   "repository": {
16 |     "type": "git",
17 |     "url": "git+https://github.com/JolifantoBambla/webgpu-spd.git"
18 |   },
19 |   "keywords": [
20 |     "WebGPU",
21 |     "Mipmap",
22 |     "GPU",
23 |     "Graphics"
24 |   ],
25 |   "author": "Lukas Herzberger",
26 |   "license": "MIT",
27 |   "bugs": {
28 |     "url": "https://github.com/JolifantoBambla/webgpu-spd/issues"
29 |   },
30 |   "homepage": "https://github.com/JolifantoBambla/webgpu-spd#readme",
31 |   "devDependencies": {
32 |     "@webgpu/types": "^0.1.40",
33 |     "typedoc": "^0.25.13",
34 |     "typescript": "^5.4.5"
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Lukas Herzberger
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## [Unreleased]
 4 | 
 5 | ## [v3.0.0] - 2025-08-03
 6 | 
 7 | ### Added
 8 | 
 9 | - Add support for texture formats enabled by the device feature [texture-formats-tier1](https://www.w3.org/TR/webgpu/#texture-formats-tier1).
10 | 
11 | ### Changed
12 | 
13 | - Use subgroup built-ins for downsampling by default if the device feature [subgroups](https://www.w3.org/TR/webgpu/#subgroups) is enabled.
14 | - Move texture format `bgra8unorm` out of `WebGPUSinglePassDownsampler::supportedFormats`.
15 | - If the texture format supports it, bind mip 6 as `'read-write'` storage texture instead of duplicating texture data in an extra buffer in case more than 6 mips are generated per pass. 
16 | 
17 | ### Fixed
18 | 
19 | - Fix handling of barriers for active workgroup counter.
20 | - Cast downsampling weight to concrete scalar type for average filter.
21 | - Fix minor typing issues.
22 | 
23 | ## [v2.0.1] - 2024-06-20
24 | 
25 | ### Fixed
26 | 
27 |  - Fix handling of cases where a texture's number of array layers exceeds the maximum number of array layers per pass.
28 | 
29 | ## [v2.0.0] - 2024-04-25
30 | 
31 | ### Added
32 | 
33 |  - Add support for specifying the maximum number of array layers that can be downsampled per pass when configuring the device using `SPDPrepareDeviceDescriptor.maxArrayLayersPerPass`.
34 |  - Add support for specifying the maximum number of mip levels that can be downsampled per pass when configuring the device using `SPDPrepareDeviceDescriptor.maxMipsPerPass`.
35 |  - Add support for using `f16` instead of `f32` during downsampling.
36 | 
37 |  ### Changed
38 | 
39 |  - Depending on the limit supported by a device, up to 12 mip levels can be generated within a single pass now.
40 |  - `WebGPUSinglePassDownsampler.setPreferredLimits` now accepts an optional `GPUAdapter` as input to clamp this limit to what the adapter allows.
41 | 
42 |  ### Fixed
43 | 
44 |  - Fix handling of integer formats (`i32` and `u32`).
45 | 
46 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yml:
--------------------------------------------------------------------------------
 1 | # Simple workflow for deploying static content to GitHub Pages
 2 | name: Deploy docs
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     tags:        
 8 |       - '*'
 9 | 
10 |   # Allows you to run this workflow manually from the Actions tab
11 |   workflow_dispatch:
12 | 
13 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
14 | permissions:
15 |   contents: read
16 |   pages: write
17 |   id-token: write
18 | 
19 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
20 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
21 | concurrency:
22 |   group: "pages"
23 |   cancel-in-progress: false
24 | 
25 | jobs:
26 |   # Single deploy job since we're just deploying
27 |   deploy:
28 |     environment:
29 |       name: github-pages
30 |       url: ${{ steps.deployment.outputs.page_url }}
31 |     runs-on: ubuntu-latest
32 |     steps:
33 |       - name: Checkout
34 |         uses: actions/checkout@v4
35 |       - name: Setup Node.js environment
36 |         uses: actions/setup-node@v4.0.2
37 |       - name: Install Dependencies
38 |         run: npm install
39 |       - name: Build Docs
40 |         run: npm run docs
41 |       - name: Publish dist folder with docs
42 |         run: |
43 |           mkdir -p docs/1.0.0
44 |           npm install webgpu-spd@1.0.0
45 |           cp -r node_modules/webgpu-spd/dist docs/1.0.0/dist
46 |           mkdir -p docs/2.x
47 |           npm install webgpu-spd@2.0.1
48 |           cp -r node_modules/webgpu-spd/dist docs/2.x/dist
49 |           mkdir -p docs/3.x
50 |           cp -r dist docs/3.x/dist
51 |           cp -r demo docs/demo
52 |       - name: Setup Pages
53 |         uses: actions/configure-pages@v5
54 |       - name: Upload artifact
55 |         uses: actions/upload-pages-artifact@v3
56 |         with:
57 |           path: 'docs/'
58 |       - name: Deploy to GitHub Pages
59 |         id: deployment
60 |         uses: actions/deploy-pages@v4
61 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | .pnpm-debug.log*
  9 | 
 10 | # Diagnostic reports (https://nodejs.org/api/report.html)
 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 12 | 
 13 | # Runtime data
 14 | pids
 15 | *.pid
 16 | *.seed
 17 | *.pid.lock
 18 | 
 19 | # Directory for instrumented libs generated by jscoverage/JSCover
 20 | lib-cov
 21 | 
 22 | # Coverage directory used by tools like istanbul
 23 | coverage
 24 | *.lcov
 25 | 
 26 | # nyc test coverage
 27 | .nyc_output
 28 | 
 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 30 | .grunt
 31 | 
 32 | # Bower dependency directory (https://bower.io/)
 33 | bower_components
 34 | 
 35 | # node-waf configuration
 36 | .lock-wscript
 37 | 
 38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 39 | build/Release
 40 | 
 41 | # Dependency directories
 42 | node_modules/
 43 | jspm_packages/
 44 | 
 45 | # Snowpack dependency directory (https://snowpack.dev/)
 46 | web_modules/
 47 | 
 48 | # TypeScript cache
 49 | *.tsbuildinfo
 50 | 
 51 | # Optional npm cache directory
 52 | .npm
 53 | 
 54 | # Optional eslint cache
 55 | .eslintcache
 56 | 
 57 | # Optional stylelint cache
 58 | .stylelintcache
 59 | 
 60 | # Microbundle cache
 61 | .rpt2_cache/
 62 | .rts2_cache_cjs/
 63 | .rts2_cache_es/
 64 | .rts2_cache_umd/
 65 | 
 66 | # Optional REPL history
 67 | .node_repl_history
 68 | 
 69 | # Output of 'npm pack'
 70 | *.tgz
 71 | 
 72 | # Yarn Integrity file
 73 | .yarn-integrity
 74 | 
 75 | # dotenv environment variable files
 76 | .env
 77 | .env.development.local
 78 | .env.test.local
 79 | .env.production.local
 80 | .env.local
 81 | 
 82 | # parcel-bundler cache (https://parceljs.org/)
 83 | .cache
 84 | .parcel-cache
 85 | 
 86 | # Next.js build output
 87 | .next
 88 | out
 89 | 
 90 | # Nuxt.js build / generate output
 91 | .nuxt
 92 | 
 93 | # Gatsby files
 94 | .cache/
 95 | # Comment in the public line in if your project uses Gatsby and not Next.js
 96 | # https://nextjs.org/blog/next-9-1#public-directory-support
 97 | # public
 98 | 
 99 | # vuepress build output
100 | .vuepress/dist
101 | 
102 | # vuepress v2.x temp and cache directory
103 | .temp
104 | .cache
105 | 
106 | # Docusaurus cache and generated files
107 | .docusaurus
108 | 
109 | # Serverless directories
110 | .serverless/
111 | 
112 | # FuseBox cache
113 | .fusebox/
114 | 
115 | # DynamoDB Local files
116 | .dynamodb/
117 | 
118 | # TernJS port file
119 | .tern-port
120 | 
121 | # Stores VSCode versions used for testing VSCode extensions
122 | .vscode-test
123 | 
124 | # yarn v2
125 | .yarn/cache
126 | .yarn/unplugged
127 | .yarn/build-state.yml
128 | .yarn/install-state.gz
129 | .pnp.*
130 | 
131 | docs


--------------------------------------------------------------------------------
/demo/styles.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   margin: 0;
  3 |   font-family: Arial, sans-serif;
  4 | }
  5 | 
  6 | .container {
  7 |   display: flex;
  8 | }
  9 | 
 10 | .sidebar {
 11 |   position: fixed;
 12 |   left: 0;
 13 |   top: 0;
 14 |   width: 250px;
 15 |   height: 100%;
 16 |   background-color: #2c3e50; /* Dark blue background color */
 17 |   color: #fff;
 18 |   padding: 20px;
 19 | }
 20 | 
 21 | .sidebar h2,
 22 | .sidebar h3 {
 23 |   color: #fff;
 24 | }
 25 | 
 26 | .sidebar h2 {
 27 |   margin-bottom: 20px;
 28 | }
 29 | 
 30 | .sidebar h3 {
 31 |   margin-bottom: 10px;
 32 | }
 33 | 
 34 | .form-section {
 35 |   margin-bottom: 20px; /* Increased margin for better separation */
 36 | }
 37 | 
 38 | .sidebar label {
 39 |   margin-bottom: 5px;
 40 | }
 41 | 
 42 | .sidebar select,
 43 | .sidebar textarea,
 44 | .sidebar input[type="number"],
 45 | .sidebar input[type="file"],
 46 | .sidebar input[type="range"],
 47 | .sidebar button[type="button"] {
 48 |   padding: 8px;
 49 |   border: none;
 50 |   border-radius: 5px;
 51 |   width: 100%;
 52 | }
 53 | 
 54 | .sidebar textarea {
 55 |   resize: vertical; /* Allow vertical resizing */
 56 | }
 57 | 
 58 | .sidebar button[type="button"] {
 59 |   padding: 10px 20px; /* Increased padding for better clickability */
 60 |   background-color: #3498db; /* Blue button color */
 61 |   color: white;
 62 |   border: none;
 63 |   border-radius: 5px;
 64 |   cursor: pointer;
 65 |   transition: background-color 0.3s; /* Smooth transition on hover */
 66 | }
 67 | 
 68 | .sidebar button[type="button"]:hover {
 69 |   background-color: #2980b9; /* Darker blue on hover */
 70 | }
 71 | 
 72 | .main-content {
 73 |   margin-left: 250px; /* Adjust according to sidebar width */
 74 |   padding: 20px;
 75 | }
 76 | 
 77 | .additional-coordinates {
 78 |   display: none; /* Initially hidden */
 79 | }
 80 | 
 81 | .sidebar input[type="checkbox"]:checked ~ .additional-coordinates {
 82 |   display: block; /* Display when checkbox is checked */
 83 | }
 84 | 
 85 | .input-group {
 86 |   display: flex;
 87 | }
 88 | 
 89 | .input-group input {
 90 |   flex: 1;
 91 | }
 92 | 
 93 | .form-section {
 94 |   margin-bottom: 20px; /* Increased margin for better separation */
 95 |   position: relative; /* Relative positioning for the form section */
 96 | }
 97 | 
 98 | .form-section input[type="checkbox"] {
 99 |   position: absolute; /* Absolute positioning for the checkbox */
100 |   right: 0; /* Position the checkbox to the right */
101 |   top: 0; /* Align checkbox vertically with the label */
102 |   margin-top: 0; /* Reset margin */
103 | }
104 | 
105 | .output-value {
106 |   position: absolute;
107 |   top: 0;
108 |   right: 0;
109 |   font-size: 0.8em;
110 |   color: #aaa;
111 | }
112 | 
113 | .form-section input[type="text"] {
114 |   width: 100%; /* Set width to 100% of the parent container */
115 |   height: 5em; /* Set height to 5 lines */
116 |   padding: 8px; /* Add padding */
117 |   box-sizing: border-box; /* Include padding and border in width/height calculation */
118 | }
119 | 
120 | canvas {
121 |   position: fixed;
122 |   margin-left: 290px;
123 | }
124 | 
125 | output {
126 |   position: absolute;
127 |   top: 0;
128 |   left: calc(100% - 10px); /* Adjust as needed */
129 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # WebGPU SPD
  2 | 
  3 | A utility library for generating up to 12 mip levels for 2d textures & texture arrays in a single WebGPU compute pass.
  4 | 
  5 | ## Docs
  6 | 
  7 | Find the docs [here](https://jolifantobambla.github.io/webgpu-spd).
  8 | 
  9 | Try it out [here](https://jolifantobambla.github.io/webgpu-spd/demo).
 10 | 
 11 | ## Installation
 12 | 
 13 | ### NPM
 14 | ```bash
 15 | npm install webgpu-spd
 16 | ```
 17 | 
 18 | ### From GitHub
 19 | ```js
 20 | import { WebGPUSinglePassDownsampler } from 'https://jolifantobambla.github.io/webgpu-spd/3.x/dist/index.js';
 21 | ```
 22 | 
 23 | ### From UNPKG
 24 | ```js
 25 | import { WebGPUSinglePassDownsampler } from 'https://unpkg.com/webgpu-spd@3.0.0/dist/index.js';
 26 | ```
 27 | 
 28 | ## Usage
 29 | 
 30 | WebGPU SPD downsamples 2d textures and 2d texture arrays using compute pipelines generating up to 12 mip levels in a single pass (all array layers are processed in the same pass). The maximum number of mip levels that can be generated within a single pass depends on the `maxStorageTexturesPerShaderStage` limit supported by the device used.
 31 | Should the number of mip levels requested for a texture exceed this limit, multiple passes, generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels each, will be used instead.
 32 | The mip levels generated for a given input texture are stored either in the input texture or in a separate target texture if specified.
 33 | This output texture must support `GPUTextureUsage.STORAGE_BINDING` with access mode `"write-only"`.
 34 | 
 35 | #### Generate mipmaps
 36 | ```js
 37 | import { WebGPUSinglePassDownsampler, maxMipLevelCount } from 'webgpu-spd';
 38 | 
 39 | const downsampler = new WebGPUSinglePassDownsampler();
 40 | 
 41 | const size = [/* size + array layers */];
 42 | const texture = device.createTexture({
 43 |     size,
 44 |     mipLevelCount: maxMipLevelCount(size[0], size[1]),
 45 |     format: 'rgba8unorm',
 46 |     usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING,
 47 | });
 48 | 
 49 | // write mip level 0
 50 | 
 51 | downsampler.generateMipmaps(device, texture);
 52 | ```
 53 | 
 54 | #### Downsample a texture each frame
 55 | ```js
 56 | import { WebGPUSinglePassDownsampler, SPDFilters } from 'webgpu-spd';
 57 | 
 58 | // during setup
 59 | const downsampler = new WebGPUSinglePassDownsampler();
 60 | const downsampleDepthPass = downsampler.preparePass(device, linearDepthTexture, { filter: SPDFilters.Min }); 
 61 | 
 62 | // in render loop
 63 | const commandEncoder = device.createCommandEncoder();
 64 | 
 65 | const computePassEncoder = commandEncoder.beginComputePass();
 66 | downsampleDepthPass.encode(computePassEncoder);
 67 | computePassEncoder.end();
 68 | 
 69 | device.queue.submit([commandEncoder.finish()]);
 70 | ```
 71 | 
 72 | #### Downsample into target
 73 | ```js
 74 | import { WebGPUSinglePassDownsampler, maxMipLevelCount } from 'webgpu-spd';
 75 | 
 76 | const downsampler = new WebGPUSinglePassDownsampler();
 77 | 
 78 | const size = [/* width, height, array layers */];
 79 | const texture = device.createTexture({
 80 |     size,
 81 |     mipLevelCount: 1,
 82 |     format: 'rgba8unorm',
 83 |     usage: GPUTextureUsage.TEXTURE_BINDING,
 84 | });
 85 | const target = device.createTexture({
 86 |     size: [size[0] / 2, size[1] / 2, size[2]],
 87 |     mipLevelCount: maxMipLevelCount(size[0], size[1]) - 1,
 88 |     format: 'rgba8unorm',
 89 |     usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING,
 90 | });
 91 | 
 92 | // write mip level 0
 93 | 
 94 | downsampler.generateMipmaps(device, texture, { target });
 95 | ```
 96 | 
 97 | #### Use min-max filter to generate a min-max pyramid for single-channel textures
 98 | 
 99 | The `SPDFilters.MinMax` filter provided by WebGPU SPD is a special filter that is meant to be used with input textures using single-channel formats like `"r32float"`, and a target texture using a two-channel format like `"rg32float"`.
100 | After the downsampling pass, the target texture will contain the minimum values in the red channel and the maximum values in the green channel.
101 | 
102 | ```js
103 | import { WebGPUSinglePassDownsampler, SPDFilters, maxMipLevelCount } from 'webgpu-spd';
104 | 
105 | // during setup
106 | const downsampler = new WebGPUSinglePassDownsampler();
107 | const linearDepth = device.createTexture({
108 |     size: [/* gBuffer size */],
109 |     mipLevelCount: 1,
110 |     format: 'r32float',
111 |     usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING,
112 | });
113 | const minMaxDepthPyramid = device.createTexture({
114 |     size: [linearDepth.width / 2, linearDepth.height / 2],
115 |     mipLevelCount: maxMipLevelCount(linearDepth.width, linearDepth.height) - 1
116 |     format: 'rg32float',
117 |     usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING,
118 | });
119 | const minMaxDepthPass = downsampler.preparePass(device, linearDepth, {
120 |     target: minMaxDepthPyramid,
121 |     filter: SPDFilters.MinMax,
122 | }); 
123 | 
124 | // in render loop
125 | 
126 | // ... write mip level 0 of linearDepth
127 | 
128 | minMaxDepthPass.encode(computePassEncoder);
129 | ```
130 | 
131 | #### Prepare pipelines for expected formats
132 | 
133 | In the above examples, GPU resources, like compute pipelines and bind group layouts etc., are created on the fly the first time a new configuration of `GPUDevice`, `GPUTextureFormat`, filter, and precision is needed.
134 | 
135 | WebGPU SPD also supports allocating resources during setup, like this:
136 | 
137 | ```js
138 | import { WebGPUSinglePassDownsampler, SPDFilters, SPDPrecision } from 'webgpu-spd';
139 | 
140 | const downsampler = new WebGPUSinglePassDownsampler({ device, formats: [
141 |     { format: 'rgba8unorm', halfPrecision: true },
142 |     { format: 'r32float', filters: [ SPDFilters.Min ] },
143 | ]});
144 | 
145 | // alternatively call
146 | downsampler.prepareDeviceResources({ device, formats: [
147 |     { format: 'rgba8unorm', halfPrecision: true },
148 |     { format: 'r32float', filters: [ SPDFilters.Min ] },
149 | ]});
150 | ```
151 | 
152 | #### Limit the number of mip levels and array layers per pass
153 | 
154 | Generating more than 6 mip levels per pass might not be supported on each platform due to buffers being not coherent by default yet.
155 | WebGPU SPD uses `min(device.limits.maxStorageTexturesPerPass, 12)` by default and can thus be implicitly configured using the device's limit.
156 | However, this might not be desirable in all cases, so WebGPU SPD can be configured to use a different limit by setting the corresponding option when preparing device resources.
157 | 
158 | If more than 6 mip levels are downsampled per pass, WebGPU SPD allocates additional internal resources to store intermediate texture data (`16 * 64 * 64 * maxArrayLayersPerPass` bytes) and for control flow purposes (`4 * maxArrayLayersPerPass` bytes).
159 | The size of these resources depends on the number of array layers that can be downsampled each pass.
160 | If a texture's number of array layers exceeds the number of array layers per pass, multiple passes will be used instead.
161 | By default, WebGPU SPD uses the device's `maxTextureArrayLayers` limit.
162 | 
163 | WebGPU SPD can be configured to use different limits like this:
164 | 
165 | ```js
166 | import { WebGPUSinglePassDownsampler, SPDFilters } from 'webgpu-spd';
167 | 
168 | const downsampler = new WebGPUSinglePassDownsampler({ device, maxMipsPerPass: 6, maxArrayLayersPerPass: 1 });
169 | 
170 | // alternatively call
171 | downsampler.prepareDeviceResources({ device, maxMipsPerPass: 6, maxArrayLayersPerPass: 1 });
172 | ```
173 | 
174 | #### Handling device loss
175 | ```js
176 | import { WebGPUSinglePassDownsampler, SPDFilters } from 'webgpu-spd';
177 | 
178 | const formatConfigs = [
179 |     { format: 'rgba8unorm' },
180 |     { format: 'r32float', filters: [ SPDFilters.Min ] },
181 | ];
182 | 
183 | // on new device
184 | downsampler.deregisterDevice(oldDevice);
185 | downsampler.prepareDeviceResources({ device: newDevice, formats: formatConfig s});
186 | downsampleTexturePass = downsampler.preparePass(newDevice, texture);
187 | ```
188 | 
189 | #### Use custom filters
190 | 
191 | Custom filters for downsampling a quad to a single pixel can be registered with WebGPU SPD using `registerFilter`.
192 | The given WGSL code must at least define a reduction function with the following name and signature:
193 | 
194 | ```wgsl
195 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar>
196 | ```
197 | 
198 | If a filter is known to be only used with a single scalar type (e.g., `u32`), uses of `SPDScalar` can also be replaced by that scalar type.
199 | 
200 | For example, a custom filter that only takes a single pixel value out of the four given ones could be implemented and used like this:
201 | 
202 | ```js
203 | import { WebGPUSinglePassDownsampler } from 'webgpu-spd';
204 | 
205 | const downsampler = new WebGPUSinglePassDownsampler();
206 | downsampler.registerFilter('upperLeft', `
207 |     fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
208 |         return v0;
209 |     }
210 | `);
211 | 
212 | // ...
213 | 
214 | downsampler.generateMipmaps(device, texture, { filter: 'upperLeft' });
215 | ```
216 | 
217 | #### Downsample image region
218 | 
219 | ```js
220 | import { WebGPUSinglePassDownsampler } from 'webgpu-spd';
221 | 
222 | const downsampler = new WebGPUSinglePassDownsampler();
223 | 
224 | const sizeHalf = [texture.width / 2, texture.height / 2];
225 | downsampler.generateMipmaps(device, texture, { offset: sizeHalf, size: sizeHalf});
226 | ```
227 | 
228 | ## Contributions
229 | 
230 | Contributions are very welcome. If you find a bug or think some important functionality is missing, please file an issue [here](https://github.com/JolifantoBambla/webgpu-spd/issues). If want to help out yourself, feel free to submit a pull request [here](https://github.com/JolifantoBambla/webgpu-spd/pulls).
231 | 
232 | ## Acknowledgements
233 | 
234 | This library is a WebGPU port of the FidelityFX Single Pass Downsampler (SPD) included in AMD's [FidelityFX-SDK](https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK).
235 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "compilerOptions": {
  3 |     /* Visit https://aka.ms/tsconfig to read more about this file */
  4 | 
  5 |     /* Projects */
  6 |     // "incremental": true,                              /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
  7 |     // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
  8 |     // "tsBuildInfoFile": "./.tsbuildinfo",              /* Specify the path to .tsbuildinfo incremental compilation file. */
  9 |     // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects. */
 10 |     // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
 11 |     // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
 12 | 
 13 |     /* Language and Environment */
 14 |     "target": "ESNext",                                  /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
 15 |     //"lib": ["ESNext", "DOM"],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
 16 |     // "jsx": "preserve",                                /* Specify what JSX code is generated. */
 17 |     // "experimentalDecorators": true,                   /* Enable experimental support for legacy experimental decorators. */
 18 |     // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
 19 |     // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
 20 |     // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
 21 |     // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
 22 |     // "reactNamespace": "",                             /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
 23 |     // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
 24 |     // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
 25 |     // "moduleDetection": "auto",                        /* Control what method is used to detect module-format JS files. */
 26 | 
 27 |     /* Modules */
 28 |     "module": "ESNext",
 29 |     //"rootDir": "",                                  /* Specify the root folder within your source files. */
 30 |     //"moduleResolution": "Node",                     /* Specify how TypeScript looks up a file from a given module specifier. */
 31 |     // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
 32 |     // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
 33 |     // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
 34 |     "typeRoots": [
 35 |       "./node_modules/@webgpu/types",
 36 |       "./node_modules/@types",
 37 |     ],                                  /* Specify multiple folders that act like './node_modules/@types'. */
 38 |     // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
 39 |     // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
 40 |     // "moduleSuffixes": [],                             /* List of file name suffixes to search when resolving a module. */
 41 |     // "allowImportingTsExtensions": true,               /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
 42 |     // "resolvePackageJsonExports": true,                /* Use the package.json 'exports' field when resolving package imports. */
 43 |     // "resolvePackageJsonImports": true,                /* Use the package.json 'imports' field when resolving imports. */
 44 |     // "customConditions": [],                           /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
 45 |     // "resolveJsonModule": true,                        /* Enable importing .json files. */
 46 |     // "allowArbitraryExtensions": true,                 /* Enable importing files with any extension, provided a declaration file is present. */
 47 |     // "noResolve": true,                                /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
 48 | 
 49 |     /* JavaScript Support */
 50 |     // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
 51 |     // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
 52 |     // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
 53 | 
 54 |     /* Emit */
 55 |     "declaration": true,                              /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
 56 |     // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
 57 |     // "emitDeclarationOnly": true,                      /* Only output d.ts files and not JavaScript files. */
 58 |     //"sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
 59 |     // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
 60 |     // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
 61 |     "outDir": "dist/",                                   /* Specify an output folder for all emitted files. */
 62 |     // "removeComments": true,                           /* Disable emitting comments. */
 63 |     // "noEmit": true,                                   /* Disable emitting files from a compilation. */
 64 |     // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
 65 |     // "importsNotUsedAsValues": "remove",               /* Specify emit/checking behavior for imports that are only used for types. */
 66 |     // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
 67 |     // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
 68 |     // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
 69 |     // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
 70 |     // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
 71 |     // "newLine": "crlf",                                /* Set the newline character for emitting files. */
 72 |     // "stripInternal": true,                            /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
 73 |     // "noEmitHelpers": true,                            /* Disable generating custom helper functions like '__extends' in compiled output. */
 74 |     // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
 75 |     // "preserveConstEnums": true,                       /* Disable erasing 'const enum' declarations in generated code. */
 76 |     // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
 77 |     // "preserveValueImports": true,                     /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
 78 | 
 79 |     /* Interop Constraints */
 80 |     // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
 81 |     // "verbatimModuleSyntax": true,                     /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
 82 |     // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
 83 |     "esModuleInterop": true,                             /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
 84 |     // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
 85 |     "forceConsistentCasingInFileNames": true,            /* Ensure that casing is correct in imports. */
 86 | 
 87 |     /* Type Checking */
 88 |     "strict": true,                                      /* Enable all strict type-checking options. */
 89 |     // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied 'any' type. */
 90 |     // "strictNullChecks": true,                         /* When type checking, take into account 'null' and 'undefined'. */
 91 |     // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
 92 |     // "strictBindCallApply": true,                      /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
 93 |     // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
 94 |     // "noImplicitThis": true,                           /* Enable error reporting when 'this' is given the type 'any'. */
 95 |     // "useUnknownInCatchVariables": true,               /* Default catch clause variables as 'unknown' instead of 'any'. */
 96 |     // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
 97 |     // "noUnusedLocals": true,                           /* Enable error reporting when local variables aren't read. */
 98 |     // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read. */
 99 |     // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
100 |     // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
101 |     // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
102 |     // "noUncheckedIndexedAccess": true,                 /* Add 'undefined' to a type when accessed using an index. */
103 |     // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
104 |     // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type. */
105 |     // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
106 |     // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */
107 | 
108 |     /* Completeness */
109 |     // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
110 |     "skipLibCheck": true                                 /* Skip type checking all .d.ts files. */
111 |   },
112 |   "include": [
113 |     "src/*"
114 |   ]
115 | }
116 | 


--------------------------------------------------------------------------------
/dist/index.d.ts:
--------------------------------------------------------------------------------
  1 | /// <reference types="dist" />
  2 | /**
  3 |  * The names of all predefined filters of {@link WebGPUSinglePassDownsampler}.
  4 |  * Custom ones can be registered with an instance of {@link WebGPUSinglePassDownsampler} using {@link WebGPUSinglePassDownsampler.registerFilter}.
  5 |  */
  6 | export declare enum SPDFilters {
  7 |     /**
  8 |      * Takes the channel-wise average of 4 pixels.
  9 |      */
 10 |     Average = "average",
 11 |     /**
 12 |      * Takes the channel-wise minimum of 4 pixels.
 13 |      */
 14 |     Min = "min",
 15 |     /**
 16 |      * Takes the channel-wise maximum of 4 pixels.
 17 |      */
 18 |     Max = "max",
 19 |     /**
 20 |      * Takes the minimum of the red channel and the maximum of the red and green channel and stores the result in the red and green channel respectively.
 21 |      * This really only makes sense for single-channel input textures (where only the red channel holds any data), e.g., for generating a min-max pyramid of a depth buffer.
 22 |      */
 23 |     MinMax = "minmax"
 24 | }
 25 | declare class SPDPassInner {
 26 |     private pipeline;
 27 |     private bindGroups;
 28 |     private dispatchDimensions;
 29 |     constructor(pipeline: GPUComputePipeline, bindGroups: Array<GPUBindGroup>, dispatchDimensions: [GPUSize32, GPUSize32, GPUSize32]);
 30 |     encode(computePass: GPUComputePassEncoder): void;
 31 | }
 32 | /**
 33 |  * A compute pass for downsampling a texture.
 34 |  */
 35 | export declare class SPDPass {
 36 |     private passes;
 37 |     /**
 38 |      * The texture the mipmaps will be written to by this {@link SPDPass}, once {@link SPDPass.encode} is called.
 39 |      */
 40 |     readonly target: GPUTexture;
 41 |     /** @ignore */
 42 |     constructor(passes: Array<SPDPassInner>, target: GPUTexture);
 43 |     /**
 44 |      * Encodes the configured mipmap generation pass(es) with the given {@link GPUComputePassEncoder}.
 45 |      * All bind groups indices used by {@link SPDPass} are reset to `null` to prevent unintentional bindings of internal bind groups for subsequent pipelines encoded in the same {@link GPUComputePassEncoder}.
 46 |      * @param computePassEncoder The {@link GPUComputePassEncoder} to encode this mipmap generation pass with.
 47 |      * @returns The {@link computePassEncoder}
 48 |      */
 49 |     encode(computePassEncoder: GPUComputePassEncoder): GPUComputePassEncoder;
 50 |     /**
 51 |      * Returns the number of passes that will be encoded by calling this instance's {@link SPDPass.encode} method.
 52 |      */
 53 |     get numPasses(): number;
 54 | }
 55 | /**
 56 |  * Configuration for {@link WebGPUSinglePassDownsampler.preparePass}.
 57 |  */
 58 | export interface SPDPassConfig {
 59 |     /**
 60 |      * The name of the filter to use for downsampling the given texture.
 61 |      * Should be one of the filters registered with {@link WebGPUSinglePassDownsampler}.
 62 |      * Defaults to {@link SPDFilters.Average}.
 63 |      */
 64 |     filter?: string;
 65 |     /**
 66 |      * The target texture the generated mipmaps are written to.
 67 |      * Its usage must include {@link GPUTextureUsage.STORAGE_BINDING}.
 68 |      * Its format must support {@link GPUStorageTextureAccess:"write-only"}.
 69 |      * Its size must be big enough to store the first mip level generated for the input texture.
 70 |      * It must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}.
 71 |      * Defaults to the given input texture.
 72 |      */
 73 |     target?: GPUTexture;
 74 |     /**
 75 |      * The upper left corner of the image region mipmaps should be generated for.
 76 |      * Defaults to [0,0].
 77 |      */
 78 |     offset?: [number, number];
 79 |     /**
 80 |      * The size of the image reagion mipmaps should be generated for.
 81 |      * Default to [texture.width - 1 - offset[0], texture.height - 1 - offset[1]].
 82 |      */
 83 |     size?: [number, number];
 84 |     /**
 85 |      * The number of mipmaps to generate.
 86 |      * Defaults to target.mipLevelCount.
 87 |      */
 88 |     numMips?: number;
 89 |     /**
 90 |      * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters.
 91 |      * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled).
 92 |      * Falls back to full precision, if the texture format is not a float format.
 93 |      * Defaults to false.
 94 |      */
 95 |     halfPrecision?: boolean;
 96 | }
 97 | export interface SPDPrepareFormatDescriptor {
 98 |     /**
 99 |      * The texture format to prepare downsampling pipelines for.
100 |      */
101 |     format: GPUTextureFormat;
102 |     /**
103 |      * The names of downsampling filters that to prepare downsampling pipelines for the given {@link format} for.
104 |      * Defaults to {@link SPDFilters.Average}.
105 |      */
106 |     filters?: Set<string>;
107 |     /**
108 |      * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters.
109 |      * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled).
110 |      * Falls back to full precision, if the texture format is not a float format.
111 |      * Defaults to false.
112 |      */
113 |     halfPrecision?: boolean;
114 | }
115 | export interface SPDPrepareDeviceDescriptor {
116 |     /**
117 |      * The device to prepare downsampling pipelines for.
118 |      */
119 |     device: GPUDevice;
120 |     /**
121 |      * The formats to prepare downsampling pipelines for.
122 |      */
123 |     formats?: Array<SPDPrepareFormatDescriptor>;
124 |     /**
125 |      * The maximum number of array layers will be downsampled on the {@link device} within a single pass.
126 |      * If a texture has more, downsampling will be split up into multiple passes handling up to this limit of array layers each.
127 |      * Defaults to device.limits.maxTextureArrayLayers.
128 |      */
129 |     maxArrayLayersPerPass?: number;
130 |     /**
131 |      * The maximum number of mip levels that can be generated on the {@link device} within a single pass.
132 |      * Note that generating more than 6 mip levels per pass is currently not supported on all platforms.
133 |      * Defaults to `Math.min(device.limits.maxStorageTexturesPerShaderStage, 12)`.
134 |      */
135 |     maxMipsPerPass?: number;
136 |     /**
137 |      * If true, disables all uses of subgroup built-ins by the downsampler even if the `'subgroups'` feature is enabled on the {@link device}.
138 |      */
139 |     disableSubgroups?: boolean;
140 | }
141 | /**
142 |  * Returns the maximum number of mip levels for a given n-dimensional size.
143 |  * @param size The size to compute the maximum number of mip levels for
144 |  * @returns The maximum number of mip levels for the given size
145 |  */
146 | export declare function maxMipLevelCount(...size: number[]): number;
147 | /**
148 |  * A helper class for downsampling 2D {@link GPUTexture} (& arrays) using as few passes as possible on a {@link GPUDevice} depending on its {@link GPUSupportedLimits}.
149 |  * Up to 12 mip levels can be generated within a single pass, if {@link GPUSupportedLimits.maxStorageTexturesPerShaderStage} supports it.
150 |  */
151 | export declare class WebGPUSinglePassDownsampler {
152 |     private filters;
153 |     private devicePipelines;
154 |     /**
155 |      * The set of formats supported by WebGPU SPD.
156 |      */
157 |     static readonly supportedFormats: Set<string>;
158 |     /**
159 |      * The set of additionally supported formats supported if the feature 'bgra8unorm-storage' is enabled.
160 |      */
161 |     static readonly supportedFormatsBgra8UnormStorage: Set<string>;
162 |     /**
163 |      * The set of additionally supported formats if the feature 'texture-formats-tier1' is enabled.
164 |      */
165 |     static readonly supportedFormatsTier1: Set<string>;
166 |     /**
167 |      * The set of formats that support read-write access.
168 |      */
169 |     static readonly supportedReadWriteFormats: Set<string>;
170 |     /**
171 |      * The set of formats that support read-write access if the feature 'texture-formats-tier2' is enabled.
172 |      */
173 |     static readonly supportedReadWriteFormatsTier2: Set<string>;
174 |     /**
175 |      * Sets the preferred device limits for {@link WebGPUSinglePassDownsampler} in a given record of limits.
176 |      * Existing preferred device limits are either increased or left untouched.
177 |      * If {@link limits} is undefined, creates a new record of preferred device limits for {@link WebGPUSinglePassDownsampler}.
178 |      * The result can be used to set {@link GPUDeviceDescriptor.requiredLimits} when requesting a device.
179 |      * @param limits A record of device limits set to update with the preferred limits for {@link WebGPUSinglePassDownsampler}
180 |      * @param adapter If this is set, the preferred limits that are set by this function will be clamped to {@link GPUAdapter.limits}.
181 |      * @returns The updated or created set of device limits with all preferred limits for {@link WebGPUSinglePassDownsampler} set
182 |      */
183 |     static setPreferredLimits(limits?: Record<string, number | GPUSize64>, adapter?: GPUAdapter): Record<string, number | GPUSize64>;
184 |     /**
185 |      * Creates a new {@link WebGPUSinglePassDownsampler}.
186 |      * On its own, {@link WebGPUSinglePassDownsampler} does not allocate any GPU resources.
187 |      * Optionally, prepare GPU resources for a given {@link SPDPrepareDeviceDescriptor}.
188 |      * @param prepareDescriptor An optional descriptor for preparing GPU resources
189 |      * @see WebGPUSinglePassDownsampler.prepareDeviceResources
190 |      */
191 |     constructor(prepareDescriptor?: SPDPrepareDeviceDescriptor);
192 |     /**
193 |      * Prepares GPU resources required by {@link WebGPUSinglePassDownsampler} to downsample textures for a given {@link SPDPrepareDeviceDescriptor}.
194 |      * @param prepareDescriptor a descriptor for preparing GPU resources
195 |      */
196 |     prepareDeviceResources(prepareDescriptor: SPDPrepareDeviceDescriptor): void;
197 |     private getOrCreateDevicePipelines;
198 |     /**
199 |      * Deregisters all resources stored for a given device.
200 |      * @param device The device resources should be deregistered for
201 |      */
202 |     deregisterDevice(device: GPUDevice): void;
203 |     /**
204 |      * Registers a new downsampling filter operation that can be injected into the downsampling shader for new pipelines.
205 |      *
206 |      * The given WGSL code must (at least) specify a function to reduce four values into one with the following name and signature:
207 |      *
208 |      *   `spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar>`
209 |      *
210 |      * @param name The unique name of the filter operation
211 |      * @param wgsl The WGSL code to inject into the downsampling shader as the filter operation
212 |      */
213 |     registerFilter(name: string, wgsl: string): void;
214 |     /**
215 |      * Prepares a pass to downsample a 2d texture / 2d texture array.
216 |      * The produced {@link SPDPass} can be used multiple times to repeatedly downsampling a texture, e.g., for downsampling the depth buffer each frame.
217 |      * For one-time use, {@link WebGPUSinglePassDownsampler.generateMipmaps} can be used instead.
218 |      *
219 |      * By default, the texture is downsampled `texture.mipLevelCount - 1` times using an averaging filter, i.e., 4 pixel values from the parent level are averaged to produce a single pixel in the current mip level.
220 |      * This behavior can be configured using the optional {@link config} parameter.
221 |      * For example, instead of writing the mip levels into the input texture itself, a separate target texture can be specified using {@link SPDPassConfig.target}.
222 |      * Other configuration options include using a different (possibly custom) filter, only downsampling a subregion of the input texture, and limiting the number of mip levels to generate, e.g., if a min-max pyramid is only needed up to a certain tile resolution.
223 |      * If the given filter does not exist, an averaging filter will be used as a fallback.
224 |      * The image region to downsample and the number of mip levels to generate are clamped to the input texture's size, and the output texture's `mipLevelCount`.
225 |      *
226 |      * Depending on the number of mip levels to generate and the device's `maxStorageTexturesPerShaderStage` limit, the {@link SPDPass} will internally consist of multiple passes, each generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels.
227 |      *
228 |      * @param device The device the {@link SPDPass} should be prepared for
229 |      * @param texture The texture that is to be processed by the {@link SPDPass}. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. Must support {@link GPUTextureUsage.TEXTURE_BINDING}, and, if no other target is given, {@link GPUTextureUsage.STORAGE_BINDING}.
230 |      * @param config The config for the {@link SPDPass}
231 |      * @returns The prepared {@link SPDPass} or undefined if preparation failed or if no mipmaps would be generated.
232 |      * @throws If the {@link GPUTextureFormat} of {@link SPDPassConfig.target} is not supported (does not support {@link GPUStorageTextureAccess:"write-only"} on the given {@link device}).
233 |      * @throws If the size of {@link SPDPassConfig.target} is too small to store the first mip level generated for {@link texture}
234 |      * @throws If {@link texture} or {@link SPDPassConfig.target} is not a 2d texture.
235 |      * @see WebGPUSinglePassDownsampler.generateMipmaps
236 |      * @see WebGPUSinglePassDownsampler.registerFilter
237 |      * @see WebGPUSinglePassDownsampler.setPreferredLimits
238 |      */
239 |     preparePass(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): SPDPass | undefined;
240 |     /**
241 |      * Generates mipmaps for the given texture.
242 |      * For textures that will be downsampled more than once, consider generating a {@link SPDPass} using {@link WebGPUSinglePassDownsampler.preparePass} and calling its {@link SPDPass.encode} method.
243 |      * This way, allocated GPU resources for downsampling the texture can be reused.
244 |      * @param device The device to use for downsampling the texture
245 |      * @param texture The texture to generate mipmaps for. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}.
246 |      * @param config The config for mipmap generation
247 |      * @returns True if mipmaps were generated, false otherwise
248 |      * @throws If {@link WebGPUSinglePassDownsampler.preparePass} threw an error.
249 |      * @see WebGPUSinglePassDownsampler.preparePass
250 |      */
251 |     generateMipmaps(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): boolean;
252 | }
253 | export {};
254 | 


--------------------------------------------------------------------------------
/demo/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |   <meta charset="UTF-8">
  5 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |   <title>WebGPU SPD</title>
  7 |   <link rel="stylesheet" href="styles.css">
  8 | </head>
  9 | <body>
 10 |     <div class="container">
 11 |         <div class="sidebar">
 12 |             <h2>Input Options</h2>
 13 |             <form>
 14 |             <div class="form-section">
 15 |                 <h3>Filter</h3>
 16 |                 <label for="filter">Filter:</label>
 17 |                 <select id="filter" name="filter">
 18 |                 <option value="average">Average</option>
 19 |                 <option value="min">Min</option>
 20 |                 <option value="max">Max</option>
 21 |                 <option value="minmax">MinMax</option>
 22 |                 <option value="custom">Custom</option>
 23 |                 </select>
 24 |                 <label for="customFilterCode">Custom Filter Code:</label>
 25 |                 <textarea id="customFilterCode" name="customFilterCode" rows="5" placeholder="Enter text here">fn spd_reduce_4(v0: vec4<SPDScalar>,v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> { return v0; }</textarea>
 26 |             </div>
 27 |             <div class="form-section">
 28 |                 <h3>Region</h3>
 29 |                 <label for="offset">Offset (x,y):</label>
 30 |                 <div class="input-group">
 31 |                     <input type="number" id="offsetX" name="offsetX" step="1" value="0" min="0">
 32 |                     <input type="number" id="offsetY" name="offsetY" step="1" value="0" min="0">
 33 |                 </div>
 34 |                 <div class="form-section">
 35 |                     <input type="checkbox" id="useRoiSize" name="showCoordinates">
 36 |                     <label for="useRoiSize">Set region size</label>
 37 |                 </div>
 38 |                 <div id="roiSizeInput" class="additional-coordinates" style="display: none;">
 39 |                     <label for="roiWidth">Size (width, height):</label>
 40 |                     <div class="input-group">
 41 |                         <input type="number" id="roiWidth" name="roiWidth" step="1" min="0" value="0">
 42 |                         <input type="number" id="roiHeight" name="roiHeight" step="1" min="0" value="0">
 43 |                     </div>
 44 |                 </div>
 45 |             </div>
 46 |             <div class="form-section">
 47 |                 <h3>Misc</h3>
 48 |                 <div class="form-section">
 49 |                 <input type="checkbox" id="intoTarget" name="intoTarget">
 50 |                 <label for="intoTarget">Into separate target</label>
 51 |                 </div>
 52 |                 <div class="form-section">
 53 |                 <input type="checkbox" id="halfPrecision" name="halfPrecision">
 54 |                 <label for="halfPrecision">Use half precision</label>
 55 |                 </div>
 56 |             </div>
 57 |             <div class="form-section">
 58 |                 <h3>Upload Image</h3>
 59 |                 <input type="file" id="texture" name="texture" accept="image/*">
 60 |                 <button id="generateForImageButton" type="button">Generate Mipmaps</button>
 61 |             </div>
 62 |             <div class="form-section">
 63 |                 <h3>Checkerboard</h3>
 64 |                 <label for="checkerboardSize">Size:</label>
 65 |                 <input type="number" id="checkerboardSize" name="size" min="1" max="4096" value="512" step="1">
 66 |                 <label for="checkerboardNumChannels">Num. channels:</label>
 67 |                 <input type="number" id="checkerboardNumChannels" name="checkerboardNumChannels" min="1" max="4" value="4" step="1">
 68 |                 <label for="checkerboardNumArrayLayers">Num. array layers:</label>
 69 |                 <input type="number" id="checkerboardNumArrayLayers" name="checkerboardNumArrayLayers" min="1" max="16" value="16" step="1">
 70 |                 <label for="scalarType">Scalar type:</label>
 71 |                 <select id="scalarType" name="scalarType">
 72 |                 <option value="f32">f32</option>
 73 |                 <option value="u32">u32</option>
 74 |                 <option value="i32">i32</option>
 75 |                 </select>
 76 |                 <button id="generateCheckerboardButton" type="button">Generate Mipmaps</button>
 77 |             </div>
 78 |             </form>
 79 |             <hr> <!-- Horizontal line to separate sections -->
 80 |             <h2>Display options</h2>
 81 |             <div class="form-section">
 82 |                 <label for="mipLevelSlider">Mip level:</label>
 83 |                 <input type="range" id="mipLevelSlider" name="mipLevelSlider" min="0" max="0" value="0">
 84 |                 <output for="mipLevelSlider" id="mipLevelOutput">0</output>
 85 |             </div>
 86 |             <div class="form-section">
 87 |                 <label for="arrayLayerSlider">Array layer:</label>
 88 |                 <input type="range" id="arrayLayerSlider" name="arrayLayerSlider" min="0" max="0" value="0">
 89 |                 <output for="arrayLayerSlider" id="arrayLayerOutput">0</output>
 90 |             </div>
 91 |         </div>
 92 |         <canvas id="canvas"></canvas>
 93 |     </div>
 94 | 
 95 |     <script type="module">
 96 |         import { WebGPUSinglePassDownsampler, maxMipLevelCount } from '../3.x/dist/index.js';
 97 | 
 98 |         function makeCheckerboardTextureData(size, numChannels, tileSize = 16, offset = 0, scalarType = 'f32') {
 99 |             const data = new (scalarType === 'f32' ? Float32Array : scalarType === 'i32' ? Int32Array : Uint32Array)(size * size * numChannels);
100 |             for (let i = 0; i < size * size; ++i) {
101 |                 const x = (offset + i) % size;
102 |                 const y = Math.trunc(i / size);
103 |                 const v = (Math.trunc(x / tileSize) + Math.trunc(y / tileSize)) % 2;
104 |                 for (let c = 0; c < numChannels; ++c) {
105 |                     data[i * numChannels + c] = c === 3 ? 1 : v;
106 |                 }
107 |             }
108 |             return data;
109 |         }
110 | 
111 |         function makeCheckerboardTexture(device, size, numChannels, arrayLayers = 1, scalarType = 'f32', tileSize = 16) {
112 |             const texture = device.createTexture({
113 |                 format: `${['r', 'rg', 'rgb', 'rgba'][numChannels - 1]}32${scalarType === 'f32' ? 'float' : scalarType === 'i32' ? 'sint' : 'uint'}`,
114 |                 size: [size, size, arrayLayers],
115 |                 mipLevelCount: maxMipLevelCount(size),
116 |                 usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING | GPUTextureUsage.COPY_DST,
117 |             });
118 |             for (let i = 0; i < arrayLayers; ++i) {
119 |                 device.queue.writeTexture(
120 |                     { texture, origin: [0, 0, i] },
121 |                     makeCheckerboardTextureData(size, numChannels, tileSize, i, scalarType),
122 |                     { bytesPerRow: size * numChannels * 4, rowsPerImage: size, },
123 |                     [size, size],
124 |                 );
125 |             }
126 |             return texture;
127 |         }
128 | 
129 |         async function main() {
130 |             const adapter = await navigator.gpu.requestAdapter();
131 |             const device = await adapter.requestDevice({requiredLimits: WebGPUSinglePassDownsampler.setPreferredLimits({}, adapter), requiredFeatures: adapter.features.has('shader-f16') ? ['shader-f16'] : []});
132 |             console.info(`got device ('maxStorageTexturesPerShaderstage': ${device.limits.maxStorageTexturesPerShaderStage}, 'shader-f16' enabled: ${device.features.has('shader-f16')})`);
133 | 
134 |             const canvas = document.querySelector('canvas');
135 |             const context = canvas.getContext('webgpu');
136 |             const presentationFormat = navigator.gpu.getPreferredCanvasFormat();
137 |             context.configure({
138 |                 device,
139 |                 format: presentationFormat,
140 |             });
141 | 
142 |             function makePipeline(scalarType) {
143 |                 const module = device.createShaderModule({
144 |                     code: `
145 |                     struct Config {
146 |                         mip: u32,
147 |                         array_layer: u32,
148 |                     }
149 |                     
150 |                     @group(0) @binding(0) var texture: texture_2d_array<${scalarType}>;
151 |                     @group(0) @binding(1) var<uniform> config: Config;
152 |                         
153 |                     @vertex
154 |                     fn vertex(@builtin(vertex_index) vertex_index: u32) -> @builtin(position) vec4<f32> {
155 |                         return vec4(vec2(f32((vertex_index << 1) & 2), f32(vertex_index & 2)) * 2 - 1, 0, 1);
156 |                     }
157 |                     
158 |                     @fragment
159 |                     fn fragment(@builtin(position) coord: vec4<f32>) -> @location(0) vec4<f32> {
160 |                         let texture_size = textureDimensions(texture, config.mip);
161 |                         let texture_coords = vec2<u32>(floor(coord.xy));
162 |                         if texture_coords.x < texture_size.x && texture_coords.y < texture_size.y {
163 |                             return vec4(vec3<f32>(textureLoad(texture, vec2<i32>(floor(coord.xy)), config.array_layer, config.mip).rgb), 1.0);
164 |                         } else {
165 |                             return vec4(0.0, 0.0, 0.0, 1.0);
166 |                         }
167 |                     }
168 |                     `,
169 |                 });
170 |                 return device.createRenderPipeline({
171 |                     layout: 'auto',
172 |                     vertex: {
173 |                         module,
174 |                         entryPoint: 'vertex',
175 |                     },
176 |                     fragment: {
177 |                         module,
178 |                         entryPoint: 'fragment',
179 |                         targets: [{ format: presentationFormat }],
180 |                     },
181 |                 });
182 |             }
183 |             const pipeline = {
184 |                 f32: makePipeline('f32'),
185 |                 i32: makePipeline('i32'),
186 |                 u32: makePipeline('u32'),
187 |             };
188 | 
189 |             const downsampler = new WebGPUSinglePassDownsampler();
190 | 
191 |             const buffer = device.createBuffer({
192 |                 size: 8,
193 |                 usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
194 |             });
195 |             
196 |             let bindGroup = undefined;
197 |             let currentScalarType = 'f32';
198 |             const render = (mip = 0, arrayLayer = 0) => {
199 |                 if (bindGroup) {
200 |                     device.queue.writeBuffer(buffer, 0, new Uint32Array([mip, arrayLayer]));
201 |                     
202 |                     const encoder = device.createCommandEncoder();
203 | 
204 |                     const pass = encoder.beginRenderPass({
205 |                         colorAttachments: [{
206 |                             view: context.getCurrentTexture().createView(),
207 |                             clearValue: [0,0,0,0],
208 |                             loadOp: 'clear',
209 |                             storeOp: 'store',
210 |                         }],
211 |                     });
212 |                     pass.setPipeline(pipeline[currentScalarType]);
213 |                     pass.setBindGroup(0, bindGroup);
214 |                     pass.draw(3);
215 |                     pass.end();
216 | 
217 |                     device.queue.submit([encoder.finish()]);
218 |                 }
219 |             }
220 | 
221 |             const mipLevelSlider = document.getElementById('mipLevelSlider');
222 |             const mipLevelOutput = document.getElementById('mipLevelOutput');
223 |             const arrayLayerSlider = document.getElementById('arrayLayerSlider');
224 |             const arrayLayerOutput = document.getElementById('arrayLayerOutput');
225 |             mipLevelSlider.addEventListener('input', _ => {
226 |                 mipLevelOutput.textContent = mipLevelSlider.value;
227 |                 render(mipLevelSlider.value, arrayLayerSlider.value);
228 |             });
229 |             arrayLayerSlider.addEventListener('input', _ => {
230 |                 arrayLayerOutput.textContent = arrayLayerSlider.value;
231 |                 render(mipLevelSlider.value, arrayLayerSlider.value);
232 |             });
233 | 
234 |             const filterDropdown = document.getElementById('filter');
235 |             const customFilterCode = document.getElementById('customFilterCode');
236 | 
237 |             const offsetX = document.getElementById('offsetX');
238 |             const offsetY = document.getElementById('offsetY');
239 |             const useRoiSize = document.getElementById('useRoiSize');
240 |             const roiSizeInput = document.getElementById('roiSizeInput');
241 |             const roiWidth = document.getElementById('roiWidth');
242 |             const roiHeight = document.getElementById('roiHeight');
243 | 
244 |             useRoiSize.addEventListener('change', _ => {
245 |                 roiSizeInput.style.display = useRoiSize.checked ? 'block' : 'none';
246 |             });
247 | 
248 |             const intoTarget = document.getElementById('intoTarget');
249 |             const halfPrecision = document.getElementById('halfPrecision');
250 | 
251 |             const onNewTexture = texture => {
252 |                 if (filterDropdown.value === 'custom') {
253 |                     downsampler.registerFilter('custom', customFilterCode.value);
254 |                 }
255 |                 const target = !intoTarget.checked ? texture : device.createTexture({
256 |                     format: texture.format,
257 |                     mipLevelCount: texture.mipLevelCount - 1,
258 |                     size: [texture.width / 2, texture.height / 2, texture.depthOrArrayLayers],
259 |                     usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING,
260 |                 });
261 |                 const config = {
262 |                     filter: filterDropdown.value,
263 |                     offset: [offsetX.value, offsetY.value],
264 |                     size: useRoiSize.checked ? [roiWidth.value, roiHeight.value] : [texture.width, texture.height],
265 |                     target,
266 |                     precision: halfPrecision.checked ? 'f16' : 'f32',
267 |                 };
268 |                 if (!downsampler.generateMipmaps(device, texture, config)) {
269 |                     console.warn(`could not downsample texture generated from ${textureUrl}`);
270 |                     return;
271 |                 }
272 | 
273 |                 canvas.width = target.width;
274 |                 canvas.height = target.height;
275 |                 mipLevelSlider.value = 0;
276 |                 arrayLayerSlider.value = 0;
277 |                 mipLevelOutput.textContent = 0;
278 |                 arrayLayerOutput.textContent = 0;
279 | 
280 |                 bindGroup = device.createBindGroup({
281 |                     layout: pipeline[currentScalarType].getBindGroupLayout(0),
282 |                     entries: [
283 |                         {
284 |                             binding: 0,
285 |                             resource: target.createView({
286 |                                 dimension: '2d-array',
287 |                                 mipLevelCount: target.mipLevelCount,
288 |                                 arrayLayerCount: target.depthOrArrayLayers,
289 |                             }),
290 |                         },
291 |                         { binding: 1, resource: { buffer }},
292 |                     ]
293 |                 });
294 | 
295 |                 mipLevelSlider.max = target.mipLevelCount - 1;
296 |                 mipLevelSlider.value = 0;
297 |                 
298 |                 arrayLayerSlider.max = target.depthOrArrayLayers - 1;
299 |                 arrayLayerSlider.value = 0;
300 | 
301 |                 render();
302 |             }
303 | 
304 |             const textureInput = document.getElementById('texture');
305 |             const generateForImageButton = document.getElementById('generateForImageButton');
306 |             generateForImageButton.addEventListener('click', _ => {
307 |                 if (textureInput.files.length) {
308 |                     createImageBitmap(textureInput.files[0], { colorSpaceConversion: 'none' }).then(source => {
309 |                         const texture = device.createTexture({
310 |                             // todo: different formats?
311 |                             format: 'rgba8unorm',
312 |                             mipLevelCount: 1 + Math.log2(Math.max(source.width, source.height)),
313 |                             size: [source.width, source.height],
314 |                             usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.STORAGE_BINDING | GPUTextureUsage.COPY_DST | GPUTextureUsage.RENDER_ATTACHMENT,
315 |                         });
316 |                         device.queue.copyExternalImageToTexture({ source, }, { texture }, { width: source.width, height: source.height });
317 |                         currentScalarType = 'f32';
318 |                         onNewTexture(texture);
319 |                     }).catch(console.error);
320 |                 }
321 |             });
322 | 
323 |             const checkerboardSize = document.getElementById('checkerboardSize');
324 |             const checkerboardNumChannels = document.getElementById('checkerboardNumChannels');
325 |             const checkerboardNumArrayLayers = document.getElementById('checkerboardNumArrayLayers');
326 |             const scalarTypeDropdown = document.getElementById('scalarType');
327 |             const generateCheckerboardButton = document.getElementById('generateCheckerboardButton');
328 |             generateCheckerboardButton.addEventListener('click', _ => {
329 |                 const scalarType = scalarTypeDropdown.value;
330 |                 const numChannels = checkerboardNumChannels.value == 3 ? 4 : checkerboardNumChannels.value;
331 |                 const texture = makeCheckerboardTexture(device, checkerboardSize.value, numChannels, checkerboardNumArrayLayers.value, scalarType);
332 |                 currentScalarType = scalarType;
333 |                 onNewTexture(texture);
334 |             })
335 |         }
336 |         main();
337 |     </script>
338 | </body>
339 | </html>
340 | 


--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
   1 | function makeShaderCode(outputFormat, filterOp = SPD_FILTER_AVERAGE, numMips, scalarType, hasSubgroups, mip6SupportsReadWrite) {
   2 |     const texelType = scalarType === SPDScalarType.I32 ? 'i32' : (scalarType === SPDScalarType.U32 ? 'u32' : 'f32');
   3 |     const useF16 = scalarType === SPDScalarType.F16;
   4 |     const filterCode = filterOp === SPD_FILTER_AVERAGE && !['f32', 'f16'].includes(texelType) ? filterOp.replace('* SPDScalar(0.25)', '/ 4') : filterOp;
   5 |     const mipsBindings = Array(numMips).fill(0)
   6 |         .map((_, i) => {
   7 |         if (i == 5 && numMips > 6 && mip6SupportsReadWrite) {
   8 |             return `@group(0) @binding(6) var dst_mip_6: texture_storage_2d_array<${outputFormat}, read_write>;`;
   9 |         }
  10 |         return `@group(0) @binding(${i + 1}) var dst_mip_${i + 1}: texture_storage_2d_array<${outputFormat}, write>;`;
  11 |     })
  12 |         .join('\n');
  13 |     // todo: get rid of this branching as soon as WGSL supports arrays of texture_storage_2d_array
  14 |     const mipsAccessorBody = Array(numMips).fill(0)
  15 |         .map((_, i) => {
  16 |         if (i == 5 && numMips > 6 && !mip6SupportsReadWrite) {
  17 |             return ` else if mip == 6 {
  18 |                     textureStore(dst_mip_6, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'});
  19 |                     mip_dst_6_buffer[slice][uv.y][uv.x] = value;
  20 |                 }`;
  21 |         }
  22 |         return `${i === 0 ? '' : ' else '}if mip == ${i + 1} {
  23 |                 textureStore(dst_mip_${i + 1}, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'});
  24 |             }`;
  25 |     })
  26 |         .join('');
  27 |     const mipsAccessor = `fn store_dst_mip(value: vec4<SPDScalar>, uv: vec2<u32>, slice: u32, mip: u32) {\n${mipsAccessorBody}\n}`;
  28 |     const midMipAccessor = mip6SupportsReadWrite ? `return vec4<SPDScalar>(textureLoad(dst_mip_6, uv, slice));` : `return mip_dst_6_buffer[slice][uv.y][uv.x];`;
  29 |     return /* wgsl */ `
  30 |     // This file is part of the FidelityFX SDK.
  31 | //
  32 | // Copyright (C) 2023 Advanced Micro Devices, Inc.
  33 | // 
  34 | // Permission is hereby granted, free of charge, to any person obtaining a copy 
  35 | // of this software and associated documentation files(the “Software”), to deal 
  36 | // in the Software without restriction, including without limitation the rights 
  37 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 
  38 | // copies of the Software, and to permit persons to whom the Software is 
  39 | // furnished to do so, subject to the following conditions :
  40 | //
  41 | // The above copyright notice and this permission notice shall be included in
  42 | // all copies or substantial portions of the Software.
  43 | //
  44 | // THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  45 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  46 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
  47 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  48 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  49 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  50 | // THE SOFTWARE.
  51 | 
  52 | 
  53 | // Definitions --------------------------------------------------------------------------------------------------------
  54 | 
  55 | ${useF16 ? 'enable f16;' : ''}
  56 | ${hasSubgroups ? 'enable subgroups;' : ''}
  57 | 
  58 | alias SPDScalar = ${scalarType};
  59 | 
  60 | // Helpers ------------------------------------------------------------------------------------------------------------
  61 | 
  62 | /**
  63 |  * A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
  64 |  * 
  65 |  * The 64-wide lane indices to 8x8 remapping is performed as follows:
  66 |  *     00 01 08 09 10 11 18 19
  67 |  *      02 03 0a 0b 12 13 1a 1b
  68 |  *      04 05 0c 0d 14 15 1c 1d
  69 |  *      06 07 0e 0f 16 17 1e 1f
  70 |  *      20 21 28 29 30 31 38 39
  71 |  *      22 23 2a 2b 32 33 3a 3b
  72 |  *      24 25 2c 2d 34 35 3c 3d
  73 |  *      26 27 2e 2f 36 37 3e 3f
  74 |  * 
  75 |  * @param a: The input 1D coordinate to remap.
  76 |  *
  77 |  * @returns The remapped 2D coordinates.
  78 |  */
  79 | fn remap_for_wave_reduction(a: u32) -> vec2<u32> {
  80 |     return vec2<u32>(
  81 |         insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
  82 |         insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
  83 |     );
  84 | }
  85 | 
  86 | fn map_to_xy(local_invocation_index: u32) -> vec2<u32> {
  87 |     let sub_xy: vec2<u32> = remap_for_wave_reduction(local_invocation_index % 64);
  88 |     return vec2<u32>(
  89 |         sub_xy.x + 8 * ((local_invocation_index >> 6) % 2),
  90 |         sub_xy.y + 8 * ((local_invocation_index >> 7))
  91 |     );
  92 | }
  93 | 
  94 | /*
  95 |  * Compute a linear value from a SRGB value.
  96 |  * 
  97 |  * @param value: The value to convert to linear from SRGB.
  98 |  * 
  99 |  *  @returns A value in SRGB space.
 100 |  */
 101 | /*
 102 | fn srgb_to_linear(value: SPDScalar) -> SPDScalar {
 103 |     let j = vec3<SPDScalar>(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
 104 |     let k = vec2<SPDScalar>(1.055, -0.055);
 105 |     return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
 106 | }
 107 | */
 108 | 
 109 | // Resources & Accessors -----------------------------------------------------------------------------------------------
 110 | struct DownsamplePassMeta {
 111 |     work_group_offset: vec2<u32>,
 112 |     num_work_groups: u32,
 113 |     mips: u32,
 114 | }
 115 | 
 116 | // In the original version dst_mip_i is an image2Darray [SPD_MAX_MIP_LEVELS+1], i.e., 12+1, but WGSL doesn't support arrays of textures yet
 117 | // Also these are read_write because for mips 7-13, the workgroup reads from mip level 6 - since most formats don't support read_write access in WGSL yet, we use a single read_write buffer in such cases instead
 118 | @group(0) @binding(0) var src_mip_0: texture_2d_array<${texelType}>;
 119 | ${mipsBindings}
 120 | 
 121 | @group(1) @binding(0) var<uniform> downsample_pass_meta : DownsamplePassMeta;
 122 | @group(1) @binding(1) var<storage, read_write> spd_global_counter: array<atomic<u32>>;
 123 | // this is only used if read_write access is not supported for the texture format
 124 | @group(1) @binding(2) var<storage, read_write> mip_dst_6_buffer: array<array<array<vec4<f32>, 64>, 64>>;
 125 | 
 126 | fn get_mips() -> u32 {
 127 |     return downsample_pass_meta.mips;
 128 | }
 129 | 
 130 | fn get_num_work_groups() -> u32 {
 131 |     return downsample_pass_meta.num_work_groups;
 132 | }
 133 | 
 134 | fn get_work_group_offset() -> vec2<u32> {
 135 |     return downsample_pass_meta.work_group_offset;
 136 | }
 137 | 
 138 | fn load_src_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 139 |     return vec4<SPDScalar>(textureLoad(src_mip_0, uv, slice, 0));
 140 | }
 141 | 
 142 | fn load_mid_mip_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 143 |     ${numMips > 6 ? midMipAccessor : 'return vec4<SPDScalar>();'}
 144 | }
 145 | 
 146 | ${mipsAccessor}
 147 | 
 148 | // Workgroup -----------------------------------------------------------------------------------------------------------
 149 | 
 150 | ${useF16 ? `
 151 | var<workgroup> spd_intermediate_rg: array<array<vec2<SPDScalar>, 16>, 16>;
 152 | var<workgroup> spd_intermediate_bg: array<array<vec2<SPDScalar>, 16>, 16>;
 153 | ` : `
 154 | var<workgroup> spd_intermediate_r: array<array<SPDScalar, 16>, 16>;
 155 | var<workgroup> spd_intermediate_g: array<array<SPDScalar, 16>, 16>;
 156 | var<workgroup> spd_intermediate_b: array<array<SPDScalar, 16>, 16>;
 157 | var<workgroup> spd_intermediate_a: array<array<SPDScalar, 16>, 16>;
 158 | `}
 159 | var<workgroup> spd_counter: atomic<u32>;
 160 | 
 161 | fn spd_increase_atomic_counter(slice: u32) {
 162 |     atomicStore(&spd_counter, atomicAdd(&spd_global_counter[slice], 1));
 163 | }
 164 | 
 165 | fn spd_get_atomic_counter() -> u32 {
 166 |     return workgroupUniformLoad(&spd_counter);
 167 | }
 168 | 
 169 | fn spd_reset_atomic_counter(slice: u32) {
 170 |     atomicStore(&spd_global_counter[slice], 0);
 171 | }
 172 | 
 173 | // Cotnrol flow --------------------------------------------------------------------------------------------------------
 174 | 
 175 | fn spd_barrier() {
 176 |     // in glsl this does: groupMemoryBarrier(); barrier();
 177 |     workgroupBarrier();
 178 | }
 179 | 
 180 | // Only last active workgroup should proceed
 181 | fn spd_exit_workgroup(num_work_groups: u32, local_invocation_index: u32, slice: u32) -> bool {
 182 |     // global atomic counter
 183 |     if (local_invocation_index == 0) {
 184 |         spd_increase_atomic_counter(slice);
 185 |     }
 186 |     storageBarrier();
 187 |     return spd_get_atomic_counter() != (num_work_groups - 1);
 188 | }
 189 | 
 190 | // Pixel access --------------------------------------------------------------------------------------------------------
 191 | 
 192 | ${filterCode}
 193 | 
 194 | ${hasSubgroups ? `
 195 | fn spd_reduce_quad(value: vec4<SPDScalar>) -> vec4<SPDScalar> {
 196 |     let v0 = value;
 197 |     let v1 = quadSwapX(value);
 198 |     let v2 = quadSwapY(value);
 199 |     let v3 = quadSwapDiagonal(value);
 200 |     return spd_reduce_4(v0, v1, v2, v3);
 201 | }
 202 | ` : ''}
 203 | 
 204 | fn spd_store(pix: vec2<u32>, out_value: vec4<SPDScalar>, mip: u32, slice: u32) {
 205 |     store_dst_mip(out_value, pix, slice, mip + 1);
 206 | }
 207 | 
 208 | fn spd_load_intermediate(x: u32, y: u32) -> vec4<SPDScalar> {
 209 |     return vec4<SPDScalar>(${useF16 ? `
 210 |         spd_intermediate_rg[x][y],
 211 |         spd_intermediate_ba[x][y],` : `
 212 |         spd_intermediate_r[x][y],
 213 |         spd_intermediate_g[x][y],
 214 |         spd_intermediate_b[x][y],
 215 |         spd_intermediate_a[x][y],`});
 216 | }
 217 | 
 218 | fn spd_store_intermediate(x: u32, y: u32, value: vec4<SPDScalar>) {
 219 | ${useF16 ? `
 220 |         spd_intermediate_rg[x][y] = value.rg;
 221 |         spd_intermediate_ba[x][y] = value.ba;` : `
 222 |         spd_intermediate_r[x][y] = value.r;
 223 |         spd_intermediate_g[x][y] = value.g;
 224 |         spd_intermediate_b[x][y] = value.b;
 225 |         spd_intermediate_a[x][y] = value.a;`}
 226 | }
 227 | 
 228 | fn spd_reduce_intermediate(i0: vec2<u32>, i1: vec2<u32>, i2: vec2<u32>, i3: vec2<u32>) -> vec4<SPDScalar> {
 229 |     let v0 = spd_load_intermediate(i0.x, i0.y);
 230 |     let v1 = spd_load_intermediate(i1.x, i1.y);
 231 |     let v2 = spd_load_intermediate(i2.x, i2.y);
 232 |     let v3 = spd_load_intermediate(i3.x, i3.y);
 233 |     return spd_reduce_4(v0, v1, v2, v3);
 234 | }
 235 | 
 236 | fn spd_reduce_load_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 237 |     let v0 = load_src_image(base + vec2<u32>(0, 0), slice);
 238 |     let v1 = load_src_image(base + vec2<u32>(0, 1), slice);
 239 |     let v2 = load_src_image(base + vec2<u32>(1, 0), slice);
 240 |     let v3 = load_src_image(base + vec2<u32>(1, 1), slice);
 241 |     return spd_reduce_4(v0, v1, v2, v3);
 242 | }
 243 | 
 244 | fn spd_reduce_load_mid_mip_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 245 |     let v0 = load_mid_mip_image(base + vec2<u32>(0, 0), slice);
 246 |     let v1 = load_mid_mip_image(base + vec2<u32>(0, 1), slice);
 247 |     let v2 = load_mid_mip_image(base + vec2<u32>(1, 0), slice);
 248 |     let v3 = load_mid_mip_image(base + vec2<u32>(1, 1), slice);
 249 |     return spd_reduce_4(v0, v1, v2, v3);
 250 | }
 251 | 
 252 | // Main logic ---------------------------------------------------------------------------------------------------------
 253 | 
 254 | fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 255 |     var v: array<vec4<SPDScalar>, 4>;
 256 | 
 257 |     let workgroup64 = workgroup_id.xy * 64;
 258 |     let workgroup32 = workgroup_id.xy * 32;
 259 |     let workgroup16 = workgroup_id.xy * 16;
 260 | 
 261 |     var tex = workgroup64 + vec2<u32>(x * 2, y * 2);
 262 |     var pix = workgroup32 + vec2<u32>(x, y);
 263 |     v[0] = spd_reduce_load_4(tex, slice);
 264 |     spd_store(pix, v[0], 0, slice);
 265 | 
 266 |     tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2);
 267 |     pix = workgroup32 + vec2<u32>(x + 16, y);
 268 |     v[1] = spd_reduce_load_4(tex, slice);
 269 |     spd_store(pix, v[1], 0, slice);
 270 | 
 271 |     tex = workgroup64 + vec2<u32>(x * 2, y * 2 + 32);
 272 |     pix = workgroup32 + vec2<u32>(x, y + 16);
 273 |     v[2] = spd_reduce_load_4(tex, slice);
 274 |     spd_store(pix, v[2], 0, slice);
 275 | 
 276 |     tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2 + 32);
 277 |     pix = workgroup32 + vec2<u32>(x + 16, y + 16);
 278 |     v[3] = spd_reduce_load_4(tex, slice);
 279 |     spd_store(pix, v[3], 0, slice);
 280 | 
 281 |     if mip <= 1 {
 282 |         return;
 283 |     }
 284 | 
 285 | ${hasSubgroups ? `
 286 |     v[0] = spd_reduce_quad(v[0]);
 287 |     v[1] = spd_reduce_quad(v[1]);
 288 |     v[2] = spd_reduce_quad(v[2]);
 289 |     v[3] = spd_reduce_quad(v[3]);
 290 | 
 291 |     if (local_invocation_index % 4) == 0 {
 292 |         spd_store(workgroup16 + vec2<u32>(x / 2, y / 2), v[0], 1, slice);
 293 |         spd_store_intermediate(x / 2, y / 2, v[0]);
 294 | 
 295 |         spd_store(workgroup16 + vec2<u32>(x / 2 + 8, y / 2), v[1], 1, slice);
 296 |         spd_store_intermediate(x / 2 + 8, y / 2, v[1]);
 297 | 
 298 |         spd_store(workgroup16 + vec2<u32>(x / 2, y / 2 + 8), v[2], 1, slice);
 299 |         spd_store_intermediate(x / 2, y / 2 + 8, v[2]);
 300 | 
 301 |         spd_store(workgroup16 + vec2<u32>(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
 302 |         spd_store_intermediate(x / 2 + 8, y / 2 + 8, v[3]);
 303 |     }
 304 | ` : `
 305 |     for (var i = 0u; i < 4u; i++) {
 306 |         spd_store_intermediate(x, y, v[i]);
 307 |         spd_barrier();
 308 |         if local_invocation_index < 64 {
 309 |             v[i] = spd_reduce_intermediate(
 310 |                 vec2<u32>(x * 2 + 0, y * 2 + 0),
 311 |                 vec2<u32>(x * 2 + 1, y * 2 + 0),
 312 |                 vec2<u32>(x * 2 + 0, y * 2 + 1),
 313 |                 vec2<u32>(x * 2 + 1, y * 2 + 1)
 314 |             );
 315 |             spd_store(workgroup16 + vec2<u32>(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
 316 |         }
 317 |         spd_barrier();
 318 |     }
 319 | 
 320 |     if local_invocation_index < 64 {
 321 |         spd_store_intermediate(x + 0, y + 0, v[0]);
 322 |         spd_store_intermediate(x + 8, y + 0, v[1]);
 323 |         spd_store_intermediate(x + 0, y + 8, v[2]);
 324 |         spd_store_intermediate(x + 8, y + 8, v[3]);
 325 |     }
 326 | `}
 327 | }
 328 | 
 329 | fn spd_downsample_mip_2(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 330 | ${hasSubgroups ? `
 331 |     var v = spd_load_intermediate(x, y);
 332 |     v = spd_reduce_quad(v);
 333 |     // quad index 0 stores result
 334 |     if (local_invocation_index % 4) == 0 {
 335 |         spd_store(workgroup_id.xy * 8 + vec2<u32>(x / 2, y / 2), v, mip, slice);
 336 |         spd_store_intermediate(x + (y / 2) % 2, y, v);
 337 |     }
 338 | ` : `
 339 |     if local_invocation_index < 64u {
 340 |         let v = spd_reduce_intermediate(
 341 |             vec2<u32>(x * 2 + 0, y * 2 + 0),
 342 |             vec2<u32>(x * 2 + 1, y * 2 + 0),
 343 |             vec2<u32>(x * 2 + 0, y * 2 + 1),
 344 |             vec2<u32>(x * 2 + 1, y * 2 + 1)
 345 |         );
 346 |         spd_store(workgroup_id.xy * 8 + vec2<u32>(x, y), v, mip, slice);
 347 |         // store to LDS, try to reduce bank conflicts
 348 |         // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
 349 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 350 |         // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
 351 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 352 |         // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
 353 |         // ...
 354 |         // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
 355 |         spd_store_intermediate(x * 2 + y % 2, y * 2, v);
 356 |     }
 357 | `}
 358 | }
 359 | 
 360 | fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 361 | ${hasSubgroups ? `
 362 |     // subgroup ops must be in uniform control flow
 363 |     var v = spd_load_intermediate(x * 2 + y % 2, y * 2);
 364 |     v = spd_reduce_quad(v);
 365 | 
 366 |     // quad index 0 stores result
 367 |     if local_invocation_index < 64u && (local_invocation_index % 4) == 0 {
 368 |         spd_store(workgroup_id.xy * 4 + vec2<u32>(x / 2, y / 2), v, mip, slice);
 369 |         spd_store_intermediate(x * 2 + y / 2, y * 2, v);
 370 |     }
 371 | ` : `
 372 |     if local_invocation_index < 16u {
 373 |         // x 0 x 0
 374 |         // 0 0 0 0
 375 |         // 0 x 0 x
 376 |         // 0 0 0 0
 377 |         let v = spd_reduce_intermediate(
 378 |             vec2<u32>(x * 4 + 0 + 0, y * 4 + 0),
 379 |             vec2<u32>(x * 4 + 2 + 0, y * 4 + 0),
 380 |             vec2<u32>(x * 4 + 0 + 1, y * 4 + 2),
 381 |             vec2<u32>(x * 4 + 2 + 1, y * 4 + 2)
 382 |         );
 383 |         spd_store(workgroup_id.xy * 4 + vec2<u32>(x, y), v, mip, slice);
 384 |         // store to LDS
 385 |         // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
 386 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 387 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 388 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 389 |         // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
 390 |         // ...
 391 |         // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
 392 |         // ...
 393 |         // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
 394 |         // ...
 395 |         spd_store_intermediate(x * 4 + y, y * 4, v);
 396 |     }
 397 | `}
 398 | }
 399 | 
 400 | fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 401 | ${hasSubgroups ? `
 402 |     // subgroup ops must be in uniform control flow
 403 |     var v = spd_load_intermediate(x * 4 + y, y * 4);
 404 |     v = spd_reduce_quad(v);
 405 | 
 406 |     // quad index 0 stores result
 407 |     if local_invocation_index < 16u && (local_invocation_index % 4) == 0 {
 408 |         spd_store(workgroup_id.xy * 2 + vec2<u32>(x / 2, y / 2), v, mip, slice);
 409 |         spd_store_intermediate(x / 2 + y, 0, v);
 410 |     }
 411 | ` : `
 412 |     if local_invocation_index < 4u {
 413 |         // x 0 0 0 x 0 0 0
 414 |         // ...
 415 |         // 0 x 0 0 0 x 0 0
 416 |         let v = spd_reduce_intermediate(
 417 |             vec2<u32>(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
 418 |             vec2<u32>(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
 419 |             vec2<u32>(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
 420 |             vec2<u32>(x * 8 + 4 + 1 + y * 2, y * 8 + 4)
 421 |         );
 422 |         spd_store(workgroup_id.xy * 2 + vec2<u32>(x, y), v, mip, slice);
 423 |         // store to LDS
 424 |         // x x x x 0 ...
 425 |         // 0 ...
 426 |         spd_store_intermediate(x + y * 2, 0, v);
 427 |     }
 428 | `}
 429 | }
 430 | 
 431 | fn spd_downsample_mip_5(workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 432 | ${hasSubgroups ? `
 433 |     // subgroup ops must be in uniform control flow
 434 |     var v = spd_load_intermediate(local_invocation_index, 0);
 435 |     v = spd_reduce_quad(v);
 436 | 
 437 |     // quad index 0 stores result
 438 |     if local_invocation_index < 4u && (local_invocation_index % 4) == 0 {
 439 |         spd_store(workgroup_id.xy, v, mip, slice);
 440 |     }
 441 | ` : `
 442 |     if local_invocation_index < 1u {
 443 |         // x x x x 0 ...
 444 |         // 0 ...
 445 |         let v = spd_reduce_intermediate(vec2<u32>(0, 0), vec2<u32>(1, 0), vec2<u32>(2, 0), vec2<u32>(3, 0));
 446 |         spd_store(workgroup_id.xy, v, mip, slice);
 447 |     }
 448 | `}
 449 | }
 450 | 
 451 | fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) {
 452 |     if mips <= base_mip {
 453 |         return;
 454 |     }
 455 |     spd_barrier();
 456 |     spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
 457 | 
 458 |     if mips <= base_mip + 1 {
 459 |         return;
 460 |     }
 461 |     spd_barrier();
 462 |     spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
 463 | 
 464 |     if mips <= base_mip + 2 {
 465 |         return;
 466 |     }
 467 |     spd_barrier();
 468 |     spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
 469 | 
 470 |     if mips <= base_mip + 3 {
 471 |         return;
 472 |     }
 473 |     spd_barrier();
 474 |     spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
 475 | }
 476 | 
 477 | fn spd_downsample_last_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) {
 478 |     if mips <= base_mip {
 479 |         return;
 480 |     }
 481 |     spd_barrier();
 482 |     spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
 483 | 
 484 |     if mips <= base_mip + 1 {
 485 |         return;
 486 |     }
 487 |     spd_barrier();
 488 |     spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
 489 | 
 490 |     if mips <= base_mip + 2 {
 491 |         return;
 492 |     }
 493 |     spd_barrier();
 494 |     spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
 495 | 
 496 |     if mips <= base_mip + 3 {
 497 |         return;
 498 |     }
 499 |     spd_barrier();
 500 |     spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
 501 | }
 502 | 
 503 | fn spd_downsample_mips_6_7(x: u32, y: u32, mips: u32, slice: u32) {
 504 |     ${mip6SupportsReadWrite ? 'textureBarrier();' : ''}
 505 | 
 506 |     var tex = vec2<u32>(x * 4 + 0, y * 4 + 0);
 507 |     var pix = vec2<u32>(x * 2 + 0, y * 2 + 0);
 508 |     let v0 = spd_reduce_load_mid_mip_4(tex, slice);
 509 |     spd_store(pix, v0, 6, slice);
 510 | 
 511 |     tex = vec2<u32>(x * 4 + 2, y * 4 + 0);
 512 |     pix = vec2<u32>(x * 2 + 1, y * 2 + 0);
 513 |     let v1 = spd_reduce_load_mid_mip_4(tex, slice);
 514 |     spd_store(pix, v1, 6, slice);
 515 | 
 516 |     tex = vec2<u32>(x * 4 + 0, y * 4 + 2);
 517 |     pix = vec2<u32>(x * 2 + 0, y * 2 + 1);
 518 |     let v2 = spd_reduce_load_mid_mip_4(tex, slice);
 519 |     spd_store(pix, v2, 6, slice);
 520 | 
 521 |     tex = vec2<u32>(x * 4 + 2, y * 4 + 2);
 522 |     pix = vec2<u32>(x * 2 + 1, y * 2 + 1);
 523 |     let v3 = spd_reduce_load_mid_mip_4(tex, slice);
 524 |     spd_store(pix, v3, 6, slice);
 525 | 
 526 |     if mips <= 7 {
 527 |         return;
 528 |     }
 529 |     // no barrier needed, working on values only from the same thread
 530 | 
 531 |     let v = spd_reduce_4(v0, v1, v2, v3);
 532 |     spd_store(vec2<u32>(x, y), v, 7, slice);
 533 |     spd_store_intermediate(x, y, v);
 534 | }
 535 | 
 536 | fn spd_downsample_last_6(x: u32, y: u32, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
 537 |     if mips <= 6 {
 538 |         return;
 539 |     }
 540 | 
 541 |     // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
 542 |     // terminate if not, continue if yes.
 543 |     if spd_exit_workgroup(num_work_groups, local_invocation_index, slice) {
 544 |         return;
 545 |     }
 546 | 
 547 |     // reset the global atomic counter back to 0 for the next spd dispatch
 548 |     spd_reset_atomic_counter(slice);
 549 | 
 550 |     // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
 551 |     // compute MIP level 6 and 7
 552 |     spd_downsample_mips_6_7(x, y, mips, slice);
 553 | 
 554 |     // compute MIP level 8, 9, 10, 11
 555 |     spd_downsample_last_four(x, y, vec2<u32>(0, 0), local_invocation_index, 8, mips, slice);
 556 | }
 557 | 
 558 | /// Downsamples a 64x64 tile based on the work group id.
 559 | /// If after downsampling it's the last active thread group, computes the remaining MIP levels.
 560 | ///
 561 | /// @param [in] workGroupID             index of the work group / thread group
 562 | /// @param [in] localInvocationIndex    index of the thread within the thread group in 1D
 563 | /// @param [in] mips                    the number of total MIP levels to compute for the input texture
 564 | /// @param [in] numWorkGroups           the total number of dispatched work groups / thread groups for this slice
 565 | /// @param [in] slice                   the slice of the input texture
 566 | fn spd_downsample(workgroup_id: vec2<u32>, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
 567 |     let xy = map_to_xy(local_invocation_index);
 568 |     spd_downsample_mips_0_1(xy.x, xy.y, workgroup_id, local_invocation_index, mips, slice);
 569 |     spd_downsample_next_four(xy.x, xy.y, workgroup_id, local_invocation_index, 2, mips, slice);
 570 |     ${numMips > 6 ? 'spd_downsample_last_6(xy.x, xy.y, local_invocation_index, mips, num_work_groups, slice);' : ''}
 571 | }
 572 | 
 573 | // Entry points -------------------------------------------------------------------------------------------------------
 574 | 
 575 | @compute
 576 | @workgroup_size(256, 1, 1)
 577 | fn downsample(@builtin(local_invocation_index) local_invocation_index: u32, @builtin(workgroup_id) workgroup_id: vec3<u32>) {
 578 |     spd_downsample(
 579 |         workgroup_id.xy + get_work_group_offset(),
 580 |         local_invocation_index,
 581 |         get_mips(),
 582 |         get_num_work_groups(),
 583 |         workgroup_id.z
 584 |     );
 585 | }
 586 |     `;
 587 | }
 588 | const SPD_FILTER_AVERAGE = /* wgsl */ `
 589 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 590 |     return (v0 + v1 + v2 + v3) * SPDScalar(0.25);
 591 | }
 592 | `;
 593 | const SPD_FILTER_MIN = /* wgsl */ `
 594 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 595 |     return min(min(v0, v1), min(v2, v3));
 596 | }
 597 | `;
 598 | const SPD_FILTER_MAX = /* wgsl */ `
 599 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 600 |     return max(max(v0, v1), max(v2, v3));
 601 | }
 602 | `;
 603 | const SPD_FILTER_MINMAX = /* wgsl */ `
 604 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 605 |     let max4 = max(max(v0.xy, v1.xy), max(v2.xy, v3.xy));
 606 |     return vec4<SPDScalar>(min(min(v0.x, v1.x), min(v2.x, v3.x)), max(max4.x, max4.y), 0, 0);
 607 | }
 608 | `;
 609 | /**
 610 |  * The names of all predefined filters of {@link WebGPUSinglePassDownsampler}.
 611 |  * Custom ones can be registered with an instance of {@link WebGPUSinglePassDownsampler} using {@link WebGPUSinglePassDownsampler.registerFilter}.
 612 |  */
 613 | export var SPDFilters;
 614 | (function (SPDFilters) {
 615 |     /**
 616 |      * Takes the channel-wise average of 4 pixels.
 617 |      */
 618 |     SPDFilters["Average"] = "average";
 619 |     /**
 620 |      * Takes the channel-wise minimum of 4 pixels.
 621 |      */
 622 |     SPDFilters["Min"] = "min";
 623 |     /**
 624 |      * Takes the channel-wise maximum of 4 pixels.
 625 |      */
 626 |     SPDFilters["Max"] = "max";
 627 |     /**
 628 |      * Takes the minimum of the red channel and the maximum of the red and green channel and stores the result in the red and green channel respectively.
 629 |      * This really only makes sense for single-channel input textures (where only the red channel holds any data), e.g., for generating a min-max pyramid of a depth buffer.
 630 |      */
 631 |     SPDFilters["MinMax"] = "minmax";
 632 | })(SPDFilters || (SPDFilters = {}));
 633 | class SPDPassInner {
 634 |     pipeline;
 635 |     bindGroups;
 636 |     dispatchDimensions;
 637 |     constructor(pipeline, bindGroups, dispatchDimensions) {
 638 |         this.pipeline = pipeline;
 639 |         this.bindGroups = bindGroups;
 640 |         this.dispatchDimensions = dispatchDimensions;
 641 |     }
 642 |     encode(computePass) {
 643 |         computePass.setPipeline(this.pipeline);
 644 |         this.bindGroups.forEach((bindGroup, index) => {
 645 |             computePass.setBindGroup(index, bindGroup);
 646 |         });
 647 |         computePass.dispatchWorkgroups(this.dispatchDimensions[0], this.dispatchDimensions[1], this.dispatchDimensions[2]);
 648 |     }
 649 | }
 650 | /**
 651 |  * A compute pass for downsampling a texture.
 652 |  */
 653 | export class SPDPass {
 654 |     passes;
 655 |     /**
 656 |      * The texture the mipmaps will be written to by this {@link SPDPass}, once {@link SPDPass.encode} is called.
 657 |      */
 658 |     target;
 659 |     /** @ignore */
 660 |     constructor(passes, target) {
 661 |         this.passes = passes;
 662 |         this.target = target;
 663 |     }
 664 |     /**
 665 |      * Encodes the configured mipmap generation pass(es) with the given {@link GPUComputePassEncoder}.
 666 |      * All bind groups indices used by {@link SPDPass} are reset to `null` to prevent unintentional bindings of internal bind groups for subsequent pipelines encoded in the same {@link GPUComputePassEncoder}.
 667 |      * @param computePassEncoder The {@link GPUComputePassEncoder} to encode this mipmap generation pass with.
 668 |      * @returns The {@link computePassEncoder}
 669 |      */
 670 |     encode(computePassEncoder) {
 671 |         this.passes.forEach(p => p.encode(computePassEncoder));
 672 |         computePassEncoder.setBindGroup(0, null);
 673 |         computePassEncoder.setBindGroup(1, null);
 674 |         return computePassEncoder;
 675 |     }
 676 |     /**
 677 |      * Returns the number of passes that will be encoded by calling this instance's {@link SPDPass.encode} method.
 678 |      */
 679 |     get numPasses() {
 680 |         return this.passes.length;
 681 |     }
 682 | }
 683 | var SPDScalarType;
 684 | (function (SPDScalarType) {
 685 |     SPDScalarType["F32"] = "f32";
 686 |     SPDScalarType["F16"] = "f16";
 687 |     SPDScalarType["I32"] = "i32";
 688 |     SPDScalarType["U32"] = "u32";
 689 | })(SPDScalarType || (SPDScalarType = {}));
 690 | class SPDPipeline {
 691 |     mipsLayout;
 692 |     pipelines;
 693 |     constructor(mipsLayout, pipelines) {
 694 |         this.mipsLayout = mipsLayout;
 695 |         this.pipelines = pipelines;
 696 |     }
 697 | }
 698 | function sanitizeScalarType(device, format, halfPrecision) {
 699 |     const texelType = format.toLocaleLowerCase().includes('sint') ? SPDScalarType.I32 : (format.toLocaleLowerCase().includes('uint') ? SPDScalarType.U32 : SPDScalarType.F32);
 700 |     if (halfPrecision && !device.features.has('shader-f16')) {
 701 |         console.warn(`[sanitizeScalarType]: half precision requested but the device feature 'shader-f16' is not enabled, falling back to full precision`);
 702 |     }
 703 |     if (halfPrecision && texelType !== SPDScalarType.F32) {
 704 |         console.warn(`[sanitizeScalarType]: half precision requested for non-float format (${format}, uses ${texelType}), falling back to full precision`);
 705 |     }
 706 |     return halfPrecision && !device.features.has('shader-f16') && texelType === SPDScalarType.F32 ? SPDScalarType.F16 : texelType;
 707 | }
 708 | class DevicePipelines {
 709 |     device;
 710 |     maxMipsPerPass;
 711 |     maxArrayLayers;
 712 |     disableSubgroups;
 713 |     internalResourcesBindGroupLayout;
 714 |     internalResourcesBindGroupLayout12;
 715 |     internalResourcesBindGroupLayout12RW;
 716 |     atomicCounters;
 717 |     midMipBuffers;
 718 |     pipelines;
 719 |     constructor(device, maxArrayLayers, maxMipsPerPass, disableSubgroups) {
 720 |         this.device = new WeakRef(device);
 721 |         this.maxMipsPerPass = Math.min(device.limits.maxStorageTexturesPerShaderStage, maxMipsPerPass ?? 12);
 722 |         this.maxArrayLayers = Math.min(device.limits.maxTextureArrayLayers, maxArrayLayers ?? device.limits.maxTextureArrayLayers);
 723 |         this.disableSubgroups = disableSubgroups ?? false;
 724 |         this.pipelines = new Map();
 725 |         this.atomicCounters = new Map();
 726 |         this.midMipBuffers = new Map();
 727 |         this.internalResourcesBindGroupLayout = device.createBindGroupLayout({
 728 |             entries: [{
 729 |                     binding: 0,
 730 |                     visibility: GPUShaderStage.COMPUTE,
 731 |                     buffer: {
 732 |                         type: 'uniform',
 733 |                         hasDynamicOffset: false,
 734 |                         minBindingSize: 16,
 735 |                     },
 736 |                 }],
 737 |         });
 738 |         if (this.maxMipsPerPass > 6) {
 739 |             this.internalResourcesBindGroupLayout12 = device.createBindGroupLayout({
 740 |                 entries: [
 741 |                     {
 742 |                         binding: 0,
 743 |                         visibility: GPUShaderStage.COMPUTE,
 744 |                         buffer: {
 745 |                             type: 'uniform',
 746 |                             hasDynamicOffset: false,
 747 |                             minBindingSize: 16,
 748 |                         },
 749 |                     },
 750 |                     {
 751 |                         binding: 1,
 752 |                         visibility: GPUShaderStage.COMPUTE,
 753 |                         buffer: {
 754 |                             type: 'storage',
 755 |                             hasDynamicOffset: false,
 756 |                             minBindingSize: 4,
 757 |                         },
 758 |                     },
 759 |                     {
 760 |                         binding: 2,
 761 |                         visibility: GPUShaderStage.COMPUTE,
 762 |                         buffer: {
 763 |                             type: 'storage',
 764 |                             hasDynamicOffset: false,
 765 |                             minBindingSize: 16 * 64 * 64,
 766 |                         },
 767 |                     },
 768 |                 ],
 769 |             });
 770 |             this.internalResourcesBindGroupLayout12RW = device.createBindGroupLayout({
 771 |                 entries: [
 772 |                     {
 773 |                         binding: 0,
 774 |                         visibility: GPUShaderStage.COMPUTE,
 775 |                         buffer: {
 776 |                             type: 'uniform',
 777 |                             hasDynamicOffset: false,
 778 |                             minBindingSize: 16,
 779 |                         },
 780 |                     },
 781 |                     {
 782 |                         binding: 1,
 783 |                         visibility: GPUShaderStage.COMPUTE,
 784 |                         buffer: {
 785 |                             type: 'storage',
 786 |                             hasDynamicOffset: false,
 787 |                             minBindingSize: 4,
 788 |                         },
 789 |                     },
 790 |                 ],
 791 |             });
 792 |         }
 793 |     }
 794 |     preparePipelines(pipelineConfigs) {
 795 |         const device = this.device.deref();
 796 |         if (device) {
 797 |             pipelineConfigs?.forEach(c => {
 798 |                 const scalarType = sanitizeScalarType(device, c.format, c.halfPrecision ?? false);
 799 |                 Array.from(c.filters ?? [SPD_FILTER_AVERAGE]).map(filter => {
 800 |                     for (let i = 0; i < this.maxMipsPerPass; ++i) {
 801 |                         this.getOrCreatePipeline(c.format, filter, i + 1, scalarType);
 802 |                     }
 803 |                 });
 804 |             });
 805 |         }
 806 |     }
 807 |     supportsReadWrite(targetFormat) {
 808 |         const device = this.device.deref();
 809 |         if (!device) {
 810 |             return false;
 811 |         }
 812 |         return WebGPUSinglePassDownsampler.supportedReadWriteFormats.has(targetFormat) || (device.features.has('texture-formats-tier2') && WebGPUSinglePassDownsampler.supportedReadWriteFormatsTier2.has(targetFormat));
 813 |     }
 814 |     createPipeline(targetFormat, filterCode, numMips, scalarType) {
 815 |         const device = this.device.deref();
 816 |         if (!device) {
 817 |             return undefined;
 818 |         }
 819 |         const rwSupport = this.supportsReadWrite(targetFormat);
 820 |         const mipsBindGroupLayout = device.createBindGroupLayout({
 821 |             entries: Array(Math.min(numMips, this.maxMipsPerPass) + 1).fill(0).map((_, i) => {
 822 |                 const entry = {
 823 |                     binding: i,
 824 |                     visibility: GPUShaderStage.COMPUTE,
 825 |                 };
 826 |                 if (i === 0) {
 827 |                     entry.texture = {
 828 |                         sampleType: scalarType === SPDScalarType.I32 ? 'sint' : (scalarType === SPDScalarType.U32 ? 'uint' : 'unfilterable-float'),
 829 |                         viewDimension: '2d-array',
 830 |                         multisampled: false,
 831 |                     };
 832 |                 }
 833 |                 else {
 834 |                     entry.storageTexture = {
 835 |                         access: (i === 6 && numMips > 6 && rwSupport) ? 'read-write' : 'write-only',
 836 |                         format: targetFormat,
 837 |                         viewDimension: '2d-array',
 838 |                     };
 839 |                 }
 840 |                 return entry;
 841 |             })
 842 |         });
 843 |         return new SPDPipeline(mipsBindGroupLayout, device.createComputePipeline({
 844 |             compute: {
 845 |                 module: device.createShaderModule({
 846 |                     code: makeShaderCode(targetFormat, filterCode, Math.min(numMips, this.maxMipsPerPass), scalarType, device.features.has('subgroups') && !this.disableSubgroups, rwSupport),
 847 |                 }),
 848 |                 entryPoint: 'downsample',
 849 |             },
 850 |             layout: device.createPipelineLayout({
 851 |                 bindGroupLayouts: [
 852 |                     mipsBindGroupLayout,
 853 |                     numMips > 6 ? (rwSupport ? this.internalResourcesBindGroupLayout12RW : this.internalResourcesBindGroupLayout12) : this.internalResourcesBindGroupLayout,
 854 |                 ],
 855 |             }),
 856 |         }));
 857 |     }
 858 |     getOrCreatePipeline(targetFormat, filterCode, numMipsToCreate, scalarType) {
 859 |         if (!this.pipelines.has(targetFormat)) {
 860 |             this.pipelines.set(targetFormat, new Map());
 861 |         }
 862 |         if (!this.pipelines.get(targetFormat)?.has(scalarType)) {
 863 |             this.pipelines.get(targetFormat)?.set(scalarType, new Map());
 864 |         }
 865 |         if (!this.pipelines.get(targetFormat)?.get(scalarType)?.has(filterCode)) {
 866 |             this.pipelines.get(targetFormat)?.get(scalarType)?.set(filterCode, new Map());
 867 |         }
 868 |         if (!this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.has(numMipsToCreate)) {
 869 |             const pipelines = this.createPipeline(targetFormat, filterCode, numMipsToCreate, scalarType);
 870 |             if (pipelines) {
 871 |                 this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.set(numMipsToCreate, pipelines);
 872 |             }
 873 |         }
 874 |         return this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.get(numMipsToCreate);
 875 |     }
 876 |     getOrCreateAtomicCountersBuffer(device, numArrayLayers) {
 877 |         if (!this.atomicCounters.has(numArrayLayers)) {
 878 |             const atomicCountersBuffer = device.createBuffer({
 879 |                 size: 4 * numArrayLayers,
 880 |                 usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
 881 |             });
 882 |             device.queue.writeBuffer(atomicCountersBuffer, 0, new Uint32Array(Array(numArrayLayers).fill(0)));
 883 |             this.atomicCounters.set(numArrayLayers, atomicCountersBuffer);
 884 |         }
 885 |         return this.atomicCounters.get(numArrayLayers);
 886 |     }
 887 |     getOrCreateMidMipBuffer(device, numArrayLayers) {
 888 |         if (!this.midMipBuffers.has(numArrayLayers)) {
 889 |             this.midMipBuffers.set(numArrayLayers, device.createBuffer({
 890 |                 size: 16 * 64 * 64 * numArrayLayers,
 891 |                 usage: GPUBufferUsage.STORAGE,
 892 |             }));
 893 |         }
 894 |         return this.midMipBuffers.get(numArrayLayers);
 895 |     }
 896 |     createMetaBindGroup(device, meta, halfPrecision, readWriteSupport) {
 897 |         const metaBuffer = device.createBuffer({
 898 |             size: 16,
 899 |             usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
 900 |         });
 901 |         device.queue.writeBuffer(metaBuffer, 0, new Uint32Array([
 902 |             ...meta.workgroupOffset,
 903 |             meta.numWorkGroups,
 904 |             meta.numMips,
 905 |         ]));
 906 |         if (meta.numMips > 6) {
 907 |             const numArrayLayersForPrecision = halfPrecision ? Math.ceil(meta.numArrayLayers / 2) : meta.numArrayLayers;
 908 |             if (readWriteSupport) {
 909 |                 return device.createBindGroup({
 910 |                     layout: this.internalResourcesBindGroupLayout12RW,
 911 |                     entries: [
 912 |                         {
 913 |                             binding: 0,
 914 |                             resource: {
 915 |                                 buffer: metaBuffer,
 916 |                             },
 917 |                         },
 918 |                         {
 919 |                             binding: 1,
 920 |                             resource: {
 921 |                                 buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision),
 922 |                             },
 923 |                         },
 924 |                     ]
 925 |                 });
 926 |             }
 927 |             else {
 928 |                 return device.createBindGroup({
 929 |                     layout: this.internalResourcesBindGroupLayout12,
 930 |                     entries: [
 931 |                         {
 932 |                             binding: 0,
 933 |                             resource: {
 934 |                                 buffer: metaBuffer,
 935 |                             },
 936 |                         },
 937 |                         {
 938 |                             binding: 1,
 939 |                             resource: {
 940 |                                 buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision),
 941 |                             },
 942 |                         },
 943 |                         {
 944 |                             binding: 2,
 945 |                             resource: {
 946 |                                 buffer: this.getOrCreateMidMipBuffer(device, numArrayLayersForPrecision),
 947 |                             },
 948 |                         },
 949 |                     ]
 950 |                 });
 951 |             }
 952 |         }
 953 |         else {
 954 |             return device.createBindGroup({
 955 |                 layout: this.internalResourcesBindGroupLayout,
 956 |                 entries: [{
 957 |                         binding: 0,
 958 |                         resource: {
 959 |                             buffer: metaBuffer,
 960 |                         },
 961 |                     }]
 962 |             });
 963 |         }
 964 |     }
 965 |     preparePass(texture, target, filterCode, offset, size, numMipsTotal, scalarType) {
 966 |         const device = this.device.deref();
 967 |         if (!device) {
 968 |             return undefined;
 969 |         }
 970 |         const rwSupport = this.supportsReadWrite(target.format);
 971 |         const passes = [];
 972 |         for (let baseArrayLayer = 0; baseArrayLayer < target.depthOrArrayLayers; baseArrayLayer += this.maxArrayLayers) {
 973 |             const numArrayLayersThisPass = Math.min(target.depthOrArrayLayers - baseArrayLayer, this.maxArrayLayers);
 974 |             for (let baseMip = 0; baseMip < numMipsTotal - 1; baseMip += this.maxMipsPerPass) {
 975 |                 const numMipsThisPass = Math.min(numMipsTotal - 1 - baseMip, this.maxMipsPerPass);
 976 |                 const baseMipOffset = offset.map(o => Math.trunc(o / Math.pow(2, baseMip)));
 977 |                 const baseMipSize = size.map(s => Math.max(Math.trunc(s / Math.pow(2, baseMip)), 1));
 978 |                 const workgroupOffset = baseMipOffset.map(o => Math.trunc(o / 64));
 979 |                 const dispatchDimensions = baseMipOffset.map((o, i) => Math.trunc((o + baseMipSize[i] - 1) / 64) + 1 - workgroupOffset[i]);
 980 |                 const numWorkGroups = dispatchDimensions.reduce((product, v) => v * product, 1);
 981 |                 const metaBindGroup = this.createMetaBindGroup(device, {
 982 |                     workgroupOffset,
 983 |                     numWorkGroups,
 984 |                     numMips: numMipsThisPass,
 985 |                     numArrayLayers: numArrayLayersThisPass,
 986 |                 }, scalarType === SPDScalarType.F16, rwSupport);
 987 |                 // todo: handle missing pipeline
 988 |                 const pipeline = this.getOrCreatePipeline(target.format, filterCode, numMipsThisPass, scalarType);
 989 |                 const mipViews = Array(numMipsThisPass + 1).fill(0).map((_, i) => {
 990 |                     if (baseMip === 0 && i === 0) {
 991 |                         return texture.createView({
 992 |                             dimension: '2d-array',
 993 |                             baseMipLevel: 0,
 994 |                             mipLevelCount: 1,
 995 |                             baseArrayLayer,
 996 |                             arrayLayerCount: numArrayLayersThisPass,
 997 |                         });
 998 |                     }
 999 |                     else {
1000 |                         const mip = baseMip + i;
1001 |                         return target.createView({
1002 |                             dimension: '2d-array',
1003 |                             baseMipLevel: texture === target ? mip : mip - 1,
1004 |                             mipLevelCount: 1,
1005 |                             baseArrayLayer,
1006 |                             arrayLayerCount: numArrayLayersThisPass,
1007 |                         });
1008 |                     }
1009 |                 });
1010 |                 const mipsBindGroup = device.createBindGroup({
1011 |                     layout: pipeline.mipsLayout,
1012 |                     entries: mipViews.map((v, i) => {
1013 |                         return {
1014 |                             binding: i,
1015 |                             resource: v,
1016 |                         };
1017 |                     }),
1018 |                 });
1019 |                 passes.push(new SPDPassInner(pipeline.pipelines, [mipsBindGroup, metaBindGroup], [...dispatchDimensions, numArrayLayersThisPass]));
1020 |             }
1021 |         }
1022 |         return new SPDPass(passes, target);
1023 |     }
1024 | }
1025 | /**
1026 |  * Returns the maximum number of mip levels for a given n-dimensional size.
1027 |  * @param size The size to compute the maximum number of mip levels for
1028 |  * @returns The maximum number of mip levels for the given size
1029 |  */
1030 | export function maxMipLevelCount(...size) {
1031 |     return 1 + Math.trunc(Math.log2(Math.max(0, ...size)));
1032 | }
1033 | /**
1034 |  * A helper class for downsampling 2D {@link GPUTexture} (& arrays) using as few passes as possible on a {@link GPUDevice} depending on its {@link GPUSupportedLimits}.
1035 |  * Up to 12 mip levels can be generated within a single pass, if {@link GPUSupportedLimits.maxStorageTexturesPerShaderStage} supports it.
1036 |  */
1037 | export class WebGPUSinglePassDownsampler {
1038 |     filters;
1039 |     devicePipelines;
1040 |     /**
1041 |      * The set of formats supported by WebGPU SPD.
1042 |      */
1043 |     static supportedFormats = new Set([
1044 |         'rgba8unorm',
1045 |         'rgba8snorm',
1046 |         'rgba8uint',
1047 |         'rgba8sint',
1048 |         'rgba16uint',
1049 |         'rgba16sint',
1050 |         'rgba16float',
1051 |         'r32uint',
1052 |         'r32sint',
1053 |         'r32float',
1054 |         'rg32uint',
1055 |         'rg32sint',
1056 |         'rg32float',
1057 |         'rgba32uint',
1058 |         'rgba32sint',
1059 |         'rgba32float',
1060 |     ]);
1061 |     /**
1062 |      * The set of additionally supported formats supported if the feature 'bgra8unorm-storage' is enabled.
1063 |      */
1064 |     static supportedFormatsBgra8UnormStorage = new Set([
1065 |         'bgra8unorm',
1066 |     ]);
1067 |     /**
1068 |      * The set of additionally supported formats if the feature 'texture-formats-tier1' is enabled.
1069 |      */
1070 |     static supportedFormatsTier1 = new Set([
1071 |         'r8unorm',
1072 |         'r8snorm',
1073 |         'r8uint',
1074 |         'r8sint',
1075 |         'rg8unorm',
1076 |         'rg8snorm',
1077 |         'rg8uint',
1078 |         'rg8sint',
1079 |         'r16unorm',
1080 |         'r16snorm',
1081 |         'r16uint',
1082 |         'r16sint',
1083 |         'r16float',
1084 |         'rg16unorm',
1085 |         'rg16snorm',
1086 |         'rg16uint',
1087 |         'rg16sint',
1088 |         'rg16float',
1089 |         'rgba16unorm',
1090 |         'rgba16snorm',
1091 |         'rgb10a2uint',
1092 |         'rgb10a2unorm',
1093 |         'rg11b10ufloat',
1094 |     ]);
1095 |     /**
1096 |      * The set of formats that support read-write access.
1097 |      */
1098 |     static supportedReadWriteFormats = new Set([
1099 |         'r32uint',
1100 |         'r32sint',
1101 |         'r32float',
1102 |     ]);
1103 |     /**
1104 |      * The set of formats that support read-write access if the feature 'texture-formats-tier2' is enabled.
1105 |      */
1106 |     static supportedReadWriteFormatsTier2 = new Set([
1107 |         'r8unorm',
1108 |         'r8uint',
1109 |         'r8sint',
1110 |         'rgba8unorm',
1111 |         'rgba8uint',
1112 |         'rgba8sint',
1113 |         'r16uint',
1114 |         'r16sint',
1115 |         'r16float',
1116 |         'rgba16uint',
1117 |         'rgba16sint',
1118 |         'rgba16float',
1119 |         'rgba32uint',
1120 |         'rgba32sint',
1121 |         'rgba32float',
1122 |     ]);
1123 |     /**
1124 |      * Sets the preferred device limits for {@link WebGPUSinglePassDownsampler} in a given record of limits.
1125 |      * Existing preferred device limits are either increased or left untouched.
1126 |      * If {@link limits} is undefined, creates a new record of preferred device limits for {@link WebGPUSinglePassDownsampler}.
1127 |      * The result can be used to set {@link GPUDeviceDescriptor.requiredLimits} when requesting a device.
1128 |      * @param limits A record of device limits set to update with the preferred limits for {@link WebGPUSinglePassDownsampler}
1129 |      * @param adapter If this is set, the preferred limits that are set by this function will be clamped to {@link GPUAdapter.limits}.
1130 |      * @returns The updated or created set of device limits with all preferred limits for {@link WebGPUSinglePassDownsampler} set
1131 |      */
1132 |     static setPreferredLimits(limits, adapter) {
1133 |         if (!limits) {
1134 |             limits = {};
1135 |         }
1136 |         const maxStorageTexturesPerShaderStage = Math.min(adapter?.limits.maxStorageTexturesPerShaderStage ?? 6, 6);
1137 |         limits.maxStorageTexturesPerShaderStage = Math.max(limits.maxStorageTexturesPerShaderStage ?? maxStorageTexturesPerShaderStage, maxStorageTexturesPerShaderStage);
1138 |         return limits;
1139 |     }
1140 |     /**
1141 |      * Creates a new {@link WebGPUSinglePassDownsampler}.
1142 |      * On its own, {@link WebGPUSinglePassDownsampler} does not allocate any GPU resources.
1143 |      * Optionally, prepare GPU resources for a given {@link SPDPrepareDeviceDescriptor}.
1144 |      * @param prepareDescriptor An optional descriptor for preparing GPU resources
1145 |      * @see WebGPUSinglePassDownsampler.prepareDeviceResources
1146 |      */
1147 |     constructor(prepareDescriptor) {
1148 |         this.filters = new Map([
1149 |             [SPDFilters.Average, SPD_FILTER_AVERAGE],
1150 |             [SPDFilters.Min, SPD_FILTER_MIN],
1151 |             [SPDFilters.Max, SPD_FILTER_MAX],
1152 |             [SPDFilters.MinMax, SPD_FILTER_MINMAX],
1153 |         ]);
1154 |         this.devicePipelines = new Map();
1155 |         if (prepareDescriptor) {
1156 |             this.prepareDeviceResources(prepareDescriptor);
1157 |         }
1158 |     }
1159 |     /**
1160 |      * Prepares GPU resources required by {@link WebGPUSinglePassDownsampler} to downsample textures for a given {@link SPDPrepareDeviceDescriptor}.
1161 |      * @param prepareDescriptor a descriptor for preparing GPU resources
1162 |      */
1163 |     prepareDeviceResources(prepareDescriptor) {
1164 |         this.getOrCreateDevicePipelines(prepareDescriptor.device, prepareDescriptor.maxArrayLayersPerPass, prepareDescriptor.maxMipsPerPass, prepareDescriptor.disableSubgroups)?.preparePipelines(prepareDescriptor?.formats?.map(format => {
1165 |             return {
1166 |                 ...format,
1167 |                 filters: new Set(Array.from(format.filters ?? []).map(filter => this.filters.get(filter) ?? SPD_FILTER_AVERAGE)),
1168 |             };
1169 |         }));
1170 |     }
1171 |     getOrCreateDevicePipelines(device, maxArrayLayers, maxMipsPerPass, disableSubgroups) {
1172 |         if (!this.devicePipelines.has(device)) {
1173 |             this.devicePipelines.set(device, new DevicePipelines(device, maxArrayLayers, maxMipsPerPass, disableSubgroups));
1174 |         }
1175 |         return this.devicePipelines.get(device);
1176 |     }
1177 |     /**
1178 |      * Deregisters all resources stored for a given device.
1179 |      * @param device The device resources should be deregistered for
1180 |      */
1181 |     deregisterDevice(device) {
1182 |         this.devicePipelines.delete(device);
1183 |     }
1184 |     /**
1185 |      * Registers a new downsampling filter operation that can be injected into the downsampling shader for new pipelines.
1186 |      *
1187 |      * The given WGSL code must (at least) specify a function to reduce four values into one with the following name and signature:
1188 |      *
1189 |      *   `spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar>`
1190 |      *
1191 |      * @param name The unique name of the filter operation
1192 |      * @param wgsl The WGSL code to inject into the downsampling shader as the filter operation
1193 |      */
1194 |     registerFilter(name, wgsl) {
1195 |         if (this.filters.has(name)) {
1196 |             console.warn(`[WebGPUSinglePassDownsampler::registerFilter]: overriding existing filter '${name}'. Previously generated pipelines are not affected.`);
1197 |         }
1198 |         this.filters.set(name, wgsl);
1199 |     }
1200 |     /**
1201 |      * Prepares a pass to downsample a 2d texture / 2d texture array.
1202 |      * The produced {@link SPDPass} can be used multiple times to repeatedly downsampling a texture, e.g., for downsampling the depth buffer each frame.
1203 |      * For one-time use, {@link WebGPUSinglePassDownsampler.generateMipmaps} can be used instead.
1204 |      *
1205 |      * By default, the texture is downsampled `texture.mipLevelCount - 1` times using an averaging filter, i.e., 4 pixel values from the parent level are averaged to produce a single pixel in the current mip level.
1206 |      * This behavior can be configured using the optional {@link config} parameter.
1207 |      * For example, instead of writing the mip levels into the input texture itself, a separate target texture can be specified using {@link SPDPassConfig.target}.
1208 |      * Other configuration options include using a different (possibly custom) filter, only downsampling a subregion of the input texture, and limiting the number of mip levels to generate, e.g., if a min-max pyramid is only needed up to a certain tile resolution.
1209 |      * If the given filter does not exist, an averaging filter will be used as a fallback.
1210 |      * The image region to downsample and the number of mip levels to generate are clamped to the input texture's size, and the output texture's `mipLevelCount`.
1211 |      *
1212 |      * Depending on the number of mip levels to generate and the device's `maxStorageTexturesPerShaderStage` limit, the {@link SPDPass} will internally consist of multiple passes, each generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels.
1213 |      *
1214 |      * @param device The device the {@link SPDPass} should be prepared for
1215 |      * @param texture The texture that is to be processed by the {@link SPDPass}. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. Must support {@link GPUTextureUsage.TEXTURE_BINDING}, and, if no other target is given, {@link GPUTextureUsage.STORAGE_BINDING}.
1216 |      * @param config The config for the {@link SPDPass}
1217 |      * @returns The prepared {@link SPDPass} or undefined if preparation failed or if no mipmaps would be generated.
1218 |      * @throws If the {@link GPUTextureFormat} of {@link SPDPassConfig.target} is not supported (does not support {@link GPUStorageTextureAccess:"write-only"} on the given {@link device}).
1219 |      * @throws If the size of {@link SPDPassConfig.target} is too small to store the first mip level generated for {@link texture}
1220 |      * @throws If {@link texture} or {@link SPDPassConfig.target} is not a 2d texture.
1221 |      * @see WebGPUSinglePassDownsampler.generateMipmaps
1222 |      * @see WebGPUSinglePassDownsampler.registerFilter
1223 |      * @see WebGPUSinglePassDownsampler.setPreferredLimits
1224 |      */
1225 |     preparePass(device, texture, config) {
1226 |         const target = config?.target ?? texture;
1227 |         const filter = config?.filter ?? SPDFilters.Average;
1228 |         const offset = (config?.offset ?? [0, 0]).map((o, d) => Math.max(0, Math.min(o, (d === 0 ? texture.width : texture.height) - 1)));
1229 |         const size = (config?.size ?? [texture.width, texture.height]).map((s, d) => Math.max(0, Math.min(s, (d === 0 ? texture.width : texture.height) - offset[d])));
1230 |         const numMips = Math.min(Math.max(config?.numMips ?? target.mipLevelCount, 0), maxMipLevelCount(...size));
1231 |         if (numMips < 2) {
1232 |             console.warn(`[WebGPUSinglePassDownsampler::prepare]: no mips to create (numMips = ${numMips})`);
1233 |             return undefined;
1234 |         }
1235 |         if (!(WebGPUSinglePassDownsampler.supportedFormats.has(target.format) ||
1236 |             (device.features.has('bgra8unorm-storage') && WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage.has(target.format)) ||
1237 |             ((device.features.has('texture-formats-tier1') || device.features.has('texture-formats-tier2')) && WebGPUSinglePassDownsampler.supportedFormatsTier1.has(target.format)))) {
1238 |             throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported. (Supported formats: ${WebGPUSinglePassDownsampler.supportedFormats}, and ${WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage} (if 'bgra8unorm-storage' is enabled), and ${WebGPUSinglePassDownsampler.supportedFormatsTier1} (if 'texture-formats-tier1' is enabled))`);
1239 |         }
1240 |         if (target.format === 'bgra8unorm' && !device.features.has('bgra8unorm-storage')) {
1241 |             throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported without feature 'bgra8unorm-storage' enabled`);
1242 |         }
1243 |         if (target.width < Math.max(1, Math.floor(size[0] / 2)) || target.height < Math.max(1, Math.floor(size[1] / 2))) {
1244 |             throw new Error(`[WebGPUSinglePassDownsampler::prepare]: target too small (${[target.width, target.height]}) for input size ${size}`);
1245 |         }
1246 |         if (target.dimension !== '2d' || texture.dimension !== '2d') {
1247 |             throw new Error('[WebGPUSinglePassDownsampler::prepare]: texture or target is not a 2d texture');
1248 |         }
1249 |         if (!this.filters.has(filter)) {
1250 |             console.warn(`[WebGPUSinglePassDownsampler::prepare]: unknown filter ${filter}, falling back to average`);
1251 |         }
1252 |         if (filter === SPD_FILTER_MINMAX && target.format.includes('r32')) {
1253 |             console.warn(`[WebGPUSinglePassDownsampler::prepare]: filter ${filter} makes no sense for one-component target format ${target.format}`);
1254 |         }
1255 |         const filterCode = this.filters.get(filter) ?? SPD_FILTER_AVERAGE;
1256 |         const scalarType = sanitizeScalarType(device, target.format, config?.halfPrecision ?? false);
1257 |         return this.getOrCreateDevicePipelines(device)?.preparePass(texture, target, filterCode, offset, size, numMips, scalarType);
1258 |     }
1259 |     /**
1260 |      * Generates mipmaps for the given texture.
1261 |      * For textures that will be downsampled more than once, consider generating a {@link SPDPass} using {@link WebGPUSinglePassDownsampler.preparePass} and calling its {@link SPDPass.encode} method.
1262 |      * This way, allocated GPU resources for downsampling the texture can be reused.
1263 |      * @param device The device to use for downsampling the texture
1264 |      * @param texture The texture to generate mipmaps for. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}.
1265 |      * @param config The config for mipmap generation
1266 |      * @returns True if mipmaps were generated, false otherwise
1267 |      * @throws If {@link WebGPUSinglePassDownsampler.preparePass} threw an error.
1268 |      * @see WebGPUSinglePassDownsampler.preparePass
1269 |      */
1270 |     generateMipmaps(device, texture, config) {
1271 |         const pass = this.preparePass(device, texture, config);
1272 |         if (!pass) {
1273 |             return false;
1274 |         }
1275 |         else {
1276 |             const commandEncoder = device.createCommandEncoder();
1277 |             pass?.encode(commandEncoder.beginComputePass()).end();
1278 |             device.queue.submit([commandEncoder.finish()]);
1279 |             return true;
1280 |         }
1281 |     }
1282 | }
1283 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
   1 | function makeShaderCode(outputFormat: string, filterOp: string = SPD_FILTER_AVERAGE, numMips: number, scalarType: SPDScalarType, hasSubgroups: boolean, mip6SupportsReadWrite: boolean): string {
   2 |     const texelType = scalarType === SPDScalarType.I32 ? 'i32' : (scalarType === SPDScalarType.U32 ? 'u32' : 'f32');
   3 |     const useF16 = scalarType === SPDScalarType.F16;
   4 | 
   5 |     const filterCode = filterOp === SPD_FILTER_AVERAGE && !['f32', 'f16'].includes(texelType) ? filterOp.replace('* SPDScalar(0.25)', '/ 4') : filterOp;
   6 | 
   7 |     const mipsBindings = Array(numMips).fill(0)
   8 |         .map((_, i) => {
   9 |             if (i == 5 && numMips > 6 && mip6SupportsReadWrite) {
  10 |                 return `@group(0) @binding(6) var dst_mip_6: texture_storage_2d_array<${outputFormat}, read_write>;`;
  11 |             }
  12 |             return `@group(0) @binding(${i + 1}) var dst_mip_${i + 1}: texture_storage_2d_array<${outputFormat}, write>;`;
  13 |         })
  14 |         .join('\n');
  15 | 
  16 |     // todo: get rid of this branching as soon as WGSL supports arrays of texture_storage_2d_array
  17 |     const mipsAccessorBody = Array(numMips).fill(0)
  18 |         .map((_, i) => {
  19 |             if (i == 5 && numMips > 6 && !mip6SupportsReadWrite) {
  20 |                 return ` else if mip == 6 {
  21 |                     textureStore(dst_mip_6, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'});
  22 |                     mip_dst_6_buffer[slice][uv.y][uv.x] = value;
  23 |                 }`
  24 |             }
  25 |             return `${i === 0 ? '' : ' else '}if mip == ${i + 1} {
  26 |                 textureStore(dst_mip_${i + 1}, uv, slice, ${useF16 ? `vec4<${texelType}>(value)` : 'value'});
  27 |             }`;
  28 |         })
  29 |         .join('');
  30 | 
  31 |     const mipsAccessor = `fn store_dst_mip(value: vec4<SPDScalar>, uv: vec2<u32>, slice: u32, mip: u32) {\n${mipsAccessorBody}\n}`
  32 |     const midMipAccessor = mip6SupportsReadWrite ? `return vec4<SPDScalar>(textureLoad(dst_mip_6, uv, slice));` : `return mip_dst_6_buffer[slice][uv.y][uv.x];`;
  33 | 
  34 |     return /* wgsl */`
  35 |     // This file is part of the FidelityFX SDK.
  36 | //
  37 | // Copyright (C) 2023 Advanced Micro Devices, Inc.
  38 | // 
  39 | // Permission is hereby granted, free of charge, to any person obtaining a copy 
  40 | // of this software and associated documentation files(the “Software”), to deal 
  41 | // in the Software without restriction, including without limitation the rights 
  42 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 
  43 | // copies of the Software, and to permit persons to whom the Software is 
  44 | // furnished to do so, subject to the following conditions :
  45 | //
  46 | // The above copyright notice and this permission notice shall be included in
  47 | // all copies or substantial portions of the Software.
  48 | //
  49 | // THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  50 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  51 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
  52 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  53 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  54 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  55 | // THE SOFTWARE.
  56 | 
  57 | 
  58 | // Definitions --------------------------------------------------------------------------------------------------------
  59 | 
  60 | ${useF16 ? 'enable f16;' : ''}
  61 | ${hasSubgroups ? 'enable subgroups;' : ''}
  62 | 
  63 | alias SPDScalar = ${scalarType};
  64 | 
  65 | // Helpers ------------------------------------------------------------------------------------------------------------
  66 | 
  67 | /**
  68 |  * A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
  69 |  * 
  70 |  * The 64-wide lane indices to 8x8 remapping is performed as follows:
  71 |  *     00 01 08 09 10 11 18 19
  72 |  *      02 03 0a 0b 12 13 1a 1b
  73 |  *      04 05 0c 0d 14 15 1c 1d
  74 |  *      06 07 0e 0f 16 17 1e 1f
  75 |  *      20 21 28 29 30 31 38 39
  76 |  *      22 23 2a 2b 32 33 3a 3b
  77 |  *      24 25 2c 2d 34 35 3c 3d
  78 |  *      26 27 2e 2f 36 37 3e 3f
  79 |  * 
  80 |  * @param a: The input 1D coordinate to remap.
  81 |  *
  82 |  * @returns The remapped 2D coordinates.
  83 |  */
  84 | fn remap_for_wave_reduction(a: u32) -> vec2<u32> {
  85 |     return vec2<u32>(
  86 |         insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
  87 |         insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
  88 |     );
  89 | }
  90 | 
  91 | fn map_to_xy(local_invocation_index: u32) -> vec2<u32> {
  92 |     let sub_xy: vec2<u32> = remap_for_wave_reduction(local_invocation_index % 64);
  93 |     return vec2<u32>(
  94 |         sub_xy.x + 8 * ((local_invocation_index >> 6) % 2),
  95 |         sub_xy.y + 8 * ((local_invocation_index >> 7))
  96 |     );
  97 | }
  98 | 
  99 | /*
 100 |  * Compute a linear value from a SRGB value.
 101 |  * 
 102 |  * @param value: The value to convert to linear from SRGB.
 103 |  * 
 104 |  *  @returns A value in SRGB space.
 105 |  */
 106 | /*
 107 | fn srgb_to_linear(value: SPDScalar) -> SPDScalar {
 108 |     let j = vec3<SPDScalar>(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
 109 |     let k = vec2<SPDScalar>(1.055, -0.055);
 110 |     return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
 111 | }
 112 | */
 113 | 
 114 | // Resources & Accessors -----------------------------------------------------------------------------------------------
 115 | struct DownsamplePassMeta {
 116 |     work_group_offset: vec2<u32>,
 117 |     num_work_groups: u32,
 118 |     mips: u32,
 119 | }
 120 | 
 121 | // In the original version dst_mip_i is an image2Darray [SPD_MAX_MIP_LEVELS+1], i.e., 12+1, but WGSL doesn't support arrays of textures yet
 122 | // Also these are read_write because for mips 7-13, the workgroup reads from mip level 6 - since most formats don't support read_write access in WGSL yet, we use a single read_write buffer in such cases instead
 123 | @group(0) @binding(0) var src_mip_0: texture_2d_array<${texelType}>;
 124 | ${mipsBindings}
 125 | 
 126 | @group(1) @binding(0) var<uniform> downsample_pass_meta : DownsamplePassMeta;
 127 | @group(1) @binding(1) var<storage, read_write> spd_global_counter: array<atomic<u32>>;
 128 | // this is only used if read_write access is not supported for the texture format
 129 | @group(1) @binding(2) var<storage, read_write> mip_dst_6_buffer: array<array<array<vec4<f32>, 64>, 64>>;
 130 | 
 131 | fn get_mips() -> u32 {
 132 |     return downsample_pass_meta.mips;
 133 | }
 134 | 
 135 | fn get_num_work_groups() -> u32 {
 136 |     return downsample_pass_meta.num_work_groups;
 137 | }
 138 | 
 139 | fn get_work_group_offset() -> vec2<u32> {
 140 |     return downsample_pass_meta.work_group_offset;
 141 | }
 142 | 
 143 | fn load_src_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 144 |     return vec4<SPDScalar>(textureLoad(src_mip_0, uv, slice, 0));
 145 | }
 146 | 
 147 | fn load_mid_mip_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 148 |     ${numMips > 6 ? midMipAccessor : 'return vec4<SPDScalar>();'}
 149 | }
 150 | 
 151 | ${mipsAccessor}
 152 | 
 153 | // Workgroup -----------------------------------------------------------------------------------------------------------
 154 | 
 155 | ${useF16 ? `
 156 | var<workgroup> spd_intermediate_rg: array<array<vec2<SPDScalar>, 16>, 16>;
 157 | var<workgroup> spd_intermediate_bg: array<array<vec2<SPDScalar>, 16>, 16>;
 158 | `: `
 159 | var<workgroup> spd_intermediate_r: array<array<SPDScalar, 16>, 16>;
 160 | var<workgroup> spd_intermediate_g: array<array<SPDScalar, 16>, 16>;
 161 | var<workgroup> spd_intermediate_b: array<array<SPDScalar, 16>, 16>;
 162 | var<workgroup> spd_intermediate_a: array<array<SPDScalar, 16>, 16>;
 163 | `}
 164 | var<workgroup> spd_counter: atomic<u32>;
 165 | 
 166 | fn spd_increase_atomic_counter(slice: u32) {
 167 |     atomicStore(&spd_counter, atomicAdd(&spd_global_counter[slice], 1));
 168 | }
 169 | 
 170 | fn spd_get_atomic_counter() -> u32 {
 171 |     return workgroupUniformLoad(&spd_counter);
 172 | }
 173 | 
 174 | fn spd_reset_atomic_counter(slice: u32) {
 175 |     atomicStore(&spd_global_counter[slice], 0);
 176 | }
 177 | 
 178 | // Cotnrol flow --------------------------------------------------------------------------------------------------------
 179 | 
 180 | fn spd_barrier() {
 181 |     // in glsl this does: groupMemoryBarrier(); barrier();
 182 |     workgroupBarrier();
 183 | }
 184 | 
 185 | // Only last active workgroup should proceed
 186 | fn spd_exit_workgroup(num_work_groups: u32, local_invocation_index: u32, slice: u32) -> bool {
 187 |     // global atomic counter
 188 |     if (local_invocation_index == 0) {
 189 |         spd_increase_atomic_counter(slice);
 190 |     }
 191 |     storageBarrier();
 192 |     return spd_get_atomic_counter() != (num_work_groups - 1);
 193 | }
 194 | 
 195 | // Pixel access --------------------------------------------------------------------------------------------------------
 196 | 
 197 | ${filterCode}
 198 | 
 199 | ${hasSubgroups ? `
 200 | fn spd_reduce_quad(value: vec4<SPDScalar>) -> vec4<SPDScalar> {
 201 |     let v0 = value;
 202 |     let v1 = quadSwapX(value);
 203 |     let v2 = quadSwapY(value);
 204 |     let v3 = quadSwapDiagonal(value);
 205 |     return spd_reduce_4(v0, v1, v2, v3);
 206 | }
 207 | ` : ''}
 208 | 
 209 | fn spd_store(pix: vec2<u32>, out_value: vec4<SPDScalar>, mip: u32, slice: u32) {
 210 |     store_dst_mip(out_value, pix, slice, mip + 1);
 211 | }
 212 | 
 213 | fn spd_load_intermediate(x: u32, y: u32) -> vec4<SPDScalar> {
 214 |     return vec4<SPDScalar>(${useF16 ? `
 215 |         spd_intermediate_rg[x][y],
 216 |         spd_intermediate_ba[x][y],` : `
 217 |         spd_intermediate_r[x][y],
 218 |         spd_intermediate_g[x][y],
 219 |         spd_intermediate_b[x][y],
 220 |         spd_intermediate_a[x][y],`
 221 |     });
 222 | }
 223 | 
 224 | fn spd_store_intermediate(x: u32, y: u32, value: vec4<SPDScalar>) {
 225 | ${useF16 ? `
 226 |         spd_intermediate_rg[x][y] = value.rg;
 227 |         spd_intermediate_ba[x][y] = value.ba;` : `
 228 |         spd_intermediate_r[x][y] = value.r;
 229 |         spd_intermediate_g[x][y] = value.g;
 230 |         spd_intermediate_b[x][y] = value.b;
 231 |         spd_intermediate_a[x][y] = value.a;`}
 232 | }
 233 | 
 234 | fn spd_reduce_intermediate(i0: vec2<u32>, i1: vec2<u32>, i2: vec2<u32>, i3: vec2<u32>) -> vec4<SPDScalar> {
 235 |     let v0 = spd_load_intermediate(i0.x, i0.y);
 236 |     let v1 = spd_load_intermediate(i1.x, i1.y);
 237 |     let v2 = spd_load_intermediate(i2.x, i2.y);
 238 |     let v3 = spd_load_intermediate(i3.x, i3.y);
 239 |     return spd_reduce_4(v0, v1, v2, v3);
 240 | }
 241 | 
 242 | fn spd_reduce_load_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 243 |     let v0 = load_src_image(base + vec2<u32>(0, 0), slice);
 244 |     let v1 = load_src_image(base + vec2<u32>(0, 1), slice);
 245 |     let v2 = load_src_image(base + vec2<u32>(1, 0), slice);
 246 |     let v3 = load_src_image(base + vec2<u32>(1, 1), slice);
 247 |     return spd_reduce_4(v0, v1, v2, v3);
 248 | }
 249 | 
 250 | fn spd_reduce_load_mid_mip_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
 251 |     let v0 = load_mid_mip_image(base + vec2<u32>(0, 0), slice);
 252 |     let v1 = load_mid_mip_image(base + vec2<u32>(0, 1), slice);
 253 |     let v2 = load_mid_mip_image(base + vec2<u32>(1, 0), slice);
 254 |     let v3 = load_mid_mip_image(base + vec2<u32>(1, 1), slice);
 255 |     return spd_reduce_4(v0, v1, v2, v3);
 256 | }
 257 | 
 258 | // Main logic ---------------------------------------------------------------------------------------------------------
 259 | 
 260 | fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 261 |     var v: array<vec4<SPDScalar>, 4>;
 262 | 
 263 |     let workgroup64 = workgroup_id.xy * 64;
 264 |     let workgroup32 = workgroup_id.xy * 32;
 265 |     let workgroup16 = workgroup_id.xy * 16;
 266 | 
 267 |     var tex = workgroup64 + vec2<u32>(x * 2, y * 2);
 268 |     var pix = workgroup32 + vec2<u32>(x, y);
 269 |     v[0] = spd_reduce_load_4(tex, slice);
 270 |     spd_store(pix, v[0], 0, slice);
 271 | 
 272 |     tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2);
 273 |     pix = workgroup32 + vec2<u32>(x + 16, y);
 274 |     v[1] = spd_reduce_load_4(tex, slice);
 275 |     spd_store(pix, v[1], 0, slice);
 276 | 
 277 |     tex = workgroup64 + vec2<u32>(x * 2, y * 2 + 32);
 278 |     pix = workgroup32 + vec2<u32>(x, y + 16);
 279 |     v[2] = spd_reduce_load_4(tex, slice);
 280 |     spd_store(pix, v[2], 0, slice);
 281 | 
 282 |     tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2 + 32);
 283 |     pix = workgroup32 + vec2<u32>(x + 16, y + 16);
 284 |     v[3] = spd_reduce_load_4(tex, slice);
 285 |     spd_store(pix, v[3], 0, slice);
 286 | 
 287 |     if mip <= 1 {
 288 |         return;
 289 |     }
 290 | 
 291 | ${hasSubgroups ? `
 292 |     v[0] = spd_reduce_quad(v[0]);
 293 |     v[1] = spd_reduce_quad(v[1]);
 294 |     v[2] = spd_reduce_quad(v[2]);
 295 |     v[3] = spd_reduce_quad(v[3]);
 296 | 
 297 |     if (local_invocation_index % 4) == 0 {
 298 |         spd_store(workgroup16 + vec2<u32>(x / 2, y / 2), v[0], 1, slice);
 299 |         spd_store_intermediate(x / 2, y / 2, v[0]);
 300 | 
 301 |         spd_store(workgroup16 + vec2<u32>(x / 2 + 8, y / 2), v[1], 1, slice);
 302 |         spd_store_intermediate(x / 2 + 8, y / 2, v[1]);
 303 | 
 304 |         spd_store(workgroup16 + vec2<u32>(x / 2, y / 2 + 8), v[2], 1, slice);
 305 |         spd_store_intermediate(x / 2, y / 2 + 8, v[2]);
 306 | 
 307 |         spd_store(workgroup16 + vec2<u32>(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
 308 |         spd_store_intermediate(x / 2 + 8, y / 2 + 8, v[3]);
 309 |     }
 310 | ` : `
 311 |     for (var i = 0u; i < 4u; i++) {
 312 |         spd_store_intermediate(x, y, v[i]);
 313 |         spd_barrier();
 314 |         if local_invocation_index < 64 {
 315 |             v[i] = spd_reduce_intermediate(
 316 |                 vec2<u32>(x * 2 + 0, y * 2 + 0),
 317 |                 vec2<u32>(x * 2 + 1, y * 2 + 0),
 318 |                 vec2<u32>(x * 2 + 0, y * 2 + 1),
 319 |                 vec2<u32>(x * 2 + 1, y * 2 + 1)
 320 |             );
 321 |             spd_store(workgroup16 + vec2<u32>(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
 322 |         }
 323 |         spd_barrier();
 324 |     }
 325 | 
 326 |     if local_invocation_index < 64 {
 327 |         spd_store_intermediate(x + 0, y + 0, v[0]);
 328 |         spd_store_intermediate(x + 8, y + 0, v[1]);
 329 |         spd_store_intermediate(x + 0, y + 8, v[2]);
 330 |         spd_store_intermediate(x + 8, y + 8, v[3]);
 331 |     }
 332 | `}
 333 | }
 334 | 
 335 | fn spd_downsample_mip_2(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 336 | ${hasSubgroups ? `
 337 |     var v = spd_load_intermediate(x, y);
 338 |     v = spd_reduce_quad(v);
 339 |     // quad index 0 stores result
 340 |     if (local_invocation_index % 4) == 0 {
 341 |         spd_store(workgroup_id.xy * 8 + vec2<u32>(x / 2, y / 2), v, mip, slice);
 342 |         spd_store_intermediate(x + (y / 2) % 2, y, v);
 343 |     }
 344 | ` : `
 345 |     if local_invocation_index < 64u {
 346 |         let v = spd_reduce_intermediate(
 347 |             vec2<u32>(x * 2 + 0, y * 2 + 0),
 348 |             vec2<u32>(x * 2 + 1, y * 2 + 0),
 349 |             vec2<u32>(x * 2 + 0, y * 2 + 1),
 350 |             vec2<u32>(x * 2 + 1, y * 2 + 1)
 351 |         );
 352 |         spd_store(workgroup_id.xy * 8 + vec2<u32>(x, y), v, mip, slice);
 353 |         // store to LDS, try to reduce bank conflicts
 354 |         // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
 355 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 356 |         // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
 357 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 358 |         // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
 359 |         // ...
 360 |         // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
 361 |         spd_store_intermediate(x * 2 + y % 2, y * 2, v);
 362 |     }
 363 | `}
 364 | }
 365 | 
 366 | fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 367 | ${hasSubgroups ? `
 368 |     // subgroup ops must be in uniform control flow
 369 |     var v = spd_load_intermediate(x * 2 + y % 2, y * 2);
 370 |     v = spd_reduce_quad(v);
 371 | 
 372 |     // quad index 0 stores result
 373 |     if local_invocation_index < 64u && (local_invocation_index % 4) == 0 {
 374 |         spd_store(workgroup_id.xy * 4 + vec2<u32>(x / 2, y / 2), v, mip, slice);
 375 |         spd_store_intermediate(x * 2 + y / 2, y * 2, v);
 376 |     }
 377 | ` : `
 378 |     if local_invocation_index < 16u {
 379 |         // x 0 x 0
 380 |         // 0 0 0 0
 381 |         // 0 x 0 x
 382 |         // 0 0 0 0
 383 |         let v = spd_reduce_intermediate(
 384 |             vec2<u32>(x * 4 + 0 + 0, y * 4 + 0),
 385 |             vec2<u32>(x * 4 + 2 + 0, y * 4 + 0),
 386 |             vec2<u32>(x * 4 + 0 + 1, y * 4 + 2),
 387 |             vec2<u32>(x * 4 + 2 + 1, y * 4 + 2)
 388 |         );
 389 |         spd_store(workgroup_id.xy * 4 + vec2<u32>(x, y), v, mip, slice);
 390 |         // store to LDS
 391 |         // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
 392 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 393 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 394 |         // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 395 |         // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
 396 |         // ...
 397 |         // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
 398 |         // ...
 399 |         // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
 400 |         // ...
 401 |         spd_store_intermediate(x * 4 + y, y * 4, v);
 402 |     }
 403 | `}
 404 | }
 405 | 
 406 | fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 407 | ${hasSubgroups ? `
 408 |     // subgroup ops must be in uniform control flow
 409 |     var v = spd_load_intermediate(x * 4 + y, y * 4);
 410 |     v = spd_reduce_quad(v);
 411 | 
 412 |     // quad index 0 stores result
 413 |     if local_invocation_index < 16u && (local_invocation_index % 4) == 0 {
 414 |         spd_store(workgroup_id.xy * 2 + vec2<u32>(x / 2, y / 2), v, mip, slice);
 415 |         spd_store_intermediate(x / 2 + y, 0, v);
 416 |     }
 417 | ` : `
 418 |     if local_invocation_index < 4u {
 419 |         // x 0 0 0 x 0 0 0
 420 |         // ...
 421 |         // 0 x 0 0 0 x 0 0
 422 |         let v = spd_reduce_intermediate(
 423 |             vec2<u32>(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
 424 |             vec2<u32>(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
 425 |             vec2<u32>(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
 426 |             vec2<u32>(x * 8 + 4 + 1 + y * 2, y * 8 + 4)
 427 |         );
 428 |         spd_store(workgroup_id.xy * 2 + vec2<u32>(x, y), v, mip, slice);
 429 |         // store to LDS
 430 |         // x x x x 0 ...
 431 |         // 0 ...
 432 |         spd_store_intermediate(x + y * 2, 0, v);
 433 |     }
 434 | `}
 435 | }
 436 | 
 437 | fn spd_downsample_mip_5(workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
 438 | ${hasSubgroups ? `
 439 |     // subgroup ops must be in uniform control flow
 440 |     var v = spd_load_intermediate(local_invocation_index, 0);
 441 |     v = spd_reduce_quad(v);
 442 | 
 443 |     // quad index 0 stores result
 444 |     if local_invocation_index < 4u && (local_invocation_index % 4) == 0 {
 445 |         spd_store(workgroup_id.xy, v, mip, slice);
 446 |     }
 447 | ` : `
 448 |     if local_invocation_index < 1u {
 449 |         // x x x x 0 ...
 450 |         // 0 ...
 451 |         let v = spd_reduce_intermediate(vec2<u32>(0, 0), vec2<u32>(1, 0), vec2<u32>(2, 0), vec2<u32>(3, 0));
 452 |         spd_store(workgroup_id.xy, v, mip, slice);
 453 |     }
 454 | `}
 455 | }
 456 | 
 457 | fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) {
 458 |     if mips <= base_mip {
 459 |         return;
 460 |     }
 461 |     spd_barrier();
 462 |     spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
 463 | 
 464 |     if mips <= base_mip + 1 {
 465 |         return;
 466 |     }
 467 |     spd_barrier();
 468 |     spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
 469 | 
 470 |     if mips <= base_mip + 2 {
 471 |         return;
 472 |     }
 473 |     spd_barrier();
 474 |     spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
 475 | 
 476 |     if mips <= base_mip + 3 {
 477 |         return;
 478 |     }
 479 |     spd_barrier();
 480 |     spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
 481 | }
 482 | 
 483 | fn spd_downsample_last_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) {
 484 |     if mips <= base_mip {
 485 |         return;
 486 |     }
 487 |     spd_barrier();
 488 |     spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
 489 | 
 490 |     if mips <= base_mip + 1 {
 491 |         return;
 492 |     }
 493 |     spd_barrier();
 494 |     spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
 495 | 
 496 |     if mips <= base_mip + 2 {
 497 |         return;
 498 |     }
 499 |     spd_barrier();
 500 |     spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
 501 | 
 502 |     if mips <= base_mip + 3 {
 503 |         return;
 504 |     }
 505 |     spd_barrier();
 506 |     spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
 507 | }
 508 | 
 509 | fn spd_downsample_mips_6_7(x: u32, y: u32, mips: u32, slice: u32) {
 510 |     ${mip6SupportsReadWrite ? 'textureBarrier();' : ''}
 511 | 
 512 |     var tex = vec2<u32>(x * 4 + 0, y * 4 + 0);
 513 |     var pix = vec2<u32>(x * 2 + 0, y * 2 + 0);
 514 |     let v0 = spd_reduce_load_mid_mip_4(tex, slice);
 515 |     spd_store(pix, v0, 6, slice);
 516 | 
 517 |     tex = vec2<u32>(x * 4 + 2, y * 4 + 0);
 518 |     pix = vec2<u32>(x * 2 + 1, y * 2 + 0);
 519 |     let v1 = spd_reduce_load_mid_mip_4(tex, slice);
 520 |     spd_store(pix, v1, 6, slice);
 521 | 
 522 |     tex = vec2<u32>(x * 4 + 0, y * 4 + 2);
 523 |     pix = vec2<u32>(x * 2 + 0, y * 2 + 1);
 524 |     let v2 = spd_reduce_load_mid_mip_4(tex, slice);
 525 |     spd_store(pix, v2, 6, slice);
 526 | 
 527 |     tex = vec2<u32>(x * 4 + 2, y * 4 + 2);
 528 |     pix = vec2<u32>(x * 2 + 1, y * 2 + 1);
 529 |     let v3 = spd_reduce_load_mid_mip_4(tex, slice);
 530 |     spd_store(pix, v3, 6, slice);
 531 | 
 532 |     if mips <= 7 {
 533 |         return;
 534 |     }
 535 |     // no barrier needed, working on values only from the same thread
 536 | 
 537 |     let v = spd_reduce_4(v0, v1, v2, v3);
 538 |     spd_store(vec2<u32>(x, y), v, 7, slice);
 539 |     spd_store_intermediate(x, y, v);
 540 | }
 541 | 
 542 | fn spd_downsample_last_6(x: u32, y: u32, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
 543 |     if mips <= 6 {
 544 |         return;
 545 |     }
 546 | 
 547 |     // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
 548 |     // terminate if not, continue if yes.
 549 |     if spd_exit_workgroup(num_work_groups, local_invocation_index, slice) {
 550 |         return;
 551 |     }
 552 | 
 553 |     // reset the global atomic counter back to 0 for the next spd dispatch
 554 |     spd_reset_atomic_counter(slice);
 555 | 
 556 |     // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
 557 |     // compute MIP level 6 and 7
 558 |     spd_downsample_mips_6_7(x, y, mips, slice);
 559 | 
 560 |     // compute MIP level 8, 9, 10, 11
 561 |     spd_downsample_last_four(x, y, vec2<u32>(0, 0), local_invocation_index, 8, mips, slice);
 562 | }
 563 | 
 564 | /// Downsamples a 64x64 tile based on the work group id.
 565 | /// If after downsampling it's the last active thread group, computes the remaining MIP levels.
 566 | ///
 567 | /// @param [in] workGroupID             index of the work group / thread group
 568 | /// @param [in] localInvocationIndex    index of the thread within the thread group in 1D
 569 | /// @param [in] mips                    the number of total MIP levels to compute for the input texture
 570 | /// @param [in] numWorkGroups           the total number of dispatched work groups / thread groups for this slice
 571 | /// @param [in] slice                   the slice of the input texture
 572 | fn spd_downsample(workgroup_id: vec2<u32>, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
 573 |     let xy = map_to_xy(local_invocation_index);
 574 |     spd_downsample_mips_0_1(xy.x, xy.y, workgroup_id, local_invocation_index, mips, slice);
 575 |     spd_downsample_next_four(xy.x, xy.y, workgroup_id, local_invocation_index, 2, mips, slice);
 576 |     ${numMips > 6 ? 'spd_downsample_last_6(xy.x, xy.y, local_invocation_index, mips, num_work_groups, slice);' : ''}
 577 | }
 578 | 
 579 | // Entry points -------------------------------------------------------------------------------------------------------
 580 | 
 581 | @compute
 582 | @workgroup_size(256, 1, 1)
 583 | fn downsample(@builtin(local_invocation_index) local_invocation_index: u32, @builtin(workgroup_id) workgroup_id: vec3<u32>) {
 584 |     spd_downsample(
 585 |         workgroup_id.xy + get_work_group_offset(),
 586 |         local_invocation_index,
 587 |         get_mips(),
 588 |         get_num_work_groups(),
 589 |         workgroup_id.z
 590 |     );
 591 | }
 592 |     `;
 593 | }
 594 | 
 595 | const SPD_FILTER_AVERAGE: string = /* wgsl */`
 596 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 597 |     return (v0 + v1 + v2 + v3) * SPDScalar(0.25);
 598 | }
 599 | `;
 600 | 
 601 | const SPD_FILTER_MIN = /* wgsl */`
 602 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 603 |     return min(min(v0, v1), min(v2, v3));
 604 | }
 605 | `;
 606 | 
 607 | const SPD_FILTER_MAX = /* wgsl */`
 608 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 609 |     return max(max(v0, v1), max(v2, v3));
 610 | }
 611 | `;
 612 | 
 613 | const SPD_FILTER_MINMAX = /* wgsl */`
 614 | fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
 615 |     let max4 = max(max(v0.xy, v1.xy), max(v2.xy, v3.xy));
 616 |     return vec4<SPDScalar>(min(min(v0.x, v1.x), min(v2.x, v3.x)), max(max4.x, max4.y), 0, 0);
 617 | }
 618 | `;
 619 | 
 620 | /**
 621 |  * The names of all predefined filters of {@link WebGPUSinglePassDownsampler}.
 622 |  * Custom ones can be registered with an instance of {@link WebGPUSinglePassDownsampler} using {@link WebGPUSinglePassDownsampler.registerFilter}.
 623 |  */
 624 | export enum SPDFilters {
 625 |     /**
 626 |      * Takes the channel-wise average of 4 pixels.
 627 |      */
 628 |     Average = 'average',
 629 | 
 630 |     /**
 631 |      * Takes the channel-wise minimum of 4 pixels.
 632 |      */
 633 |     Min = 'min',
 634 | 
 635 |     /**
 636 |      * Takes the channel-wise maximum of 4 pixels.
 637 |      */
 638 |     Max = 'max',
 639 | 
 640 |     /**
 641 |      * Takes the minimum of the red channel and the maximum of the red and green channel and stores the result in the red and green channel respectively.
 642 |      * This really only makes sense for single-channel input textures (where only the red channel holds any data), e.g., for generating a min-max pyramid of a depth buffer.
 643 |      */
 644 |     MinMax = 'minmax',
 645 | }
 646 | 
 647 | class SPDPassInner {
 648 |     constructor(private pipeline: GPUComputePipeline, private bindGroups: Array<GPUBindGroup>, private dispatchDimensions: [GPUSize32, GPUSize32, GPUSize32]) {}
 649 |     encode(computePass: GPUComputePassEncoder) {
 650 |         computePass.setPipeline(this.pipeline);
 651 |         this.bindGroups.forEach((bindGroup, index) => {
 652 |             computePass.setBindGroup(index, bindGroup);
 653 |         });
 654 |         computePass.dispatchWorkgroups(this.dispatchDimensions[0], this.dispatchDimensions[1], this.dispatchDimensions[2]);
 655 |     }
 656 | }
 657 | 
 658 | /**
 659 |  * A compute pass for downsampling a texture.
 660 |  */
 661 | export class SPDPass {
 662 |     /**
 663 |      * The texture the mipmaps will be written to by this {@link SPDPass}, once {@link SPDPass.encode} is called.
 664 |      */
 665 |     readonly target: GPUTexture
 666 | 
 667 |     /** @ignore */
 668 |     constructor(private passes: Array<SPDPassInner>, target: GPUTexture) {
 669 |         this.target = target;
 670 |     }
 671 |     /**
 672 |      * Encodes the configured mipmap generation pass(es) with the given {@link GPUComputePassEncoder}.
 673 |      * All bind groups indices used by {@link SPDPass} are reset to `null` to prevent unintentional bindings of internal bind groups for subsequent pipelines encoded in the same {@link GPUComputePassEncoder}.
 674 |      * @param computePassEncoder The {@link GPUComputePassEncoder} to encode this mipmap generation pass with.
 675 |      * @returns The {@link computePassEncoder}
 676 |      */
 677 |     encode(computePassEncoder: GPUComputePassEncoder): GPUComputePassEncoder {
 678 |         this.passes.forEach(p => p.encode(computePassEncoder));
 679 |         computePassEncoder.setBindGroup(0, null);
 680 |         computePassEncoder.setBindGroup(1, null);
 681 |         return computePassEncoder;
 682 |     }
 683 | 
 684 |     /**
 685 |      * Returns the number of passes that will be encoded by calling this instance's {@link SPDPass.encode} method.
 686 |      */
 687 |     get numPasses(): number {
 688 |         return this.passes.length
 689 |     }
 690 | }
 691 | 
 692 | enum SPDScalarType {
 693 |     F32 = 'f32',
 694 |     F16 = 'f16',
 695 |     I32 = 'i32',
 696 |     U32 = 'u32',
 697 | }
 698 | 
 699 | /**
 700 |  * Configuration for {@link WebGPUSinglePassDownsampler.preparePass}.
 701 |  */
 702 | export interface SPDPassConfig {
 703 |     /**
 704 |      * The name of the filter to use for downsampling the given texture.
 705 |      * Should be one of the filters registered with {@link WebGPUSinglePassDownsampler}.
 706 |      * Defaults to {@link SPDFilters.Average}.
 707 |      */
 708 |     filter?: string,
 709 | 
 710 |     /**
 711 |      * The target texture the generated mipmaps are written to.
 712 |      * Its usage must include {@link GPUTextureUsage.STORAGE_BINDING}.
 713 |      * Its format must support {@link GPUStorageTextureAccess:"write-only"}.
 714 |      * Its size must be big enough to store the first mip level generated for the input texture.
 715 |      * It must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}.
 716 |      * Defaults to the given input texture.
 717 |      */
 718 |     target?: GPUTexture,
 719 | 
 720 |     /**
 721 |      * The upper left corner of the image region mipmaps should be generated for.
 722 |      * Defaults to [0,0].
 723 |      */
 724 |     offset?: [number, number],
 725 | 
 726 |     /**
 727 |      * The size of the image reagion mipmaps should be generated for.
 728 |      * Default to [texture.width - 1 - offset[0], texture.height - 1 - offset[1]].
 729 |      */
 730 |     size?: [number, number],
 731 | 
 732 |     /**
 733 |      * The number of mipmaps to generate.
 734 |      * Defaults to target.mipLevelCount.
 735 |      */
 736 |     numMips?: number,
 737 | 
 738 |     /**
 739 |      * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters.
 740 |      * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled).
 741 |      * Falls back to full precision, if the texture format is not a float format.
 742 |      * Defaults to false.
 743 |      */
 744 |     halfPrecision?: boolean;
 745 | }
 746 | 
 747 | interface GPUDownsamplingMeta {
 748 |     workgroupOffset: [number, number],
 749 |     numWorkGroups: number,
 750 |     numMips: number,
 751 |     numArrayLayers: number,
 752 | }
 753 | 
 754 | class SPDPipeline {
 755 |     constructor(readonly mipsLayout: GPUBindGroupLayout, readonly pipelines: GPUComputePipeline) {}
 756 | }
 757 | 
 758 | export interface SPDPrepareFormatDescriptor {
 759 |     /**
 760 |      * The texture format to prepare downsampling pipelines for.
 761 |      */
 762 |     format: GPUTextureFormat,
 763 | 
 764 |     /**
 765 |      * The names of downsampling filters that to prepare downsampling pipelines for the given {@link format} for.
 766 |      * Defaults to {@link SPDFilters.Average}.
 767 |      */
 768 |     filters?: Set<string>,
 769 | 
 770 |     /**
 771 |      * If set to true, will try to use half-precision floats (`f16`) for this combination of texture format and filters.
 772 |      * Falls back to full precision, if half precision is requested but not supported by the device (feature 'shader-f16' not enabled).
 773 |      * Falls back to full precision, if the texture format is not a float format.
 774 |      * Defaults to false.
 775 |      */
 776 |     halfPrecision?: boolean,
 777 | }
 778 | 
 779 | export interface SPDPrepareDeviceDescriptor {
 780 |     /**
 781 |      * The device to prepare downsampling pipelines for.
 782 |      */
 783 |     device: GPUDevice,
 784 | 
 785 |     /**
 786 |      * The formats to prepare downsampling pipelines for.
 787 |      */
 788 |     formats?: Array<SPDPrepareFormatDescriptor>,
 789 | 
 790 |     /**
 791 |      * The maximum number of array layers will be downsampled on the {@link device} within a single pass.
 792 |      * If a texture has more, downsampling will be split up into multiple passes handling up to this limit of array layers each.
 793 |      * Defaults to device.limits.maxTextureArrayLayers.
 794 |      */
 795 |     maxArrayLayersPerPass?: number,
 796 | 
 797 |     /**
 798 |      * The maximum number of mip levels that can be generated on the {@link device} within a single pass.
 799 |      * Note that generating more than 6 mip levels per pass is currently not supported on all platforms.
 800 |      * Defaults to `Math.min(device.limits.maxStorageTexturesPerShaderStage, 12)`.
 801 |      */
 802 |     maxMipsPerPass?: number,
 803 | 
 804 |     /**
 805 |      * If true, disables all uses of subgroup built-ins by the downsampler even if the `'subgroups'` feature is enabled on the {@link device}.
 806 |      */
 807 |     disableSubgroups?: boolean,
 808 | }
 809 | 
 810 | function sanitizeScalarType(device: GPUDevice, format: GPUTextureFormat, halfPrecision: boolean): SPDScalarType {
 811 |     const texelType = format.toLocaleLowerCase().includes('sint') ? SPDScalarType.I32 : (format.toLocaleLowerCase().includes('uint') ? SPDScalarType.U32 : SPDScalarType.F32);
 812 |     if (halfPrecision && !device.features.has('shader-f16')) {
 813 |         console.warn(`[sanitizeScalarType]: half precision requested but the device feature 'shader-f16' is not enabled, falling back to full precision`);
 814 |     }
 815 |     if (halfPrecision && texelType !== SPDScalarType.F32) {
 816 |         console.warn(`[sanitizeScalarType]: half precision requested for non-float format (${format}, uses ${texelType}), falling back to full precision`);
 817 |     }
 818 |     return halfPrecision && !device.features.has('shader-f16') && texelType === SPDScalarType.F32 ? SPDScalarType.F16 : texelType;
 819 | }
 820 | 
 821 | class DevicePipelines {
 822 |     private device: WeakRef<GPUDevice>;
 823 |     private readonly maxMipsPerPass: number;
 824 |     private readonly maxArrayLayers: number;
 825 |     private readonly disableSubgroups: boolean;
 826 |     private readonly internalResourcesBindGroupLayout: GPUBindGroupLayout;
 827 |     private readonly internalResourcesBindGroupLayout12?: GPUBindGroupLayout;
 828 |     private readonly internalResourcesBindGroupLayout12RW?: GPUBindGroupLayout;
 829 |     private atomicCounters: Map<number, GPUBuffer>;
 830 |     private midMipBuffers: Map<number, GPUBuffer>;
 831 |     private pipelines: Map<GPUTextureFormat, Map<SPDScalarType, Map<string, Map<number, SPDPipeline>>>>;
 832 | 
 833 |     constructor(device: GPUDevice, maxArrayLayers?: number, maxMipsPerPass?: number, disableSubgroups?: boolean) {
 834 |         this.device = new WeakRef(device);
 835 |         this.maxMipsPerPass = Math.min(device.limits.maxStorageTexturesPerShaderStage, maxMipsPerPass ?? 12);
 836 |         this.maxArrayLayers = Math.min(device.limits.maxTextureArrayLayers, maxArrayLayers ?? device.limits.maxTextureArrayLayers);
 837 |         this.disableSubgroups = disableSubgroups ?? false;
 838 |         this.pipelines = new Map();
 839 |         this.atomicCounters = new Map();
 840 |         this.midMipBuffers = new Map();
 841 | 
 842 |         this.internalResourcesBindGroupLayout = device.createBindGroupLayout({
 843 |             entries: [{
 844 |                 binding: 0,
 845 |                 visibility: GPUShaderStage.COMPUTE,
 846 |                 buffer: {
 847 |                     type: 'uniform',
 848 |                     hasDynamicOffset: false,
 849 |                     minBindingSize: 16,
 850 |                 },
 851 |             }],
 852 |         });
 853 | 
 854 |         if (this.maxMipsPerPass > 6) {
 855 |             this.internalResourcesBindGroupLayout12 = device.createBindGroupLayout({
 856 |                 entries: [
 857 |                     {
 858 |                         binding: 0,
 859 |                         visibility: GPUShaderStage.COMPUTE,
 860 |                         buffer: {
 861 |                             type: 'uniform',
 862 |                             hasDynamicOffset: false,
 863 |                             minBindingSize: 16,
 864 |                         },
 865 |                     },
 866 |                     {
 867 |                         binding: 1,
 868 |                         visibility: GPUShaderStage.COMPUTE,
 869 |                         buffer: {
 870 |                             type: 'storage',
 871 |                             hasDynamicOffset: false,
 872 |                             minBindingSize: 4,
 873 |                         },
 874 |                     },
 875 |                     {
 876 |                         binding: 2,
 877 |                         visibility: GPUShaderStage.COMPUTE,
 878 |                         buffer: {
 879 |                             type: 'storage',
 880 |                             hasDynamicOffset: false,
 881 |                             minBindingSize: 16 * 64 * 64,
 882 |                         },
 883 |                     },
 884 |                 ],
 885 |             });
 886 |             this.internalResourcesBindGroupLayout12RW = device.createBindGroupLayout({
 887 |                 entries: [
 888 |                     {
 889 |                         binding: 0,
 890 |                         visibility: GPUShaderStage.COMPUTE,
 891 |                         buffer: {
 892 |                             type: 'uniform',
 893 |                             hasDynamicOffset: false,
 894 |                             minBindingSize: 16,
 895 |                         },
 896 |                     },
 897 |                     {
 898 |                         binding: 1,
 899 |                         visibility: GPUShaderStage.COMPUTE,
 900 |                         buffer: {
 901 |                             type: 'storage',
 902 |                             hasDynamicOffset: false,
 903 |                             minBindingSize: 4,
 904 |                         },
 905 |                     },
 906 |                 ],
 907 |             });
 908 |         }
 909 |     }
 910 | 
 911 |     preparePipelines(pipelineConfigs?: Array<SPDPrepareFormatDescriptor>) {
 912 |         const device = this.device.deref();
 913 |         if (device) {
 914 |             pipelineConfigs?.forEach(c => {
 915 |                 const scalarType = sanitizeScalarType(device, c.format, c.halfPrecision ?? false);
 916 |                 Array.from(c.filters ?? [SPD_FILTER_AVERAGE]).map(filter => {
 917 |                     for (let i = 0; i < this.maxMipsPerPass; ++i) {
 918 |                         this.getOrCreatePipeline(c.format, filter, i + 1, scalarType);
 919 |                     }
 920 |                 });
 921 |             });
 922 |         }
 923 |     }
 924 | 
 925 |     private supportsReadWrite(targetFormat: GPUTextureFormat): boolean {
 926 |         const device = this.device.deref();
 927 |         if (!device) {
 928 |             return false;
 929 |         }
 930 |         return WebGPUSinglePassDownsampler.supportedReadWriteFormats.has(targetFormat) || (device.features.has('texture-formats-tier2') && WebGPUSinglePassDownsampler.supportedReadWriteFormatsTier2.has(targetFormat));
 931 |     }
 932 | 
 933 |     private createPipeline(targetFormat: GPUTextureFormat, filterCode: string, numMips: number, scalarType: SPDScalarType): SPDPipeline | undefined {
 934 |         const device = this.device.deref();
 935 |         if (!device) {
 936 |             return undefined;
 937 |         }
 938 | 
 939 |         const rwSupport = this.supportsReadWrite(targetFormat);
 940 | 
 941 |         const mipsBindGroupLayout = device.createBindGroupLayout({
 942 |             entries: Array(Math.min(numMips, this.maxMipsPerPass) + 1).fill(0).map((_, i) => {
 943 |                 const entry: GPUBindGroupLayoutEntry = {
 944 |                     binding: i,
 945 |                     visibility: GPUShaderStage.COMPUTE,
 946 |                 };
 947 |                 if (i === 0) {
 948 |                     entry.texture = {
 949 |                         sampleType: scalarType === SPDScalarType.I32 ? 'sint' : (scalarType === SPDScalarType.U32 ? 'uint' : 'unfilterable-float'),
 950 |                         viewDimension: '2d-array',
 951 |                         multisampled: false,
 952 |                     };
 953 |                 } else {
 954 |                     entry.storageTexture = {
 955 |                         access: (i === 6 && numMips > 6 && rwSupport) ? 'read-write' : 'write-only',
 956 |                         format: targetFormat,
 957 |                         viewDimension: '2d-array',
 958 |                     };
 959 |                 }
 960 |                 return entry;
 961 |             })
 962 |         });
 963 | 
 964 |         return new SPDPipeline(
 965 |             mipsBindGroupLayout,
 966 |             device.createComputePipeline({
 967 |                 compute: {
 968 |                     module: device.createShaderModule({
 969 |                         code: makeShaderCode(targetFormat, filterCode, Math.min(numMips, this.maxMipsPerPass), scalarType, device.features.has('subgroups') && !this.disableSubgroups, rwSupport),
 970 |                     }),
 971 |                     entryPoint: 'downsample',
 972 |                 },
 973 |                 layout: device.createPipelineLayout({
 974 |                     bindGroupLayouts: [
 975 |                         mipsBindGroupLayout,
 976 |                         numMips > 6 ? (rwSupport ? this.internalResourcesBindGroupLayout12RW! : this.internalResourcesBindGroupLayout12!) : this.internalResourcesBindGroupLayout,
 977 |                     ],
 978 |                 }),
 979 |             }),
 980 |         );
 981 |     }
 982 | 
 983 |     private getOrCreatePipeline(targetFormat: GPUTextureFormat, filterCode: string, numMipsToCreate: number, scalarType: SPDScalarType): SPDPipeline | undefined {
 984 |         if (!this.pipelines.has(targetFormat)) {
 985 |             this.pipelines.set(targetFormat, new Map());
 986 |         }
 987 |         if (!this.pipelines.get(targetFormat)?.has(scalarType)) {
 988 |             this.pipelines.get(targetFormat)?.set(scalarType, new Map());
 989 |         }
 990 |         if (!this.pipelines.get(targetFormat)?.get(scalarType)?.has(filterCode)) {
 991 |             this.pipelines.get(targetFormat)?.get(scalarType)?.set(filterCode, new Map());
 992 |         }
 993 |         if (!this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.has(numMipsToCreate)) {
 994 |             const pipelines = this.createPipeline(targetFormat, filterCode, numMipsToCreate, scalarType);
 995 |             if (pipelines) {
 996 |                 this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.set(numMipsToCreate, pipelines);
 997 |             }
 998 |         }
 999 |         return this.pipelines.get(targetFormat)?.get(scalarType)?.get(filterCode)?.get(numMipsToCreate);
1000 |     }
1001 | 
1002 |     private getOrCreateAtomicCountersBuffer(device: GPUDevice, numArrayLayers: number): GPUBuffer {
1003 |         if (!this.atomicCounters.has(numArrayLayers)) {
1004 |             const atomicCountersBuffer = device.createBuffer({
1005 |                 size: 4 * numArrayLayers,
1006 |                 usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
1007 |             });
1008 |             device.queue.writeBuffer(atomicCountersBuffer, 0, new Uint32Array(Array(numArrayLayers).fill(0)));
1009 |             this.atomicCounters.set(numArrayLayers, atomicCountersBuffer);
1010 |         }
1011 |         return this.atomicCounters.get(numArrayLayers)!
1012 |     }
1013 | 
1014 |     private getOrCreateMidMipBuffer(device: GPUDevice, numArrayLayers: number): GPUBuffer {
1015 |         if (!this.midMipBuffers.has(numArrayLayers)) {
1016 |             this.midMipBuffers.set(numArrayLayers, device.createBuffer({
1017 |                 size: 16 * 64 * 64 * numArrayLayers,
1018 |                 usage: GPUBufferUsage.STORAGE,
1019 |             }));
1020 |         }
1021 |         return this.midMipBuffers.get(numArrayLayers)!
1022 |     }
1023 | 
1024 | 
1025 |     private createMetaBindGroup(device: GPUDevice, meta: GPUDownsamplingMeta, halfPrecision: boolean, readWriteSupport: boolean): GPUBindGroup {
1026 |         const metaBuffer = device.createBuffer({
1027 |             size: 16,
1028 |             usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
1029 |         });
1030 |         device.queue.writeBuffer(metaBuffer, 0, new Uint32Array([
1031 |             ...meta.workgroupOffset,
1032 |             meta.numWorkGroups,
1033 |             meta.numMips,
1034 |         ]));
1035 |         if (meta.numMips > 6) {
1036 |             const numArrayLayersForPrecision = halfPrecision ? Math.ceil(meta.numArrayLayers / 2) : meta.numArrayLayers;
1037 |             if (readWriteSupport) {
1038 |                 return device.createBindGroup({
1039 |                     layout: this.internalResourcesBindGroupLayout12RW!,
1040 |                     entries: [
1041 |                         {
1042 |                             binding: 0,
1043 |                             resource: {
1044 |                                 buffer: metaBuffer,
1045 |                             },
1046 |                         },
1047 |                         {
1048 |                             binding: 1,
1049 |                             resource: {
1050 |                                 buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision),
1051 |                             },
1052 |                         },
1053 |                     ]
1054 |                 });
1055 |             } else {
1056 |                 return device.createBindGroup({
1057 |                     layout: this.internalResourcesBindGroupLayout12!,
1058 |                     entries: [
1059 |                         {
1060 |                             binding: 0,
1061 |                             resource: {
1062 |                                 buffer: metaBuffer,
1063 |                             },
1064 |                         },
1065 |                         {
1066 |                             binding: 1,
1067 |                             resource: {
1068 |                                 buffer: this.getOrCreateAtomicCountersBuffer(device, numArrayLayersForPrecision),
1069 |                             },
1070 |                         },
1071 |                         {
1072 |                             binding: 2,
1073 |                             resource: {
1074 |                                 buffer: this.getOrCreateMidMipBuffer(device, numArrayLayersForPrecision),
1075 |                             },
1076 |                         },
1077 |                     ]
1078 |                 });
1079 |             }
1080 |         } else {
1081 |             return device.createBindGroup({
1082 |                 layout: this.internalResourcesBindGroupLayout,
1083 |                 entries: [{
1084 |                     binding: 0,
1085 |                     resource: {
1086 |                         buffer: metaBuffer,
1087 |                     },
1088 |                 }]
1089 |             });
1090 |         }
1091 |     }
1092 | 
1093 |     preparePass(texture: GPUTexture, target: GPUTexture, filterCode: string, offset: [number, number], size: [number, number], numMipsTotal: number, scalarType: SPDScalarType): SPDPass | undefined {
1094 |         const device = this.device.deref();
1095 |         if (!device) {
1096 |             return undefined;
1097 |         }
1098 | 
1099 |         const rwSupport = this.supportsReadWrite(target.format);
1100 | 
1101 |         const passes = [];
1102 |         for (let baseArrayLayer = 0; baseArrayLayer < target.depthOrArrayLayers; baseArrayLayer += this.maxArrayLayers) {
1103 |             const numArrayLayersThisPass = Math.min(target.depthOrArrayLayers - baseArrayLayer, this.maxArrayLayers);
1104 |             for (let baseMip = 0; baseMip < numMipsTotal - 1; baseMip += this.maxMipsPerPass) {
1105 |                 const numMipsThisPass = Math.min(numMipsTotal - 1 - baseMip, this.maxMipsPerPass);
1106 | 
1107 |                 const baseMipOffset = offset.map(o => Math.trunc(o / Math.pow(2, baseMip)));
1108 |                 const baseMipSize = size.map(s => Math.max(Math.trunc(s / Math.pow(2, baseMip)), 1));
1109 |                 const workgroupOffset = baseMipOffset.map(o => Math.trunc(o / 64)) as [number, number];
1110 |                 const dispatchDimensions = baseMipOffset.map((o, i) => Math.trunc((o + baseMipSize[i] - 1) / 64) + 1 - workgroupOffset[i]) as [number, number];
1111 |                 const numWorkGroups = dispatchDimensions.reduce((product, v) => v * product, 1);
1112 | 
1113 |                 const metaBindGroup = this.createMetaBindGroup(
1114 |                     device,
1115 |                     {
1116 |                         workgroupOffset,
1117 |                         numWorkGroups,
1118 |                         numMips: numMipsThisPass,
1119 |                         numArrayLayers: numArrayLayersThisPass,
1120 |                     },
1121 |                     scalarType === SPDScalarType.F16,
1122 |                     rwSupport,
1123 |                 );
1124 | 
1125 |                 // todo: handle missing pipeline
1126 |                 const pipeline = this.getOrCreatePipeline(target.format, filterCode, numMipsThisPass, scalarType)!;
1127 | 
1128 |                 const mipViews = Array(numMipsThisPass + 1).fill(0).map((_, i) => {
1129 |                     if (baseMip === 0 && i === 0) {
1130 |                         return texture.createView({
1131 |                             dimension: '2d-array',
1132 |                             baseMipLevel: 0,
1133 |                             mipLevelCount: 1,
1134 |                             baseArrayLayer,
1135 |                             arrayLayerCount: numArrayLayersThisPass,
1136 |                         });
1137 |                     } else {
1138 |                         const mip = baseMip + i;
1139 |                         return target.createView({
1140 |                             dimension: '2d-array',
1141 |                             baseMipLevel: texture === target ? mip : mip - 1,
1142 |                             mipLevelCount: 1,
1143 |                             baseArrayLayer,
1144 |                             arrayLayerCount: numArrayLayersThisPass,
1145 |                         });
1146 |                     }
1147 |                 });
1148 | 
1149 |                 const mipsBindGroup = device.createBindGroup({
1150 |                     layout: pipeline.mipsLayout,
1151 |                     entries: mipViews.map((v, i) => {
1152 |                         return {
1153 |                             binding: i,
1154 |                             resource: v,
1155 |                         };
1156 |                     }),
1157 |                 });
1158 |                 passes.push(new SPDPassInner(pipeline.pipelines, [mipsBindGroup, metaBindGroup], [...dispatchDimensions, numArrayLayersThisPass]));
1159 |             }
1160 |         }
1161 |         return new SPDPass(passes, target);
1162 |     }
1163 | }
1164 | 
1165 | /**
1166 |  * Returns the maximum number of mip levels for a given n-dimensional size.
1167 |  * @param size The size to compute the maximum number of mip levels for
1168 |  * @returns The maximum number of mip levels for the given size
1169 |  */
1170 | export function maxMipLevelCount(...size: number[]): number {
1171 |     return 1 + Math.trunc(Math.log2(Math.max(0, ...size)));
1172 | }
1173 | 
1174 | /**
1175 |  * A helper class for downsampling 2D {@link GPUTexture} (& arrays) using as few passes as possible on a {@link GPUDevice} depending on its {@link GPUSupportedLimits}.
1176 |  * Up to 12 mip levels can be generated within a single pass, if {@link GPUSupportedLimits.maxStorageTexturesPerShaderStage} supports it.
1177 |  */
1178 | export class WebGPUSinglePassDownsampler {
1179 |     private filters: Map<string, string>;
1180 |     private devicePipelines: WeakMap<GPUDevice, DevicePipelines>;
1181 | 
1182 |     /**
1183 |      * The set of formats supported by WebGPU SPD.
1184 |      */
1185 |     static readonly supportedFormats: Set<string> = new Set([
1186 |         'rgba8unorm',
1187 |         'rgba8snorm',
1188 |         'rgba8uint',
1189 |         'rgba8sint',
1190 |         'rgba16uint',
1191 |         'rgba16sint',
1192 |         'rgba16float',
1193 |         'r32uint',
1194 |         'r32sint',
1195 |         'r32float',
1196 |         'rg32uint',
1197 |         'rg32sint',
1198 |         'rg32float',
1199 |         'rgba32uint',
1200 |         'rgba32sint',
1201 |         'rgba32float',
1202 |     ]);
1203 | 
1204 |     /**
1205 |      * The set of additionally supported formats supported if the feature 'bgra8unorm-storage' is enabled.
1206 |      */
1207 |     static readonly supportedFormatsBgra8UnormStorage: Set<string> = new Set([
1208 |         'bgra8unorm',
1209 |     ]);
1210 | 
1211 |     /**
1212 |      * The set of additionally supported formats if the feature 'texture-formats-tier1' is enabled.
1213 |      */
1214 |     static readonly supportedFormatsTier1: Set<string> = new Set([
1215 |         'r8unorm',
1216 |         'r8snorm',
1217 |         'r8uint',
1218 |         'r8sint',
1219 |         'rg8unorm',
1220 |         'rg8snorm',
1221 |         'rg8uint',
1222 |         'rg8sint',
1223 |         'r16unorm',
1224 |         'r16snorm',
1225 |         'r16uint',
1226 |         'r16sint',
1227 |         'r16float',
1228 |         'rg16unorm',
1229 |         'rg16snorm',
1230 |         'rg16uint',
1231 |         'rg16sint',
1232 |         'rg16float',
1233 |         'rgba16unorm',
1234 |         'rgba16snorm',
1235 |         'rgb10a2uint',
1236 |         'rgb10a2unorm',
1237 |         'rg11b10ufloat',
1238 |     ]);
1239 | 
1240 |     /**
1241 |      * The set of formats that support read-write access.
1242 |      */
1243 |     static readonly supportedReadWriteFormats: Set<string> = new Set([
1244 |         'r32uint',
1245 |         'r32sint',
1246 |         'r32float',
1247 |     ]);
1248 | 
1249 |     /**
1250 |      * The set of formats that support read-write access if the feature 'texture-formats-tier2' is enabled.
1251 |      */
1252 |     static readonly supportedReadWriteFormatsTier2: Set<string> = new Set([
1253 |         'r8unorm',
1254 |         'r8uint',
1255 |         'r8sint',
1256 |         'rgba8unorm',
1257 |         'rgba8uint',
1258 |         'rgba8sint',
1259 |         'r16uint',
1260 |         'r16sint',
1261 |         'r16float',
1262 |         'rgba16uint',
1263 |         'rgba16sint',
1264 |         'rgba16float',
1265 |         'rgba32uint',
1266 |         'rgba32sint',
1267 |         'rgba32float',
1268 |     ]);
1269 | 
1270 |     /**
1271 |      * Sets the preferred device limits for {@link WebGPUSinglePassDownsampler} in a given record of limits.
1272 |      * Existing preferred device limits are either increased or left untouched.
1273 |      * If {@link limits} is undefined, creates a new record of preferred device limits for {@link WebGPUSinglePassDownsampler}.
1274 |      * The result can be used to set {@link GPUDeviceDescriptor.requiredLimits} when requesting a device.
1275 |      * @param limits A record of device limits set to update with the preferred limits for {@link WebGPUSinglePassDownsampler}
1276 |      * @param adapter If this is set, the preferred limits that are set by this function will be clamped to {@link GPUAdapter.limits}.
1277 |      * @returns The updated or created set of device limits with all preferred limits for {@link WebGPUSinglePassDownsampler} set
1278 |      */
1279 |     static setPreferredLimits(limits?: Record<string, number | GPUSize64>, adapter?: GPUAdapter): Record<string, number | GPUSize64> {
1280 |         if (!limits) {
1281 |             limits = {};
1282 |         }
1283 |         const maxStorageTexturesPerShaderStage = Math.min(adapter?.limits.maxStorageTexturesPerShaderStage ?? 6, 6);
1284 |         limits.maxStorageTexturesPerShaderStage = Math.max(limits.maxStorageTexturesPerShaderStage ?? maxStorageTexturesPerShaderStage, maxStorageTexturesPerShaderStage);
1285 |         return limits;
1286 |     }
1287 | 
1288 |     /**
1289 |      * Creates a new {@link WebGPUSinglePassDownsampler}.
1290 |      * On its own, {@link WebGPUSinglePassDownsampler} does not allocate any GPU resources.
1291 |      * Optionally, prepare GPU resources for a given {@link SPDPrepareDeviceDescriptor}.
1292 |      * @param prepareDescriptor An optional descriptor for preparing GPU resources
1293 |      * @see WebGPUSinglePassDownsampler.prepareDeviceResources
1294 |      */
1295 |     constructor(prepareDescriptor?: SPDPrepareDeviceDescriptor) {
1296 |         this.filters = new Map([
1297 |             [SPDFilters.Average, SPD_FILTER_AVERAGE],
1298 |             [SPDFilters.Min, SPD_FILTER_MIN],
1299 |             [SPDFilters.Max, SPD_FILTER_MAX],
1300 |             [SPDFilters.MinMax, SPD_FILTER_MINMAX],
1301 |         ]);
1302 |         this.devicePipelines = new Map();
1303 | 
1304 |         if (prepareDescriptor) {
1305 |             this.prepareDeviceResources(prepareDescriptor);
1306 |         }
1307 |     }
1308 | 
1309 |     /**
1310 |      * Prepares GPU resources required by {@link WebGPUSinglePassDownsampler} to downsample textures for a given {@link SPDPrepareDeviceDescriptor}.
1311 |      * @param prepareDescriptor a descriptor for preparing GPU resources
1312 |      */
1313 |     prepareDeviceResources(prepareDescriptor: SPDPrepareDeviceDescriptor) {
1314 |         this.getOrCreateDevicePipelines(prepareDescriptor.device, prepareDescriptor.maxArrayLayersPerPass, prepareDescriptor.maxMipsPerPass, prepareDescriptor.disableSubgroups)?.preparePipelines(prepareDescriptor?.formats?.map(format => {
1315 |             return {
1316 |                 ...format,
1317 |                 filters: new Set(Array.from(format.filters ?? []).map(filter => this.filters.get(filter) ?? SPD_FILTER_AVERAGE)),
1318 |             };
1319 |         }));
1320 |     }
1321 | 
1322 |     private getOrCreateDevicePipelines(device: GPUDevice, maxArrayLayers?: number, maxMipsPerPass?: number, disableSubgroups?: boolean): DevicePipelines | undefined {
1323 |         if (!this.devicePipelines.has(device)) {
1324 |             this.devicePipelines.set(device, new DevicePipelines(device, maxArrayLayers, maxMipsPerPass, disableSubgroups));
1325 |         }
1326 |         return this.devicePipelines.get(device);
1327 |     }
1328 | 
1329 |     /**
1330 |      * Deregisters all resources stored for a given device.
1331 |      * @param device The device resources should be deregistered for
1332 |      */
1333 |     deregisterDevice(device: GPUDevice) {
1334 |         this.devicePipelines.delete(device);
1335 |     }
1336 | 
1337 |     /**
1338 |      * Registers a new downsampling filter operation that can be injected into the downsampling shader for new pipelines.
1339 |      *
1340 |      * The given WGSL code must (at least) specify a function to reduce four values into one with the following name and signature:
1341 |      *
1342 |      *   `spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar>`
1343 |      *
1344 |      * @param name The unique name of the filter operation
1345 |      * @param wgsl The WGSL code to inject into the downsampling shader as the filter operation
1346 |      */
1347 |     registerFilter(name: string, wgsl: string) {
1348 |         if (this.filters.has(name)) {
1349 |             console.warn(`[WebGPUSinglePassDownsampler::registerFilter]: overriding existing filter '${name}'. Previously generated pipelines are not affected.`);
1350 |         }
1351 |         this.filters.set(name, wgsl);
1352 |     }
1353 | 
1354 |     /**
1355 |      * Prepares a pass to downsample a 2d texture / 2d texture array.
1356 |      * The produced {@link SPDPass} can be used multiple times to repeatedly downsampling a texture, e.g., for downsampling the depth buffer each frame.
1357 |      * For one-time use, {@link WebGPUSinglePassDownsampler.generateMipmaps} can be used instead.
1358 |      *
1359 |      * By default, the texture is downsampled `texture.mipLevelCount - 1` times using an averaging filter, i.e., 4 pixel values from the parent level are averaged to produce a single pixel in the current mip level.
1360 |      * This behavior can be configured using the optional {@link config} parameter.
1361 |      * For example, instead of writing the mip levels into the input texture itself, a separate target texture can be specified using {@link SPDPassConfig.target}.
1362 |      * Other configuration options include using a different (possibly custom) filter, only downsampling a subregion of the input texture, and limiting the number of mip levels to generate, e.g., if a min-max pyramid is only needed up to a certain tile resolution.
1363 |      * If the given filter does not exist, an averaging filter will be used as a fallback.
1364 |      * The image region to downsample and the number of mip levels to generate are clamped to the input texture's size, and the output texture's `mipLevelCount`.
1365 |      *
1366 |      * Depending on the number of mip levels to generate and the device's `maxStorageTexturesPerShaderStage` limit, the {@link SPDPass} will internally consist of multiple passes, each generating up to `min(maxStorageTexturesPerShaderStage, 12)` mip levels.
1367 |      *
1368 |      * @param device The device the {@link SPDPass} should be prepared for
1369 |      * @param texture The texture that is to be processed by the {@link SPDPass}. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}. Must support {@link GPUTextureUsage.TEXTURE_BINDING}, and, if no other target is given, {@link GPUTextureUsage.STORAGE_BINDING}.
1370 |      * @param config The config for the {@link SPDPass}
1371 |      * @returns The prepared {@link SPDPass} or undefined if preparation failed or if no mipmaps would be generated.
1372 |      * @throws If the {@link GPUTextureFormat} of {@link SPDPassConfig.target} is not supported (does not support {@link GPUStorageTextureAccess:"write-only"} on the given {@link device}).
1373 |      * @throws If the size of {@link SPDPassConfig.target} is too small to store the first mip level generated for {@link texture}
1374 |      * @throws If {@link texture} or {@link SPDPassConfig.target} is not a 2d texture.
1375 |      * @see WebGPUSinglePassDownsampler.generateMipmaps
1376 |      * @see WebGPUSinglePassDownsampler.registerFilter
1377 |      * @see WebGPUSinglePassDownsampler.setPreferredLimits
1378 |      */
1379 |     preparePass(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): SPDPass | undefined {
1380 |         const target = config?.target ?? texture;
1381 |         const filter = config?.filter ?? SPDFilters.Average;
1382 |         const offset = (config?.offset ?? [0, 0]).map((o, d) => Math.max(0, Math.min(o, (d === 0 ? texture.width : texture.height) - 1))) as [number, number];
1383 |         const size = (config?.size ?? [texture.width, texture.height]).map((s, d) => Math.max(0, Math.min(s, (d === 0 ? texture.width : texture.height) - offset[d]))) as [number, number];
1384 |         const numMips = Math.min(Math.max(config?.numMips ?? target.mipLevelCount, 0), maxMipLevelCount(...size));
1385 | 
1386 |         if (numMips < 2) {
1387 |             console.warn(`[WebGPUSinglePassDownsampler::prepare]: no mips to create (numMips = ${numMips})`);
1388 |             return undefined;
1389 |         }
1390 |         if (!(WebGPUSinglePassDownsampler.supportedFormats.has(target.format) ||
1391 |             (device.features.has('bgra8unorm-storage') && WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage.has(target.format)) ||
1392 |             ((device.features.has('texture-formats-tier1') || device.features.has('texture-formats-tier2')) && WebGPUSinglePassDownsampler.supportedFormatsTier1.has(target.format))))
1393 |         {
1394 |             throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported. (Supported formats: ${WebGPUSinglePassDownsampler.supportedFormats}, and ${WebGPUSinglePassDownsampler.supportedFormatsBgra8UnormStorage} (if 'bgra8unorm-storage' is enabled), and ${WebGPUSinglePassDownsampler.supportedFormatsTier1} (if 'texture-formats-tier1' is enabled))`);
1395 |         }
1396 |         if (target.format === 'bgra8unorm' && !device.features.has('bgra8unorm-storage')) {
1397 |             throw new Error(`[WebGPUSinglePassDownsampler::prepare]: format ${target.format} not supported without feature 'bgra8unorm-storage' enabled`);
1398 |         }
1399 |         if (target.width < Math.max(1, Math.floor(size[0] / 2)) || target.height < Math.max(1, Math.floor(size[1] / 2))) {
1400 |             throw new Error(`[WebGPUSinglePassDownsampler::prepare]: target too small (${[target.width, target.height]}) for input size ${size}`);
1401 |         }
1402 |         if (target.dimension !== '2d' || texture.dimension !== '2d') {
1403 |             throw new Error('[WebGPUSinglePassDownsampler::prepare]: texture or target is not a 2d texture');
1404 |         }
1405 |         if (!this.filters.has(filter)) {
1406 |             console.warn(`[WebGPUSinglePassDownsampler::prepare]: unknown filter ${filter}, falling back to average`);
1407 |         }
1408 |         if (filter === SPD_FILTER_MINMAX && target.format.includes('r32')) {
1409 |             console.warn(`[WebGPUSinglePassDownsampler::prepare]: filter ${filter} makes no sense for one-component target format ${target.format}`);
1410 |         }
1411 |         const filterCode = this.filters.get(filter) ?? SPD_FILTER_AVERAGE;
1412 |         const scalarType = sanitizeScalarType(device, target.format, config?.halfPrecision ?? false);
1413 | 
1414 |         return this.getOrCreateDevicePipelines(device)?.preparePass(texture, target, filterCode, offset, size, numMips, scalarType);
1415 |     }
1416 | 
1417 |     /**
1418 |      * Generates mipmaps for the given texture.
1419 |      * For textures that will be downsampled more than once, consider generating a {@link SPDPass} using {@link WebGPUSinglePassDownsampler.preparePass} and calling its {@link SPDPass.encode} method.
1420 |      * This way, allocated GPU resources for downsampling the texture can be reused.
1421 |      * @param device The device to use for downsampling the texture
1422 |      * @param texture The texture to generate mipmaps for. Must support generating a {@link GPUTextureView} with {@link GPUTextureViewDimension:"2d-array"}.
1423 |      * @param config The config for mipmap generation
1424 |      * @returns True if mipmaps were generated, false otherwise
1425 |      * @throws If {@link WebGPUSinglePassDownsampler.preparePass} threw an error.
1426 |      * @see WebGPUSinglePassDownsampler.preparePass
1427 |      */
1428 |     generateMipmaps(device: GPUDevice, texture: GPUTexture, config?: SPDPassConfig): boolean {
1429 |         const pass = this.preparePass(device, texture, config);
1430 |         if (!pass) {
1431 |             return false;
1432 |         } else {
1433 |             const commandEncoder = device.createCommandEncoder();
1434 |             pass?.encode(commandEncoder.beginComputePass()).end();
1435 |             device.queue.submit([commandEncoder.finish()]);
1436 |             return true;
1437 |         }
1438 |     }
1439 | }
1440 | 
1441 | 


--------------------------------------------------------------------------------