├── .gitignore
├── tests
    ├── images
    │   ├── brick.png
    │   ├── blender.png
    │   ├── marble.png
    │   ├── brick-alpha.png
    │   └── marble-alpha.png
    ├── multi_tasks.rs
    ├── common
    │   └── mod.rs
    └── metrics.rs
├── rustfmt.toml
├── compressor
    ├── Cargo.toml
    └── src
    │   └── main.rs
├── README.md
├── LICENSE
├── Cargo.toml
├── CHANGELOG.md
└── src
    ├── lib.rs
    ├── settings.rs
    ├── encode
        ├── bc1_to_5.rs
        ├── common.rs
        └── bc7.rs
    ├── encode.rs
    ├── decode.rs
    ├── shader
        └── bc1_to_5.wgsl
    └── block_compressor.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | /target
3 | Cargo.lock
4 | *.png
5 | *.dds
6 | 


--------------------------------------------------------------------------------
/tests/images/brick.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasenbanck/block_compression/HEAD/tests/images/brick.png


--------------------------------------------------------------------------------
/tests/images/blender.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasenbanck/block_compression/HEAD/tests/images/blender.png


--------------------------------------------------------------------------------
/tests/images/marble.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasenbanck/block_compression/HEAD/tests/images/marble.png


--------------------------------------------------------------------------------
/tests/images/brick-alpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasenbanck/block_compression/HEAD/tests/images/brick-alpha.png


--------------------------------------------------------------------------------
/tests/images/marble-alpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasenbanck/block_compression/HEAD/tests/images/marble-alpha.png


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | # These don't change the "standard" how Rust programs normaly look, but make things more consistent.
2 | format_code_in_doc_comments = true
3 | hex_literal_case = "Upper"
4 | imports_granularity = "Crate"
5 | group_imports = "StdExternalCrate"
6 | use_try_shorthand = true
7 | 


--------------------------------------------------------------------------------
/compressor/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "compressor"
 3 | version = "0.1.0"
 4 | publish = false
 5 | authors.workspace = true
 6 | edition.workspace = true
 7 | rust-version.workspace = true
 8 | 
 9 | [dependencies]
10 | block_compression = { path = ".." }
11 | bytemuck = { workspace = true }
12 | ddsfile = { workspace = true }
13 | image = { workspace = true, features = ["bmp", "png", "tga"] }
14 | pollster = { workspace = true }
15 | wgpu = { workspace = true, features = ["static-dxc"] }
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # block_compression
 2 | 
 3 | [![Crate](https://img.shields.io/crates/v/block_compression.svg)](https://crates.io/crates/block_compression)
 4 | [![API](https://docs.rs/block_compression/badge.svg)](https://docs.rs/block_compression)
 5 | 
 6 | Texture block compression using WGPU compute shader.
 7 | The shaders are a port of Intel's ISPC Texture Compressor's kernel to WGSL compute shader.
 8 | 
 9 | Tested with the following backends:
10 | 
11 | * DX12
12 | * Metal
13 | * Vulkan
14 | 
15 | ## Supported block compressions
16 | 
17 | Currently supported block compressions are:
18 | 
19 | * BC1
20 | * BC2
21 | * BC3
22 | * BC4
23 | * BC5
24 | * BC6H
25 | * BC7
26 | 
27 | ## DX12 pipeline creation
28 | 
29 | The pipeline creation for BC7 and especially BC6H takes a long time under DX12. The DXC compiler seems to take a very
30 | long time to compile the shader. For this reason we moved them behind features, which are included in the default
31 | features.
32 | 
33 | ## License
34 | 
35 | This project is licensed under the [MIT](LICENSE) license.
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2025, Nils Hasenbanck
 2 | Copyright (c) 2016-2024, Intel Corporation
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation
 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 7 | permit persons to whom the Software is furnished to do so, subject to the following conditions:
 8 | 
 9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of
10 | the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
13 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
15 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 | SOFTWARE.
17 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace.package]
 2 | authors = ["Nils Hasenbanck <nils@hasenbanck.de>"]
 3 | edition = "2021"
 4 | rust-version = "1.80"
 5 | 
 6 | [package]
 7 | name = "block_compression"
 8 | description = "Texture block compression using WGPU compute shader"
 9 | version = "0.7.0"
10 | license = "MIT"
11 | documentation = "https://docs.rs/block_compression"
12 | repository = "https://github.com/hasenbanck/block_compression"
13 | authors.workspace = true
14 | edition.workspace = true
15 | rust-version.workspace = true
16 | keywords = ["texture", "image", "compress", "wgpu"]
17 | categories = ["rendering", "rendering::engine"]
18 | exclude = ["tests/images/"]
19 | 
20 | [badges]
21 | maintenance = { status = "actively-developed" }
22 | 
23 | [features]
24 | default = ["bc15", "bc6h", "bc7", "wgpu"]
25 | bc15 = []
26 | bc6h = ["half"]
27 | bc7 = []
28 | 
29 | [dependencies]
30 | bytemuck = { workspace = true, features = ["derive"] }
31 | half = { workspace = true, optional = true, features = ["bytemuck"] }
32 | wgpu = { workspace = true, optional = true }
33 | 
34 | [dev-dependencies]
35 | image = { workspace = true, features = ["png"] }
36 | pollster = { workspace = true }
37 | wgpu = { workspace = true, features = ["static-dxc"] }
38 | 
39 | [workspace.dependencies]
40 | bytemuck = "1"
41 | ddsfile = "0.5"
42 | half = "2"
43 | image = { version = "0.25", default-features = false }
44 | pollster = "0.4"
45 | wgpu = "27"
46 | 
47 | [package.metadata.docs.rs]
48 | features = ["bc6h", "bc7", "half"]
49 | rustdoc-args = ["--cfg", "docsrs"]
50 | 
51 | [workspace]
52 | members = [
53 |     "compressor",
54 | ]
55 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [0.7.0] - 2025-10-02
 9 | 
10 | ### Updated
11 | 
12 | - Target WGPU 27
13 | 
14 | ## [0.6.0] - 2025-07-23
15 | 
16 | ### Updated
17 | 
18 | - Target WGPU 26
19 | 
20 | ## [0.5.0] - 2025-06-02
21 | 
22 | ### Changed
23 | 
24 | - `GpuBlockCompressor::new()` takes the WGPU device and queue directly without an Arc wrapped around it. WGPU 25
25 |   made the main structures clonable, since they are internally reference counted, so it's not needed anymore to wrap
26 |   them in a smart pointer anymore.
27 | 
28 | ### Fixed
29 | 
30 | - Fix an issue with AMD integrated GPU's where WGPU's forced loop bounding in shaders made running the BC7 shader
31 |   impossible.
32 | 
33 | ## [0.4.0] - 2025-04-11
34 | 
35 | ### Updated
36 | 
37 | - Target WGPU 25
38 | 
39 | ## [0.3.0] - 2025-02-21
40 | 
41 | ### Updated
42 | 
43 | - Allow the GPU compressor to use row based offsets into the texture to
44 |   allow submitting smaller chunks of work.
45 | 
46 | ## [0.2.1] - 2025-02-17
47 | 
48 | ### Updated
49 | 
50 | - Fix BC6H encoding for black pixels
51 | - Use adapter limits in the example compressor
52 | - Improve PSNR output CPU of when compared to the GPU versions of BC6H / BC7
53 | 
54 | ## [0.2.0] - 2025-01-22
55 | 
56 | ### Added
57 | 
58 | - Provide more feature flags for optional features
59 | - Implemented CPU based BC6H encoding
60 | - Implemented CPU based BC7 encoding
61 | 
62 | ## [0.1.1] - 2025-01-20
63 | 
64 | ### Updated
65 | 
66 | - Fix compilation with no default features.
67 | 
68 | ## [0.1.0] - 2025-01-20
69 | 
70 | ### Added
71 | 
72 | - Initial release.
73 | 


--------------------------------------------------------------------------------
/tests/multi_tasks.rs:
--------------------------------------------------------------------------------
  1 | use block_compression::*;
  2 | use wgpu::{CommandEncoderDescriptor, ComputePassDescriptor, TextureViewDescriptor};
  3 | 
  4 | use crate::common::{
  5 |     create_blocks_buffer, create_wgpu_resources, download_blocks_data,
  6 |     read_image_and_create_texture, BRICK_FILE_PATH, MARBLE_FILE_PATH,
  7 | };
  8 | 
  9 | mod common;
 10 | 
 11 | fn test_multi_task_compression(variant: CompressionVariant) {
 12 |     let (device, queue) = create_wgpu_resources();
 13 |     let mut block_compressor = GpuBlockCompressor::new(device.clone(), queue.clone());
 14 | 
 15 |     let (brick_texture, _) =
 16 |         read_image_and_create_texture(&device, &queue, BRICK_FILE_PATH, variant);
 17 |     let (marble_texture, _) =
 18 |         read_image_and_create_texture(&device, &queue, MARBLE_FILE_PATH, variant);
 19 | 
 20 |     let brick_height = brick_texture.height();
 21 |     let marble_height = marble_texture.height();
 22 | 
 23 |     // Split heights in half (rounded to multiple of 4)
 24 |     let brick_half_height = (brick_height / 2) & !3;
 25 |     let marble_half_height = (marble_height / 2) & !3;
 26 | 
 27 |     let bricks_half_size = variant.blocks_byte_size(brick_texture.width(), brick_half_height);
 28 |     let marble_half_size = variant.blocks_byte_size(marble_texture.width(), marble_half_height);
 29 |     let total_size = (bricks_half_size * 2) + (marble_half_size * 2);
 30 | 
 31 |     let blocks = create_blocks_buffer(&device, total_size as u64);
 32 | 
 33 |     block_compressor.add_compression_task(
 34 |         variant,
 35 |         &brick_texture.create_view(&TextureViewDescriptor::default()),
 36 |         brick_texture.width(),
 37 |         brick_half_height,
 38 |         &blocks,
 39 |         None,
 40 |         None,
 41 |     );
 42 |     block_compressor.add_compression_task(
 43 |         variant,
 44 |         &brick_texture.create_view(&TextureViewDescriptor::default()),
 45 |         brick_texture.width(),
 46 |         brick_half_height,
 47 |         &blocks,
 48 |         Some(brick_half_height),
 49 |         Some(bricks_half_size as u32),
 50 |     );
 51 | 
 52 |     block_compressor.add_compression_task(
 53 |         variant,
 54 |         &marble_texture.create_view(&TextureViewDescriptor::default()),
 55 |         marble_texture.width(),
 56 |         marble_half_height,
 57 |         &blocks,
 58 |         None,
 59 |         Some((bricks_half_size * 2) as u32),
 60 |     );
 61 |     block_compressor.add_compression_task(
 62 |         variant,
 63 |         &marble_texture.create_view(&TextureViewDescriptor::default()),
 64 |         marble_texture.width(),
 65 |         marble_half_height,
 66 |         &blocks,
 67 |         Some(marble_half_height),
 68 |         Some((bricks_half_size * 2 + marble_half_size) as u32),
 69 |     );
 70 | 
 71 |     let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor {
 72 |         label: Some("command encoder"),
 73 |     });
 74 | 
 75 |     {
 76 |         let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor {
 77 |             label: Some("compute pass"),
 78 |             timestamp_writes: None,
 79 |         });
 80 | 
 81 |         block_compressor.compress(&mut pass);
 82 |     }
 83 | 
 84 |     queue.submit([encoder.finish()]);
 85 | 
 86 |     let blocks_data = download_blocks_data(&device, &queue, blocks);
 87 | 
 88 |     let brick_first_half_not_empty = !blocks_data[..bricks_half_size]
 89 |         .iter()
 90 |         .all(|&data| data == 0);
 91 |     let brick_second_half_not_empty = !blocks_data[bricks_half_size..bricks_half_size * 2]
 92 |         .iter()
 93 |         .all(|&data| data == 0);
 94 |     let marble_first_half_not_empty = !blocks_data
 95 |         [bricks_half_size * 2..bricks_half_size * 2 + marble_half_size]
 96 |         .iter()
 97 |         .all(|&data| data == 0);
 98 |     let marble_second_half_not_empty = !blocks_data[bricks_half_size * 2 + marble_half_size..]
 99 |         .iter()
100 |         .all(|&data| data == 0);
101 | 
102 |     assert!(brick_first_half_not_empty, "Brick first half is empty");
103 |     assert!(brick_second_half_not_empty, "Brick second half is empty");
104 |     assert!(marble_first_half_not_empty, "Marble first half is empty");
105 |     assert!(marble_second_half_not_empty, "Marble second half is empty");
106 | }
107 | 
108 | #[test]
109 | fn multi_task_compression_bc1() {
110 |     test_multi_task_compression(CompressionVariant::BC1);
111 | }
112 | 
113 | #[test]
114 | fn multi_task_compression_bc2() {
115 |     test_multi_task_compression(CompressionVariant::BC2);
116 | }
117 | 
118 | #[test]
119 | fn multi_task_compression_bc3() {
120 |     test_multi_task_compression(CompressionVariant::BC3);
121 | }
122 | 
123 | #[test]
124 | fn multi_task_compression_bc4() {
125 |     test_multi_task_compression(CompressionVariant::BC4);
126 | }
127 | 
128 | #[test]
129 | fn multi_task_compression_bc5() {
130 |     test_multi_task_compression(CompressionVariant::BC5);
131 | }
132 | 
133 | #[test]
134 | fn multi_task_compression_bc6h() {
135 |     test_multi_task_compression(CompressionVariant::BC6H(BC6HSettings::very_fast()));
136 | }
137 | 
138 | #[test]
139 | fn multi_task_compression_bc7() {
140 |     test_multi_task_compression(CompressionVariant::BC7(BC7Settings::opaque_ultra_fast()));
141 | }
142 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! # block_compression
  2 | //!
  3 | //! Texture block compression using WGPU compute shader.
  4 | //! The shaders are a port of Intel's ISPC Texture Compressor's kernel to WGSL compute shader.
  5 | //!
  6 | //! Tested with the following backends:
  7 | //!
  8 | //! * DX12
  9 | //! * Metal
 10 | //! * Vulkan
 11 | //!
 12 | //! ## DX12 pipeline creation
 13 | //!
 14 | //! The pipeline creation for BC7 and especially BC6H takes a long time under DX12. The DXC compiler
 15 | //! seems to take a very long time to compile the shader. For this reason we moved them behind
 16 | //! features, which are included in the default features.
 17 | //!
 18 | //! ## Supported block compressions
 19 | //!
 20 | //! Currently supported block compressions are:
 21 | //!
 22 | //!  * BC1
 23 | //!  * BC2
 24 | //!  * BC3
 25 | //!  * BC4
 26 | //!  * BC5
 27 | //!  * BC6H
 28 | //!  * BC7
 29 | 
 30 | #![cfg_attr(docsrs, feature(doc_cfg))]
 31 | 
 32 | #[cfg(all(
 33 |     feature = "wgpu",
 34 |     any(feature = "bc15", feature = "bc6h", feature = "bc7")
 35 | ))]
 36 | mod block_compressor;
 37 | pub mod decode;
 38 | pub mod encode;
 39 | mod settings;
 40 | 
 41 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
 42 | use std::hash::{Hash, Hasher};
 43 | 
 44 | #[cfg(all(
 45 |     feature = "wgpu",
 46 |     any(feature = "bc15", feature = "bc6h", feature = "bc7")
 47 | ))]
 48 | #[cfg_attr(
 49 |     docsrs,
 50 |     doc(cfg(all(
 51 |         feature = "wgpu",
 52 |         any(feature = "bc15", feature = "bc6h", feature = "bc7")
 53 |     )))
 54 | )]
 55 | pub use block_compressor::GpuBlockCompressor;
 56 | pub use bytemuck;
 57 | #[cfg(feature = "bc6h")]
 58 | #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
 59 | pub use half;
 60 | #[cfg(feature = "bc6h")]
 61 | #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
 62 | pub use settings::BC6HSettings;
 63 | #[cfg(feature = "bc7")]
 64 | #[cfg_attr(docsrs, doc(cfg(feature = "bc7")))]
 65 | pub use settings::BC7Settings;
 66 | 
 67 | /// Block compression variants supported by this crate.
 68 | #[derive(Copy, Clone, Debug)]
 69 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
 70 | #[cfg_attr(
 71 |     docsrs,
 72 |     doc(cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7")))
 73 | )]
 74 | pub enum CompressionVariant {
 75 |     #[cfg(feature = "bc15")]
 76 |     #[cfg_attr(docsrs, doc(cfg(feature = "bc15")))]
 77 |     /// BC1 compression (RGB)
 78 |     BC1,
 79 |     #[cfg(feature = "bc15")]
 80 |     #[cfg_attr(docsrs, doc(cfg(feature = "bc15")))]
 81 |     /// BC2 compression with sharp alpha (RGBA)
 82 |     BC2,
 83 |     #[cfg(feature = "bc15")]
 84 |     #[cfg_attr(docsrs, doc(cfg(feature = "bc15")))]
 85 |     /// BC3 compression with smooth alpha (RGBA)
 86 |     BC3,
 87 |     #[cfg(feature = "bc15")]
 88 |     #[cfg_attr(docsrs, doc(cfg(feature = "bc15")))]
 89 |     /// BC4 compression (R)
 90 |     BC4,
 91 |     #[cfg(feature = "bc15")]
 92 |     #[cfg_attr(docsrs, doc(cfg(feature = "bc15")))]
 93 |     /// BC5 compression (RG)
 94 |     BC5,
 95 |     #[cfg(feature = "bc6h")]
 96 |     #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
 97 |     /// BC6H compression (RGB)
 98 |     BC6H(BC6HSettings),
 99 |     #[cfg(feature = "bc7")]
100 |     #[cfg_attr(docsrs, doc(cfg(feature = "bc7")))]
101 |     /// BC7 compression with smooth alpha (RGBA)
102 |     BC7(BC7Settings),
103 | }
104 | 
105 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
106 | impl PartialEq for CompressionVariant {
107 |     fn eq(&self, other: &Self) -> bool {
108 |         std::mem::discriminant(self) == std::mem::discriminant(other)
109 |     }
110 | }
111 | 
112 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
113 | impl Eq for CompressionVariant {}
114 | 
115 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
116 | impl Hash for CompressionVariant {
117 |     fn hash<H: Hasher>(&self, state: &mut H) {
118 |         std::mem::discriminant(self).hash(state);
119 |     }
120 | }
121 | 
122 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
123 | impl CompressionVariant {
124 |     /// Returns the bytes per row for the given width.
125 |     ///
126 |     /// The width is used to calculate how many blocks are needed per row,
127 |     /// which is then multiplied by the block size.
128 |     /// Width is rounded up to the nearest multiple of 4.
129 |     pub const fn bytes_per_row(self, width: u32) -> u32 {
130 |         let blocks_per_row = width.div_ceil(4);
131 |         blocks_per_row * self.block_byte_size()
132 |     }
133 | 
134 |     /// Returns the byte size required for storing compressed blocks for the given dimensions.
135 |     ///
136 |     /// The size is calculated based on the block compression format and rounded up dimensions.
137 |     /// Width and height are rounded up to the nearest multiple of 4.
138 |     pub const fn blocks_byte_size(self, width: u32, height: u32) -> usize {
139 |         let block_width = (width as usize).div_ceil(4);
140 |         let block_height = (height as usize).div_ceil(4);
141 |         let block_count = block_width * block_height;
142 |         let block_size = self.block_byte_size() as usize;
143 |         block_count * block_size
144 |     }
145 | 
146 |     const fn block_byte_size(self) -> u32 {
147 |         match self {
148 |             #[cfg(feature = "bc15")]
149 |             Self::BC1 | Self::BC4 => 8,
150 |             #[cfg(feature = "bc15")]
151 |             Self::BC2 | Self::BC3 | Self::BC5 => 16,
152 |             #[cfg(feature = "bc6h")]
153 |             Self::BC6H(..) => 16,
154 |             #[cfg(feature = "bc7")]
155 |             Self::BC7(..) => 16,
156 |         }
157 |     }
158 | 
159 |     #[cfg(feature = "wgpu")]
160 |     const fn name(self) -> &'static str {
161 |         match self {
162 |             #[cfg(feature = "bc15")]
163 |             Self::BC1 => "bc1",
164 |             #[cfg(feature = "bc15")]
165 |             Self::BC2 => "bc2",
166 |             #[cfg(feature = "bc15")]
167 |             Self::BC3 => "bc3",
168 |             #[cfg(feature = "bc15")]
169 |             Self::BC4 => "bc4",
170 |             #[cfg(feature = "bc15")]
171 |             Self::BC5 => "bc5",
172 |             #[cfg(feature = "bc6h")]
173 |             Self::BC6H(..) => "bc6h",
174 |             #[cfg(feature = "bc7")]
175 |             Self::BC7(..) => "bc7",
176 |         }
177 |     }
178 | 
179 |     #[cfg(feature = "wgpu")]
180 |     const fn entry_point(self) -> &'static str {
181 |         match self {
182 |             #[cfg(feature = "bc15")]
183 |             Self::BC1 => "compress_bc1",
184 |             #[cfg(feature = "bc15")]
185 |             Self::BC2 => "compress_bc2",
186 |             #[cfg(feature = "bc15")]
187 |             Self::BC3 => "compress_bc3",
188 |             #[cfg(feature = "bc15")]
189 |             Self::BC4 => "compress_bc4",
190 |             #[cfg(feature = "bc15")]
191 |             Self::BC5 => "compress_bc5",
192 |             #[cfg(feature = "bc6h")]
193 |             Self::BC6H(..) => "compress_bc6h",
194 |             #[cfg(feature = "bc7")]
195 |             Self::BC7(..) => "compress_bc7",
196 |         }
197 |     }
198 | }
199 | 


--------------------------------------------------------------------------------
/tests/common/mod.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     sync::{Arc, LazyLock},
  3 |     time::Duration,
  4 | };
  5 | 
  6 | use block_compression::CompressionVariant;
  7 | use half::f16;
  8 | use image::ImageReader;
  9 | use pollster::block_on;
 10 | use wgpu::{
 11 |     util::{DeviceExt, TextureDataOrder},
 12 |     wgt::{Dx12SwapchainKind, Dx12UseFrameLatencyWaitableObject},
 13 |     BackendOptions, Backends, Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor,
 14 |     Device, DeviceDescriptor, Dx12BackendOptions, Dx12Compiler, Error, ExperimentalFeatures,
 15 |     Extent3d, Features, Instance, InstanceDescriptor, InstanceFlags, Limits, MapMode, MemoryHints,
 16 |     PollType, PowerPreference, Queue, Texture, TextureDescriptor, TextureDimension, TextureFormat,
 17 |     TextureUsages, Trace,
 18 | };
 19 | 
 20 | #[inline]
 21 | pub fn srgb_to_linear(srgb: u8) -> f64 {
 22 |     let v = (srgb as f64) / 255.0;
 23 |     if v <= 0.04045 {
 24 |         v / 12.92
 25 |     } else {
 26 |         ((v + 0.055) / 1.055).powf(2.4)
 27 |     }
 28 | }
 29 | 
 30 | pub const BRICK_FILE_PATH: &str = "tests/images/brick.png";
 31 | pub const MARBLE_FILE_PATH: &str = "tests/images/marble.png";
 32 | 
 33 | pub fn create_wgpu_resources() -> (Device, Queue) {
 34 |     static CACHE: LazyLock<(Device, Queue)> = LazyLock::new(|| {
 35 |         let instance = Instance::new(&InstanceDescriptor {
 36 |             backends: Backends::from_env().unwrap_or_default(),
 37 |             flags: InstanceFlags::from_build_config().with_env(),
 38 |             memory_budget_thresholds: Default::default(),
 39 |             backend_options: BackendOptions {
 40 |                 dx12: Dx12BackendOptions {
 41 |                     shader_compiler: Dx12Compiler::StaticDxc,
 42 |                     presentation_system: Dx12SwapchainKind::DxgiFromHwnd,
 43 |                     latency_waitable_object: Dx12UseFrameLatencyWaitableObject::Wait,
 44 |                 }
 45 |                 .with_env(),
 46 |                 ..Default::default()
 47 |             },
 48 |         });
 49 | 
 50 |         let adapter = block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
 51 |             power_preference: PowerPreference::HighPerformance,
 52 |             compatible_surface: None,
 53 |             force_fallback_adapter: false,
 54 |         }))
 55 |         .expect("Failed to find an appropriate adapter");
 56 | 
 57 |         let (device, queue) = block_on(adapter.request_device(&DeviceDescriptor {
 58 |             label: Some("main device"),
 59 |             required_features: Features::default(),
 60 |             required_limits: Limits::default(),
 61 |             experimental_features: ExperimentalFeatures::disabled(),
 62 |             memory_hints: MemoryHints::Performance,
 63 |             trace: Trace::Off,
 64 |         }))
 65 |         .expect("Failed to create device");
 66 |         device.on_uncaptured_error(Arc::new(error_handler));
 67 | 
 68 |         (device, queue)
 69 |     });
 70 | 
 71 |     CACHE.clone()
 72 | }
 73 | 
 74 | pub fn error_handler(error: Error) {
 75 |     let (message_type, message) = match error {
 76 |         Error::OutOfMemory { source } => ("OutOfMemory", source.to_string()),
 77 |         Error::Validation {
 78 |             source,
 79 |             description,
 80 |         } => ("Validation", format!("{source}: {description}")),
 81 |         Error::Internal {
 82 |             source,
 83 |             description,
 84 |         } => ("Internal", format!("{source}: {description}")),
 85 |     };
 86 | 
 87 |     panic!("wgpu [{message_type}] [error]: {message}");
 88 | }
 89 | 
 90 | pub fn read_image_and_create_texture(
 91 |     device: &Device,
 92 |     queue: &Queue,
 93 |     file_path: &str,
 94 |     variant: CompressionVariant,
 95 | ) -> (Texture, Vec<u8>) {
 96 |     let image = ImageReader::open(file_path)
 97 |         .expect("can't open input image")
 98 |         .decode()
 99 |         .expect("can't decode image");
100 | 
101 |     let rgba_image = image.to_rgba8();
102 |     let width = rgba_image.width();
103 |     let height = rgba_image.height();
104 | 
105 |     let texture = if matches!(variant, CompressionVariant::BC6H(..)) {
106 |         let rgba_f16_data: Vec<u8> = rgba_image
107 |             .iter()
108 |             .flat_map(|color| f16::from_f64(srgb_to_linear(*color)).to_ne_bytes())
109 |             .collect();
110 | 
111 |         device.create_texture_with_data(
112 |             queue,
113 |             &TextureDescriptor {
114 |                 label: Some(file_path),
115 |                 size: Extent3d {
116 |                     width,
117 |                     height,
118 |                     depth_or_array_layers: 1,
119 |                 },
120 |                 mip_level_count: 1,
121 |                 sample_count: 1,
122 |                 dimension: TextureDimension::D2,
123 |                 format: TextureFormat::Rgba16Float,
124 |                 usage: TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING,
125 |                 view_formats: &[],
126 |             },
127 |             TextureDataOrder::LayerMajor,
128 |             rgba_f16_data.as_slice(),
129 |         )
130 |     } else {
131 |         device.create_texture_with_data(
132 |             queue,
133 |             &TextureDescriptor {
134 |                 label: Some(file_path),
135 |                 size: Extent3d {
136 |                     width,
137 |                     height,
138 |                     depth_or_array_layers: 1,
139 |                 },
140 |                 mip_level_count: 1,
141 |                 sample_count: 1,
142 |                 dimension: TextureDimension::D2,
143 |                 format: TextureFormat::Rgba8Unorm,
144 |                 usage: TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING,
145 |                 view_formats: &[],
146 |             },
147 |             TextureDataOrder::LayerMajor,
148 |             &rgba_image,
149 |         )
150 |     };
151 | 
152 |     (texture, rgba_image.to_vec())
153 | }
154 | 
155 | pub fn create_blocks_buffer(device: &Device, size: u64) -> Buffer {
156 |     device.create_buffer(&BufferDescriptor {
157 |         label: Some("blocks buffer"),
158 |         size,
159 |         usage: BufferUsages::COPY_SRC | BufferUsages::STORAGE,
160 |         mapped_at_creation: false,
161 |     })
162 | }
163 | 
164 | pub fn download_blocks_data(device: &Device, queue: &Queue, block_buffer: Buffer) -> Vec<u8> {
165 |     let size = block_buffer.size();
166 | 
167 |     let staging_buffer = device.create_buffer(&BufferDescriptor {
168 |         label: Some("staging buffer"),
169 |         size,
170 |         usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
171 |         mapped_at_creation: false,
172 |     });
173 | 
174 |     let mut copy_encoder = device.create_command_encoder(&CommandEncoderDescriptor {
175 |         label: Some("copy encoder"),
176 |     });
177 | 
178 |     copy_encoder.copy_buffer_to_buffer(&block_buffer, 0, &staging_buffer, 0, size);
179 | 
180 |     queue.submit([copy_encoder.finish()]);
181 | 
182 |     let result;
183 | 
184 |     {
185 |         let buffer_slice = staging_buffer.slice(..);
186 | 
187 |         let (tx, rx) = std::sync::mpsc::channel();
188 |         buffer_slice.map_async(MapMode::Read, move |v| tx.send(v).unwrap());
189 | 
190 |         let _ = device.poll(PollType::Wait {
191 |             submission_index: None,
192 |             timeout: Some(Duration::from_secs(60)),
193 |         });
194 | 
195 |         match rx.recv() {
196 |             Ok(Ok(())) => {
197 |                 result = buffer_slice.get_mapped_range().to_vec();
198 |             }
199 |             _ => panic!("couldn't read from buffer"),
200 |         }
201 |     }
202 | 
203 |     staging_buffer.unmap();
204 | 
205 |     result
206 | }
207 | 


--------------------------------------------------------------------------------
/src/settings.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(any(feature = "bc6h", feature = "bc7"))]
  2 | use bytemuck::{Pod, Zeroable};
  3 | 
  4 | /// Encoding settings for BC6H.
  5 | #[cfg(feature = "bc6h")]
  6 | #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
  7 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Pod, Zeroable)]
  8 | #[repr(C)]
  9 | pub struct BC6HSettings {
 10 |     pub(crate) slow_mode: u32,
 11 |     pub(crate) fast_mode: u32,
 12 |     pub(crate) refine_iterations_1p: u32,
 13 |     pub(crate) refine_iterations_2p: u32,
 14 |     pub(crate) fast_skip_threshold: u32,
 15 | }
 16 | 
 17 | #[cfg(feature = "bc6h")]
 18 | impl BC6HSettings {
 19 |     /// Very fast settings.
 20 |     pub const fn very_fast() -> Self {
 21 |         Self {
 22 |             slow_mode: false as _,
 23 |             fast_mode: true as _,
 24 |             fast_skip_threshold: 0,
 25 |             refine_iterations_1p: 0,
 26 |             refine_iterations_2p: 0,
 27 |         }
 28 |     }
 29 | 
 30 |     /// Fast settings.
 31 |     pub const fn fast() -> Self {
 32 |         Self {
 33 |             slow_mode: false as _,
 34 |             fast_mode: true as _,
 35 |             fast_skip_threshold: 2,
 36 |             refine_iterations_1p: 0,
 37 |             refine_iterations_2p: 1,
 38 |         }
 39 |     }
 40 | 
 41 |     /// Basic settings.
 42 |     pub const fn basic() -> Self {
 43 |         Self {
 44 |             slow_mode: false as _,
 45 |             fast_mode: false as _,
 46 |             fast_skip_threshold: 4,
 47 |             refine_iterations_1p: 2,
 48 |             refine_iterations_2p: 2,
 49 |         }
 50 |     }
 51 | 
 52 |     /// Slow settings.
 53 |     pub const fn slow() -> Self {
 54 |         Self {
 55 |             slow_mode: true as _,
 56 |             fast_mode: false as _,
 57 |             fast_skip_threshold: 10,
 58 |             refine_iterations_1p: 2,
 59 |             refine_iterations_2p: 2,
 60 |         }
 61 |     }
 62 | 
 63 |     /// Very slow settings.
 64 |     pub const fn very_slow() -> Self {
 65 |         Self {
 66 |             slow_mode: true as _,
 67 |             fast_mode: false as _,
 68 |             fast_skip_threshold: 32,
 69 |             refine_iterations_1p: 2,
 70 |             refine_iterations_2p: 2,
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | #[cfg(feature = "bc7")]
 76 | #[cfg_attr(docsrs, doc(cfg(feature = "bc7")))]
 77 | /// Encoding settings for BC7.
 78 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Pod, Zeroable)]
 79 | #[repr(C)]
 80 | pub struct BC7Settings {
 81 |     pub(crate) refine_iterations: [u32; 8],
 82 |     pub(crate) mode_selection: [u32; 4],
 83 |     pub(crate) skip_mode2: u32,
 84 |     pub(crate) fast_skip_threshold_mode1: u32,
 85 |     pub(crate) fast_skip_threshold_mode3: u32,
 86 |     pub(crate) fast_skip_threshold_mode7: u32,
 87 |     pub(crate) mode45_channel0: u32,
 88 |     pub(crate) refine_iterations_channel: u32,
 89 |     pub(crate) channels: u32,
 90 | }
 91 | 
 92 | #[cfg(feature = "bc7")]
 93 | #[cfg_attr(docsrs, doc(cfg(feature = "bc7")))]
 94 | impl BC7Settings {
 95 |     /// Opaque ultra fast settings.
 96 |     pub const fn opaque_ultra_fast() -> Self {
 97 |         Self {
 98 |             channels: 3,
 99 |             mode_selection: [false as _, false as _, false as _, true as _],
100 |             skip_mode2: true as _,
101 |             fast_skip_threshold_mode1: 3,
102 |             fast_skip_threshold_mode3: 1,
103 |             fast_skip_threshold_mode7: 0,
104 |             mode45_channel0: 0,
105 |             refine_iterations_channel: 0,
106 |             refine_iterations: [2, 2, 2, 1, 2, 2, 1, 0],
107 |         }
108 |     }
109 | 
110 |     /// Opaque very fast settings.
111 |     pub const fn opaque_very_fast() -> Self {
112 |         Self {
113 |             channels: 3,
114 |             mode_selection: [false as _, true as _, false as _, true as _],
115 |             skip_mode2: true as _,
116 |             fast_skip_threshold_mode1: 3,
117 |             fast_skip_threshold_mode3: 1,
118 |             fast_skip_threshold_mode7: 0,
119 |             mode45_channel0: 0,
120 |             refine_iterations_channel: 0,
121 |             refine_iterations: [2, 2, 2, 1, 2, 2, 1, 0],
122 |         }
123 |     }
124 | 
125 |     /// Opaque fast settings.
126 |     pub const fn opaque_fast() -> Self {
127 |         Self {
128 |             channels: 3,
129 |             mode_selection: [false as _, true as _, false as _, true as _],
130 |             skip_mode2: true as _,
131 |             fast_skip_threshold_mode1: 12,
132 |             fast_skip_threshold_mode3: 4,
133 |             fast_skip_threshold_mode7: 0,
134 |             mode45_channel0: 0,
135 |             refine_iterations_channel: 0,
136 |             refine_iterations: [2, 2, 2, 1, 2, 2, 2, 0],
137 |         }
138 |     }
139 | 
140 |     /// Opaque basic settings.
141 |     pub const fn opaque_basic() -> Self {
142 |         Self {
143 |             channels: 3,
144 |             mode_selection: [true as _, true as _, true as _, true as _],
145 |             skip_mode2: true as _,
146 |             fast_skip_threshold_mode1: 12,
147 |             fast_skip_threshold_mode3: 8,
148 |             fast_skip_threshold_mode7: 0,
149 |             mode45_channel0: 0,
150 |             refine_iterations_channel: 2,
151 |             refine_iterations: [2, 2, 2, 2, 2, 2, 2, 0],
152 |         }
153 |     }
154 | 
155 |     /// Opaque slow settings.
156 |     pub const fn opaque_slow() -> Self {
157 |         Self {
158 |             channels: 3,
159 |             mode_selection: [true as _, true as _, true as _, true as _],
160 |             skip_mode2: false as _,
161 |             fast_skip_threshold_mode1: 64,
162 |             fast_skip_threshold_mode3: 64,
163 |             fast_skip_threshold_mode7: 0,
164 |             mode45_channel0: 0,
165 |             refine_iterations_channel: 4,
166 |             refine_iterations: [4, 4, 4, 4, 4, 4, 4, 0],
167 |         }
168 |     }
169 | 
170 |     /// Alpha ultra fast settings.
171 |     pub const fn alpha_ultrafast() -> Self {
172 |         Self {
173 |             channels: 4,
174 |             mode_selection: [false as _, false as _, true as _, true as _],
175 |             skip_mode2: true as _,
176 |             fast_skip_threshold_mode1: 0,
177 |             fast_skip_threshold_mode3: 0,
178 |             fast_skip_threshold_mode7: 4,
179 |             mode45_channel0: 3,
180 |             refine_iterations_channel: 1,
181 |             refine_iterations: [2, 1, 2, 1, 1, 1, 2, 2],
182 |         }
183 |     }
184 | 
185 |     /// Alpha very fast settings.
186 |     pub const fn alpha_very_fast() -> Self {
187 |         Self {
188 |             channels: 4,
189 |             mode_selection: [false as _, true as _, true as _, true as _],
190 |             skip_mode2: true as _,
191 |             fast_skip_threshold_mode1: 0,
192 |             fast_skip_threshold_mode3: 0,
193 |             fast_skip_threshold_mode7: 4,
194 |             mode45_channel0: 3,
195 |             refine_iterations_channel: 2,
196 |             refine_iterations: [2, 1, 2, 1, 2, 2, 2, 2],
197 |         }
198 |     }
199 | 
200 |     /// Alpha fast settings.
201 |     pub const fn alpha_fast() -> Self {
202 |         Self {
203 |             channels: 4,
204 |             mode_selection: [false as _, true as _, true as _, true as _],
205 |             skip_mode2: true as _,
206 |             fast_skip_threshold_mode1: 4,
207 |             fast_skip_threshold_mode3: 4,
208 |             fast_skip_threshold_mode7: 8,
209 |             mode45_channel0: 3,
210 |             refine_iterations_channel: 2,
211 |             refine_iterations: [2, 1, 2, 1, 2, 2, 2, 2],
212 |         }
213 |     }
214 | 
215 |     /// Alpha basic settings.
216 |     pub const fn alpha_basic() -> Self {
217 |         Self {
218 |             channels: 4,
219 |             mode_selection: [true as _, true as _, true as _, true as _],
220 |             skip_mode2: true as _,
221 |             fast_skip_threshold_mode1: 12,
222 |             fast_skip_threshold_mode3: 8,
223 |             fast_skip_threshold_mode7: 8,
224 |             mode45_channel0: 0,
225 |             refine_iterations_channel: 2,
226 |             refine_iterations: [2, 2, 2, 2, 2, 2, 2, 2],
227 |         }
228 |     }
229 | 
230 |     /// Alpha slow settings.
231 |     pub const fn alpha_slow() -> Self {
232 |         Self {
233 |             channels: 4,
234 |             mode_selection: [true as _, true as _, true as _, true as _],
235 |             skip_mode2: false as _,
236 |             fast_skip_threshold_mode1: 64,
237 |             fast_skip_threshold_mode3: 64,
238 |             fast_skip_threshold_mode7: 64,
239 |             mode45_channel0: 0,
240 |             refine_iterations_channel: 4,
241 |             refine_iterations: [4, 4, 4, 4, 4, 4, 4, 4],
242 |         }
243 |     }
244 | }
245 | 


--------------------------------------------------------------------------------
/src/encode/bc1_to_5.rs:
--------------------------------------------------------------------------------
  1 | pub(crate) struct BlockCompressorBC15 {
  2 |     block: [f32; 64],
  3 | }
  4 | 
  5 | impl Default for BlockCompressorBC15 {
  6 |     fn default() -> Self {
  7 |         Self { block: [0.0; 64] }
  8 |     }
  9 | }
 10 | 
 11 | impl BlockCompressorBC15 {
 12 |     pub(crate) fn load_block_interleaved_rgba(
 13 |         &mut self,
 14 |         rgba_data: &[u8],
 15 |         xx: usize,
 16 |         yy: usize,
 17 |         stride: usize,
 18 |     ) {
 19 |         for y in 0..4 {
 20 |             for x in 0..4 {
 21 |                 let pixel_x = xx * 4 + x;
 22 |                 let pixel_y = yy * 4 + y;
 23 | 
 24 |                 let offset = pixel_y * stride + pixel_x * 4;
 25 | 
 26 |                 let red = rgba_data[offset] as f32;
 27 |                 let green = rgba_data[offset + 1] as f32;
 28 |                 let blue = rgba_data[offset + 2] as f32;
 29 |                 let alpha = rgba_data[offset + 3] as f32;
 30 | 
 31 |                 self.block[y * 4 + x] = red;
 32 |                 self.block[16 + y * 4 + x] = green;
 33 |                 self.block[32 + y * 4 + x] = blue;
 34 |                 self.block[48 + y * 4 + x] = alpha;
 35 |             }
 36 |         }
 37 |     }
 38 | 
 39 |     pub(crate) fn load_block_r_8bit(
 40 |         &mut self,
 41 |         rgba_data: &[u8],
 42 |         xx: usize,
 43 |         yy: usize,
 44 |         stride: usize,
 45 |     ) {
 46 |         for y in 0..4 {
 47 |             for x in 0..4 {
 48 |                 let pixel_x = xx * 4 + x;
 49 |                 let pixel_y = yy * 4 + y;
 50 | 
 51 |                 let offset = pixel_y * stride + pixel_x * 4;
 52 |                 let red = rgba_data[offset] as f32;
 53 | 
 54 |                 self.block[48 + y * 4 + x] = red;
 55 |             }
 56 |         }
 57 |     }
 58 | 
 59 |     pub(crate) fn load_block_g_8bit(
 60 |         &mut self,
 61 |         rgba_data: &[u8],
 62 |         xx: usize,
 63 |         yy: usize,
 64 |         stride: usize,
 65 |     ) {
 66 |         for y in 0..4 {
 67 |             for x in 0..4 {
 68 |                 let pixel_x = xx * 4 + x;
 69 |                 let pixel_y = yy * 4 + y;
 70 | 
 71 |                 let offset = pixel_y * stride + pixel_x * 4;
 72 |                 let green = rgba_data[offset + 1] as f32;
 73 | 
 74 |                 self.block[48 + y * 4 + x] = green;
 75 |             }
 76 |         }
 77 |     }
 78 | 
 79 |     pub(crate) fn load_block_alpha_4bit(
 80 |         &mut self,
 81 |         rgba_data: &[u8],
 82 |         xx: usize,
 83 |         yy: usize,
 84 |         stride: usize,
 85 |     ) -> [u32; 2] {
 86 |         let mut alpha_bits = [0; 2];
 87 | 
 88 |         for y in 0..4 {
 89 |             for x in 0..4 {
 90 |                 let pixel_x = xx * 4 + x;
 91 |                 let pixel_y = yy * 4 + y;
 92 | 
 93 |                 let offset = pixel_y * stride + pixel_x * 4;
 94 |                 let alpha = rgba_data[offset + 3] as f32 / 255.0;
 95 | 
 96 |                 // Convert alpha to 4 bits (0-15)
 97 |                 let alpha4 = (alpha * 15.0) as u32;
 98 |                 let bit_position = y * 16 + x * 4;
 99 | 
100 |                 if bit_position < 32 {
101 |                     alpha_bits[0] |= alpha4 << bit_position;
102 |                 } else {
103 |                     alpha_bits[1] |= alpha4 << (bit_position - 32);
104 |                 }
105 |             }
106 |         }
107 | 
108 |         alpha_bits
109 |     }
110 | 
111 |     pub(crate) fn store_data(
112 |         &self,
113 |         blocks_buffer: &mut [u8],
114 |         block_width: usize,
115 |         xx: usize,
116 |         yy: usize,
117 |         data: &[u32],
118 |     ) {
119 |         let offset = (yy * block_width + xx) * (data.len() * 4);
120 | 
121 |         for (index, &value) in data.iter().enumerate() {
122 |             let byte_offset = offset + index * 4;
123 |             blocks_buffer[byte_offset] = value as u8;
124 |             blocks_buffer[byte_offset + 1] = (value >> 8) as u8;
125 |             blocks_buffer[byte_offset + 2] = (value >> 16) as u8;
126 |             blocks_buffer[byte_offset + 3] = (value >> 24) as u8;
127 |         }
128 |     }
129 | 
130 |     fn compute_covar_dc(&self, covar: &mut [f32; 6], dc: &mut [f32; 3]) {
131 |         for (p, value) in dc.iter_mut().enumerate() {
132 |             let mut acc = 0.0;
133 |             for k in 0..16 {
134 |                 acc += self.block[k + p * 16];
135 |             }
136 |             *value = acc / 16.0;
137 |         }
138 | 
139 |         let mut covar0 = 0.0;
140 |         let mut covar1 = 0.0;
141 |         let mut covar2 = 0.0;
142 |         let mut covar3 = 0.0;
143 |         let mut covar4 = 0.0;
144 |         let mut covar5 = 0.0;
145 | 
146 |         for k in 0..16 {
147 |             let rgb0 = self.block[k] - dc[0];
148 |             let rgb1 = self.block[k + 16] - dc[1];
149 |             let rgb2 = self.block[k + 32] - dc[2];
150 | 
151 |             covar0 += rgb0 * rgb0;
152 |             covar1 += rgb0 * rgb1;
153 |             covar2 += rgb0 * rgb2;
154 |             covar3 += rgb1 * rgb1;
155 |             covar4 += rgb1 * rgb2;
156 |             covar5 += rgb2 * rgb2;
157 |         }
158 | 
159 |         covar[0] = covar0;
160 |         covar[1] = covar1;
161 |         covar[2] = covar2;
162 |         covar[3] = covar3;
163 |         covar[4] = covar4;
164 |         covar[5] = covar5;
165 |     }
166 | 
167 |     fn ssymv(result: &mut [f32; 3], covar: &[f32; 6], a_vector: &[f32; 3]) {
168 |         result[0] = covar[0] * a_vector[0] + covar[1] * a_vector[1] + covar[2] * a_vector[2];
169 |         result[1] = covar[1] * a_vector[0] + covar[3] * a_vector[1] + covar[4] * a_vector[2];
170 |         result[2] = covar[2] * a_vector[0] + covar[4] * a_vector[1] + covar[5] * a_vector[2];
171 |     }
172 | 
173 |     fn compute_axis3(axis: &mut [f32; 3], covar: &[f32; 6], power_iterations: i32) {
174 |         let mut a_vector = [1.0; 3];
175 | 
176 |         for i in 0..power_iterations {
177 |             Self::ssymv(axis, covar, &a_vector);
178 | 
179 |             a_vector.copy_from_slice(&axis[..]);
180 | 
181 |             if i % 2 == 1 {
182 |                 let mut norm_sq = 0.0;
183 |                 for value in axis.iter() {
184 |                     norm_sq += value * value;
185 |                 }
186 | 
187 |                 let rnorm = 1.0 / norm_sq.sqrt();
188 | 
189 |                 for value in a_vector.iter_mut() {
190 |                     *value *= rnorm;
191 |                 }
192 |             }
193 |         }
194 | 
195 |         axis.copy_from_slice(&a_vector);
196 |     }
197 | 
198 |     fn pick_endpoints(&self, c0: &mut [f32; 3], c1: &mut [f32; 3], axis: &[f32; 3], dc: &[f32; 3]) {
199 |         let mut min_dot: f32 = 256.0 * 256.0;
200 |         let mut max_dot: f32 = 0.0;
201 | 
202 |         for y in 0..4 {
203 |             for x in 0..4 {
204 |                 let mut dot = 0.0;
205 |                 for p in 0..3 {
206 |                     dot += (self.block[p * 16 + y * 4 + x] - dc[p]) * axis[p];
207 |                 }
208 | 
209 |                 min_dot = f32::min(min_dot, dot);
210 |                 max_dot = f32::max(max_dot, dot);
211 |             }
212 |         }
213 | 
214 |         if max_dot - min_dot < 1.0 {
215 |             min_dot -= 0.5;
216 |             max_dot += 0.5;
217 |         }
218 | 
219 |         let mut norm_sq = 0.0;
220 |         for value in axis.iter() {
221 |             norm_sq += *value * *value;
222 |         }
223 | 
224 |         let rnorm_sq = norm_sq.recip();
225 |         for p in 0..3 {
226 |             c0[p] = f32::clamp(dc[p] + min_dot * rnorm_sq * axis[p], 0.0, 255.0);
227 |             c1[p] = f32::clamp(dc[p] + max_dot * rnorm_sq * axis[p], 0.0, 255.0);
228 |         }
229 |     }
230 | 
231 |     fn dec_rgb565(c: &mut [f32; 3], p: i32) {
232 |         let b5 = p & 31;
233 |         let g6 = (p >> 5) & 63;
234 |         let r5 = (p >> 11) & 31;
235 | 
236 |         c[0] = ((r5 << 3) + (r5 >> 2)) as f32;
237 |         c[1] = ((g6 << 2) + (g6 >> 4)) as f32;
238 |         c[2] = ((b5 << 3) + (b5 >> 2)) as f32;
239 |     }
240 | 
241 |     fn enc_rgb565(c: &[f32; 3]) -> i32 {
242 |         let r = c[0] as i32;
243 |         let g = c[1] as i32;
244 |         let b = c[2] as i32;
245 | 
246 |         let r5 = (r * 31 + 128 + ((r * 31) >> 8)) >> 8;
247 |         let g6 = (g * 63 + 128 + ((g * 63) >> 8)) >> 8;
248 |         let b5 = (b * 31 + 128 + ((b * 31) >> 8)) >> 8;
249 | 
250 |         (r5 << 11) + (g6 << 5) + b5
251 |     }
252 | 
253 |     fn fast_quant(&self, p0: i32, p1: i32) -> u32 {
254 |         let mut c0 = [0.0; 3];
255 |         let mut c1 = [0.0; 3];
256 |         Self::dec_rgb565(&mut c0, p0);
257 |         Self::dec_rgb565(&mut c1, p1);
258 | 
259 |         let mut dir = [0.0; 3];
260 |         for p in 0..3 {
261 |             dir[p] = c1[p] - c0[p];
262 |         }
263 | 
264 |         let mut sq_norm = 0.0;
265 |         for value in dir.iter() {
266 |             sq_norm += value.powi(2);
267 |         }
268 | 
269 |         let rsq_norm = sq_norm.recip();
270 | 
271 |         for value in dir.iter_mut() {
272 |             *value *= rsq_norm * 3.0;
273 |         }
274 | 
275 |         let mut bias = 0.5;
276 |         for p in 0..3 {
277 |             bias -= c0[p] * dir[p];
278 |         }
279 | 
280 |         let mut bits = 0;
281 |         let mut scaler = 1;
282 |         for k in 0..16 {
283 |             let mut dot = 0.0;
284 |             for (p, value) in dir.iter().enumerate() {
285 |                 dot += self.block[k + p * 16] * value;
286 |             }
287 | 
288 |             let q = i32::clamp((dot + bias) as i32, 0, 3);
289 |             bits += q as u32 * scaler;
290 |             scaler = scaler.wrapping_mul(4);
291 |         }
292 | 
293 |         bits
294 |     }
295 | 
296 |     fn bc1_refine(&self, pe: &mut [i32; 2], bits: u32, dc: &[f32; 3]) {
297 |         let mut c0 = [0.0; 3];
298 |         let mut c1 = [0.0; 3];
299 | 
300 |         if (bits ^ (bits.wrapping_mul(4))) < 4 {
301 |             c0.copy_from_slice(&dc[..]);
302 |             c1.copy_from_slice(&dc[..]);
303 |         } else {
304 |             let mut atb1 = [0.0; 3];
305 |             let mut sum_q = 0.0;
306 |             let mut sum_qq = 0.0;
307 |             let mut shifted_bits = bits;
308 | 
309 |             for k in 0..16 {
310 |                 let q = (shifted_bits & 3) as f32;
311 |                 shifted_bits >>= 2;
312 | 
313 |                 let x = 3.0 - q;
314 | 
315 |                 sum_q += q;
316 |                 sum_qq += q * q;
317 | 
318 |                 for (p, value) in atb1.iter_mut().enumerate() {
319 |                     *value += x * self.block[k + p * 16];
320 |                 }
321 |             }
322 | 
323 |             let mut sum = [0.0; 3];
324 |             let mut atb2 = [0.0; 3];
325 | 
326 |             for p in 0..3 {
327 |                 sum[p] = dc[p] * 16.0;
328 |                 atb2[p] = 3.0 * sum[p] - atb1[p];
329 |             }
330 | 
331 |             let cxx = 16.0 * 9.0 - 2.0 * 3.0 * sum_q + sum_qq;
332 |             let cyy = sum_qq;
333 |             let cxy = 3.0 * sum_q - sum_qq;
334 |             let scale = 3.0 * (cxx * cyy - cxy * cxy).recip();
335 | 
336 |             for p in 0..3 {
337 |                 c0[p] = (atb1[p] * cyy - atb2[p] * cxy) * scale;
338 |                 c1[p] = (atb2[p] * cxx - atb1[p] * cxy) * scale;
339 | 
340 |                 c0[p] = f32::clamp(c0[p], 0.0, 255.0);
341 |                 c1[p] = f32::clamp(c1[p], 0.0, 255.0);
342 |             }
343 |         }
344 | 
345 |         pe[0] = Self::enc_rgb565(&c0);
346 |         pe[1] = Self::enc_rgb565(&c1);
347 |     }
348 | 
349 |     fn fix_qbits(qbits: u32) -> u32 {
350 |         const MASK_01B: u32 = 0x55555555;
351 |         const MASK_10B: u32 = 0xAAAAAAAA;
352 | 
353 |         let qbits0 = qbits & MASK_01B;
354 |         let qbits1 = qbits & MASK_10B;
355 | 
356 |         (qbits1 >> 1) + (qbits1 ^ (qbits0 << 1))
357 |     }
358 | 
359 |     pub(crate) fn compress_block_bc1_core(&self) -> [u32; 2] {
360 |         let power_iterations = 4;
361 |         let refine_iterations = 1;
362 | 
363 |         let mut covar = [0.0; 6];
364 |         let mut dc = [0.0; 3];
365 |         self.compute_covar_dc(&mut covar, &mut dc);
366 | 
367 |         const EPS: f32 = f32::EPSILON;
368 |         covar[0] += EPS;
369 |         covar[3] += EPS;
370 |         covar[5] += EPS;
371 | 
372 |         let mut axis = [0.0; 3];
373 |         Self::compute_axis3(&mut axis, &covar, power_iterations);
374 | 
375 |         let mut c0 = [0.0; 3];
376 |         let mut c1 = [0.0; 3];
377 |         self.pick_endpoints(&mut c0, &mut c1, &axis, &dc);
378 | 
379 |         let mut p = [0; 2];
380 |         p[0] = Self::enc_rgb565(&c0);
381 |         p[1] = Self::enc_rgb565(&c1);
382 |         if p[0] < p[1] {
383 |             p.swap(0, 1);
384 |         }
385 | 
386 |         let mut data = [0; 2];
387 |         data[0] = ((p[1] as u32) << 16) | p[0] as u32;
388 |         data[1] = self.fast_quant(p[0], p[1]);
389 | 
390 |         for _ in 0..refine_iterations {
391 |             self.bc1_refine(&mut p, data[1], &dc);
392 |             if p[0] < p[1] {
393 |                 p.swap(0, 1);
394 |             }
395 |             data[0] = ((p[1] as u32) << 16) | p[0] as u32;
396 |             data[1] = self.fast_quant(p[0], p[1]);
397 |         }
398 | 
399 |         data[1] = Self::fix_qbits(data[1]);
400 | 
401 |         data
402 |     }
403 | 
404 |     pub(crate) fn compress_block_bc3_alpha(&self) -> [u32; 2] {
405 |         let mut ep = [255.0, 0.0];
406 | 
407 |         // Find min/max endpoints using block[48] to block[63] for alpha
408 |         for k in 0..16 {
409 |             ep[0] = f32::min(ep[0], self.block[48 + k]);
410 |             ep[1] = f32::max(ep[1], self.block[48 + k]);
411 |         }
412 | 
413 |         // Prevent division by zero
414 |         if ep[0] == ep[1] {
415 |             ep[1] = ep[0] + 0.1;
416 |         }
417 | 
418 |         let mut qblock = [0; 2];
419 |         let scale = 7.0 / (ep[1] - ep[0]);
420 | 
421 |         for k in 0..16 {
422 |             let v = self.block[48 + k];
423 |             let proj = (v - ep[0]) * scale + 0.5;
424 | 
425 |             let mut q = i32::clamp(proj as i32, 0, 7);
426 |             q = 7 - q;
427 | 
428 |             if q > 0 {
429 |                 q += 1;
430 |             }
431 |             if q == 8 {
432 |                 q = 1;
433 |             }
434 | 
435 |             qblock[k / 8] |= (q as u32) << ((k % 8) * 3);
436 |         }
437 | 
438 |         let mut data = [0; 2];
439 |         data[0] = (u32::clamp(ep[0] as u32, 0, 255) << 8) | u32::clamp(ep[1] as u32, 0, 255);
440 |         data[0] |= qblock[0] << 16;
441 |         data[1] = qblock[0] >> 16;
442 |         data[1] |= qblock[1] << 8;
443 | 
444 |         data
445 |     }
446 | }
447 | 


--------------------------------------------------------------------------------
/compressor/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     fs::File,
  3 |     path::PathBuf,
  4 |     sync::Arc,
  5 |     time::{Duration, Instant},
  6 | };
  7 | 
  8 | use block_compression::{
  9 |     half::f16, BC6HSettings, BC7Settings, CompressionVariant, GpuBlockCompressor,
 10 | };
 11 | use bytemuck::cast_slice;
 12 | use ddsfile::{AlphaMode, D3D10ResourceDimension, Dds, DxgiFormat, NewDxgiParams};
 13 | use image::ImageReader;
 14 | use pollster::block_on;
 15 | use wgpu::{
 16 |     util::{DeviceExt, TextureDataOrder},
 17 |     wgt::{Dx12SwapchainKind, Dx12UseFrameLatencyWaitableObject},
 18 |     BackendOptions, Backends, Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor,
 19 |     ComputePassDescriptor, ComputePassTimestampWrites, Device, DeviceDescriptor,
 20 |     Dx12BackendOptions, Dx12Compiler, Error, ExperimentalFeatures, Extent3d, Features,
 21 |     GlBackendOptions, Instance, InstanceDescriptor, InstanceFlags, MapMode, MemoryHints,
 22 |     NoopBackendOptions, PollType, PowerPreference, QueryType, Queue, Texture, TextureDescriptor,
 23 |     TextureDimension, TextureFormat, TextureUsages, TextureViewDescriptor, Trace,
 24 | };
 25 | 
 26 | fn main() {
 27 |     let (variant, file_name) = match parse_args() {
 28 |         Some(args) => args,
 29 |         None => return,
 30 |     };
 31 | 
 32 |     let (device, queue) = create_resources();
 33 |     let mut compressor: GpuBlockCompressor = GpuBlockCompressor::new(device.clone(), queue.clone());
 34 | 
 35 |     let start = Instant::now();
 36 | 
 37 |     let texture = read_image_and_create_texture(&device, &queue, &file_name, variant);
 38 |     let texture_view = texture.create_view(&TextureViewDescriptor::default());
 39 |     let width = texture.width();
 40 |     let height = texture.height();
 41 | 
 42 |     let duration = start.elapsed();
 43 |     println!(
 44 |         "Image read and upload took: {:.3} ms",
 45 |         duration.as_secs_f64() * 1000.0
 46 |     );
 47 | 
 48 |     let blocks_buffer = device.create_buffer(&BufferDescriptor {
 49 |         label: Some("blocks buffer"),
 50 |         size: variant.blocks_byte_size(width, height) as _,
 51 |         usage: BufferUsages::COPY_SRC | BufferUsages::STORAGE,
 52 |         mapped_at_creation: false,
 53 |     });
 54 | 
 55 |     compressor.add_compression_task(
 56 |         variant,
 57 |         &texture_view,
 58 |         width,
 59 |         height,
 60 |         &blocks_buffer,
 61 |         None,
 62 |         None,
 63 |     );
 64 | 
 65 |     compress(&mut compressor, &device, &queue);
 66 | 
 67 |     let start = Instant::now();
 68 | 
 69 |     let block_data = download_blocks_data(&device, &queue, blocks_buffer);
 70 | 
 71 |     let duration = start.elapsed();
 72 |     println!(
 73 |         "Block data download took: {:.3} ms",
 74 |         duration.as_secs_f64() * 1000.0
 75 |     );
 76 | 
 77 |     let start = Instant::now();
 78 | 
 79 |     write_dds_file(&file_name, variant, width, height, block_data);
 80 | 
 81 |     let duration = start.elapsed();
 82 |     println!(
 83 |         "DDS output to disk took: {:.3} ms",
 84 |         duration.as_secs_f64() * 1000.0
 85 |     );
 86 | }
 87 | 
 88 | fn create_resources() -> (Device, Queue) {
 89 |     let instance = Instance::new(&InstanceDescriptor {
 90 |         backends: Backends::from_env().unwrap_or_default(),
 91 |         flags: InstanceFlags::from_build_config().with_env(),
 92 |         memory_budget_thresholds: Default::default(),
 93 |         backend_options: BackendOptions {
 94 |             gl: GlBackendOptions::default(),
 95 |             dx12: Dx12BackendOptions {
 96 |                 shader_compiler: Dx12Compiler::StaticDxc,
 97 |                 presentation_system: Dx12SwapchainKind::DxgiFromHwnd,
 98 |                 latency_waitable_object: Dx12UseFrameLatencyWaitableObject::Wait,
 99 |             }
100 |             .with_env(),
101 |             noop: NoopBackendOptions::default(),
102 |         },
103 |     });
104 | 
105 |     let adapter = block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
106 |         power_preference: PowerPreference::HighPerformance,
107 |         compatible_surface: None,
108 |         force_fallback_adapter: false,
109 |     }))
110 |     .expect("Failed to find an appropriate adapter");
111 | 
112 |     let (device, queue) = block_on(adapter.request_device(&DeviceDescriptor {
113 |         label: Some("main device"),
114 |         required_features: Features::TIMESTAMP_QUERY,
115 |         required_limits: adapter.limits(),
116 |         experimental_features: ExperimentalFeatures::disabled(),
117 |         memory_hints: MemoryHints::MemoryUsage,
118 |         trace: Trace::Off,
119 |     }))
120 |     .expect("Failed to create device");
121 |     device.on_uncaptured_error(Arc::new(error_handler));
122 | 
123 |     let info = adapter.get_info();
124 |     println!("Using backend: {:?}", info.backend);
125 | 
126 |     (device, queue)
127 | }
128 | 
129 | fn read_image_and_create_texture(
130 |     device: &Device,
131 |     queue: &Queue,
132 |     file_name: &str,
133 |     variant: CompressionVariant,
134 | ) -> Texture {
135 |     let image = ImageReader::open(file_name)
136 |         .expect("can't open input image")
137 |         .decode()
138 |         .expect("can't decode image");
139 | 
140 |     let rgba_image = image.to_rgba8();
141 |     let width = rgba_image.width();
142 |     let height = rgba_image.height();
143 | 
144 |     if matches!(variant, CompressionVariant::BC6H(..)) {
145 |         let rgba_f16_data: Vec<u8> = rgba_image
146 |             .iter()
147 |             .flat_map(|color| f16::from_f64(srgb_to_linear(*color)).to_le_bytes())
148 |             .collect();
149 | 
150 |         device.create_texture_with_data(
151 |             queue,
152 |             &TextureDescriptor {
153 |                 label: Some(file_name),
154 |                 size: Extent3d {
155 |                     width,
156 |                     height,
157 |                     depth_or_array_layers: 1,
158 |                 },
159 |                 mip_level_count: 1,
160 |                 sample_count: 1,
161 |                 dimension: TextureDimension::D2,
162 |                 format: TextureFormat::Rgba16Float,
163 |                 usage: TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING,
164 |                 view_formats: &[],
165 |             },
166 |             TextureDataOrder::LayerMajor,
167 |             rgba_f16_data.as_slice(),
168 |         )
169 |     } else {
170 |         device.create_texture_with_data(
171 |             queue,
172 |             &TextureDescriptor {
173 |                 label: Some(file_name),
174 |                 size: Extent3d {
175 |                     width,
176 |                     height,
177 |                     depth_or_array_layers: 1,
178 |                 },
179 |                 mip_level_count: 1,
180 |                 sample_count: 1,
181 |                 dimension: TextureDimension::D2,
182 |                 format: TextureFormat::Rgba8Unorm,
183 |                 usage: TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING,
184 |                 view_formats: &[],
185 |             },
186 |             TextureDataOrder::LayerMajor,
187 |             &rgba_image,
188 |         )
189 |     }
190 | }
191 | 
192 | #[inline]
193 | pub fn srgb_to_linear(srgb: u8) -> f64 {
194 |     let v = (srgb as f64) / 255.0;
195 |     if v <= 0.04045 {
196 |         v / 12.92
197 |     } else {
198 |         ((v + 0.055) / 1.055).powf(2.4)
199 |     }
200 | }
201 | 
202 | fn compress(compressor: &mut GpuBlockCompressor, device: &Device, queue: &Queue) {
203 |     let timestamp_query_set = device.create_query_set(&wgpu::QuerySetDescriptor {
204 |         label: Some("timestamp query set"),
205 |         count: 2,
206 |         ty: QueryType::Timestamp,
207 |     });
208 | 
209 |     let timestamp_resolve_buffer = device.create_buffer(&BufferDescriptor {
210 |         label: Some("timestamp resolve buffer"),
211 |         size: 16,
212 |         usage: BufferUsages::COPY_DST | BufferUsages::COPY_SRC | BufferUsages::QUERY_RESOLVE,
213 |         mapped_at_creation: false,
214 |     });
215 | 
216 |     let timestamp_readback_buffer = device.create_buffer(&BufferDescriptor {
217 |         label: Some("timestamp read-back buffer"),
218 |         size: 16,
219 |         usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
220 |         mapped_at_creation: false,
221 |     });
222 | 
223 |     let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor {
224 |         label: Some("command encoder"),
225 |     });
226 | 
227 |     {
228 |         let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor {
229 |             label: Some("compute pass"),
230 |             timestamp_writes: Some(ComputePassTimestampWrites {
231 |                 query_set: &timestamp_query_set,
232 |                 beginning_of_pass_write_index: Some(0),
233 |                 end_of_pass_write_index: Some(1),
234 |             }),
235 |         });
236 | 
237 |         compressor.compress(&mut pass);
238 |     }
239 | 
240 |     encoder.resolve_query_set(&timestamp_query_set, 0..2, &timestamp_resolve_buffer, 0);
241 | 
242 |     encoder.copy_buffer_to_buffer(
243 |         &timestamp_resolve_buffer,
244 |         0,
245 |         &timestamp_readback_buffer,
246 |         0,
247 |         16,
248 |     );
249 | 
250 |     queue.submit([encoder.finish()]);
251 | 
252 |     {
253 |         let buffer_slice = timestamp_readback_buffer.slice(..);
254 | 
255 |         let (tx, rx) = std::sync::mpsc::channel();
256 |         buffer_slice.map_async(MapMode::Read, move |v| tx.send(v).unwrap());
257 | 
258 |         let _ = device.poll(PollType::Wait {
259 |             submission_index: None,
260 |             timeout: Some(Duration::from_secs(60)),
261 |         });
262 | 
263 |         match rx.recv() {
264 |             Ok(Ok(())) => {
265 |                 let data = buffer_slice.get_mapped_range();
266 |                 let timestamps: &[u64] = cast_slice(&data);
267 | 
268 |                 let period = queue.get_timestamp_period() as f64;
269 |                 let start_ns = timestamps[0] as f64 * period;
270 |                 let end_ns = timestamps[1] as f64 * period;
271 |                 let duration_ms = (end_ns - start_ns) / 1_000_000.0;
272 | 
273 |                 println!("Compression took: {duration_ms:.3} ms");
274 |             }
275 |             _ => panic!("couldn't read from buffer"),
276 |         }
277 | 
278 |         timestamp_readback_buffer.unmap();
279 |     }
280 | }
281 | 
282 | fn download_blocks_data(device: &Device, queue: &Queue, block_buffer: Buffer) -> Vec<u8> {
283 |     let size = block_buffer.size();
284 | 
285 |     let staging_buffer = device.create_buffer(&BufferDescriptor {
286 |         label: Some("staging buffer"),
287 |         size,
288 |         usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
289 |         mapped_at_creation: false,
290 |     });
291 | 
292 |     let mut copy_encoder = device.create_command_encoder(&CommandEncoderDescriptor {
293 |         label: Some("copy encoder"),
294 |     });
295 | 
296 |     copy_encoder.copy_buffer_to_buffer(&block_buffer, 0, &staging_buffer, 0, size);
297 | 
298 |     queue.submit([copy_encoder.finish()]);
299 | 
300 |     let result;
301 | 
302 |     {
303 |         let buffer_slice = staging_buffer.slice(..);
304 | 
305 |         let (tx, rx) = std::sync::mpsc::channel();
306 |         buffer_slice.map_async(MapMode::Read, move |v| tx.send(v).unwrap());
307 | 
308 |         let _ = device.poll(PollType::Wait {
309 |             submission_index: None,
310 |             timeout: Some(Duration::from_secs(60)),
311 |         });
312 | 
313 |         match rx.recv() {
314 |             Ok(Ok(())) => {
315 |                 result = buffer_slice.get_mapped_range().to_vec();
316 |             }
317 |             _ => panic!("couldn't read from buffer"),
318 |         }
319 |     }
320 | 
321 |     staging_buffer.unmap();
322 | 
323 |     result
324 | }
325 | 
326 | fn write_dds_file(
327 |     file_name: &str,
328 |     variant: CompressionVariant,
329 |     width: u32,
330 |     height: u32,
331 |     block_data: Vec<u8>,
332 | ) {
333 |     let mut dds = Dds::new_dxgi(NewDxgiParams {
334 |         height,
335 |         width,
336 |         depth: None,
337 |         format: dxgi_format(variant),
338 |         mipmap_levels: Some(1),
339 |         array_layers: None,
340 |         caps2: None,
341 |         is_cubemap: false,
342 |         resource_dimension: D3D10ResourceDimension::Texture2D,
343 |         alpha_mode: AlphaMode::Straight,
344 |     })
345 |     .expect("failed to create DDS header");
346 | 
347 |     dds.data = block_data;
348 | 
349 |     let mut dds_name = PathBuf::from(file_name);
350 |     dds_name.set_extension("dds");
351 | 
352 |     let mut file = File::create(dds_name).expect("failed to create output file");
353 |     dds.write(&mut file).expect("failed to write DDS file");
354 | }
355 | 
356 | fn dxgi_format(variant: CompressionVariant) -> DxgiFormat {
357 |     match variant {
358 |         CompressionVariant::BC1 => DxgiFormat::BC1_UNorm_sRGB,
359 |         CompressionVariant::BC2 => DxgiFormat::BC2_UNorm_sRGB,
360 |         CompressionVariant::BC3 => DxgiFormat::BC3_UNorm_sRGB,
361 |         CompressionVariant::BC4 => DxgiFormat::BC4_UNorm,
362 |         CompressionVariant::BC5 => DxgiFormat::BC5_UNorm,
363 |         CompressionVariant::BC6H(..) => DxgiFormat::BC6H_UF16,
364 |         CompressionVariant::BC7(..) => DxgiFormat::BC7_UNorm_sRGB,
365 |     }
366 | }
367 | 
368 | fn print_help() {
369 |     println!("Usage: compressor <compression_variant> <input_file>");
370 |     println!("\nCompression variants:");
371 |     println!("  bc1  - BC1 compression (RGB)");
372 |     println!("  bc2  - BC2 compression with sharp alpha (RGBA)");
373 |     println!("  bc3  - BC3 compression with smooth alpha (RGBA)");
374 |     println!("  bc4  - BC4 compression (R)");
375 |     println!("  bc5  - BC5 compression (RG)");
376 |     println!("  bc6h - BC6H compression (RGB HDR)");
377 |     println!("  bc7  - BC7 compression with smooth alpha (RGBA)");
378 | }
379 | 
380 | fn parse_args() -> Option<(CompressionVariant, String)> {
381 |     let args: Vec<String> = std::env::args().collect();
382 | 
383 |     if args.len() != 3 || args.contains(&"--help".to_string()) {
384 |         print_help();
385 |         return None;
386 |     }
387 | 
388 |     let variant = match args[1].to_lowercase().as_str() {
389 |         "bc1" => CompressionVariant::BC1,
390 |         "bc2" => CompressionVariant::BC2,
391 |         "bc3" => CompressionVariant::BC3,
392 |         "bc4" => CompressionVariant::BC4,
393 |         "bc5" => CompressionVariant::BC5,
394 |         "bc6h" => CompressionVariant::BC6H(BC6HSettings::very_slow()),
395 |         "bc7" => CompressionVariant::BC7(BC7Settings::alpha_slow()),
396 |         _ => {
397 |             println!("Error: Invalid compression variant");
398 |             print_help();
399 |             return None;
400 |         }
401 |     };
402 | 
403 |     let file_name = args[2].clone();
404 | 
405 |     Some((variant, file_name))
406 | }
407 | 
408 | pub fn error_handler(error: Error) {
409 |     let (message_type, message) = match error {
410 |         Error::OutOfMemory { source } => ("OutOfMemory", source.to_string()),
411 |         Error::Validation {
412 |             source,
413 |             description,
414 |         } => ("Validation", format!("{source}: {description}")),
415 |         Error::Internal {
416 |             source,
417 |             description,
418 |         } => ("Internal", format!("{source}: {description}")),
419 |     };
420 | 
421 |     panic!("wgpu [{message_type}] [error]: {message}");
422 | }
423 | 


--------------------------------------------------------------------------------
/tests/metrics.rs:
--------------------------------------------------------------------------------
  1 | use block_compression::{
  2 |     decode::decompress_blocks_as_rgba8, encode::compress_rgba8, BC6HSettings, BC7Settings,
  3 |     CompressionVariant, GpuBlockCompressor,
  4 | };
  5 | use wgpu::{CommandEncoderDescriptor, ComputePassDescriptor, TextureViewDescriptor};
  6 | 
  7 | use self::common::{
  8 |     create_blocks_buffer, create_wgpu_resources, download_blocks_data,
  9 |     read_image_and_create_texture, srgb_to_linear, BRICK_FILE_PATH, MARBLE_FILE_PATH,
 10 | };
 11 | 
 12 | mod common;
 13 | 
 14 | pub const BRICK_ALPHA_FILE_PATH: &str = "tests/images/brick-alpha.png";
 15 | pub const MARBLE_ALPHA_FILE_PATH: &str = "tests/images/marble-alpha.png";
 16 | pub const BLENDER_FILE_PATH: &str = "tests/images/blender.png";
 17 | 
 18 | #[derive(Debug, Clone)]
 19 | pub struct PsnrResult {
 20 |     pub overall_psnr: f64,
 21 |     pub overall_mse: f64,
 22 |     pub channel_results: ChannelResults,
 23 | }
 24 | 
 25 | #[derive(Debug, Clone)]
 26 | pub struct ChannelResults {
 27 |     pub red: ChannelMetrics,
 28 |     pub green: ChannelMetrics,
 29 |     pub blue: ChannelMetrics,
 30 |     pub alpha: ChannelMetrics,
 31 | }
 32 | 
 33 | #[derive(Debug, Clone)]
 34 | pub struct ChannelMetrics {
 35 |     pub psnr: f64,
 36 |     pub mse: f64,
 37 | }
 38 | 
 39 | /// Calculates quality metrics for a given image. The input data and output data must be RGBA data.
 40 | pub fn calculate_image_metrics_rgba8(
 41 |     original: &[u8],
 42 |     compressed: &[u8],
 43 |     width: u32,
 44 |     height: u32,
 45 |     channels: u32,
 46 | ) -> PsnrResult {
 47 |     if original.len() != compressed.len() {
 48 |         panic!("Image buffers must have same length");
 49 |     }
 50 |     if original.len() != (width * height * 4) as usize {
 51 |         panic!("Buffer size doesn't match dimensions");
 52 |     }
 53 | 
 54 |     let mut channel_mse = [0.0; 4];
 55 |     let pixel_count = (width * height) as f64;
 56 | 
 57 |     for index in (0..original.len()).step_by(4) {
 58 |         for channel in 0..channels as usize {
 59 |             let orig = if channel < 3 {
 60 |                 srgb_to_linear(original[index + channel])
 61 |             } else {
 62 |                 (original[index + channel] as f64) / 255.0
 63 |             };
 64 | 
 65 |             let comp = if channel < 3 {
 66 |                 srgb_to_linear(compressed[index + channel])
 67 |             } else {
 68 |                 (compressed[index + channel] as f64) / 255.0
 69 |             };
 70 | 
 71 |             let diff = orig - comp;
 72 |             channel_mse[channel] += diff * diff;
 73 |         }
 74 |     }
 75 | 
 76 |     // Normalize MSE values
 77 |     channel_mse.iter_mut().for_each(|mse| *mse /= pixel_count);
 78 | 
 79 |     let calculate_psnr = |mse: f64| -> f64 {
 80 |         if mse == 0.0 {
 81 |             0.0
 82 |         } else {
 83 |             20.0 * (1.0 / mse.sqrt()).log10()
 84 |         }
 85 |     };
 86 | 
 87 |     let overall_mse = channel_mse.iter().sum::<f64>() / channels as f64;
 88 |     let overall_psnr = calculate_psnr(overall_mse);
 89 | 
 90 |     let channel_results = ChannelResults {
 91 |         red: ChannelMetrics {
 92 |             mse: channel_mse[0],
 93 |             psnr: calculate_psnr(channel_mse[0]),
 94 |         },
 95 |         green: ChannelMetrics {
 96 |             mse: channel_mse[1],
 97 |             psnr: calculate_psnr(channel_mse[1]),
 98 |         },
 99 |         blue: ChannelMetrics {
100 |             mse: channel_mse[2],
101 |             psnr: calculate_psnr(channel_mse[2]),
102 |         },
103 |         alpha: ChannelMetrics {
104 |             mse: channel_mse[3],
105 |             psnr: calculate_psnr(channel_mse[3]),
106 |         },
107 |     };
108 | 
109 |     PsnrResult {
110 |         overall_psnr,
111 |         overall_mse,
112 |         channel_results,
113 |     }
114 | }
115 | 
116 | fn print_metrics(name: &str, metrics: &PsnrResult) {
117 |     println!("-----------------------");
118 |     println!("Image name: {name}");
119 |     println!("Overall PSNR: {:.4} dB", metrics.overall_psnr);
120 |     println!("Overall MSE: {:.9}", metrics.overall_mse);
121 |     println!(
122 |         "Red channel PSNR: {:.4} dB",
123 |         metrics.channel_results.red.psnr
124 |     );
125 |     println!(
126 |         "Green channel PSNR: {:.4} dB",
127 |         metrics.channel_results.green.psnr
128 |     );
129 |     println!(
130 |         "Blue channel PSNR: {:.4} dB",
131 |         metrics.channel_results.blue.psnr
132 |     );
133 |     println!(
134 |         "Alpha channel PSNR: {:.4} dB",
135 |         metrics.channel_results.alpha.psnr
136 |     );
137 |     println!("-----------------------");
138 | }
139 | 
140 | fn compress_image_reference(
141 |     variant: CompressionVariant,
142 |     width: u32,
143 |     height: u32,
144 |     data: &[u8],
145 | ) -> Vec<u8> {
146 |     let output_size = variant.blocks_byte_size(width, height);
147 |     let mut blocks = vec![0; output_size];
148 |     compress_rgba8(variant, data, &mut blocks, width, height, width * 4);
149 |     blocks
150 | }
151 | 
152 | fn compress_image(image_path: &str, variant: CompressionVariant) -> (u32, u32, Vec<u8>, Vec<u8>) {
153 |     let (device, queue) = create_wgpu_resources();
154 |     let mut block_compressor = GpuBlockCompressor::new(device.clone(), queue.clone());
155 | 
156 |     let (texture, original_data) =
157 |         read_image_and_create_texture(&device, &queue, image_path, variant);
158 |     let blocks_size = variant.blocks_byte_size(texture.width(), texture.height());
159 | 
160 |     let blocks = create_blocks_buffer(&device, blocks_size as u64);
161 | 
162 |     block_compressor.add_compression_task(
163 |         variant,
164 |         &texture.create_view(&TextureViewDescriptor::default()),
165 |         texture.width(),
166 |         texture.height(),
167 |         &blocks,
168 |         None,
169 |         None,
170 |     );
171 | 
172 |     let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor {
173 |         label: Some("command encoder"),
174 |     });
175 | 
176 |     {
177 |         let mut pass = encoder.begin_compute_pass(&ComputePassDescriptor {
178 |             label: Some("compute pass"),
179 |             timestamp_writes: None,
180 |         });
181 | 
182 |         block_compressor.compress(&mut pass);
183 |     }
184 | 
185 |     queue.submit([encoder.finish()]);
186 | 
187 |     let blocks_data = download_blocks_data(&device, &queue, blocks);
188 | 
189 |     (
190 |         texture.width(),
191 |         texture.height(),
192 |         original_data,
193 |         blocks_data,
194 |     )
195 | }
196 | 
197 | fn calculate_psnr(
198 |     variant: CompressionVariant,
199 |     channels: u32,
200 |     width: u32,
201 |     height: u32,
202 |     original_data: &[u8],
203 |     blocks_data: &[u8],
204 | ) -> PsnrResult {
205 |     let size = width * height * 4;
206 | 
207 |     let mut decompressed_data = vec![0; size as usize];
208 |     decompress_blocks_as_rgba8(variant, width, height, blocks_data, &mut decompressed_data);
209 | 
210 |     calculate_image_metrics_rgba8(original_data, &decompressed_data, width, height, channels)
211 | }
212 | 
213 | fn compare_psnr(image_path: &str, variant: CompressionVariant, channels: u32) {
214 |     let image_name = std::path::Path::new(image_path)
215 |         .file_name()
216 |         .unwrap()
217 |         .to_str()
218 |         .unwrap();
219 | 
220 |     let (width, height, original_data, blocks_data) = compress_image(image_path, variant);
221 | 
222 |     let psnr = calculate_psnr(
223 |         variant,
224 |         channels,
225 |         width,
226 |         height,
227 |         &original_data,
228 |         &blocks_data,
229 |     );
230 | 
231 |     let reference_block_data = compress_image_reference(variant, width, height, &original_data);
232 | 
233 |     let reference_psnr = calculate_psnr(
234 |         variant,
235 |         channels,
236 |         width,
237 |         height,
238 |         &original_data,
239 |         &reference_block_data,
240 |     );
241 | 
242 |     print_metrics(image_name, &psnr);
243 |     print_metrics(image_name, &reference_psnr);
244 | 
245 |     const DIFFERENCE: f64 = 0.0025;
246 | 
247 |     if f64::abs(reference_psnr.overall_psnr - psnr.overall_psnr) > DIFFERENCE {
248 |         panic!(
249 |             "Significant overall PSNR difference for image `{image_name}`: {:.3} != {:.3}",
250 |             reference_psnr.overall_psnr, psnr.overall_psnr
251 |         );
252 |     }
253 | }
254 | 
255 | #[test]
256 | fn psnr_bc1() {
257 |     compare_psnr(BRICK_FILE_PATH, CompressionVariant::BC1, 3);
258 |     compare_psnr(MARBLE_FILE_PATH, CompressionVariant::BC1, 3);
259 |     compare_psnr(BLENDER_FILE_PATH, CompressionVariant::BC1, 3);
260 | }
261 | 
262 | #[test]
263 | fn psnr_bc3() {
264 |     compare_psnr(BRICK_ALPHA_FILE_PATH, CompressionVariant::BC3, 4);
265 |     compare_psnr(MARBLE_ALPHA_FILE_PATH, CompressionVariant::BC3, 4);
266 | }
267 | 
268 | #[test]
269 | fn psnr_bc6h_very_fast() {
270 |     compare_psnr(
271 |         BRICK_FILE_PATH,
272 |         CompressionVariant::BC6H(BC6HSettings::very_fast()),
273 |         3,
274 |     );
275 |     compare_psnr(
276 |         MARBLE_FILE_PATH,
277 |         CompressionVariant::BC6H(BC6HSettings::very_fast()),
278 |         3,
279 |     );
280 |     compare_psnr(
281 |         BLENDER_FILE_PATH,
282 |         CompressionVariant::BC6H(BC6HSettings::very_fast()),
283 |         3,
284 |     );
285 | }
286 | 
287 | #[test]
288 | fn psnr_bc6h_fast() {
289 |     compare_psnr(
290 |         BRICK_FILE_PATH,
291 |         CompressionVariant::BC6H(BC6HSettings::fast()),
292 |         3,
293 |     );
294 |     compare_psnr(
295 |         MARBLE_FILE_PATH,
296 |         CompressionVariant::BC6H(BC6HSettings::fast()),
297 |         3,
298 |     );
299 |     compare_psnr(
300 |         BLENDER_FILE_PATH,
301 |         CompressionVariant::BC6H(BC6HSettings::fast()),
302 |         3,
303 |     );
304 | }
305 | 
306 | #[test]
307 | fn psnr_bc6h_basic() {
308 |     compare_psnr(
309 |         BRICK_FILE_PATH,
310 |         CompressionVariant::BC6H(BC6HSettings::basic()),
311 |         3,
312 |     );
313 |     compare_psnr(
314 |         MARBLE_FILE_PATH,
315 |         CompressionVariant::BC6H(BC6HSettings::basic()),
316 |         3,
317 |     );
318 |     compare_psnr(
319 |         BLENDER_FILE_PATH,
320 |         CompressionVariant::BC6H(BC6HSettings::basic()),
321 |         3,
322 |     );
323 | }
324 | 
325 | #[test]
326 | fn psnr_bc6h_slow() {
327 |     compare_psnr(
328 |         BRICK_FILE_PATH,
329 |         CompressionVariant::BC6H(BC6HSettings::slow()),
330 |         3,
331 |     );
332 |     compare_psnr(
333 |         MARBLE_FILE_PATH,
334 |         CompressionVariant::BC6H(BC6HSettings::slow()),
335 |         3,
336 |     );
337 |     compare_psnr(
338 |         BLENDER_FILE_PATH,
339 |         CompressionVariant::BC6H(BC6HSettings::slow()),
340 |         3,
341 |     );
342 | }
343 | 
344 | #[test]
345 | fn psnr_bc6h_very_slow() {
346 |     compare_psnr(
347 |         BRICK_FILE_PATH,
348 |         CompressionVariant::BC6H(BC6HSettings::very_slow()),
349 |         3,
350 |     );
351 |     compare_psnr(
352 |         MARBLE_FILE_PATH,
353 |         CompressionVariant::BC6H(BC6HSettings::very_slow()),
354 |         3,
355 |     );
356 |     compare_psnr(
357 |         BLENDER_FILE_PATH,
358 |         CompressionVariant::BC6H(BC6HSettings::very_slow()),
359 |         3,
360 |     );
361 | }
362 | 
363 | #[test]
364 | fn psnr_bc7_alpha_ultra_fast() {
365 |     compare_psnr(
366 |         BRICK_ALPHA_FILE_PATH,
367 |         CompressionVariant::BC7(BC7Settings::alpha_ultrafast()),
368 |         4,
369 |     );
370 |     compare_psnr(
371 |         MARBLE_ALPHA_FILE_PATH,
372 |         CompressionVariant::BC7(BC7Settings::alpha_ultrafast()),
373 |         4,
374 |     );
375 | }
376 | 
377 | #[test]
378 | fn psnr_bc7_alpha_very_fast() {
379 |     compare_psnr(
380 |         BRICK_ALPHA_FILE_PATH,
381 |         CompressionVariant::BC7(BC7Settings::alpha_very_fast()),
382 |         4,
383 |     );
384 |     compare_psnr(
385 |         MARBLE_ALPHA_FILE_PATH,
386 |         CompressionVariant::BC7(BC7Settings::alpha_very_fast()),
387 |         4,
388 |     );
389 | }
390 | 
391 | #[test]
392 | fn psnr_bc7_alpha_fast() {
393 |     compare_psnr(
394 |         BRICK_ALPHA_FILE_PATH,
395 |         CompressionVariant::BC7(BC7Settings::alpha_fast()),
396 |         4,
397 |     );
398 |     compare_psnr(
399 |         MARBLE_ALPHA_FILE_PATH,
400 |         CompressionVariant::BC7(BC7Settings::alpha_fast()),
401 |         4,
402 |     );
403 | }
404 | 
405 | #[test]
406 | fn psnr_bc7_alpha_basic() {
407 |     compare_psnr(
408 |         BRICK_ALPHA_FILE_PATH,
409 |         CompressionVariant::BC7(BC7Settings::alpha_basic()),
410 |         4,
411 |     );
412 |     compare_psnr(
413 |         MARBLE_ALPHA_FILE_PATH,
414 |         CompressionVariant::BC7(BC7Settings::alpha_basic()),
415 |         4,
416 |     );
417 | }
418 | 
419 | #[test]
420 | fn psnr_bc7_alpha_slow() {
421 |     compare_psnr(
422 |         BRICK_ALPHA_FILE_PATH,
423 |         CompressionVariant::BC7(BC7Settings::alpha_slow()),
424 |         4,
425 |     );
426 |     compare_psnr(
427 |         MARBLE_ALPHA_FILE_PATH,
428 |         CompressionVariant::BC7(BC7Settings::alpha_slow()),
429 |         4,
430 |     );
431 | }
432 | 
433 | #[test]
434 | fn psnr_bc7_opaque_ultra_fast() {
435 |     compare_psnr(
436 |         BRICK_FILE_PATH,
437 |         CompressionVariant::BC7(BC7Settings::opaque_ultra_fast()),
438 |         3,
439 |     );
440 |     compare_psnr(
441 |         MARBLE_FILE_PATH,
442 |         CompressionVariant::BC7(BC7Settings::opaque_ultra_fast()),
443 |         3,
444 |     );
445 |     compare_psnr(
446 |         BLENDER_FILE_PATH,
447 |         CompressionVariant::BC7(BC7Settings::opaque_ultra_fast()),
448 |         3,
449 |     );
450 | }
451 | 
452 | #[test]
453 | fn psnr_bc7_opaque_very_fast() {
454 |     compare_psnr(
455 |         BRICK_FILE_PATH,
456 |         CompressionVariant::BC7(BC7Settings::opaque_very_fast()),
457 |         3,
458 |     );
459 |     compare_psnr(
460 |         MARBLE_FILE_PATH,
461 |         CompressionVariant::BC7(BC7Settings::opaque_very_fast()),
462 |         3,
463 |     );
464 |     compare_psnr(
465 |         BLENDER_FILE_PATH,
466 |         CompressionVariant::BC7(BC7Settings::opaque_very_fast()),
467 |         3,
468 |     );
469 | }
470 | 
471 | #[test]
472 | fn psnr_bc7_opaque_fast() {
473 |     compare_psnr(
474 |         BRICK_FILE_PATH,
475 |         CompressionVariant::BC7(BC7Settings::opaque_fast()),
476 |         3,
477 |     );
478 |     compare_psnr(
479 |         MARBLE_FILE_PATH,
480 |         CompressionVariant::BC7(BC7Settings::opaque_fast()),
481 |         3,
482 |     );
483 |     compare_psnr(
484 |         BLENDER_FILE_PATH,
485 |         CompressionVariant::BC7(BC7Settings::opaque_fast()),
486 |         3,
487 |     );
488 | }
489 | 
490 | #[test]
491 | fn psnr_bc7_opaque_basic() {
492 |     compare_psnr(
493 |         BRICK_FILE_PATH,
494 |         CompressionVariant::BC7(BC7Settings::opaque_basic()),
495 |         3,
496 |     );
497 |     compare_psnr(
498 |         MARBLE_FILE_PATH,
499 |         CompressionVariant::BC7(BC7Settings::opaque_basic()),
500 |         3,
501 |     );
502 |     compare_psnr(
503 |         BLENDER_FILE_PATH,
504 |         CompressionVariant::BC7(BC7Settings::opaque_basic()),
505 |         3,
506 |     );
507 | }
508 | 
509 | #[test]
510 | fn psnr_bc7_opaque_slow() {
511 |     compare_psnr(
512 |         BRICK_FILE_PATH,
513 |         CompressionVariant::BC7(BC7Settings::opaque_slow()),
514 |         3,
515 |     );
516 |     compare_psnr(
517 |         MARBLE_FILE_PATH,
518 |         CompressionVariant::BC7(BC7Settings::opaque_slow()),
519 |         3,
520 |     );
521 |     compare_psnr(
522 |         BLENDER_FILE_PATH,
523 |         CompressionVariant::BC7(BC7Settings::opaque_slow()),
524 |         3,
525 |     );
526 | }
527 | 


--------------------------------------------------------------------------------
/src/encode.rs:
--------------------------------------------------------------------------------
  1 | //! CPU based encoding.
  2 | 
  3 | #[cfg(feature = "bc15")]
  4 | mod bc1_to_5;
  5 | #[cfg(feature = "bc6h")]
  6 | mod bc6h;
  7 | #[cfg(feature = "bc7")]
  8 | mod bc7;
  9 | #[cfg(any(feature = "bc6h", feature = "bc7"))]
 10 | mod common;
 11 | 
 12 | #[cfg(feature = "bc15")]
 13 | use self::bc1_to_5::BlockCompressorBC15;
 14 | #[cfg(feature = "bc6h")]
 15 | use self::bc6h::BlockCompressorBC6H;
 16 | #[cfg(feature = "bc7")]
 17 | use self::bc7::BlockCompressorBC7;
 18 | #[cfg(feature = "bc6h")]
 19 | use crate::BC6HSettings;
 20 | #[cfg(feature = "bc7")]
 21 | use crate::BC7Settings;
 22 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
 23 | use crate::CompressionVariant;
 24 | 
 25 | /// Compresses raw RGBA8 data into using a texture block compression format.
 26 | ///
 27 | /// It supports BC1 through BC7 compression formats and provides CPU-based texture compression
 28 | /// for RGBA8 data.
 29 | ///
 30 | /// # Data Layout Requirements
 31 | /// The input data must be in RGBA8 format (8 bits per channel, 32 bits per pixel). The data is
 32 | /// expected to be in row-major order, with optional stride for padding between rows.
 33 | ///
 34 | /// # Buffer Requirements
 35 | /// The destination buffer must have sufficient capacity to store the compressed blocks.
 36 | /// The required size can be calculated using [`CompressionVariant::blocks_byte_size()`].
 37 | ///
 38 | /// For example:
 39 | /// ```ignore
 40 | /// let required_size = variant.blocks_byte_size(width, height);
 41 | /// assert!(blocks_buffer.len() &gt;= required_size);
 42 | /// ```
 43 | ///
 44 | /// # Arguments
 45 | /// * `variation` - The block compression format to use
 46 | /// * `rgba_data` - Source RGBA8 pixel data
 47 | /// * `blocks_buffer` - Destination buffer for the compressed blocks
 48 | /// * `width` - Width of the image in pixels
 49 | /// * `height` - Height of the image in pixels
 50 | /// * `stride` - Number of bytes per row in the source data (for padding).
 51 | ///   Must be `width * 4` for tightly packed RGBA data.
 52 | ///
 53 | /// # Panics
 54 | /// * If `width` or `height` is not a multiple of 4
 55 | /// * If the destination `blocks_buffer` is too small to hold the compressed data
 56 | ///
 57 | /// # Example
 58 | /// ```
 59 | /// use block_compression::{encode::compress_rgba8, CompressionVariant};
 60 | ///
 61 | /// let rgba_data = vec![0u8; 256 * 256 * 4]; // Your RGBA data
 62 | /// let width = 256;
 63 | /// let height = 256;
 64 | /// let stride = width * 4; // Tightly packed rows
 65 | /// let variant = CompressionVariant::BC1;
 66 | ///
 67 | /// let mut blocks_buffer = vec![0u8; variant.blocks_byte_size(width, height)];
 68 | ///
 69 | /// compress_rgba8(
 70 | ///     variant,
 71 | ///     &rgba_data,
 72 | ///     &mut blocks_buffer,
 73 | ///     width,
 74 | ///     height,
 75 | ///     stride,
 76 | /// );
 77 | /// ```
 78 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
 79 | #[cfg_attr(
 80 |     docsrs,
 81 |     doc(cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7")))
 82 | )]
 83 | pub fn compress_rgba8(
 84 |     variation: CompressionVariant,
 85 |     rgba_data: &[u8],
 86 |     blocks_buffer: &mut [u8],
 87 |     width: u32,
 88 |     height: u32,
 89 |     stride: u32,
 90 | ) {
 91 |     assert_eq!(height % 4, 0);
 92 |     assert_eq!(width % 4, 0);
 93 | 
 94 |     let required_size = variation.blocks_byte_size(width, height);
 95 | 
 96 |     assert!(
 97 |         blocks_buffer.len() >= required_size,
 98 |         "blocks_buffer size ({}) is too small to hold compressed blocks. Required size: {}",
 99 |         blocks_buffer.len(),
100 |         required_size
101 |     );
102 | 
103 |     let stride = stride as usize;
104 |     let block_width = (width as usize).div_ceil(4);
105 |     let block_height = (height as usize).div_ceil(4);
106 | 
107 |     match variation {
108 |         #[cfg(feature = "bc15")]
109 |         CompressionVariant::BC1 => {
110 |             compress_bc1(rgba_data, blocks_buffer, block_width, block_height, stride);
111 |         }
112 |         #[cfg(feature = "bc15")]
113 |         CompressionVariant::BC2 => {
114 |             compress_bc2(rgba_data, blocks_buffer, block_width, block_height, stride);
115 |         }
116 |         #[cfg(feature = "bc15")]
117 |         CompressionVariant::BC3 => {
118 |             compress_bc3(rgba_data, blocks_buffer, block_width, block_height, stride);
119 |         }
120 |         #[cfg(feature = "bc15")]
121 |         CompressionVariant::BC4 => {
122 |             compress_bc4(rgba_data, blocks_buffer, block_width, block_height, stride);
123 |         }
124 |         #[cfg(feature = "bc15")]
125 |         CompressionVariant::BC5 => {
126 |             compress_bc5(rgba_data, blocks_buffer, block_width, block_height, stride);
127 |         }
128 |         #[cfg(feature = "bc6h")]
129 |         CompressionVariant::BC6H(settings) => {
130 |             compress_bc6h_8bit(
131 |                 rgba_data,
132 |                 blocks_buffer,
133 |                 block_width,
134 |                 block_height,
135 |                 stride,
136 |                 &settings,
137 |             );
138 |         }
139 |         #[cfg(feature = "bc7")]
140 |         CompressionVariant::BC7(settings) => {
141 |             compress_bc7(
142 |                 rgba_data,
143 |                 blocks_buffer,
144 |                 block_width,
145 |                 block_height,
146 |                 stride,
147 |                 &settings,
148 |             );
149 |         }
150 |     }
151 | }
152 | 
153 | /// Compresses raw RGBA16 (half-float) data using the BC6H texture block compression format.
154 | ///
155 | /// It supports only BC6H compression format and provides CPU-based texture compression
156 | /// for RGBA16 (half-float) data.
157 | ///
158 | /// # Data Layout Requirements
159 | /// The input data must be in RGBA16 format (16 bits per channel using half-float). The data is
160 | /// expected to be in row-major order, with optional stride for padding between rows.
161 | ///
162 | /// # Buffer Requirements
163 | /// The destination buffer must have sufficient capacity to store the compressed blocks.
164 | /// The required size can be calculated using [`CompressionVariant::blocks_byte_size()`].
165 | ///
166 | /// For example:
167 | /// ```ignore
168 | /// let required_size = variant.blocks_byte_size(width, height);
169 | /// assert!(blocks_buffer.len() &gt;= required_size);
170 | /// ```
171 | ///
172 | /// # Arguments
173 | /// * `variation` - The block compression format to use (must be BC6H)
174 | /// * `rgb_data` - Source RGBA16 pixel data in half-float format
175 | /// * `blocks_buffer` - Destination buffer for the compressed blocks
176 | /// * `width` - Width of the image in pixels
177 | /// * `height` - Height of the image in pixels
178 | /// * `stride` - Number of half-float elements per row in the source data (for padding).
179 | ///   Must be `width * 4` for tightly packed RGBA data.
180 | ///
181 | /// # Panics
182 | /// * If `width` or `height` is not a multiple of 4
183 | /// * If the destination `blocks_buffer` is too small to hold the compressed data
184 | /// * If `variation` is not `CompressionVariant::BC6H`
185 | ///
186 | /// # Example
187 | /// ```
188 | /// use block_compression::{encode::compress_rgba16, BC6HSettings, CompressionVariant};
189 | /// use half::f16;
190 | ///
191 | /// let rgba_data = vec![f16::ZERO; 256 * 256 * 4]; // Your RGBA16 data
192 | /// let width = 256;
193 | /// let height = 256;
194 | /// let stride = width * 4; // Tightly packed rows
195 | /// let settings = BC6HSettings::very_slow();
196 | /// let variant = CompressionVariant::BC6H(settings);
197 | ///
198 | /// let mut blocks_buffer = vec![0u8; variant.blocks_byte_size(width, height)];
199 | ///
200 | /// compress_rgba16(
201 | ///     variant,
202 | ///     &rgba_data,
203 | ///     &mut blocks_buffer,
204 | ///     width,
205 | ///     height,
206 | ///     stride,
207 | /// );
208 | /// ```
209 | #[cfg(feature = "bc6h")]
210 | #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
211 | pub fn compress_rgba16(
212 |     variation: CompressionVariant,
213 |     rgba_data: &[half::f16],
214 |     blocks_buffer: &mut [u8],
215 |     width: u32,
216 |     height: u32,
217 |     stride: u32,
218 | ) {
219 |     assert_eq!(height % 4, 0);
220 |     assert_eq!(width % 4, 0);
221 | 
222 |     let required_size = variation.blocks_byte_size(width, height);
223 | 
224 |     assert!(
225 |         blocks_buffer.len() >= required_size,
226 |         "blocks_buffer size ({}) is too small to hold compressed blocks. Required size: {}",
227 |         blocks_buffer.len(),
228 |         required_size
229 |     );
230 | 
231 |     let stride = stride as usize;
232 |     let block_width = (width as usize).div_ceil(4);
233 |     let block_height = (height as usize).div_ceil(4);
234 | 
235 |     match variation {
236 |         CompressionVariant::BC6H(settings) => {
237 |             compress_bc6h_16bit(
238 |                 rgba_data,
239 |                 blocks_buffer,
240 |                 block_width,
241 |                 block_height,
242 |                 stride,
243 |                 &settings,
244 |             );
245 |         }
246 |         #[allow(unreachable_patterns)]
247 |         _ => {
248 |             panic!("only BC6H is supported for calling compress_rgba16");
249 |         }
250 |     }
251 | }
252 | 
253 | #[cfg(feature = "bc15")]
254 | fn compress_bc1(
255 |     rgba_data: &[u8],
256 |     blocks_buffer: &mut [u8],
257 |     block_width: usize,
258 |     block_height: usize,
259 |     stride: usize,
260 | ) {
261 |     for yy in 0..block_height {
262 |         for xx in 0..block_width {
263 |             let mut block_compressor = BlockCompressorBC15::default();
264 | 
265 |             block_compressor.load_block_interleaved_rgba(rgba_data, xx, yy, stride);
266 |             let color_result = block_compressor.compress_block_bc1_core();
267 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy, &color_result);
268 |         }
269 |     }
270 | }
271 | 
272 | #[cfg(feature = "bc15")]
273 | fn compress_bc2(
274 |     rgba_data: &[u8],
275 |     blocks_buffer: &mut [u8],
276 |     block_width: usize,
277 |     block_height: usize,
278 |     stride: usize,
279 | ) {
280 |     for yy in 0..block_height {
281 |         for xx in 0..block_width {
282 |             let mut block_compressor = BlockCompressorBC15::default();
283 |             let mut compressed_data = [0; 4];
284 | 
285 |             let alpha_result = block_compressor.load_block_alpha_4bit(rgba_data, xx, yy, stride);
286 | 
287 |             compressed_data[0] = alpha_result[0];
288 |             compressed_data[1] = alpha_result[1];
289 | 
290 |             block_compressor.load_block_interleaved_rgba(rgba_data, xx, yy, stride);
291 | 
292 |             let color_result = block_compressor.compress_block_bc1_core();
293 |             compressed_data[2] = color_result[0];
294 |             compressed_data[3] = color_result[1];
295 | 
296 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy, &compressed_data);
297 |         }
298 |     }
299 | }
300 | 
301 | #[cfg(feature = "bc15")]
302 | fn compress_bc3(
303 |     rgba_data: &[u8],
304 |     blocks_buffer: &mut [u8],
305 |     block_width: usize,
306 |     block_height: usize,
307 |     stride: usize,
308 | ) {
309 |     for yy in 0..block_height {
310 |         for xx in 0..block_width {
311 |             let mut block_compressor = BlockCompressorBC15::default();
312 | 
313 |             let mut compressed_data = [0; 4];
314 | 
315 |             block_compressor.load_block_interleaved_rgba(rgba_data, xx, yy, stride);
316 | 
317 |             let alpha_result = block_compressor.compress_block_bc3_alpha();
318 |             compressed_data[0] = alpha_result[0];
319 |             compressed_data[1] = alpha_result[1];
320 | 
321 |             let color_result = block_compressor.compress_block_bc1_core();
322 |             compressed_data[2] = color_result[0];
323 |             compressed_data[3] = color_result[1];
324 | 
325 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy, &compressed_data);
326 |         }
327 |     }
328 | }
329 | 
330 | #[cfg(feature = "bc15")]
331 | fn compress_bc4(
332 |     rgba_data: &[u8],
333 |     blocks_buffer: &mut [u8],
334 |     block_width: usize,
335 |     block_height: usize,
336 |     stride: usize,
337 | ) {
338 |     for yy in 0..block_height {
339 |         for xx in 0..block_width {
340 |             let mut block_compressor = BlockCompressorBC15::default();
341 | 
342 |             let mut compressed_data = [0; 2];
343 | 
344 |             block_compressor.load_block_r_8bit(rgba_data, xx, yy, stride);
345 | 
346 |             let color_result = block_compressor.compress_block_bc3_alpha();
347 |             compressed_data[0] = color_result[0];
348 |             compressed_data[1] = color_result[1];
349 | 
350 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy, &compressed_data);
351 |         }
352 |     }
353 | }
354 | 
355 | #[cfg(feature = "bc15")]
356 | fn compress_bc5(
357 |     rgba_data: &[u8],
358 |     blocks_buffer: &mut [u8],
359 |     block_width: usize,
360 |     block_height: usize,
361 |     stride: usize,
362 | ) {
363 |     for yy in 0..block_height {
364 |         for xx in 0..block_width {
365 |             let mut block_compressor = BlockCompressorBC15::default();
366 | 
367 |             let mut compressed_data = [0; 4];
368 | 
369 |             block_compressor.load_block_r_8bit(rgba_data, xx, yy, stride);
370 | 
371 |             let red_result = block_compressor.compress_block_bc3_alpha();
372 |             compressed_data[0] = red_result[0];
373 |             compressed_data[1] = red_result[1];
374 | 
375 |             block_compressor.load_block_g_8bit(rgba_data, xx, yy, stride);
376 | 
377 |             let green_result = block_compressor.compress_block_bc3_alpha();
378 |             compressed_data[2] = green_result[0];
379 |             compressed_data[3] = green_result[1];
380 | 
381 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy, &compressed_data);
382 |         }
383 |     }
384 | }
385 | 
386 | #[cfg(feature = "bc6h")]
387 | fn compress_bc6h_8bit(
388 |     rgba_data: &[u8],
389 |     blocks_buffer: &mut [u8],
390 |     block_width: usize,
391 |     block_height: usize,
392 |     stride: usize,
393 |     settings: &BC6HSettings,
394 | ) {
395 |     for yy in 0..block_height {
396 |         for xx in 0..block_width {
397 |             let mut block_compressor = BlockCompressorBC6H::new(settings);
398 |             block_compressor.load_block_interleaved_8bit(rgba_data, xx, yy, stride);
399 |             block_compressor.compress_bc6h_core();
400 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy);
401 |         }
402 |     }
403 | }
404 | 
405 | #[cfg(feature = "bc6h")]
406 | fn compress_bc6h_16bit(
407 |     rgba_data: &[half::f16],
408 |     blocks_buffer: &mut [u8],
409 |     block_width: usize,
410 |     block_height: usize,
411 |     stride: usize,
412 |     settings: &BC6HSettings,
413 | ) {
414 |     for yy in 0..block_height {
415 |         for xx in 0..block_width {
416 |             let mut block_compressor = BlockCompressorBC6H::new(settings);
417 |             block_compressor.load_block_interleaved_16bit(rgba_data, xx, yy, stride);
418 |             block_compressor.compress_bc6h_core();
419 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy);
420 |         }
421 |     }
422 | }
423 | 
424 | #[cfg(feature = "bc7")]
425 | fn compress_bc7(
426 |     rgba_data: &[u8],
427 |     blocks_buffer: &mut [u8],
428 |     block_width: usize,
429 |     block_height: usize,
430 |     stride: usize,
431 |     settings: &BC7Settings,
432 | ) {
433 |     for yy in 0..block_height {
434 |         for xx in 0..block_width {
435 |             let mut block_compressor = BlockCompressorBC7::new(settings);
436 | 
437 |             block_compressor.load_block_interleaved_rgba(rgba_data, xx, yy, stride);
438 |             block_compressor.compute_opaque_err();
439 |             block_compressor.compress_block_bc7_core();
440 |             block_compressor.store_data(blocks_buffer, block_width, xx, yy);
441 |         }
442 |     }
443 | }
444 | 


--------------------------------------------------------------------------------
/src/decode.rs:
--------------------------------------------------------------------------------
  1 | //! CPU based decoding.
  2 | 
  3 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
  4 | mod block;
  5 | 
  6 | #[cfg(feature = "bc7")]
  7 | #[cfg_attr(docsrs, doc(cfg(feature = "bc7")))]
  8 | pub use self::block::decode_block_bc7;
  9 | #[cfg(feature = "bc15")]
 10 | #[cfg_attr(docsrs, doc(cfg(feature = "bc15")))]
 11 | pub use self::block::{
 12 |     decode_block_bc1, decode_block_bc2, decode_block_bc3, decode_block_bc4, decode_block_bc5,
 13 | };
 14 | #[cfg(feature = "bc6h")]
 15 | #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
 16 | pub use self::block::{decode_block_bc6h, decode_block_bc6h_float};
 17 | #[cfg(feature = "bc6h")]
 18 | use crate::BC6HSettings;
 19 | #[cfg(feature = "bc7")]
 20 | use crate::BC7Settings;
 21 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
 22 | use crate::CompressionVariant;
 23 | 
 24 | /// Trait to decode a BC variant into RGBA8 data.
 25 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
 26 | trait BlockRgba8Decoder {
 27 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize);
 28 |     fn block_byte_size() -> u32;
 29 | }
 30 | 
 31 | /// Trait to decode a BC variant into RGBA16F data.
 32 | #[cfg(feature = "bc6h")]
 33 | trait BlockRgba16fDecoder {
 34 |     fn decode_block_rgba16f(compressed: &[u8], decompressed: &mut [half::f16], pitch: usize);
 35 |     fn block_byte_size() -> u32;
 36 | }
 37 | 
 38 | /// Trait to decode a BC variant into RGBA32F data.
 39 | #[cfg(feature = "bc6h")]
 40 | trait BlockRgba32fDecoder {
 41 |     fn decode_block_rgba32f(compressed: &[u8], decompressed: &mut [f32], pitch: usize);
 42 |     fn block_byte_size() -> u32;
 43 | }
 44 | 
 45 | #[cfg(feature = "bc15")]
 46 | struct BC1Decoder;
 47 | #[cfg(feature = "bc15")]
 48 | struct BC2Decoder;
 49 | #[cfg(feature = "bc15")]
 50 | struct BC3Decoder;
 51 | #[cfg(feature = "bc15")]
 52 | struct BC4Decoder;
 53 | #[cfg(feature = "bc15")]
 54 | struct BC5Decoder;
 55 | #[cfg(feature = "bc6h")]
 56 | struct BC6HDecoder;
 57 | #[cfg(feature = "bc7")]
 58 | struct BC7Decoder;
 59 | 
 60 | #[cfg(feature = "bc15")]
 61 | impl BlockRgba8Decoder for BC1Decoder {
 62 |     #[inline(always)]
 63 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize) {
 64 |         decode_block_bc1(compressed, decompressed, pitch)
 65 |     }
 66 | 
 67 |     fn block_byte_size() -> u32 {
 68 |         CompressionVariant::BC1.block_byte_size()
 69 |     }
 70 | }
 71 | 
 72 | #[cfg(feature = "bc15")]
 73 | impl BlockRgba8Decoder for BC2Decoder {
 74 |     #[inline(always)]
 75 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize) {
 76 |         decode_block_bc2(compressed, decompressed, pitch)
 77 |     }
 78 | 
 79 |     fn block_byte_size() -> u32 {
 80 |         CompressionVariant::BC2.block_byte_size()
 81 |     }
 82 | }
 83 | 
 84 | #[cfg(feature = "bc15")]
 85 | impl BlockRgba8Decoder for BC3Decoder {
 86 |     #[inline(always)]
 87 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize) {
 88 |         decode_block_bc3(compressed, decompressed, pitch)
 89 |     }
 90 | 
 91 |     fn block_byte_size() -> u32 {
 92 |         CompressionVariant::BC3.block_byte_size()
 93 |     }
 94 | }
 95 | 
 96 | #[cfg(feature = "bc15")]
 97 | impl BlockRgba8Decoder for BC4Decoder {
 98 |     #[inline(always)]
 99 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize) {
100 |         const PITCH: usize = 4;
101 |         let mut buffer = [0u8; 16];
102 |         decode_block_bc4(compressed, &mut buffer, 4);
103 | 
104 |         // Convert R8 to RGBA8
105 |         for y in 0..4 {
106 |             for x in 0..4 {
107 |                 let out_pos = y * pitch + x * 4;
108 |                 let in_pos = y * PITCH + x;
109 | 
110 |                 decompressed[out_pos] = buffer[in_pos];
111 |                 decompressed[out_pos + 1] = 0;
112 |                 decompressed[out_pos + 2] = 0;
113 |                 decompressed[out_pos + 3] = 0;
114 |             }
115 |         }
116 |     }
117 | 
118 |     fn block_byte_size() -> u32 {
119 |         CompressionVariant::BC4.block_byte_size()
120 |     }
121 | }
122 | 
123 | #[cfg(feature = "bc15")]
124 | impl BlockRgba8Decoder for BC5Decoder {
125 |     #[inline(always)]
126 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize) {
127 |         const PITCH: usize = 8;
128 |         let mut buffer = [0u8; 32];
129 |         decode_block_bc5(compressed, &mut buffer, PITCH);
130 | 
131 |         // Convert RG8 to RGBA8
132 |         for y in 0..4 {
133 |             for x in 0..4 {
134 |                 let out_pos = y * pitch + x * 4;
135 |                 let in_pos = y * PITCH + x * 2;
136 | 
137 |                 decompressed[out_pos] = buffer[in_pos];
138 |                 decompressed[out_pos + 1] = buffer[in_pos + 1];
139 |                 decompressed[out_pos + 2] = 0;
140 |                 decompressed[out_pos + 3] = 0;
141 |             }
142 |         }
143 |     }
144 | 
145 |     fn block_byte_size() -> u32 {
146 |         CompressionVariant::BC5.block_byte_size()
147 |     }
148 | }
149 | 
150 | #[cfg(feature = "bc6h")]
151 | fn linear_to_srgb(linear: f32) -> u8 {
152 |     let v = if linear <= 0.0031308 {
153 |         linear * 12.92
154 |     } else {
155 |         1.055 * linear.powf(1.0 / 2.4) - 0.055
156 |     };
157 | 
158 |     (v.clamp(0.0, 1.0) * 255.0).round() as u8
159 | }
160 | 
161 | #[cfg(feature = "bc6h")]
162 | impl BlockRgba8Decoder for BC6HDecoder {
163 |     #[inline(always)]
164 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize) {
165 |         const PITCH: usize = 12;
166 |         let mut buffer = [0.0_f32; 48];
167 |         decode_block_bc6h_float(compressed, &mut buffer, PITCH, false);
168 | 
169 |         // Convert RGB16F to RGBA8
170 |         for y in 0..4 {
171 |             for x in 0..4 {
172 |                 let out_pos = y * pitch + x * 4;
173 |                 let in_pos = y * PITCH + x * 3;
174 | 
175 |                 decompressed[out_pos] = linear_to_srgb(buffer[in_pos]) as _;
176 |                 decompressed[out_pos + 1] = linear_to_srgb(buffer[in_pos + 1]) as _;
177 |                 decompressed[out_pos + 2] = linear_to_srgb(buffer[in_pos + 2]) as _;
178 |                 decompressed[out_pos + 3] = 0;
179 |             }
180 |         }
181 |     }
182 | 
183 |     fn block_byte_size() -> u32 {
184 |         CompressionVariant::BC6H(BC6HSettings::basic()).block_byte_size()
185 |     }
186 | }
187 | 
188 | #[cfg(feature = "bc7")]
189 | impl BlockRgba8Decoder for BC7Decoder {
190 |     #[inline(always)]
191 |     fn decode_block_rgba8(compressed: &[u8], decompressed: &mut [u8], pitch: usize) {
192 |         decode_block_bc7(compressed, decompressed, pitch)
193 |     }
194 | 
195 |     fn block_byte_size() -> u32 {
196 |         CompressionVariant::BC7(BC7Settings::alpha_basic()).block_byte_size()
197 |     }
198 | }
199 | 
200 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
201 | fn decompress_rgba8<D: BlockRgba8Decoder>(
202 |     width: u32,
203 |     height: u32,
204 |     blocks_data: &[u8],
205 |     rgba_data: &mut [u8],
206 | ) {
207 |     let blocks_x = width.div_ceil(4);
208 |     let blocks_y = height.div_ceil(4);
209 |     let block_byte_size = D::block_byte_size() as usize;
210 |     let output_row_pitch = width as usize * 4; // Always RGBA
211 | 
212 |     for by in 0..blocks_y {
213 |         for bx in 0..blocks_x {
214 |             let block_index = (by * blocks_x + bx) as usize;
215 |             let block_offset = block_index * block_byte_size;
216 | 
217 |             if block_offset + block_byte_size > blocks_data.len() {
218 |                 break;
219 |             }
220 | 
221 |             let output_offset = (by * 4 * output_row_pitch as u32 + bx * 16) as usize;
222 | 
223 |             if output_offset < rgba_data.len() {
224 |                 D::decode_block_rgba8(
225 |                     &blocks_data[block_offset..block_offset + block_byte_size],
226 |                     &mut rgba_data[output_offset..],
227 |                     output_row_pitch,
228 |                 );
229 |             }
230 |         }
231 |     }
232 | }
233 | 
234 | #[cfg(feature = "bc6h")]
235 | impl BlockRgba16fDecoder for BC6HDecoder {
236 |     #[inline(always)]
237 |     fn decode_block_rgba16f(compressed: &[u8], decompressed: &mut [half::f16], pitch: usize) {
238 |         decode_block_bc6h(compressed, decompressed, pitch, false);
239 |     }
240 | 
241 |     fn block_byte_size() -> u32 {
242 |         CompressionVariant::BC6H(BC6HSettings::basic()).block_byte_size()
243 |     }
244 | }
245 | 
246 | #[cfg(feature = "bc6h")]
247 | fn decompress_rgba16f<D: BlockRgba16fDecoder>(
248 |     width: u32,
249 |     height: u32,
250 |     blocks_data: &[u8],
251 |     rgba_data: &mut [half::f16],
252 | ) {
253 |     let blocks_x = width.div_ceil(4);
254 |     let blocks_y = height.div_ceil(4);
255 |     let block_byte_size = D::block_byte_size() as usize;
256 |     let output_row_pitch = width as usize * 4; // Always RGBA16f
257 | 
258 |     for by in 0..blocks_y {
259 |         for bx in 0..blocks_x {
260 |             let block_index = (by * blocks_x + bx) as usize;
261 |             let block_offset = block_index * block_byte_size;
262 | 
263 |             if block_offset + block_byte_size > blocks_data.len() {
264 |                 break;
265 |             }
266 | 
267 |             let output_offset = (by * 4 * output_row_pitch as u32 + bx * 16) as usize;
268 | 
269 |             if output_offset < rgba_data.len() {
270 |                 D::decode_block_rgba16f(
271 |                     &blocks_data[block_offset..block_offset + block_byte_size],
272 |                     &mut rgba_data[output_offset..],
273 |                     output_row_pitch,
274 |                 );
275 |             }
276 |         }
277 |     }
278 | }
279 | 
280 | #[cfg(feature = "bc6h")]
281 | impl BlockRgba32fDecoder for BC6HDecoder {
282 |     #[inline(always)]
283 |     fn decode_block_rgba32f(compressed: &[u8], decompressed: &mut [f32], pitch: usize) {
284 |         decode_block_bc6h_float(compressed, decompressed, pitch, false);
285 |     }
286 | 
287 |     fn block_byte_size() -> u32 {
288 |         CompressionVariant::BC6H(BC6HSettings::basic()).block_byte_size()
289 |     }
290 | }
291 | 
292 | #[cfg(feature = "bc6h")]
293 | fn decompress_rgba32f<D: BlockRgba32fDecoder>(
294 |     width: u32,
295 |     height: u32,
296 |     blocks_data: &[u8],
297 |     rgba_data: &mut [f32],
298 | ) {
299 |     let blocks_x = width.div_ceil(4);
300 |     let blocks_y = height.div_ceil(4);
301 |     let block_byte_size = D::block_byte_size() as usize;
302 |     let output_row_pitch = width as usize * 4; // Always RGBA32f
303 | 
304 |     for by in 0..blocks_y {
305 |         for bx in 0..blocks_x {
306 |             let block_index = (by * blocks_x + bx) as usize;
307 |             let block_offset = block_index * block_byte_size;
308 | 
309 |             if block_offset + block_byte_size > blocks_data.len() {
310 |                 break;
311 |             }
312 | 
313 |             let output_offset = (by * 4 * output_row_pitch as u32 + bx * 16) as usize;
314 | 
315 |             if output_offset < rgba_data.len() {
316 |                 D::decode_block_rgba32f(
317 |                     &blocks_data[block_offset..block_offset + block_byte_size],
318 |                     &mut rgba_data[output_offset..],
319 |                     output_row_pitch,
320 |                 );
321 |             }
322 |         }
323 |     }
324 | }
325 | 
326 | /// Helper function to easily decompress block data into RGBA8 data.
327 | ///
328 | /// # Panics
329 | /// - The `blocks_data` has not the expected size (`variant.blocks_byte_size()`)
330 | /// - The `rgba_data` has not the expected size (`width * height * 4`)
331 | #[cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7"))]
332 | #[cfg_attr(
333 |     docsrs,
334 |     doc(cfg(any(feature = "bc15", feature = "bc6h", feature = "bc7")))
335 | )]
336 | pub fn decompress_blocks_as_rgba8(
337 |     variant: CompressionVariant,
338 |     width: u32,
339 |     height: u32,
340 |     blocks_data: &[u8],
341 |     rgba_data: &mut [u8],
342 | ) {
343 |     let expected_input_size = variant.blocks_byte_size(width, height);
344 |     assert_eq!(
345 |         blocks_data.len(),
346 |         expected_input_size,
347 |         "the input bitstream slice has not the expected size"
348 |     );
349 | 
350 |     let expected_output_size = width as usize * height as usize * 4;
351 |     assert_eq!(
352 |         rgba_data.len(),
353 |         expected_output_size,
354 |         "the output slice has not the expected size"
355 |     );
356 | 
357 |     match variant {
358 |         #[cfg(feature = "bc15")]
359 |         CompressionVariant::BC1 => {
360 |             decompress_rgba8::<BC1Decoder>(width, height, blocks_data, rgba_data)
361 |         }
362 |         #[cfg(feature = "bc15")]
363 |         CompressionVariant::BC2 => {
364 |             decompress_rgba8::<BC2Decoder>(width, height, blocks_data, rgba_data)
365 |         }
366 |         #[cfg(feature = "bc15")]
367 |         CompressionVariant::BC3 => {
368 |             decompress_rgba8::<BC3Decoder>(width, height, blocks_data, rgba_data)
369 |         }
370 |         #[cfg(feature = "bc15")]
371 |         CompressionVariant::BC4 => {
372 |             decompress_rgba8::<BC4Decoder>(width, height, blocks_data, rgba_data)
373 |         }
374 |         #[cfg(feature = "bc15")]
375 |         CompressionVariant::BC5 => {
376 |             decompress_rgba8::<BC5Decoder>(width, height, blocks_data, rgba_data)
377 |         }
378 |         #[cfg(feature = "bc6h")]
379 |         CompressionVariant::BC6H(..) => {
380 |             decompress_rgba8::<BC6HDecoder>(width, height, blocks_data, rgba_data)
381 |         }
382 |         #[cfg(feature = "bc7")]
383 |         CompressionVariant::BC7(..) => {
384 |             decompress_rgba8::<BC7Decoder>(width, height, blocks_data, rgba_data)
385 |         }
386 |     }
387 | }
388 | 
389 | /// Helper function to easily decompress block data into RGBA16F data. Only BCH6 is currently supported.
390 | ///
391 | /// # Panics
392 | /// - The `blocks_data` has not the expected size (`variant.blocks_byte_size()`)
393 | /// - The `rgba_data` has not the expected size (`width * height * 4`)
394 | /// - If `variant` is any other value than BC6H.
395 | #[cfg(feature = "bc6h")]
396 | #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
397 | pub fn decompress_blocks_as_rgba16f(
398 |     variant: CompressionVariant,
399 |     width: u32,
400 |     height: u32,
401 |     blocks_data: &[u8],
402 |     rgba_data: &mut [half::f16],
403 | ) {
404 |     let expected_input_size = variant.blocks_byte_size(width, height);
405 | 
406 |     assert_eq!(
407 |         blocks_data.len(),
408 |         expected_input_size,
409 |         "the input bitstream slice has not the expected size"
410 |     );
411 | 
412 |     let expected_output_size = width as usize * height as usize * 4;
413 |     assert_eq!(
414 |         rgba_data.len(),
415 |         expected_output_size,
416 |         "the output slice has not the expected size"
417 |     );
418 | 
419 |     match variant {
420 |         CompressionVariant::BC6H(..) => {
421 |             decompress_rgba16f::<BC6HDecoder>(width, height, blocks_data, rgba_data)
422 |         }
423 |         #[allow(unreachable_patterns)]
424 |         _ => {
425 |             panic!("unsupported compression variant");
426 |         }
427 |     }
428 | }
429 | 
430 | /// Helper function to easily decompress block data into RGBA32F data. Only BCH6 is currently supported.
431 | ///
432 | /// # Panics
433 | /// - The `blocks_data` has not the expected size (`variant.blocks_byte_size()`)
434 | /// - The `rgba_data` has not the expected size (`width * height * 4`)
435 | /// - If `variant` is any other value than BC6H.
436 | #[cfg(feature = "bc6h")]
437 | #[cfg_attr(docsrs, doc(cfg(feature = "bc6h")))]
438 | pub fn decompress_blocks_as_rgba32f(
439 |     variant: CompressionVariant,
440 |     width: u32,
441 |     height: u32,
442 |     blocks_data: &[u8],
443 |     rgba_data: &mut [f32],
444 | ) {
445 |     let expected_input_size = variant.blocks_byte_size(width, height);
446 |     assert_eq!(
447 |         blocks_data.len(),
448 |         expected_input_size,
449 |         "the input bitstream slice has not the expected size"
450 |     );
451 | 
452 |     let expected_output_size = width as usize * height as usize * 4;
453 |     assert_eq!(
454 |         rgba_data.len(),
455 |         expected_output_size,
456 |         "the output slice has not the expected size"
457 |     );
458 | 
459 |     match variant {
460 |         CompressionVariant::BC6H(..) => {
461 |             decompress_rgba32f::<BC6HDecoder>(width, height, blocks_data, rgba_data)
462 |         }
463 |         #[allow(unreachable_patterns)]
464 |         _ => {
465 |             panic!("unsupported compression variant");
466 |         }
467 |     }
468 | }
469 | 


--------------------------------------------------------------------------------
/src/shader/bc1_to_5.wgsl:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2025, Nils Hasenbanck
  2 | // Copyright (c) 2016-2024, Intel Corporation
  3 | //
  4 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  5 | // documentation files (the "Software"), to deal in the Software without restriction, including without limitation
  6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  7 | // permit persons to whom the Software is furnished to do so, subject to the following conditions:
  8 | //
  9 | // The above copyright notice and this permission notice shall be included in all copies or substantial portions of
 10 | // the Software.
 11 | //
 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
 13 | // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 14 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 15 | // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 16 | // SOFTWARE.
 17 | 
 18 | struct Uniforms {
 19 |     width: u32,
 20 |     height: u32,
 21 |     texture_y_offset: u32,
 22 |     blocks_offset: u32,
 23 | }
 24 | 
 25 | @group(0) @binding(0) var source_texture: texture_2d<f32>;
 26 | @group(0) @binding(1) var<storage, read_write> block_buffer: array<u32>;
 27 | @group(0) @binding(2) var<uniform> uniforms: Uniforms;
 28 | 
 29 | fn sq(x: f32) -> f32 {
 30 |     return x * x;
 31 | }
 32 | 
 33 | fn rsqrt(x: f32) -> f32 {
 34 |     return 1.0 / sqrt(x);
 35 | }
 36 | 
 37 | fn rcp(x: f32) -> f32 {
 38 |     return 1.0 / x;
 39 | }
 40 | 
 41 | fn load_block_interleaved_rgba(block: ptr<function, array<f32, 64>>, xx: u32, yy: u32) {
 42 |     for (var y = 0u; y < 4u; y++) {
 43 |         for (var x = 0u; x < 4u; x++) {
 44 |             let pixel_x = xx * 4u + x;
 45 |             let pixel_y = yy * 4u + y;
 46 |             let rgba = textureLoad(source_texture, vec2<u32>(pixel_x, pixel_y), 0);
 47 | 
 48 |             (*block)[16u * 0u + y * 4u + x] = rgba.r * 255.0;
 49 |             (*block)[16u * 1u + y * 4u + x] = rgba.g * 255.0;
 50 |             (*block)[16u * 2u + y * 4u + x] = rgba.b * 255.0;
 51 |             (*block)[16u * 3u + y * 4u + x] = rgba.a * 255.0;
 52 |         }
 53 |     }
 54 | }
 55 | 
 56 | fn load_block_r_8bit(block: ptr<function, array<f32, 64>>, xx: u32, yy: u32) {
 57 |     for (var y = 0u; y < 4u; y++) {
 58 |         for (var x = 0u; x < 4u; x++) {
 59 |             let pixel_x = xx * 4u + x;
 60 |             let pixel_y = yy * 4u + y;
 61 |             let red = textureLoad(source_texture, vec2<u32>(pixel_x, pixel_y), 0).r;
 62 | 
 63 |             (*block)[48u + y * 4u + x] = red * 255.0;
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | fn load_block_g_8bit(block: ptr<function, array<f32, 64>>, xx: u32, yy: u32) {
 69 |     for (var y = 0u; y < 4u; y++) {
 70 |         for (var x = 0u; x < 4u; x++) {
 71 |             let pixel_x = xx * 4u + x;
 72 |             let pixel_y = yy * 4u + y;
 73 |             let green = textureLoad(source_texture, vec2<u32>(pixel_x, pixel_y), 0).g;
 74 | 
 75 |             (*block)[48u + y * 4u + x] = green  * 255.0;
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | fn load_block_alpha_4bit(xx: u32, yy: u32) -> vec2<u32> {
 81 |     var alpha_bits: vec2<u32>;
 82 | 
 83 |     for (var y = 0u; y < 4u; y++) {
 84 |         for (var x = 0u; x < 4u; x++) {
 85 |             let pixel_x = xx * 4u + x;
 86 |             let pixel_y = yy * 4u + y + uniforms.texture_y_offset;;
 87 |             let alpha = textureLoad(source_texture, vec2<u32>(pixel_x, pixel_y), 0).a;
 88 | 
 89 |             // Convert alpha to 4 bits (0-15)
 90 |             let alpha4 = u32(alpha * 15.0);
 91 |             let bit_position = y * 16u + x * 4u;
 92 | 
 93 |             if (bit_position < 32u) {
 94 |                 alpha_bits[0] |= (alpha4 << bit_position);
 95 |             } else {
 96 |                 alpha_bits[1] |= (alpha4 << (bit_position - 32u));
 97 |             }
 98 |         }
 99 |     }
100 | 
101 |     return alpha_bits;
102 | }
103 | 
104 | fn store_data_2(block_width: u32, xx: u32, yy: u32, data: vec2<u32>) {
105 |     let offset = uniforms.blocks_offset + (yy * block_width * 2u + xx * 2u);
106 | 
107 |     block_buffer[offset + 0] = data[0];
108 |     block_buffer[offset + 1] = data[1];
109 | }
110 | 
111 | fn store_data_4(block_width: u32, xx: u32, yy: u32, data: vec4<u32>) {
112 |     let offset = uniforms.blocks_offset + (yy * block_width * 4u + xx * 4u);
113 | 
114 |     block_buffer[offset + 0] = data[0];
115 |     block_buffer[offset + 1] = data[1];
116 |     block_buffer[offset + 2] = data[2];
117 |     block_buffer[offset + 3] = data[3];
118 | }
119 | 
120 | fn compute_covar_dc(
121 |     covar: ptr<function, array<f32, 6>>,
122 |     dc: ptr<function, vec3<f32>>,
123 |     block: ptr<function, array<f32, 64>>,
124 | ) {
125 |     for (var p = 0u; p < 3u; p++) {
126 |         var acc = 0.0;
127 |         for (var k = 0u; k < 16u; k++) {
128 |             acc += (*block)[k + p * 16u];
129 |         }
130 |         (*dc)[p] = acc / 16.0;
131 |     }
132 | 
133 |     var covar0 = 0.0;
134 |     var covar1 = 0.0;
135 |     var covar2 = 0.0;
136 |     var covar3 = 0.0;
137 |     var covar4 = 0.0;
138 |     var covar5 = 0.0;
139 | 
140 |     for (var k = 0u; k < 16u; k++) {
141 |         let rgb0 = (*block)[k + 0u * 16u] - (*dc)[0];
142 |         let rgb1 = (*block)[k + 1u * 16u] - (*dc)[1];
143 |         let rgb2 = (*block)[k + 2u * 16u] - (*dc)[2];
144 | 
145 |         covar0 += rgb0 * rgb0;
146 |         covar1 += rgb0 * rgb1;
147 |         covar2 += rgb0 * rgb2;
148 |         covar3 += rgb1 * rgb1;
149 |         covar4 += rgb1 * rgb2;
150 |         covar5 += rgb2 * rgb2;
151 |     }
152 | 
153 |     (*covar)[0] = covar0;
154 |     (*covar)[1] = covar1;
155 |     (*covar)[2] = covar2;
156 |     (*covar)[3] = covar3;
157 |     (*covar)[4] = covar4;
158 |     (*covar)[5] = covar5;
159 | }
160 | 
161 | fn ssymv(result: ptr<function, vec3<f32>>, covar: ptr<function, array<f32, 6>>, a_vector: ptr<function, vec3<f32>>) {
162 |     (*result)[0] = (*covar)[0] * (*a_vector)[0] + (*covar)[1] * (*a_vector)[1] + (*covar)[2] * (*a_vector)[2];
163 |     (*result)[1] = (*covar)[1] * (*a_vector)[0] + (*covar)[3] * (*a_vector)[1] + (*covar)[4] * (*a_vector)[2];
164 |     (*result)[2] = (*covar)[2] * (*a_vector)[0] + (*covar)[4] * (*a_vector)[1] + (*covar)[5] * (*a_vector)[2];
165 | }
166 | 
167 | fn compute_axis3(axis: ptr<function, vec3<f32>>, covar: ptr<function, array<f32, 6>>, powerIterations: i32) {
168 |     var a_vector = vec3<f32>(1.0, 1.0, 1.0);
169 | 
170 |     for (var i = 0; i < powerIterations; i++) {
171 |         ssymv(axis, covar, &a_vector);
172 | 
173 |         for (var p = 0u; p < 3u; p++) {
174 |             a_vector[p] = (*axis)[p];
175 |         }
176 | 
177 |         if (i % 2 == 1) {
178 |             var norm_sq = 0.0;
179 |             for (var p = 0u; p < 3u; p++) {
180 |                 norm_sq += (*axis)[p] * (*axis)[p];
181 |             }
182 | 
183 |             let rnorm = rsqrt(norm_sq);
184 |             for (var p = 0u; p < 3u; p++) {
185 |                 a_vector[p] *= rnorm;
186 |             }
187 |         }
188 |     }
189 | 
190 |     for (var p = 0u; p < 3u; p++) {
191 |         (*axis)[p] = a_vector[p];
192 |     }
193 | }
194 | 
195 | fn pick_endpoints(
196 |     c0: ptr<function, vec3<f32>>,
197 |     c1: ptr<function, vec3<f32>>,
198 |     block: ptr<function, array<f32, 64>>,
199 |     axis: ptr<function, vec3<f32>>,
200 |     dc: ptr<function, vec3<f32>>
201 | ) {
202 |     var min_dot = 256.0 * 256.0;
203 |     var max_dot = 0.0;
204 | 
205 |     for (var y = 0u; y < 4u; y++) {
206 |         for (var x = 0u; x < 4u; x++) {
207 |             var dot = 0.0;
208 |             for (var p = 0u; p < 3u; p++) {
209 |                 dot += ((*block)[p * 16u + y * 4u + x] - (*dc)[p]) * (*axis)[p];
210 |             }
211 | 
212 |             min_dot = min(min_dot, dot);
213 |             max_dot = max(max_dot, dot);
214 |         }
215 |     }
216 | 
217 |     if (max_dot - min_dot < 1.0) {
218 |         min_dot -= 0.5;
219 |         max_dot += 0.5;
220 |     }
221 | 
222 |     var norm_sq = 0.0;
223 |     for (var p = 0u; p < 3u; p++) {
224 |         norm_sq += (*axis)[p] * (*axis)[p];
225 |     }
226 | 
227 |     let rnorm_sq = rcp(norm_sq);
228 |     for (var p = 0u; p < 3u; p++) {
229 |         (*c0)[p] = clamp((*dc)[p] + min_dot * rnorm_sq * (*axis)[p], 0.0, 255.0);
230 |         (*c1)[p] = clamp((*dc)[p] + max_dot * rnorm_sq * (*axis)[p], 0.0, 255.0);
231 |     }
232 | }
233 | 
234 | fn dec_rgb565(c: ptr<function, vec3<f32>>, p: i32) {
235 |     let b5 = (p >> 0) & 31;
236 |     let g6 = (p >> 5) & 63;
237 |     let r5 = (p >> 11) & 31;
238 | 
239 |     (*c)[0] = f32((r5 << 3) + (r5 >> 2));
240 |     (*c)[1] = f32((g6 << 2) + (g6 >> 4));
241 |     (*c)[2] = f32((b5 << 3) + (b5 >> 2));
242 | }
243 | 
244 | fn enc_rgb565(c: ptr<function, vec3<f32>>) -> i32 {
245 |     let r = i32((*c)[0]);
246 |     let g = i32((*c)[1]);
247 |     let b = i32((*c)[2]);
248 | 
249 |     let r5 = (r * 31 + 128 + ((r * 31) >> 8)) >> 8;
250 |     let g6 = (g * 63 + 128 + ((g * 63) >> 8)) >> 8;
251 |     let b5 = (b * 31 + 128 + ((b * 31) >> 8)) >> 8;
252 | 
253 |     return (r5 << 11) + (g6 << 5) + b5;
254 | }
255 | 
256 | fn fast_quant(block: ptr<function, array<f32, 64>>, p0: i32, p1: i32) -> u32 {
257 |     var c0: vec3<f32>;
258 |     var c1: vec3<f32>;
259 |     dec_rgb565(&c0, p0);
260 |     dec_rgb565(&c1, p1);
261 | 
262 |     var dir: vec3<f32>;
263 |     for (var p = 0u; p < 3u; p++) {
264 |         dir[p] = c1[p] - c0[p];
265 |     }
266 | 
267 |     var sq_norm = 0.0;
268 |     for (var p = 0u; p < 3u; p++) {
269 |         sq_norm += sq(dir[p]);
270 |     }
271 | 
272 |     let rsq_norm = rcp(sq_norm);
273 | 
274 |     for (var p = 0u; p < 3u; p++) {
275 |         dir[p] *= rsq_norm * 3.0;
276 |     }
277 | 
278 |     var bias = 0.5;
279 |     for (var p = 0u; p < 3u; p++) {
280 |         bias -= c0[p] * dir[p];
281 |     }
282 | 
283 |     var bits = 0u;
284 |     var scaler = 1u;
285 |     for (var k = 0u; k < 16u; k++) {
286 |         var dot = 0.0;
287 |         for (var p = 0u; p < 3u; p++) {
288 |             dot += (*block)[k + p * 16u] * dir[p];
289 |         }
290 | 
291 |         let q = clamp(i32(dot + bias), 0, 3);
292 |         bits += u32(q) * scaler;
293 |         scaler *= 4u;
294 |     }
295 | 
296 |     return bits;
297 | }
298 | 
299 | fn bc1_refine(pe: ptr<function, vec2<i32>>, block: ptr<function, array<f32, 64>>, bits: u32, dc: ptr<function, vec3<f32>>) {
300 |     var c0: vec3<f32>;
301 |     var c1: vec3<f32>;
302 | 
303 |     if ((bits ^ (bits * 4u)) < 4u) {
304 |         for (var p = 0u; p < 3u; p++) {
305 |             c0[p] = (*dc)[p];
306 |             c1[p] = (*dc)[p];
307 |         }
308 |     } else {
309 |         var atb1: vec3<f32>;
310 |         var sum_q = 0.0;
311 |         var sum_qq = 0.0;
312 |         var shifted_bits = bits;
313 | 
314 |         for (var k = 0u; k < 16u; k++) {
315 |             let q = f32(shifted_bits & 3u);
316 |             shifted_bits = shifted_bits >> 2u;
317 | 
318 |             let x = 3.0 - q;
319 | 
320 |             sum_q += q;
321 |             sum_qq += q * q;
322 | 
323 |             for (var p = 0u; p < 3u; p++) {
324 |                 atb1[p] += x * (*block)[k + p * 16u];
325 |             }
326 |         }
327 | 
328 |         var sum: vec3<f32>;
329 |         var atb2: vec3<f32>;
330 | 
331 |         for (var p = 0u; p < 3u; p++) {
332 |             sum[p] = (*dc)[p] * 16.0;
333 |             atb2[p] = 3.0 * sum[p] - atb1[p];
334 |         }
335 | 
336 |         let cxx = 16.0 * sq(3.0) - 2.0 * 3.0 * sum_q + sum_qq;
337 |         let cyy = sum_qq;
338 |         let cxy = 3.0 * sum_q - sum_qq;
339 |         let scale = 3.0 * rcp(cxx * cyy - cxy * cxy);
340 | 
341 |         for (var p = 0u; p < 3u; p++) {
342 |             c0[p] = (atb1[p] * cyy - atb2[p] * cxy) * scale;
343 |             c1[p] = (atb2[p] * cxx - atb1[p] * cxy) * scale;
344 | 
345 |             c0[p] = clamp(c0[p], 0.0, 255.0);
346 |             c1[p] = clamp(c1[p], 0.0, 255.0);
347 |         }
348 |     }
349 | 
350 |     (*pe)[0] = enc_rgb565(&c0);
351 |     (*pe)[1] = enc_rgb565(&c1);
352 | }
353 | 
354 | fn fix_qbits(qbits: u32) -> u32 {
355 |     const MASK_01B: u32 = 0x55555555u;
356 |     const MASK_10B: u32 = 0xAAAAAAAAu;
357 | 
358 |     let qbits0 = qbits & MASK_01B;
359 |     let qbits1 = qbits & MASK_10B;
360 |     return (qbits1 >> 1u) + (qbits1 ^ (qbits0 << 1u));
361 | }
362 | 
363 | fn compress_block_bc1_core(block: ptr<function, array<f32, 64>>) -> vec2<u32> {
364 |     let power_iterations = 4;
365 |     let refine_iterations = 1;
366 | 
367 |     var covar: array<f32, 6>;
368 |     var dc: vec3<f32>;
369 |     compute_covar_dc(&covar, &dc, block);
370 | 
371 |     const eps = 0.001;
372 |     covar[0] += eps;
373 |     covar[3] += eps;
374 |     covar[5] += eps;
375 | 
376 |     var axis: vec3<f32>;
377 |     compute_axis3(&axis, &covar, power_iterations);
378 | 
379 |     var c0: vec3<f32>;
380 |     var c1: vec3<f32>;
381 |     pick_endpoints(&c0, &c1, block, &axis, &dc);
382 | 
383 |     var p: vec2<i32>;
384 |     p[0] = enc_rgb565(&c0);
385 |     p[1] = enc_rgb565(&c1);
386 |     if (p[0] < p[1]) {
387 |         let temp = p[0];
388 |         p[0] = p[1];
389 |         p[1] = temp;
390 |     }
391 | 
392 |     var data: vec2<u32>;
393 |     data[0] = (u32(p[1]) << 16u) | u32(p[0]);
394 |     data[1] = fast_quant(block, p[0], p[1]);
395 | 
396 |     for (var i = 0; i < refine_iterations; i++) {
397 |         bc1_refine(&p, block, data[1], &dc);
398 |         if (p[0] < p[1]) {
399 |             let temp = p[0];
400 |             p[0] = p[1];
401 |             p[1] = temp;
402 |         }
403 |         data[0] = (u32(p[1]) << 16u) | u32(p[0]);
404 |         data[1] = fast_quant(block, p[0], p[1]);
405 |     }
406 | 
407 |     data[1] = fix_qbits(data[1]);
408 |     return data;
409 | }
410 | 
411 | fn compress_block_bc3_alpha(block: ptr<function, array<f32, 64>>) -> vec2<u32> {
412 |     var ep = vec2<f32>(255.0, 0.0);
413 | 
414 |     // Find min/max endpoints using block[48] to block[63] for alpha
415 |     for (var k: u32 = 0u; k < 16u; k++) {
416 |         ep[0] = min(ep[0], (*block)[48 + k]);
417 |         ep[1] = max(ep[1], (*block)[48 + k]);
418 |     }
419 | 
420 |     // Prevent division by zero
421 |     if (ep[0] == ep[1]) {
422 |         ep[1] = ep[0] + 0.1;
423 |     }
424 | 
425 |     var qblock: vec2<u32>;
426 |     let scale = 7.0 / (ep[1] - ep[0]);
427 | 
428 |     for (var k: u32 = 0u; k < 16u; k++) {
429 |         let v = (*block)[48u + k];
430 |         let proj = (v - ep[0]) * scale + 0.5;
431 | 
432 |         var q = clamp(i32(proj), 0, 7);
433 |         q = 7 - q;
434 | 
435 |         if (q > 0) {
436 |             q += 1;
437 |         }
438 |         if (q == 8) {
439 |             q = 1;
440 |         }
441 | 
442 |         qblock[k / 8u] |= u32(q) << ((k % 8u) * 3u);
443 |     }
444 | 
445 |     var data: vec2<u32>;
446 |     data[0] = (clamp(u32(ep[0]), 0u, 255u) << 8u) | clamp(u32(ep[1]), 0u, 255u);
447 |     data[0] |= qblock[0] << 16u;
448 |     data[1] = qblock[0] >> 16u;
449 |     data[1] |= qblock[1] << 8u;
450 | 
451 |     return data;
452 | }
453 | 
454 | @compute
455 | @workgroup_size(8, 8)
456 | fn compress_bc1(@builtin(global_invocation_id) global_id: vec3<u32>) {
457 |     let xx = global_id.x;
458 |     let yy = global_id.y;
459 | 
460 |     let block_width = (uniforms.width + 3u) / 4u;
461 |     let block_height = (uniforms.height + 3u) / 4u;
462 | 
463 |     if (xx >= block_width || yy >= block_height) {
464 |         return;
465 |     }
466 | 
467 |     var block: array<f32, 64>;
468 |     var compressed_data: vec2<u32>;
469 | 
470 |     load_block_interleaved_rgba(&block, xx, yy);
471 | 
472 |     let color_result = compress_block_bc1_core(&block);
473 |     compressed_data[0] = color_result[0];
474 |     compressed_data[1] = color_result[1];
475 | 
476 |     store_data_2(block_width, xx, yy, compressed_data);
477 | }
478 | 
479 | @compute
480 | @workgroup_size(8, 8)
481 | fn compress_bc2(@builtin(global_invocation_id) global_id: vec3<u32>) {
482 |     let xx = global_id.x;
483 |     let yy = global_id.y;
484 | 
485 |     let block_width = (uniforms.width + 3u) / 4u;
486 |     let block_height = (uniforms.height + 3u) / 4u;
487 | 
488 |     if (xx >= block_width || yy >= block_height) {
489 |         return;
490 |     }
491 | 
492 |     var block: array<f32, 64>;
493 |     var compressed_data: vec4<u32>;
494 | 
495 |     let alpha_result = load_block_alpha_4bit(xx, yy);
496 |     compressed_data[0] = alpha_result[0];
497 |     compressed_data[1] = alpha_result[1];
498 | 
499 |     load_block_interleaved_rgba(&block, xx, yy);
500 | 
501 |     let color_result = compress_block_bc1_core(&block);
502 |     compressed_data[2] = color_result[0];
503 |     compressed_data[3] = color_result[1];
504 | 
505 |     store_data_4(block_width, xx, yy, compressed_data);
506 | }
507 | 
508 | @compute
509 | @workgroup_size(8, 8)
510 | fn compress_bc3(@builtin(global_invocation_id) global_id: vec3<u32>) {
511 |     let xx = global_id.x;
512 |     let yy = global_id.y;
513 | 
514 |     let block_width = (uniforms.width + 3u) / 4u;
515 |     let block_height = (uniforms.height + 3u) / 4u;
516 | 
517 |     if (xx >= block_width || yy >= block_height) {
518 |         return;
519 |     }
520 | 
521 |     var block: array<f32, 64>;
522 |     var compressed_data: vec4<u32>;
523 | 
524 |     load_block_interleaved_rgba(&block, xx, yy);
525 | 
526 |     let alpha_result = compress_block_bc3_alpha(&block);
527 |     compressed_data[0] = alpha_result[0];
528 |     compressed_data[1] = alpha_result[1];
529 | 
530 |     let color_result = compress_block_bc1_core(&block);
531 |     compressed_data[2] = color_result[0];
532 |     compressed_data[3] = color_result[1];
533 | 
534 |     store_data_4(block_width, xx, yy, compressed_data);
535 | }
536 | 
537 | @compute
538 | @workgroup_size(8, 8)
539 | fn compress_bc4(@builtin(global_invocation_id) global_id: vec3<u32>) {
540 |     let xx = global_id.x;
541 |     let yy = global_id.y;
542 | 
543 |     let block_width = (uniforms.width + 3u) / 4u;
544 |     let block_height = (uniforms.height + 3u) / 4u;
545 | 
546 |     if (xx >= block_width || yy >= block_height) {
547 |         return;
548 |     }
549 | 
550 |     var block: array<f32, 64>;
551 |     var compressed_data: vec2<u32>;
552 | 
553 |     load_block_r_8bit(&block, xx, yy);
554 | 
555 |     let color_result = compress_block_bc3_alpha(&block);
556 |     compressed_data[0] = color_result[0];
557 |     compressed_data[1] = color_result[1];
558 | 
559 |     store_data_2(block_width, xx, yy, compressed_data);
560 | }
561 | 
562 | @compute
563 | @workgroup_size(8, 8)
564 | fn compress_bc5(@builtin(global_invocation_id) global_id: vec3<u32>) {
565 |     let xx = global_id.x;
566 |     let yy = global_id.y;
567 | 
568 |     let block_width = (uniforms.width + 3u) / 4u;
569 |     let block_height = (uniforms.height + 3u) / 4u;
570 | 
571 |     if (xx >= block_width || yy >= block_height) {
572 |         return;
573 |     }
574 | 
575 |     var block: array<f32, 64>;
576 |     var compressed_data: vec4<u32>;
577 | 
578 |     load_block_r_8bit(&block, xx, yy);
579 | 
580 |     let red_result = compress_block_bc3_alpha(&block);
581 |     compressed_data[0] = red_result[0];
582 |     compressed_data[1] = red_result[1];
583 | 
584 |     load_block_g_8bit(&block, xx, yy);
585 | 
586 |     let green_result = compress_block_bc3_alpha(&block);
587 |     compressed_data[2] = green_result[0];
588 |     compressed_data[3] = green_result[1];
589 | 
590 |     store_data_4(block_width, xx, yy, compressed_data);
591 | }
592 | 


--------------------------------------------------------------------------------
/src/encode/common.rs:
--------------------------------------------------------------------------------
  1 | #[inline(always)]
  2 | pub(crate) const fn sq(x: f32) -> f32 {
  3 |     x * x
  4 | }
  5 | 
  6 | pub(crate) fn get_unquant_value(bits: u32, index: i32) -> i32 {
  7 |     match bits {
  8 |         2 => {
  9 |             const TABLE: [i32; 16] = [0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
 10 |             TABLE[index as usize]
 11 |         }
 12 |         3 => {
 13 |             const TABLE: [i32; 16] = [0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0];
 14 |             TABLE[index as usize]
 15 |         }
 16 |         _ => {
 17 |             const TABLE: [i32; 16] = [0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64];
 18 |             TABLE[index as usize]
 19 |         }
 20 |     }
 21 | }
 22 | 
 23 | pub(crate) fn get_pattern(part_id: i32) -> u32 {
 24 |     const PATTERN_TABLE: [u32; 128] = [
 25 |         0x50505050, 0x40404040, 0x54545454, 0x54505040, 0x50404000, 0x55545450, 0x55545040,
 26 |         0x54504000, 0x50400000, 0x55555450, 0x55544000, 0x54400000, 0x55555440, 0x55550000,
 27 |         0x55555500, 0x55000000, 0x55150100, 0x00004054, 0x15010000, 0x00405054, 0x00004050,
 28 |         0x15050100, 0x05010000, 0x40505054, 0x00404050, 0x05010100, 0x14141414, 0x05141450,
 29 |         0x01155440, 0x00555500, 0x15014054, 0x05414150, 0x44444444, 0x55005500, 0x11441144,
 30 |         0x05055050, 0x05500550, 0x11114444, 0x41144114, 0x44111144, 0x15055054, 0x01055040,
 31 |         0x05041050, 0x05455150, 0x14414114, 0x50050550, 0x41411414, 0x00141400, 0x00041504,
 32 |         0x00105410, 0x10541000, 0x04150400, 0x50410514, 0x41051450, 0x05415014, 0x14054150,
 33 |         0x41050514, 0x41505014, 0x40011554, 0x54150140, 0x50505500, 0x00555050, 0x15151010,
 34 |         0x54540404, 0xAA685050, 0x6A5A5040, 0x5A5A4200, 0x5450A0A8, 0xA5A50000, 0xA0A05050,
 35 |         0x5555A0A0, 0x5A5A5050, 0xAA550000, 0xAA555500, 0xAAAA5500, 0x90909090, 0x94949494,
 36 |         0xA4A4A4A4, 0xA9A59450, 0x2A0A4250, 0xA5945040, 0x0A425054, 0xA5A5A500, 0x55A0A0A0,
 37 |         0xA8A85454, 0x6A6A4040, 0xA4A45000, 0x1A1A0500, 0x0050A4A4, 0xAAA59090, 0x14696914,
 38 |         0x69691400, 0xA08585A0, 0xAA821414, 0x50A4A450, 0x6A5A0200, 0xA9A58000, 0x5090A0A8,
 39 |         0xA8A09050, 0x24242424, 0x00AA5500, 0x24924924, 0x24499224, 0x50A50A50, 0x500AA550,
 40 |         0xAAAA4444, 0x66660000, 0xA5A0A5A0, 0x50A050A0, 0x69286928, 0x44AAAA44, 0x66666600,
 41 |         0xAA444444, 0x54A854A8, 0x95809580, 0x96969600, 0xA85454A8, 0x80959580, 0xAA141414,
 42 |         0x96960000, 0xAAAA1414, 0xA05050A0, 0xA0A5A5A0, 0x96000000, 0x40804080, 0xA9A8A9A8,
 43 |         0xAAAAAA44, 0x2A4A5254,
 44 |     ];
 45 | 
 46 |     PATTERN_TABLE[part_id as usize]
 47 | }
 48 | 
 49 | pub(crate) fn get_pattern_mask(part_id: i32, j: u32) -> u32 {
 50 |     const PATTERN_MASK_TABLE: [u32; 128] = [
 51 |         0xCCCC3333, 0x88887777, 0xEEEE1111, 0xECC81337, 0xC880377F, 0xFEEC0113, 0xFEC80137,
 52 |         0xEC80137F, 0xC80037FF, 0xFFEC0013, 0xFE80017F, 0xE80017FF, 0xFFE80017, 0xFF0000FF,
 53 |         0xFFF0000F, 0xF0000FFF, 0xF71008EF, 0x008EFF71, 0x71008EFF, 0x08CEF731, 0x008CFF73,
 54 |         0x73108CEF, 0x3100CEFF, 0x8CCE7331, 0x088CF773, 0x3110CEEF, 0x66669999, 0x366CC993,
 55 |         0x17E8E817, 0x0FF0F00F, 0x718E8E71, 0x399CC663, 0xAAAA5555, 0xF0F00F0F, 0x5A5AA5A5,
 56 |         0x33CCCC33, 0x3C3CC3C3, 0x55AAAA55, 0x96966969, 0xA55A5AA5, 0x73CE8C31, 0x13C8EC37,
 57 |         0x324CCDB3, 0x3BDCC423, 0x69969669, 0xC33C3CC3, 0x99666699, 0x0660F99F, 0x0272FD8D,
 58 |         0x04E4FB1B, 0x4E40B1BF, 0x2720D8DF, 0xC93636C9, 0x936C6C93, 0x39C6C639, 0x639C9C63,
 59 |         0x93366CC9, 0x9CC66339, 0x817E7E81, 0xE71818E7, 0xCCF0330F, 0x0FCCF033, 0x774488BB,
 60 |         0xEE2211DD, 0x08CC0133, 0x8CC80037, 0xCC80006F, 0xEC001331, 0x330000FF, 0x00CC3333,
 61 |         0xFF000033, 0xCCCC0033, 0x0F0000FF, 0x0FF0000F, 0x00F0000F, 0x44443333, 0x66661111,
 62 |         0x22221111, 0x136C0013, 0x008C8C63, 0x36C80137, 0x08CEC631, 0x3330000F, 0xF0000333,
 63 |         0x00EE1111, 0x88880077, 0x22C0113F, 0x443088CF, 0x0C22F311, 0x03440033, 0x69969009,
 64 |         0x9960009F, 0x03303443, 0x00660699, 0xC22C3113, 0x8C0000EF, 0x1300007F, 0xC4003331,
 65 |         0x004C1333, 0x22229999, 0x00F0F00F, 0x24929249, 0x29429429, 0xC30C30C3, 0xC03C3C03,
 66 |         0x00AA0055, 0xAA0000FF, 0x30300303, 0xC0C03333, 0x90900909, 0xA00A5005, 0xAAA0000F,
 67 |         0x0AAA0555, 0xE0E01111, 0x70700707, 0x6660000F, 0x0EE01111, 0x07707007, 0x06660999,
 68 |         0x660000FF, 0x00660099, 0x0CC03333, 0x03303003, 0x60000FFF, 0x80807777, 0x10100101,
 69 |         0x000A0005, 0x08CE8421,
 70 |     ];
 71 | 
 72 |     let mask_packed = PATTERN_MASK_TABLE[part_id as usize];
 73 |     let mask0 = mask_packed & 0xFFFF;
 74 |     let mask1 = mask_packed >> 16;
 75 | 
 76 |     if j == 2 {
 77 |         !mask0 & !mask1
 78 |     } else if j == 0 {
 79 |         mask0
 80 |     } else {
 81 |         mask1
 82 |     }
 83 | }
 84 | 
 85 | pub(crate) fn get_skips(part_id: i32) -> [u32; 3] {
 86 |     const SKIP_TABLE: [u32; 128] = [
 87 |         0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
 88 |         0xF0, 0xF0, 0x20, 0x80, 0x20, 0x20, 0x80, 0x80, 0xF0, 0x20, 0x80, 0x20, 0x20, 0x80, 0x80,
 89 |         0x20, 0x20, 0xF0, 0xF0, 0x60, 0x80, 0x20, 0x80, 0xF0, 0xF0, 0x20, 0x80, 0x20, 0x20, 0x20,
 90 |         0xF0, 0xF0, 0x60, 0x60, 0x20, 0x60, 0x80, 0xF0, 0xF0, 0x20, 0x20, 0xF0, 0xF0, 0xF0, 0xF0,
 91 |         0xF0, 0x20, 0x20, 0xF0, 0x3F, 0x38, 0xF8, 0xF3, 0x8F, 0x3F, 0xF3, 0xF8, 0x8F, 0x8F, 0x6F,
 92 |         0x6F, 0x6F, 0x5F, 0x3F, 0x38, 0x3F, 0x38, 0x8F, 0xF3, 0x3F, 0x38, 0x6F, 0xA8, 0x53, 0x8F,
 93 |         0x86, 0x6A, 0x8F, 0x5F, 0xFA, 0xF8, 0x8F, 0xF3, 0x3F, 0x5A, 0x6A, 0xA8, 0x89, 0xFA, 0xF6,
 94 |         0x3F, 0xF8, 0x5F, 0xF3, 0xF6, 0xF6, 0xF8, 0x3F, 0xF3, 0x5F, 0x5F, 0x5F, 0x8F, 0x5F, 0xAF,
 95 |         0x5F, 0xAF, 0x8F, 0xDF, 0xF3, 0xCF, 0x3F, 0x38,
 96 |     ];
 97 | 
 98 |     let skip_packed = SKIP_TABLE[part_id as usize];
 99 | 
100 |     [0, skip_packed >> 4, skip_packed & 15]
101 | }
102 | 
103 | pub(crate) fn put_bits(data: &mut [u32; 5], pos: &mut u32, bits: u32, v: u32) {
104 |     data[(*pos / 32) as usize] |= v << (*pos % 32);
105 |     if *pos % 32 + bits > 32 {
106 |         data[(*pos / 32 + 1) as usize] |= v >> (32 - *pos % 32);
107 |     }
108 |     *pos += bits;
109 | }
110 | 
111 | pub(crate) fn data_shl_1bit_from(data: &mut [u32; 5], from_bits: usize) {
112 |     if from_bits < 96 {
113 |         let shifted = (data[2] >> 1) | (data[3] << 31);
114 |         let mask = ((1 << (from_bits - 64)) - 1) >> 1;
115 |         data[2] = (mask & data[2]) | (!mask & shifted);
116 |         data[3] = (data[3] >> 1) | (data[4] << 31);
117 |         data[4] >>= 1;
118 |     } else if from_bits < 128 {
119 |         let shifted = (data[3] >> 1) | (data[4] << 31);
120 |         let mask = ((1 << (from_bits - 96)) - 1) >> 1;
121 |         data[3] = (mask & data[3]) | (!mask & shifted);
122 |         data[4] >>= 1;
123 |     }
124 | }
125 | 
126 | pub(crate) fn partial_sort_list(list: &mut [i32], length: usize, partial_count: u32) {
127 |     for k in 0..partial_count as usize {
128 |         let mut best_idx = k;
129 |         let mut best_value = list[k];
130 | 
131 |         for i in k + 1..length {
132 |             if best_value > list[i] {
133 |                 best_value = list[i];
134 |                 best_idx = i;
135 |             }
136 |         }
137 | 
138 |         list.swap(k, best_idx);
139 |     }
140 | }
141 | 
142 | pub(crate) fn opt_endpoints(
143 |     ep: &mut [f32],
144 |     block: &[f32; 64],
145 |     bits: u32,
146 |     qblock: [u32; 2],
147 |     mask: u32,
148 |     channels: usize,
149 | ) {
150 |     let levels = 1 << bits;
151 | 
152 |     let mut atb1 = [0.0; 4];
153 |     let mut sum_q = 0.0;
154 |     let mut sum_qq = 0.0;
155 |     let mut sum = [0.0; 5];
156 | 
157 |     let mut mask_shifted = mask << 1;
158 |     for k1 in 0..2 {
159 |         let mut qbits_shifted = qblock[k1];
160 |         for k2 in 0..8 {
161 |             let k = k1 * 8 + k2;
162 |             let q = (qbits_shifted & 15) as f32;
163 |             qbits_shifted >>= 4;
164 | 
165 |             mask_shifted >>= 1;
166 |             if (mask_shifted & 1) == 0 {
167 |                 continue;
168 |             }
169 | 
170 |             let x = (levels - 1) as f32 - q;
171 | 
172 |             sum_q += q;
173 |             sum_qq += q * q;
174 | 
175 |             sum[4] += 1.0;
176 |             for p in 0..channels {
177 |                 sum[p] += block[k + p * 16];
178 |                 atb1[p] += x * block[k + p * 16];
179 |             }
180 |         }
181 |     }
182 | 
183 |     let mut atb2 = [0.0; 4];
184 |     for p in 0..channels {
185 |         atb2[p] = (levels - 1) as f32 * sum[p] - atb1[p];
186 |     }
187 | 
188 |     let cxx = sum[4] * sq((levels - 1) as f32) - 2.0 * (levels - 1) as f32 * sum_q + sum_qq;
189 |     let cyy = sum_qq;
190 |     let cxy = (levels - 1) as f32 * sum_q - sum_qq;
191 |     let scale = (levels - 1) as f32 / (cxx * cyy - cxy * cxy);
192 | 
193 |     for p in 0..channels {
194 |         ep[p] = (atb1[p] * cyy - atb2[p] * cxy) * scale;
195 |         ep[4 + p] = (atb2[p] * cxx - atb1[p] * cxy) * scale;
196 |     }
197 | 
198 |     if f32::abs(cxx * cyy - cxy * cxy) < 0.001 {
199 |         // flatten
200 |         for p in 0..channels {
201 |             ep[p] = sum[p] / sum[4];
202 |             ep[4 + p] = ep[p];
203 |         }
204 |     }
205 | }
206 | 
207 | // Principal Component Analysis (PCA) bound
208 | pub(crate) fn get_pca_bound(covar: &[f32; 10], channels: usize) -> f32 {
209 |     const POWER_ITERATIONS: u32 = 4; // Quite approximative, but enough for bounding
210 | 
211 |     let mut covar_scaled = *covar;
212 |     let inv_var = 1.0 / (256.0 * 256.0);
213 |     for covar_scaled in covar_scaled.iter_mut() {
214 |         *covar_scaled *= inv_var;
215 |     }
216 | 
217 |     const EPS: f32 = sq(0.001);
218 |     covar_scaled[0] += EPS;
219 |     covar_scaled[4] += EPS;
220 |     covar_scaled[7] += EPS;
221 | 
222 |     let mut axis = [0.0; 4];
223 |     compute_axis(&mut axis, &covar_scaled, POWER_ITERATIONS, channels);
224 | 
225 |     let mut a_vec = [0.0; 4];
226 |     if channels == 3 {
227 |         ssymv3(&mut a_vec, &covar_scaled, &axis);
228 |     } else if channels == 4 {
229 |         ssymv4(&mut a_vec, &covar_scaled, &axis);
230 |     }
231 | 
232 |     let mut sq_sum = 0.0;
233 |     for &value in a_vec[..channels].iter() {
234 |         sq_sum += sq(value);
235 |     }
236 |     let lambda = sq_sum.sqrt();
237 | 
238 |     let mut bound = covar_scaled[0] + covar_scaled[4] + covar_scaled[7];
239 |     if channels == 4 {
240 |         bound += covar_scaled[9];
241 |     }
242 |     bound -= lambda;
243 | 
244 |     f32::max(bound, 0.0)
245 | }
246 | 
247 | pub(crate) fn ssymv3(a: &mut [f32; 4], covar: &[f32; 10], b: &[f32; 4]) {
248 |     a[0] = covar[0] * b[0] + covar[1] * b[1] + covar[2] * b[2];
249 |     a[1] = covar[1] * b[0] + covar[4] * b[1] + covar[5] * b[2];
250 |     a[2] = covar[2] * b[0] + covar[5] * b[1] + covar[7] * b[2];
251 | }
252 | 
253 | pub(crate) fn ssymv4(a: &mut [f32; 4], covar: &[f32; 10], b: &[f32; 4]) {
254 |     a[0] = covar[0] * b[0] + covar[1] * b[1] + covar[2] * b[2] + covar[3] * b[3];
255 |     a[1] = covar[1] * b[0] + covar[4] * b[1] + covar[5] * b[2] + covar[6] * b[3];
256 |     a[2] = covar[2] * b[0] + covar[5] * b[1] + covar[7] * b[2] + covar[8] * b[3];
257 |     a[3] = covar[3] * b[0] + covar[6] * b[1] + covar[8] * b[2] + covar[9] * b[3];
258 | }
259 | 
260 | pub(crate) fn compute_axis(
261 |     axis: &mut [f32; 4],
262 |     covar: &[f32; 10],
263 |     power_iterations: u32,
264 |     channels: usize,
265 | ) {
266 |     let mut a_vec = [1.0, 1.0, 1.0, 1.0];
267 | 
268 |     for i in 0..power_iterations {
269 |         if channels == 3 {
270 |             ssymv3(axis, covar, &a_vec);
271 |         } else if channels == 4 {
272 |             ssymv4(axis, covar, &a_vec);
273 |         }
274 | 
275 |         a_vec[..channels].copy_from_slice(&axis[..channels]);
276 | 
277 |         // Renormalize every other iteration
278 |         if i % 2 == 1 {
279 |             let mut norm_sq = 0.0;
280 |             for p in 0..channels {
281 |                 norm_sq += sq(axis[p]);
282 |             }
283 | 
284 |             let rnorm = 1.0 / norm_sq.sqrt();
285 |             for value in a_vec[..channels].iter_mut() {
286 |                 *value *= rnorm;
287 |             }
288 |         }
289 |     }
290 | 
291 |     axis[..channels].copy_from_slice(&a_vec[..channels]);
292 | }
293 | 
294 | pub(crate) fn compute_stats_masked(
295 |     stats: &mut [f32; 15],
296 |     block: &[f32; 64],
297 |     mask: u32,
298 |     channels: usize,
299 | ) {
300 |     let mut mask_shifted = mask << 1;
301 |     for k in 0..16 {
302 |         mask_shifted >>= 1;
303 |         let flag = (mask_shifted & 1) as f32;
304 | 
305 |         let mut rgba = [0.0; 4];
306 |         for p in 0..channels {
307 |             rgba[p] = block[k + p * 16] * flag;
308 |         }
309 |         stats[14] += flag;
310 | 
311 |         stats[10] += rgba[0];
312 |         stats[11] += rgba[1];
313 |         stats[12] += rgba[2];
314 | 
315 |         stats[0] += rgba[0] * rgba[0];
316 |         stats[1] += rgba[0] * rgba[1];
317 |         stats[2] += rgba[0] * rgba[2];
318 | 
319 |         stats[4] += rgba[1] * rgba[1];
320 |         stats[5] += rgba[1] * rgba[2];
321 | 
322 |         stats[7] += rgba[2] * rgba[2];
323 | 
324 |         if channels == 4 {
325 |             stats[13] += rgba[3];
326 |             stats[3] += rgba[0] * rgba[3];
327 |             stats[6] += rgba[1] * rgba[3];
328 |             stats[8] += rgba[2] * rgba[3];
329 |             stats[9] += rgba[3] * rgba[3];
330 |         }
331 |     }
332 | }
333 | 
334 | pub(crate) fn covar_from_stats(covar: &mut [f32; 10], stats: [f32; 15], channels: usize) {
335 |     covar[0] = stats[0] - stats[10] * stats[10] / stats[14];
336 |     covar[1] = stats[1] - stats[10] * stats[11] / stats[14];
337 |     covar[2] = stats[2] - stats[10] * stats[12] / stats[14];
338 | 
339 |     covar[4] = stats[4] - stats[11] * stats[11] / stats[14];
340 |     covar[5] = stats[5] - stats[11] * stats[12] / stats[14];
341 | 
342 |     covar[7] = stats[7] - stats[12] * stats[12] / stats[14];
343 | 
344 |     if channels == 4 {
345 |         covar[3] = stats[3] - stats[10] * stats[13] / stats[14];
346 |         covar[6] = stats[6] - stats[11] * stats[13] / stats[14];
347 |         covar[8] = stats[8] - stats[12] * stats[13] / stats[14];
348 |         covar[9] = stats[9] - stats[13] * stats[13] / stats[14];
349 |     }
350 | }
351 | 
352 | pub(crate) fn compute_covar_dc_masked(
353 |     covar: &mut [f32; 10],
354 |     dc: &mut [f32; 4],
355 |     block: &[f32; 64],
356 |     mask: u32,
357 |     channels: usize,
358 | ) {
359 |     let mut stats = [0.0; 15];
360 |     compute_stats_masked(&mut stats, block, mask, channels);
361 | 
362 |     // Calculate dc values from stats
363 |     for p in 0..channels {
364 |         dc[p] = stats[10 + p] / stats[14];
365 |     }
366 | 
367 |     covar_from_stats(covar, stats, channels);
368 | }
369 | 
370 | pub(crate) fn block_pca_axis(
371 |     axis: &mut [f32; 4],
372 |     dc: &mut [f32; 4],
373 |     block: &[f32; 64],
374 |     mask: u32,
375 |     channels: usize,
376 | ) {
377 |     const POWER_ITERATIONS: u32 = 8; // 4 not enough for HQ
378 | 
379 |     let mut covar = [0.0; 10];
380 |     compute_covar_dc_masked(&mut covar, dc, block, mask, channels);
381 | 
382 |     const INV_VAR: f32 = 1.0 / (256.0 * 256.0);
383 |     for covar in covar.iter_mut() {
384 |         *covar *= INV_VAR;
385 |     }
386 | 
387 |     const EPS: f32 = sq(0.001);
388 |     covar[0] += EPS;
389 |     covar[4] += EPS;
390 |     covar[7] += EPS;
391 |     covar[9] += EPS;
392 | 
393 |     compute_axis(axis, &covar, POWER_ITERATIONS, channels);
394 | }
395 | 
396 | pub(crate) fn block_pca_bound_split(
397 |     block: &[f32; 64],
398 |     mask: u32,
399 |     full_stats: [f32; 15],
400 |     channels: usize,
401 | ) -> f32 {
402 |     let mut stats = [0.0; 15];
403 |     compute_stats_masked(&mut stats, block, mask, channels);
404 | 
405 |     let mut covar1 = [0.0; 10];
406 |     covar_from_stats(&mut covar1, stats, channels);
407 | 
408 |     for i in 0..15 {
409 |         stats[i] = full_stats[i] - stats[i];
410 |     }
411 | 
412 |     let mut covar2 = [0.0; 10];
413 |     covar_from_stats(&mut covar2, stats, channels);
414 | 
415 |     let mut bound = 0.0;
416 |     bound += get_pca_bound(&covar1, channels);
417 |     bound += get_pca_bound(&covar2, channels);
418 | 
419 |     bound.sqrt() * 256.0
420 | }
421 | 
422 | pub(crate) fn block_quant(
423 |     qblock: &mut [u32; 2],
424 |     block: &[f32; 64],
425 |     bits: u32,
426 |     ep: &[f32],
427 |     pattern: u32,
428 |     channels: usize,
429 | ) -> f32 {
430 |     let mut total_err = 0.0;
431 |     let levels = 1 << bits;
432 | 
433 |     qblock[0] = 0;
434 |     qblock[1] = 0;
435 | 
436 |     let mut pattern_shifted = pattern;
437 |     for k in 0..16 {
438 |         let j = (pattern_shifted & 3) as usize;
439 |         pattern_shifted >>= 2;
440 | 
441 |         let mut proj = 0.0;
442 |         let mut div = 0.0;
443 |         for p in 0..channels {
444 |             let ep_a = ep[8 * j + p];
445 |             let ep_b = ep[8 * j + 4 + p];
446 |             proj += (block[k + p * 16] - ep_a) * (ep_b - ep_a);
447 |             div += sq(ep_b - ep_a);
448 |         }
449 | 
450 |         proj /= div;
451 | 
452 |         let q1 = (proj * levels as f32 + 0.5) as i32;
453 |         let q1_clamped = i32::clamp(q1, 1, levels - 1);
454 | 
455 |         let mut err0 = 0.0;
456 |         let mut err1 = 0.0;
457 |         let w0 = get_unquant_value(bits, q1_clamped - 1);
458 |         let w1 = get_unquant_value(bits, q1_clamped);
459 | 
460 |         for p in 0..channels {
461 |             let ep_a = ep[8 * j + p];
462 |             let ep_b = ep[8 * j + 4 + p];
463 |             let dec_v0 = (((64 - w0) * ep_a as i32 + w0 * ep_b as i32 + 32) / 64) as f32;
464 |             let dec_v1 = (((64 - w1) * ep_a as i32 + w1 * ep_b as i32 + 32) / 64) as f32;
465 |             err0 += sq(dec_v0 - block[k + p * 16]);
466 |             err1 += sq(dec_v1 - block[k + p * 16]);
467 |         }
468 | 
469 |         let mut best_err = err1;
470 |         let mut best_q = q1_clamped;
471 |         if err0 < err1 {
472 |             best_err = err0;
473 |             best_q = q1_clamped - 1;
474 |         }
475 | 
476 |         qblock[k / 8] |= (best_q as u32) << (4 * (k % 8));
477 |         total_err += best_err;
478 |     }
479 | 
480 |     total_err
481 | }
482 | 
483 | pub(crate) fn block_segment_core(ep: &mut [f32], block: &[f32; 64], mask: u32, channels: usize) {
484 |     let mut axis = [0.0; 4];
485 |     let mut dc = [0.0; 4];
486 |     block_pca_axis(&mut axis, &mut dc, block, mask, channels);
487 | 
488 |     let mut ext = [f32::INFINITY, f32::NEG_INFINITY];
489 | 
490 |     // Find min/max
491 |     let mut mask_shifted = mask << 1;
492 |     for k in 0..16 {
493 |         mask_shifted >>= 1;
494 |         if (mask_shifted & 1) == 0 {
495 |             continue;
496 |         }
497 | 
498 |         let mut dot = 0.0;
499 |         for p in 0..channels {
500 |             dot += axis[p] * (block[16 * p + k] - dc[p]);
501 |         }
502 | 
503 |         ext[0] = f32::min(ext[0], dot);
504 |         ext[1] = f32::max(ext[1], dot);
505 |     }
506 | 
507 |     // Create some distance if the endpoints collapse
508 |     if ext[1] - ext[0] < 1.0 {
509 |         ext[0] -= 0.5;
510 |         ext[1] += 0.5;
511 |     }
512 | 
513 |     for i in 0..2 {
514 |         for p in 0..channels {
515 |             ep[4 * i + p] = ext[i] * axis[p] + dc[p];
516 |         }
517 |     }
518 | }
519 | 
520 | pub(crate) fn bc7_code_qblock(
521 |     data: &mut [u32; 5],
522 |     qpos: &mut u32,
523 |     qblock: [u32; 2],
524 |     bits: u32,
525 |     flips: u32,
526 | ) {
527 |     let levels = 1 << bits;
528 |     let mut flips_shifted = flips;
529 | 
530 |     for k1 in 0..2 {
531 |         let mut qbits_shifted = qblock[k1];
532 |         for k2 in 0..8 {
533 |             let mut q = qbits_shifted & 15;
534 |             if (flips_shifted & 1) > 0 {
535 |                 q = (levels - 1) - q;
536 |             }
537 | 
538 |             if k1 == 0 && k2 == 0 {
539 |                 put_bits(data, qpos, bits - 1, q);
540 |             } else {
541 |                 put_bits(data, qpos, bits, q);
542 |             }
543 |             qbits_shifted >>= 4;
544 |             flips_shifted >>= 1;
545 |         }
546 |     }
547 | }
548 | 
549 | pub(crate) fn bc7_code_adjust_skip_mode01237(data: &mut [u32; 5], mode: usize, part_id: i32) {
550 |     let pairs = if mode == 0 || mode == 2 { 3 } else { 2 };
551 |     let bits = if mode == 0 || mode == 1 { 3 } else { 2 };
552 | 
553 |     let mut skips = get_skips(part_id);
554 | 
555 |     if pairs > 2 && skips[1] < skips[2] {
556 |         skips.swap(1, 2);
557 |     }
558 | 
559 |     for &k in skips[1..pairs].iter() {
560 |         data_shl_1bit_from(data, 128 + (pairs - 1) - (15 - k as usize) * bits);
561 |     }
562 | }
563 | 
564 | pub(crate) fn bc7_code_apply_swap_mode456(
565 |     qep: &mut [i32],
566 |     channels: usize,
567 |     qblock: &mut [u32; 2],
568 |     bits: u32,
569 | ) {
570 |     let levels = 1 << bits;
571 | 
572 |     if (qblock[0] & 15) >= levels / 2 {
573 |         for p in 0..channels {
574 |             qep.swap(p, channels + p);
575 |         }
576 | 
577 |         for value in qblock.iter_mut() {
578 |             *value = (0x11111111 * (levels - 1)) - *value;
579 |         }
580 |     }
581 | }
582 | 
583 | pub(crate) fn bc7_code_apply_swap_mode01237(
584 |     qep: &mut [i32; 24],
585 |     qblock: [u32; 2],
586 |     mode: usize,
587 |     part_id: i32,
588 | ) -> u32 {
589 |     let bits = if mode == 0 || mode == 1 { 3 } else { 2 };
590 |     let pairs = if mode == 0 || mode == 2 { 3 } else { 2 };
591 | 
592 |     let mut flips = 0;
593 |     let levels = 1 << bits;
594 | 
595 |     let skips = get_skips(part_id);
596 | 
597 |     for j in 0..pairs {
598 |         let k0 = skips[j] as usize;
599 |         // Extract 4 bits from qblock at position k0
600 |         let q = (qblock[k0 >> 3] << (28 - (k0 & 7) * 4)) >> 28;
601 | 
602 |         if q >= levels / 2 {
603 |             for p in 0..4 {
604 |                 qep.swap(8 * j + p, 8 * j + 4 + p);
605 |             }
606 | 
607 |             let pmask = get_pattern_mask(part_id, j as u32);
608 |             flips |= pmask;
609 |         }
610 |     }
611 | 
612 |     flips
613 | }
614 | 


--------------------------------------------------------------------------------
/src/block_compressor.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, num::NonZeroU64};
  2 | 
  3 | use bytemuck::{cast_slice, Pod, Zeroable};
  4 | use wgpu::{
  5 |     self, include_wgsl, BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayout,
  6 |     BindGroupLayoutDescriptor, BindGroupLayoutEntry, BindingResource, BindingType, Buffer,
  7 |     BufferBinding, BufferBindingType, BufferDescriptor, BufferUsages, ComputePass, ComputePipeline,
  8 |     ComputePipelineDescriptor, Device, PipelineCompilationOptions, PipelineLayoutDescriptor, Queue,
  9 |     ShaderModule, ShaderRuntimeChecks, ShaderStages, TextureSampleType, TextureView,
 10 |     TextureViewDimension,
 11 | };
 12 | 
 13 | #[cfg(feature = "bc6h")]
 14 | use crate::BC6HSettings;
 15 | #[cfg(feature = "bc7")]
 16 | use crate::BC7Settings;
 17 | use crate::CompressionVariant;
 18 | 
 19 | #[derive(Copy, Clone, Zeroable, Pod)]
 20 | #[repr(C)]
 21 | struct Uniforms {
 22 |     /// The width of the image data.
 23 |     width: u32,
 24 |     /// The height of the image data.
 25 |     height: u32,
 26 |     /// Start row of the texture data we want to convert.
 27 |     texture_y_offset: u32,
 28 |     /// Start of the blocks data in u32 elements.
 29 |     blocks_offset: u32,
 30 | }
 31 | 
 32 | struct Task {
 33 |     variant: CompressionVariant,
 34 |     width: u32,
 35 |     height: u32,
 36 |     uniform_offset: u32,
 37 |     #[cfg(any(feature = "bc6h", feature = "bc7"))]
 38 |     setting_offset: u32,
 39 |     texture_y_offset: u32,
 40 |     buffer_offset: u32,
 41 |     texture_view: TextureView,
 42 |     buffer: Buffer,
 43 | }
 44 | 
 45 | /// Compresses texture data with a block compression algorithm using WGPU compute shader.
 46 | pub struct GpuBlockCompressor {
 47 |     scratch_buffer: Vec<u8>,
 48 |     task: Vec<Task>,
 49 |     uniforms_buffer: Buffer,
 50 |     #[cfg(feature = "bc6h")]
 51 |     bc6h_settings_buffer: Buffer,
 52 |     #[cfg(feature = "bc7")]
 53 |     bc7_settings_buffer: Buffer,
 54 |     bind_group_layouts: HashMap<CompressionVariant, BindGroupLayout>,
 55 |     pipelines: HashMap<CompressionVariant, ComputePipeline>,
 56 |     device: Device,
 57 |     queue: Queue,
 58 |     uniforms_aligned_size: usize,
 59 |     #[cfg(feature = "bc6h")]
 60 |     bc6h_aligned_size: usize,
 61 |     #[cfg(feature = "bc7")]
 62 |     bc7_aligned_size: usize,
 63 | }
 64 | 
 65 | impl GpuBlockCompressor {
 66 |     /// Creates a new block compressor instance.
 67 |     ///
 68 |     /// [`wgpu::Device`] and [`wgpu::Queue`] are internally reference counted and can cheaply and
 69 |     /// safely be cloned.
 70 |     pub fn new(device: Device, queue: Queue) -> Self {
 71 |         let limits = device.limits();
 72 | 
 73 |         let alignment = limits.min_uniform_buffer_offset_alignment as usize;
 74 |         let size = size_of::<Uniforms>();
 75 |         let uniforms_aligned_size = size.div_ceil(alignment) * alignment;
 76 | 
 77 |         #[cfg(feature = "bc6h")]
 78 |         let bc6h_aligned_size = {
 79 |             let alignment = limits.min_storage_buffer_offset_alignment as usize;
 80 |             let size = size_of::<BC6HSettings>();
 81 |             size.div_ceil(alignment) * alignment
 82 |         };
 83 | 
 84 |         #[cfg(feature = "bc7")]
 85 |         let bc7_aligned_size = {
 86 |             let alignment = limits.min_storage_buffer_offset_alignment as usize;
 87 |             let size = size_of::<BC7Settings>();
 88 |             size.div_ceil(alignment) * alignment
 89 |         };
 90 | 
 91 |         #[cfg(feature = "bc15")]
 92 |         let shader_module_bc1_to_5 =
 93 |             device.create_shader_module(include_wgsl!("shader/bc1_to_5.wgsl"));
 94 |         #[cfg(feature = "bc6h")]
 95 |         let shader_module_bc6h = device.create_shader_module(include_wgsl!("shader/bc6h.wgsl"));
 96 |         // The addition of the bounded loop in https://github.com/gfx-rs/wgpu/pull/7080
 97 |         // seems to cause the program to crash with AMD integrated GPU.
 98 |         #[cfg(feature = "bc7")]
 99 |         let shader_module_bc7 = {
100 |             unsafe {
101 |                 let checks = ShaderRuntimeChecks {
102 |                     bounds_checks: true,
103 |                     force_loop_bounding: false,
104 |                 };
105 |                 device.create_shader_module_trusted(include_wgsl!("shader/bc7.wgsl"), checks)
106 |             }
107 |         };
108 | 
109 |         let uniforms_buffer = device.create_buffer(&BufferDescriptor {
110 |             label: Some("uniforms"),
111 |             size: (uniforms_aligned_size * 16) as _,
112 |             usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
113 |             mapped_at_creation: false,
114 |         });
115 | 
116 |         #[cfg(feature = "bc6h")]
117 |         let bc6h_settings_buffer = device.create_buffer(&BufferDescriptor {
118 |             label: Some("bc6h settings"),
119 |             size: (bc6h_aligned_size * 16) as _,
120 |             usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
121 |             mapped_at_creation: false,
122 |         });
123 | 
124 |         #[cfg(feature = "bc7")]
125 |         let bc7_settings_buffer = device.create_buffer(&BufferDescriptor {
126 |             label: Some("bc7 settings"),
127 |             size: (bc7_aligned_size * 16) as _,
128 |             usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
129 |             mapped_at_creation: false,
130 |         });
131 | 
132 |         let mut bind_group_layouts = HashMap::new();
133 |         let mut pipelines = HashMap::new();
134 | 
135 |         #[cfg(feature = "bc15")]
136 |         Self::create_pipeline(
137 |             &device,
138 |             &shader_module_bc1_to_5,
139 |             &mut bind_group_layouts,
140 |             &mut pipelines,
141 |             CompressionVariant::BC1,
142 |         );
143 |         #[cfg(feature = "bc15")]
144 |         Self::create_pipeline(
145 |             &device,
146 |             &shader_module_bc1_to_5,
147 |             &mut bind_group_layouts,
148 |             &mut pipelines,
149 |             CompressionVariant::BC2,
150 |         );
151 |         #[cfg(feature = "bc15")]
152 |         Self::create_pipeline(
153 |             &device,
154 |             &shader_module_bc1_to_5,
155 |             &mut bind_group_layouts,
156 |             &mut pipelines,
157 |             CompressionVariant::BC3,
158 |         );
159 |         #[cfg(feature = "bc15")]
160 |         Self::create_pipeline(
161 |             &device,
162 |             &shader_module_bc1_to_5,
163 |             &mut bind_group_layouts,
164 |             &mut pipelines,
165 |             CompressionVariant::BC4,
166 |         );
167 |         #[cfg(feature = "bc15")]
168 |         Self::create_pipeline(
169 |             &device,
170 |             &shader_module_bc1_to_5,
171 |             &mut bind_group_layouts,
172 |             &mut pipelines,
173 |             CompressionVariant::BC5,
174 |         );
175 |         #[cfg(feature = "bc6h")]
176 |         Self::create_pipeline(
177 |             &device,
178 |             &shader_module_bc6h,
179 |             &mut bind_group_layouts,
180 |             &mut pipelines,
181 |             CompressionVariant::BC6H(BC6HSettings::basic()),
182 |         );
183 |         #[cfg(feature = "bc7")]
184 |         Self::create_pipeline(
185 |             &device,
186 |             &shader_module_bc7,
187 |             &mut bind_group_layouts,
188 |             &mut pipelines,
189 |             CompressionVariant::BC7(BC7Settings::alpha_basic()),
190 |         );
191 | 
192 |         Self {
193 |             scratch_buffer: Vec::default(),
194 |             task: Vec::default(),
195 |             uniforms_buffer,
196 |             #[cfg(feature = "bc6h")]
197 |             bc6h_settings_buffer,
198 |             #[cfg(feature = "bc7")]
199 |             bc7_settings_buffer,
200 |             bind_group_layouts,
201 |             pipelines,
202 |             device,
203 |             queue,
204 |             uniforms_aligned_size,
205 |             #[cfg(feature = "bc6h")]
206 |             bc6h_aligned_size,
207 |             #[cfg(feature = "bc7")]
208 |             bc7_aligned_size,
209 |         }
210 |     }
211 | 
212 |     #[allow(unused_mut)]
213 |     fn create_pipeline(
214 |         device: &Device,
215 |         shader_module: &ShaderModule,
216 |         bind_group_layouts: &mut HashMap<CompressionVariant, BindGroupLayout>,
217 |         pipelines: &mut HashMap<CompressionVariant, ComputePipeline>,
218 |         variant: CompressionVariant,
219 |     ) {
220 |         let mut layout_entries = vec![
221 |             BindGroupLayoutEntry {
222 |                 binding: 0,
223 |                 visibility: ShaderStages::COMPUTE,
224 |                 ty: BindingType::Texture {
225 |                     sample_type: TextureSampleType::Float { filterable: true },
226 |                     view_dimension: TextureViewDimension::D2,
227 |                     multisampled: false,
228 |                 },
229 |                 count: None,
230 |             },
231 |             BindGroupLayoutEntry {
232 |                 binding: 1,
233 |                 visibility: ShaderStages::COMPUTE,
234 |                 ty: BindingType::Buffer {
235 |                     ty: BufferBindingType::Storage { read_only: false },
236 |                     has_dynamic_offset: false,
237 |                     min_binding_size: None,
238 |                 },
239 |                 count: None,
240 |             },
241 |             BindGroupLayoutEntry {
242 |                 binding: 2,
243 |                 visibility: ShaderStages::COMPUTE,
244 |                 ty: BindingType::Buffer {
245 |                     ty: BufferBindingType::Uniform,
246 |                     has_dynamic_offset: true,
247 |                     min_binding_size: None,
248 |                 },
249 |                 count: None,
250 |             },
251 |         ];
252 | 
253 |         match variant {
254 |             #[cfg(feature = "bc6h")]
255 |             CompressionVariant::BC6H(..) => {
256 |                 layout_entries.push(BindGroupLayoutEntry {
257 |                     binding: 3,
258 |                     visibility: ShaderStages::COMPUTE,
259 |                     ty: BindingType::Buffer {
260 |                         ty: BufferBindingType::Storage { read_only: true },
261 |                         has_dynamic_offset: true,
262 |                         min_binding_size: NonZeroU64::new(size_of::<BC6HSettings>() as _),
263 |                     },
264 |                     count: None,
265 |                 });
266 |             }
267 |             #[cfg(feature = "bc7")]
268 |             CompressionVariant::BC7(..) => {
269 |                 layout_entries.push(BindGroupLayoutEntry {
270 |                     binding: 3,
271 |                     visibility: ShaderStages::COMPUTE,
272 |                     ty: BindingType::Buffer {
273 |                         ty: BufferBindingType::Storage { read_only: true },
274 |                         has_dynamic_offset: true,
275 |                         min_binding_size: NonZeroU64::new(size_of::<BC7Settings>() as _),
276 |                     },
277 |                     count: None,
278 |                 });
279 |             }
280 |             #[allow(unreachable_patterns)]
281 |             _ => {}
282 |         }
283 | 
284 |         let name = variant.name();
285 | 
286 |         let bind_group_layout = device.create_bind_group_layout(&BindGroupLayoutDescriptor {
287 |             label: Some(&format!("{name} bind group layout")),
288 |             entries: &layout_entries,
289 |         });
290 | 
291 |         let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor {
292 |             label: Some(&format!("{name} block compression pipeline layout")),
293 |             bind_group_layouts: &[&bind_group_layout],
294 |             push_constant_ranges: &[],
295 |         });
296 | 
297 |         let pipeline = device.create_compute_pipeline(&ComputePipelineDescriptor {
298 |             label: Some(&format!("{name} block compression pipeline")),
299 |             layout: Some(&pipeline_layout),
300 |             module: shader_module,
301 |             entry_point: Some(variant.entry_point()),
302 |             compilation_options: PipelineCompilationOptions::default(),
303 |             cache: None,
304 |         });
305 | 
306 |         bind_group_layouts.insert(variant, bind_group_layout);
307 |         pipelines.insert(variant, pipeline);
308 |     }
309 | 
310 |     /// Adds a texture compression task to the queue.
311 |     ///
312 |     /// This API is designed to be very flexible. For example, it is possible to fill the mip map
313 |     /// levels of a texture with multiple calls to this function.
314 |     ///
315 |     /// # Texture View Requirements
316 |     /// The source texture should provide enough channels for the texture compression. If only a
317 |     /// single red channel is provided and BC1 is used, only the red channel will be properly
318 |     /// encoded. All texture compression need to work on the raw texture data. The texture can
319 |     /// use a sRGB texture format, but it needs to provide a view with a non-sRGB texture format.
320 |     /// For example for a texture with a `Rgba8UnormSrgb` texture format, you will need to provide
321 |     /// a texture view with the `Rgba8Unorm` format.
322 |     ///
323 |     /// BC1, 2, 3, 4, 5 and 7 expect to work on an `unorm` format. `Rgba8Unorm` should be correct
324 |     /// for 99.9% of cases.
325 |     ///
326 |     /// BC6H needs an `unorm` or `float` format. `Rgba16Float` is optimal for HDR textures.
327 |     /// Colors should be in linear space and not in sRGBA space.
328 |     ///
329 |     /// # Buffer Requirements
330 |     /// The destination buffer must have sufficient capacity to store the compressed blocks at the
331 |     /// specified offset. The required size can be calculated using
332 |     /// [`CompressionVariant::blocks_byte_size()`].
333 |     ///
334 |     /// For example:
335 |     ///
336 |     /// ```ignore
337 |     /// let required_size = variant.blocks_byte_size(width, height);
338 |     /// let total_size = offset + required_size;
339 |     /// assert!(buffer.size() >= total_size);
340 |     /// ```
341 |     ///
342 |     /// # Arguments
343 |     /// * `variant` - The block compression format to use
344 |     /// * `texture_view` - View into the source texture to compress
345 |     /// * `width` - Width of the texture view in pixels
346 |     /// * `height` - Height of the texture view in pixels
347 |     /// * `buffer` - Destination storage buffer for the compressed data
348 |     /// * `texture_y_offset` - Optional offset in pixel rows into the source texture
349 |     /// * `blocks_offset` - Optional offset in bytes into the destination buffer
350 |     ///
351 |     /// # Panics
352 |     /// - If `width` or `height` or `texture_y_offset`, if set, is not a multiple of 4
353 |     /// - If the destination `buffer` is not a storage buffer
354 |     /// - If the destination `buffer` is too small to hold the compressed blocks at the specified offset
355 |     #[allow(clippy::too_many_arguments)]
356 |     pub fn add_compression_task(
357 |         &mut self,
358 |         variant: CompressionVariant,
359 |         texture_view: &TextureView,
360 |         width: u32,
361 |         height: u32,
362 |         buffer: &Buffer,
363 |         texture_y_offset: Option<u32>,
364 |         blocks_offset: Option<u32>,
365 |     ) {
366 |         assert_eq!(height % 4, 0);
367 |         assert_eq!(width % 4, 0);
368 | 
369 |         if let Some(texture_y_offset) = texture_y_offset {
370 |             assert_eq!(texture_y_offset % 4, 0);
371 |         }
372 | 
373 |         assert!(
374 |             buffer.usage().contains(BufferUsages::STORAGE),
375 |             "buffer needs to be a storage buffer"
376 |         );
377 | 
378 |         let required_size = variant.blocks_byte_size(width, height);
379 |         let total_size = blocks_offset.unwrap_or(0) as usize + required_size;
380 | 
381 |         assert!(
382 |             buffer.size() as usize >= total_size,
383 |             "buffer size ({}) is too small to hold compressed blocks at offset {}. Required size: {}",
384 |             buffer.size(),
385 |             blocks_offset.unwrap_or(0),
386 |             total_size
387 |         );
388 | 
389 |         self.task.push(Task {
390 |             variant,
391 |             width,
392 |             height,
393 |             uniform_offset: 0,
394 |             #[cfg(any(feature = "bc6h", feature = "bc7"))]
395 |             setting_offset: 0,
396 |             texture_y_offset: texture_y_offset.unwrap_or(0),
397 |             buffer_offset: blocks_offset.unwrap_or(0),
398 |             texture_view: texture_view.clone(),
399 |             buffer: buffer.clone(),
400 |         });
401 |     }
402 | 
403 |     fn update_buffer_sizes(&mut self) {
404 |         let total_uniforms_size = self.uniforms_aligned_size * self.task.len();
405 |         if total_uniforms_size > self.uniforms_buffer.size() as usize {
406 |             self.uniforms_buffer = self.device.create_buffer(&BufferDescriptor {
407 |                 label: Some("uniforms buffer"),
408 |                 size: total_uniforms_size as u64,
409 |                 usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
410 |                 mapped_at_creation: false,
411 |             });
412 |         }
413 | 
414 |         #[cfg(feature = "bc6h")]
415 |         {
416 |             let bc6_setting_count = self
417 |                 .task
418 |                 .iter()
419 |                 .filter(|task| matches!(task.variant, CompressionVariant::BC6H(..)))
420 |                 .count();
421 | 
422 |             let total_bc6h_size = self.bc6h_aligned_size * bc6_setting_count;
423 |             if total_bc6h_size > self.bc6h_settings_buffer.size() as usize {
424 |                 self.bc6h_settings_buffer = self.device.create_buffer(&BufferDescriptor {
425 |                     label: Some("bc6h settings buffer"),
426 |                     size: total_bc6h_size as u64,
427 |                     usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
428 |                     mapped_at_creation: false,
429 |                 });
430 |             }
431 |         }
432 | 
433 |         #[cfg(feature = "bc7")]
434 |         {
435 |             let bc7_setting_count = self
436 |                 .task
437 |                 .iter()
438 |                 .filter(|task| matches!(task.variant, CompressionVariant::BC7(..)))
439 |                 .count();
440 | 
441 |             let total_bc7_size = self.bc7_aligned_size * bc7_setting_count;
442 |             if total_bc7_size > self.bc7_settings_buffer.size() as usize {
443 |                 self.bc7_settings_buffer = self.device.create_buffer(&BufferDescriptor {
444 |                     label: Some("bc7 settings buffer"),
445 |                     size: total_bc7_size as u64,
446 |                     usage: BufferUsages::COPY_DST | BufferUsages::STORAGE,
447 |                     mapped_at_creation: false,
448 |                 });
449 |             }
450 |         }
451 |     }
452 | 
453 |     fn upload(&mut self) {
454 |         self.scratch_buffer.clear();
455 |         for (index, task) in self.task.iter_mut().enumerate() {
456 |             let offset = index * self.uniforms_aligned_size;
457 |             task.uniform_offset = offset as u32;
458 | 
459 |             let uniforms = Uniforms {
460 |                 width: task.width,
461 |                 height: task.height,
462 |                 texture_y_offset: task.texture_y_offset,
463 |                 blocks_offset: task.buffer_offset / 4,
464 |             };
465 | 
466 |             self.scratch_buffer
467 |                 .resize(offset + self.uniforms_aligned_size, 0);
468 |             self.scratch_buffer[offset..offset + size_of::<Uniforms>()]
469 |                 .copy_from_slice(cast_slice(&[uniforms]));
470 |         }
471 |         if !self.scratch_buffer.is_empty() {
472 |             if let Some(mut data) = self.queue.write_buffer_with(
473 |                 &self.uniforms_buffer,
474 |                 0,
475 |                 NonZeroU64::new(self.scratch_buffer.len() as u64).unwrap(),
476 |             ) {
477 |                 data.copy_from_slice(&self.scratch_buffer);
478 |             }
479 |         }
480 | 
481 |         #[cfg(feature = "bc6h")]
482 |         {
483 |             self.scratch_buffer.clear();
484 |             for (index, (settings, task)) in self
485 |                 .task
486 |                 .iter_mut()
487 |                 .filter_map(|task| {
488 |                     #[allow(irrefutable_let_patterns)]
489 |                     if let CompressionVariant::BC6H(settings) = task.variant {
490 |                         Some((settings, task))
491 |                     } else {
492 |                         None
493 |                     }
494 |                 })
495 |                 .enumerate()
496 |             {
497 |                 let offset = index * self.bc6h_aligned_size;
498 |                 task.setting_offset = offset as u32;
499 |                 self.scratch_buffer
500 |                     .resize(offset + self.bc6h_aligned_size, 0);
501 |                 self.scratch_buffer[offset..offset + size_of::<BC6HSettings>()]
502 |                     .copy_from_slice(cast_slice(&[settings]));
503 |             }
504 |             if !self.scratch_buffer.is_empty() {
505 |                 if let Some(mut data) = self.queue.write_buffer_with(
506 |                     &self.bc6h_settings_buffer,
507 |                     0,
508 |                     NonZeroU64::new(self.scratch_buffer.len() as u64).unwrap(),
509 |                 ) {
510 |                     data.copy_from_slice(&self.scratch_buffer);
511 |                 }
512 |             }
513 |         }
514 | 
515 |         #[cfg(feature = "bc7")]
516 |         {
517 |             self.scratch_buffer.clear();
518 |             for (index, (settings, task)) in self
519 |                 .task
520 |                 .iter_mut()
521 |                 .filter_map(|task| {
522 |                     #[allow(irrefutable_let_patterns)]
523 |                     if let CompressionVariant::BC7(settings) = task.variant {
524 |                         Some((settings, task))
525 |                     } else {
526 |                         None
527 |                     }
528 |                 })
529 |                 .enumerate()
530 |             {
531 |                 let offset = index * self.bc7_aligned_size;
532 |                 task.setting_offset = offset as u32;
533 |                 self.scratch_buffer
534 |                     .resize(offset + self.bc7_aligned_size, 0);
535 |                 self.scratch_buffer[offset..offset + size_of::<BC7Settings>()]
536 |                     .copy_from_slice(cast_slice(&[settings]));
537 |             }
538 |             if !self.scratch_buffer.is_empty() {
539 |                 if let Some(mut data) = self.queue.write_buffer_with(
540 |                     &self.bc7_settings_buffer,
541 |                     0,
542 |                     NonZeroU64::new(self.scratch_buffer.len() as u64).unwrap(),
543 |                 ) {
544 |                     data.copy_from_slice(&self.scratch_buffer);
545 |                 }
546 |             }
547 |         }
548 |     }
549 | 
550 |     /// Will upload all dispatch data and then dispatches all compression tasks to the GPU.
551 |     ///
552 |     /// # Arguments
553 |     /// * `pass` - The compute pass to record commands into
554 |     pub fn compress(&mut self, pass: &mut ComputePass) {
555 |         self.update_buffer_sizes();
556 |         self.upload();
557 | 
558 |         let mut bind_groups: Vec<BindGroup> = self
559 |             .task
560 |             .iter()
561 |             .map(|task| self.create_bind_group(task))
562 |             .collect();
563 | 
564 |         for (task, bind_group) in self.task.drain(..).zip(bind_groups.drain(..)) {
565 |             let pipeline = self
566 |                 .pipelines
567 |                 .get(&task.variant)
568 |                 .expect("can't find pipeline for variant");
569 | 
570 |             pass.set_pipeline(pipeline);
571 | 
572 |             match task.variant {
573 |                 #[cfg(feature = "bc6h")]
574 |                 CompressionVariant::BC6H(..) => {
575 |                     pass.set_bind_group(
576 |                         0,
577 |                         &bind_group,
578 |                         &[task.uniform_offset, task.setting_offset],
579 |                     );
580 |                 }
581 |                 #[cfg(feature = "bc7")]
582 |                 CompressionVariant::BC7(..) => {
583 |                     pass.set_bind_group(
584 |                         0,
585 |                         &bind_group,
586 |                         &[task.uniform_offset, task.setting_offset],
587 |                     );
588 |                 }
589 |                 #[allow(irrefutable_let_patterns)]
590 |                 #[allow(unreachable_patterns)]
591 |                 _ => {
592 |                     pass.set_bind_group(0, &bind_group, &[task.uniform_offset]);
593 |                 }
594 |             }
595 | 
596 |             let block_width = task.width.div_ceil(4);
597 |             let block_height = task.height.div_ceil(4);
598 | 
599 |             let workgroup_width = block_width.div_ceil(8);
600 |             let workgroup_height = block_height.div_ceil(8);
601 | 
602 |             pass.dispatch_workgroups(workgroup_width, workgroup_height, 1);
603 |         }
604 |     }
605 | 
606 |     fn create_bind_group(&self, task: &Task) -> BindGroup {
607 |         let bind_group_layout = self
608 |             .bind_group_layouts
609 |             .get(&task.variant)
610 |             .expect("Can't find bind group layout for variant");
611 | 
612 |         match task.variant {
613 |             #[cfg(feature = "bc15")]
614 |             CompressionVariant::BC1
615 |             | CompressionVariant::BC2
616 |             | CompressionVariant::BC3
617 |             | CompressionVariant::BC4
618 |             | CompressionVariant::BC5 => self.device.create_bind_group(&BindGroupDescriptor {
619 |                 label: Some("bind group"),
620 |                 layout: bind_group_layout,
621 |                 entries: &[
622 |                     BindGroupEntry {
623 |                         binding: 0,
624 |                         resource: BindingResource::TextureView(&task.texture_view),
625 |                     },
626 |                     BindGroupEntry {
627 |                         binding: 1,
628 |                         resource: task.buffer.as_entire_binding(),
629 |                     },
630 |                     BindGroupEntry {
631 |                         binding: 2,
632 |                         resource: BindingResource::Buffer(BufferBinding {
633 |                             buffer: &self.uniforms_buffer,
634 |                             offset: 0,
635 |                             size: Some(NonZeroU64::new(self.uniforms_aligned_size as u64).unwrap()),
636 |                         }),
637 |                     },
638 |                 ],
639 |             }),
640 |             #[cfg(feature = "bc6h")]
641 |             CompressionVariant::BC6H(..) => self.device.create_bind_group(&BindGroupDescriptor {
642 |                 label: Some("bind group"),
643 |                 layout: bind_group_layout,
644 |                 entries: &[
645 |                     BindGroupEntry {
646 |                         binding: 0,
647 |                         resource: BindingResource::TextureView(&task.texture_view),
648 |                     },
649 |                     BindGroupEntry {
650 |                         binding: 1,
651 |                         resource: task.buffer.as_entire_binding(),
652 |                     },
653 |                     BindGroupEntry {
654 |                         binding: 2,
655 |                         resource: BindingResource::Buffer(BufferBinding {
656 |                             buffer: &self.uniforms_buffer,
657 |                             offset: 0,
658 |                             size: Some(NonZeroU64::new(self.uniforms_aligned_size as u64).unwrap()),
659 |                         }),
660 |                     },
661 |                     BindGroupEntry {
662 |                         binding: 3,
663 |                         resource: BindingResource::Buffer(BufferBinding {
664 |                             buffer: &self.bc6h_settings_buffer,
665 |                             offset: 0,
666 |                             size: Some(NonZeroU64::new(self.bc6h_aligned_size as u64).unwrap()),
667 |                         }),
668 |                     },
669 |                 ],
670 |             }),
671 |             #[cfg(feature = "bc7")]
672 |             CompressionVariant::BC7(..) => self.device.create_bind_group(&BindGroupDescriptor {
673 |                 label: Some("bind group"),
674 |                 layout: bind_group_layout,
675 |                 entries: &[
676 |                     BindGroupEntry {
677 |                         binding: 0,
678 |                         resource: BindingResource::TextureView(&task.texture_view),
679 |                     },
680 |                     BindGroupEntry {
681 |                         binding: 1,
682 |                         resource: task.buffer.as_entire_binding(),
683 |                     },
684 |                     BindGroupEntry {
685 |                         binding: 2,
686 |                         resource: BindingResource::Buffer(BufferBinding {
687 |                             buffer: &self.uniforms_buffer,
688 |                             offset: 0,
689 |                             size: Some(NonZeroU64::new(self.uniforms_aligned_size as u64).unwrap()),
690 |                         }),
691 |                     },
692 |                     BindGroupEntry {
693 |                         binding: 3,
694 |                         resource: BindingResource::Buffer(BufferBinding {
695 |                             buffer: &self.bc7_settings_buffer,
696 |                             offset: 0,
697 |                             size: Some(NonZeroU64::new(self.bc7_aligned_size as u64).unwrap()),
698 |                         }),
699 |                     },
700 |                 ],
701 |             }),
702 |         }
703 |     }
704 | }
705 | 


--------------------------------------------------------------------------------
/src/encode/bc7.rs:
--------------------------------------------------------------------------------
  1 | use super::common::*;
  2 | use crate::BC7Settings;
  3 | 
  4 | #[derive(Default)]
  5 | struct Mode45Parameters {
  6 |     qep: [i32; 8],
  7 |     qblock: [u32; 2],
  8 |     aqep: [i32; 2],
  9 |     aqblock: [u32; 2],
 10 |     rotation: u32,
 11 |     swap: u32,
 12 | }
 13 | 
 14 | pub(crate) struct BlockCompressorBC7<'a> {
 15 |     block: [f32; 64],
 16 |     data: [u32; 5],
 17 |     best_err: f32,
 18 |     opaque_err: f32,
 19 |     settings: &'a BC7Settings,
 20 | }
 21 | 
 22 | #[inline(always)]
 23 | const fn sq(x: f32) -> f32 {
 24 |     x * x
 25 | }
 26 | 
 27 | impl<'a> BlockCompressorBC7<'a> {
 28 |     pub(crate) fn new(settings: &'a BC7Settings) -> Self {
 29 |         Self {
 30 |             block: [0.0; 64],
 31 |             data: [0; 5],
 32 |             best_err: f32::INFINITY,
 33 |             opaque_err: 0.0,
 34 |             settings,
 35 |         }
 36 |     }
 37 | 
 38 |     pub(crate) fn load_block_interleaved_rgba(
 39 |         &mut self,
 40 |         rgba_data: &[u8],
 41 |         xx: usize,
 42 |         yy: usize,
 43 |         stride: usize,
 44 |     ) {
 45 |         for y in 0..4 {
 46 |             for x in 0..4 {
 47 |                 let pixel_x = xx * 4 + x;
 48 |                 let pixel_y = yy * 4 + y;
 49 | 
 50 |                 let offset = pixel_y * stride + pixel_x * 4;
 51 | 
 52 |                 let red = rgba_data[offset] as f32;
 53 |                 let green = rgba_data[offset + 1] as f32;
 54 |                 let blue = rgba_data[offset + 2] as f32;
 55 |                 let alpha = rgba_data[offset + 3] as f32;
 56 | 
 57 |                 self.block[y * 4 + x] = red;
 58 |                 self.block[16 + y * 4 + x] = green;
 59 |                 self.block[32 + y * 4 + x] = blue;
 60 |                 self.block[48 + y * 4 + x] = alpha;
 61 |             }
 62 |         }
 63 |     }
 64 | 
 65 |     pub(crate) fn store_data(
 66 |         &self,
 67 |         blocks_buffer: &mut [u8],
 68 |         block_width: usize,
 69 |         xx: usize,
 70 |         yy: usize,
 71 |     ) {
 72 |         let offset = (yy * block_width + xx) * 16;
 73 | 
 74 |         for (index, &value) in self.data[..4].iter().enumerate() {
 75 |             let byte_offset = offset + index * 4;
 76 |             blocks_buffer[byte_offset] = value as u8;
 77 |             blocks_buffer[byte_offset + 1] = (value >> 8) as u8;
 78 |             blocks_buffer[byte_offset + 2] = (value >> 16) as u8;
 79 |             blocks_buffer[byte_offset + 3] = (value >> 24) as u8;
 80 |         }
 81 |     }
 82 | 
 83 |     fn unpack_to_byte(v: i32, bits: u32) -> i32 {
 84 |         let vv = v << (8 - bits);
 85 |         vv + (vv >> bits)
 86 |     }
 87 | 
 88 |     fn ep_quant0367(qep: &mut [i32], ep: &[f32], mode: usize, channels: usize) {
 89 |         let bits = if mode == 0 {
 90 |             4
 91 |         } else if mode == 7 {
 92 |             5
 93 |         } else {
 94 |             7
 95 |         };
 96 |         let levels = 1 << bits;
 97 |         let levels2 = levels * 2 - 1;
 98 | 
 99 |         for i in 0..2 {
100 |             let mut qep_b = [0; 8];
101 | 
102 |             for b in 0..2 {
103 |                 for p in 0..4 {
104 |                     let v = ((ep[i * 4 + p] / 255.0 * levels2 as f32 - b as f32) / 2.0 + 0.5)
105 |                         as i32
106 |                         * 2
107 |                         + b as i32;
108 |                     qep_b[b * 4 + p] = i32::clamp(v, b as i32, levels2 - 1 + b as i32);
109 |                 }
110 |             }
111 | 
112 |             let mut ep_b = [0.0; 8];
113 |             for j in 0..8 {
114 |                 ep_b[j] = qep_b[j] as f32;
115 |             }
116 | 
117 |             if mode == 0 {
118 |                 for j in 0..8 {
119 |                     ep_b[j] = Self::unpack_to_byte(qep_b[j], 5) as f32;
120 |                 }
121 |             }
122 | 
123 |             let mut err0 = 0.0;
124 |             let mut err1 = 0.0;
125 |             for p in 0..channels {
126 |                 err0 += sq(ep[i * 4 + p] - ep_b[p]);
127 |                 err1 += sq(ep[i * 4 + p] - ep_b[4 + p]);
128 |             }
129 | 
130 |             for p in 0..4 {
131 |                 qep[i * 4 + p] = if err0 < err1 { qep_b[p] } else { qep_b[4 + p] };
132 |             }
133 |         }
134 |     }
135 | 
136 |     fn ep_quant1(qep: &mut [i32], ep: &mut [f32]) {
137 |         let mut qep_b = [0; 16];
138 | 
139 |         for b in 0..2 {
140 |             for i in 0..8 {
141 |                 let v = ((ep[i] / 255.0 * 127.0 - b as f32) / 2.0 + 0.5) as i32 * 2 + b as i32;
142 |                 qep_b[b * 8 + i] = i32::clamp(v, b as i32, 126 + b as i32);
143 |             }
144 |         }
145 | 
146 |         // dequant
147 |         let mut ep_b = [0.0; 16];
148 |         for k in 0..16 {
149 |             ep_b[k] = Self::unpack_to_byte(qep_b[k], 7) as f32;
150 |         }
151 | 
152 |         let mut err0 = 0.0;
153 |         let mut err1 = 0.0;
154 |         for j in 0..2 {
155 |             for p in 0..3 {
156 |                 err0 += sq(ep[j * 4 + p] - ep_b[j * 4 + p]);
157 |                 err1 += sq(ep[j * 4 + p] - ep_b[8 + j * 4 + p]);
158 |             }
159 |         }
160 | 
161 |         for i in 0..8 {
162 |             qep[i] = if err0 < err1 { qep_b[i] } else { qep_b[8 + i] };
163 |         }
164 |     }
165 | 
166 |     fn ep_quant245(qep: &mut [i32], ep: &[f32], mode: usize) {
167 |         let bits = if mode == 5 { 7 } else { 5 };
168 | 
169 |         let levels = 1 << bits;
170 | 
171 |         for i in 0..8 {
172 |             let v = (ep[i] / 255.0 * (levels - 1) as f32 + 0.5) as i32;
173 |             qep[i] = i32::clamp(v, 0, levels - 1);
174 |         }
175 |     }
176 | 
177 |     fn ep_quant(qep: &mut [i32], ep: &mut [f32], mode: usize, channels: usize) {
178 |         const PAIRS_TABLE: [usize; 8] = [3, 2, 3, 2, 1, 1, 1, 2];
179 |         let pairs = PAIRS_TABLE[mode];
180 | 
181 |         if mode == 0 || mode == 3 || mode == 6 || mode == 7 {
182 |             for i in 0..pairs {
183 |                 Self::ep_quant0367(&mut qep[i * 8..], &ep[i * 8..], mode, channels);
184 |             }
185 |         } else if mode == 1 {
186 |             for i in 0..pairs {
187 |                 Self::ep_quant1(&mut qep[i * 8..], &mut ep[i * 8..]);
188 |             }
189 |         } else if mode == 2 || mode == 4 || mode == 5 {
190 |             for i in 0..pairs {
191 |                 Self::ep_quant245(&mut qep[i * 8..], &ep[i * 8..], mode);
192 |             }
193 |         }
194 |     }
195 | 
196 |     fn ep_dequant(ep: &mut [f32], qep: &[i32], mode: usize) {
197 |         const PAIRS_TABLE: [usize; 8] = [3, 2, 3, 2, 1, 1, 1, 2];
198 |         let pairs = PAIRS_TABLE[mode];
199 | 
200 |         // mode 3, 6 are 8-bit
201 |         if mode == 3 || mode == 6 {
202 |             for i in 0..8 * pairs {
203 |                 ep[i] = qep[i] as f32;
204 |             }
205 |         } else if mode == 1 || mode == 5 {
206 |             for i in 0..8 * pairs {
207 |                 ep[i] = Self::unpack_to_byte(qep[i], 7) as f32;
208 |             }
209 |         } else if mode == 0 || mode == 2 || mode == 4 {
210 |             for i in 0..8 * pairs {
211 |                 ep[i] = Self::unpack_to_byte(qep[i], 5) as f32;
212 |             }
213 |         } else if mode == 7 {
214 |             for i in 0..8 * pairs {
215 |                 ep[i] = Self::unpack_to_byte(qep[i], 6) as f32;
216 |             }
217 |         }
218 |     }
219 | 
220 |     fn ep_quant_dequant(qep: &mut [i32], ep: &mut [f32], mode: usize, channels: usize) {
221 |         Self::ep_quant(qep, ep, mode, channels);
222 |         Self::ep_dequant(ep, qep, mode);
223 |     }
224 | 
225 |     fn opt_channel(
226 |         &self,
227 |         qblock: &mut [u32; 2],
228 |         qep: &mut [i32; 2],
229 |         channel_block: &[f32; 16],
230 |         bits: u32,
231 |         epbits: u32,
232 |     ) -> f32 {
233 |         let mut ep = [255.0, 0.0];
234 | 
235 |         for k in 0..16 {
236 |             ep[0] = f32::min(ep[0], channel_block[k]);
237 |             ep[1] = f32::max(ep[1], channel_block[k]);
238 |         }
239 | 
240 |         Self::channel_quant_dequant(qep, &mut ep, epbits);
241 |         let mut err = Self::channel_opt_quant(qblock, channel_block, bits, &ep);
242 | 
243 |         // Refine
244 |         let refine_iterations = self.settings.refine_iterations_channel;
245 |         for _ in 0..refine_iterations {
246 |             Self::channel_opt_endpoints(&mut ep, channel_block, bits, *qblock);
247 |             Self::channel_quant_dequant(qep, &mut ep, epbits);
248 |             err = Self::channel_opt_quant(qblock, channel_block, bits, &ep);
249 |         }
250 | 
251 |         err
252 |     }
253 | 
254 |     fn channel_quant_dequant(qep: &mut [i32; 2], ep: &mut [f32; 2], epbits: u32) {
255 |         let elevels = 1 << epbits;
256 | 
257 |         for i in 0..2 {
258 |             let v = (ep[i] / 255.0 * (elevels - 1) as f32 + 0.5) as i32;
259 |             qep[i] = i32::clamp(v, 0, elevels - 1);
260 |             ep[i] = Self::unpack_to_byte(qep[i], epbits) as f32;
261 |         }
262 |     }
263 | 
264 |     fn channel_opt_quant(
265 |         qblock: &mut [u32; 2],
266 |         channel_block: &[f32; 16],
267 |         bits: u32,
268 |         ep: &[f32; 2],
269 |     ) -> f32 {
270 |         let levels = 1 << bits;
271 | 
272 |         qblock[0] = 0;
273 |         qblock[1] = 0;
274 | 
275 |         let mut total_err = 0.0;
276 | 
277 |         for k in 0..16 {
278 |             let proj = (channel_block[k] - ep[0]) / (ep[1] - ep[0] + 0.001);
279 | 
280 |             let q1 = (proj * levels as f32 + 0.5) as i32;
281 |             let q1_clamped = i32::clamp(q1, 1, levels - 1);
282 | 
283 |             let mut err0 = 0.0;
284 |             let mut err1 = 0.0;
285 |             let w0 = get_unquant_value(bits, q1_clamped - 1);
286 |             let w1 = get_unquant_value(bits, q1_clamped);
287 | 
288 |             let dec_v0 = (((64 - w0) * ep[0] as i32 + w0 * ep[1] as i32 + 32) / 64) as f32;
289 |             let dec_v1 = (((64 - w1) * ep[0] as i32 + w1 * ep[1] as i32 + 32) / 64) as f32;
290 |             err0 += sq(dec_v0 - channel_block[k]);
291 |             err1 += sq(dec_v1 - channel_block[k]);
292 | 
293 |             let best_err = if err0 < err1 { err0 } else { err1 };
294 | 
295 |             let best_q = if err0 < err1 {
296 |                 q1_clamped - 1
297 |             } else {
298 |                 q1_clamped
299 |             };
300 | 
301 |             qblock[k / 8] |= (best_q as u32) << (4 * (k % 8));
302 |             total_err += best_err;
303 |         }
304 | 
305 |         total_err
306 |     }
307 | 
308 |     fn channel_opt_endpoints(
309 |         ep: &mut [f32; 2],
310 |         channel_block: &[f32; 16],
311 |         bits: u32,
312 |         qblock: [u32; 2],
313 |     ) {
314 |         let levels = 1 << bits;
315 | 
316 |         let mut atb1 = 0.0;
317 |         let mut sum_q = 0.0;
318 |         let mut sum_qq = 0.0;
319 |         let mut sum = 0.0;
320 | 
321 |         for k1 in 0..2 {
322 |             let mut qbits_shifted = qblock[k1];
323 |             for k2 in 0..8 {
324 |                 let k = k1 * 8 + k2;
325 |                 let q = (qbits_shifted & 15) as f32;
326 |                 qbits_shifted >>= 4;
327 | 
328 |                 let x = (levels - 1) as f32 - q;
329 | 
330 |                 sum_q += q;
331 |                 sum_qq += q * q;
332 | 
333 |                 sum += channel_block[k];
334 |                 atb1 += x * channel_block[k];
335 |             }
336 |         }
337 | 
338 |         let atb2 = (levels - 1) as f32 * sum - atb1;
339 | 
340 |         let cxx = 16.0 * sq((levels - 1) as f32) - 2.0 * (levels - 1) as f32 * sum_q + sum_qq;
341 |         let cyy = sum_qq;
342 |         let cxy = (levels - 1) as f32 * sum_q - sum_qq;
343 |         let scale = (levels - 1) as f32 / (cxx * cyy - cxy * cxy);
344 | 
345 |         ep[0] = (atb1 * cyy - atb2 * cxy) * scale;
346 |         ep[1] = (atb2 * cxx - atb1 * cxy) * scale;
347 | 
348 |         ep[0] = f32::clamp(ep[0], 0.0, 255.0);
349 |         ep[1] = f32::clamp(ep[1], 0.0, 255.0);
350 | 
351 |         if f32::abs(cxx * cyy - cxy * cxy) < 0.001 {
352 |             ep[0] = sum / 16.0;
353 |             ep[1] = ep[0];
354 |         }
355 |     }
356 | 
357 |     pub(crate) fn block_segment(ep: &mut [f32], block: &[f32; 64], mask: u32, channels: usize) {
358 |         block_segment_core(ep, block, mask, channels);
359 | 
360 |         for i in 0..2 {
361 |             for p in 0..channels {
362 |                 ep[4 * i + p] = f32::clamp(ep[4 * i + p], 0.0, 255.0);
363 |             }
364 |         }
365 |     }
366 | 
367 |     fn bc7_code_mode01237(
368 |         &mut self,
369 |         qep: &mut [i32; 24],
370 |         qblock: [u32; 2],
371 |         part_id: i32,
372 |         mode: usize,
373 |     ) {
374 |         let bits = if mode == 0 || mode == 1 { 3 } else { 2 };
375 |         let pairs = if mode == 0 || mode == 2 { 3 } else { 2 };
376 |         let channels = if mode == 7 { 4 } else { 3 };
377 | 
378 |         let flips = bc7_code_apply_swap_mode01237(qep, qblock, mode, part_id);
379 | 
380 |         self.data = [0; 5];
381 |         let mut pos = 0;
382 | 
383 |         // Mode 0-3, 7
384 |         put_bits(&mut self.data, &mut pos, (mode + 1) as u32, 1 << mode);
385 | 
386 |         // Partition
387 |         if mode == 0 {
388 |             put_bits(&mut self.data, &mut pos, 4, (part_id & 15) as u32);
389 |         } else {
390 |             put_bits(&mut self.data, &mut pos, 6, (part_id & 63) as u32);
391 |         }
392 | 
393 |         // Endpoints
394 |         for p in 0..channels {
395 |             for j in 0..pairs * 2 {
396 |                 if mode == 0 {
397 |                     put_bits(&mut self.data, &mut pos, 4, (qep[j * 4 + p] as u32) >> 1);
398 |                 } else if mode == 1 {
399 |                     put_bits(&mut self.data, &mut pos, 6, (qep[j * 4 + p] as u32) >> 1);
400 |                 } else if mode == 2 {
401 |                     put_bits(&mut self.data, &mut pos, 5, qep[j * 4 + p] as u32);
402 |                 } else if mode == 3 {
403 |                     put_bits(&mut self.data, &mut pos, 7, (qep[j * 4 + p] as u32) >> 1);
404 |                 } else if mode == 7 {
405 |                     put_bits(&mut self.data, &mut pos, 5, (qep[j * 4 + p] as u32) >> 1);
406 |                 }
407 |             }
408 |         }
409 | 
410 |         // P bits
411 |         if mode == 1 {
412 |             for j in 0..2 {
413 |                 put_bits(&mut self.data, &mut pos, 1, (qep[j * 8] as u32) & 1);
414 |             }
415 |         }
416 | 
417 |         if mode == 0 || mode == 3 || mode == 7 {
418 |             for j in 0..pairs * 2 {
419 |                 put_bits(&mut self.data, &mut pos, 1, (qep[j * 4] as u32) & 1);
420 |             }
421 |         }
422 | 
423 |         // Quantized values
424 |         bc7_code_qblock(&mut self.data, &mut pos, qblock, bits, flips);
425 |         bc7_code_adjust_skip_mode01237(&mut self.data, mode, part_id);
426 |     }
427 | 
428 |     fn bc7_code_mode45(&mut self, params: &Mode45Parameters, mode: usize) {
429 |         let mut qep = params.qep;
430 |         let mut qblock = params.qblock;
431 |         let mut aqep = params.aqep;
432 |         let mut aqblock = params.aqblock;
433 |         let rotation = params.rotation;
434 |         let swap = params.swap;
435 | 
436 |         let bits = 2;
437 |         let abits = if mode == 4 { 3 } else { 2 };
438 |         let epbits = if mode == 4 { 5 } else { 7 };
439 |         let aepbits = if mode == 4 { 6 } else { 8 };
440 | 
441 |         if swap == 0 {
442 |             bc7_code_apply_swap_mode456(&mut qep, 4, &mut qblock, bits);
443 |             bc7_code_apply_swap_mode456(&mut aqep, 1, &mut aqblock, abits);
444 |         } else {
445 |             std::mem::swap(&mut qblock, &mut aqblock);
446 | 
447 |             bc7_code_apply_swap_mode456(&mut aqep, 1, &mut qblock, bits);
448 |             bc7_code_apply_swap_mode456(&mut qep, 4, &mut aqblock, abits);
449 |         }
450 | 
451 |         // Clear state data
452 |         self.data = [0; 5];
453 |         let mut pos = 0;
454 | 
455 |         // Mode 4-5
456 |         put_bits(&mut self.data, &mut pos, (mode + 1) as u32, 1 << mode);
457 | 
458 |         // Rotation
459 |         put_bits(&mut self.data, &mut pos, 2, (rotation + 1) & 3);
460 | 
461 |         if mode == 4 {
462 |             put_bits(&mut self.data, &mut pos, 1, swap);
463 |         }
464 | 
465 |         // Endpoints
466 |         for p in 0..3 {
467 |             put_bits(&mut self.data, &mut pos, epbits, qep[p] as u32);
468 |             put_bits(&mut self.data, &mut pos, epbits, qep[4 + p] as u32);
469 |         }
470 | 
471 |         // Alpha endpoints
472 |         put_bits(&mut self.data, &mut pos, aepbits, aqep[0] as u32);
473 |         put_bits(&mut self.data, &mut pos, aepbits, aqep[1] as u32);
474 | 
475 |         // Quantized values
476 |         bc7_code_qblock(&mut self.data, &mut pos, qblock, bits, 0);
477 |         bc7_code_qblock(&mut self.data, &mut pos, aqblock, abits, 0);
478 |     }
479 | 
480 |     fn bc7_code_mode6(&mut self, qep: &mut [i32], qblock: &mut [u32; 2]) {
481 |         bc7_code_apply_swap_mode456(qep, 4, qblock, 4);
482 | 
483 |         self.data = [0; 5];
484 |         let mut pos = 0;
485 | 
486 |         // Mode 6
487 |         put_bits(&mut self.data, &mut pos, 7, 64);
488 | 
489 |         // Endpoints
490 |         for p in 0..4 {
491 |             put_bits(&mut self.data, &mut pos, 7, (qep[p] as u32) >> 1);
492 |             put_bits(&mut self.data, &mut pos, 7, (qep[4 + p] as u32) >> 1);
493 |         }
494 | 
495 |         // P bits
496 |         put_bits(&mut self.data, &mut pos, 1, (qep[0] as u32) & 1);
497 |         put_bits(&mut self.data, &mut pos, 1, (qep[4] as u32) & 1);
498 | 
499 |         // Quantized values
500 |         bc7_code_qblock(&mut self.data, &mut pos, *qblock, 4, 0);
501 |     }
502 | 
503 |     fn bc7_enc_mode01237_part_fast(
504 |         &self,
505 |         qep: &mut [i32; 24],
506 |         qblock: &mut [u32; 2],
507 |         part_id: i32,
508 |         mode: usize,
509 |     ) -> f32 {
510 |         let pattern = get_pattern(part_id);
511 |         let bits = if mode == 0 || mode == 1 { 3 } else { 2 };
512 |         let pairs = if mode == 0 || mode == 2 { 3 } else { 2 };
513 |         let channels = if mode == 7 { 4 } else { 3 };
514 | 
515 |         let mut ep = [0.0; 24];
516 |         for j in 0..pairs {
517 |             let mask = get_pattern_mask(part_id, j as u32);
518 |             Self::block_segment(&mut ep[j * 8..], &self.block, mask, channels);
519 |         }
520 | 
521 |         Self::ep_quant_dequant(qep, &mut ep, mode, channels);
522 | 
523 |         block_quant(qblock, &self.block, bits, &ep, pattern, channels)
524 |     }
525 | 
526 |     fn bc7_enc_mode01237(&mut self, mode: usize, part_list: &[i32; 64], part_count: usize) {
527 |         if part_count == 0 {
528 |             return;
529 |         }
530 | 
531 |         let bits = if mode == 0 || mode == 1 { 3 } else { 2 };
532 |         let pairs = if mode == 0 || mode == 2 { 3 } else { 2 };
533 |         let channels = if mode == 7 { 4 } else { 3 };
534 | 
535 |         let mut best_qep = [0; 24];
536 |         let mut best_qblock = [0; 2];
537 |         let mut best_part_id = -1;
538 |         let mut best_err = f32::INFINITY;
539 | 
540 |         for &part in part_list[..part_count].iter() {
541 |             let mut part_id = part & 63;
542 |             part_id = if pairs == 3 { part_id + 64 } else { part_id };
543 | 
544 |             let mut qep = [0; 24];
545 |             let mut qblock = [0; 2];
546 |             let err = self.bc7_enc_mode01237_part_fast(&mut qep, &mut qblock, part_id, mode);
547 | 
548 |             if err < best_err {
549 |                 best_qep[..(8 * pairs)].copy_from_slice(&qep[..(8 * pairs)]);
550 |                 best_qblock.copy_from_slice(&qblock);
551 | 
552 |                 best_part_id = part_id;
553 |                 best_err = err;
554 |             }
555 |         }
556 | 
557 |         let refine_iterations = self.settings.refine_iterations[mode];
558 |         for _ in 0..refine_iterations {
559 |             let mut ep = [0.0; 24];
560 |             for j in 0..pairs {
561 |                 let mask = get_pattern_mask(best_part_id, j as u32);
562 |                 opt_endpoints(
563 |                     &mut ep[j * 8..],
564 |                     &self.block,
565 |                     bits,
566 |                     best_qblock,
567 |                     mask,
568 |                     channels,
569 |                 );
570 |             }
571 | 
572 |             let mut qep = [0; 24];
573 |             let mut qblock = [0; 2];
574 | 
575 |             Self::ep_quant_dequant(&mut qep, &mut ep, mode, channels);
576 | 
577 |             let pattern = get_pattern(best_part_id);
578 |             let err = block_quant(&mut qblock, &self.block, bits, &ep, pattern, channels);
579 | 
580 |             if err < best_err {
581 |                 best_qep[..(8 * pairs)].copy_from_slice(&qep[..(8 * pairs)]);
582 |                 best_qblock.copy_from_slice(&qblock);
583 | 
584 |                 best_err = err;
585 |             }
586 |         }
587 | 
588 |         if mode != 7 {
589 |             best_err += self.opaque_err;
590 |         }
591 | 
592 |         if best_err < self.best_err {
593 |             self.best_err = best_err;
594 |             self.bc7_code_mode01237(&mut best_qep, best_qblock, best_part_id, mode);
595 |         }
596 |     }
597 | 
598 |     fn bc7_enc_mode02(&mut self) {
599 |         let part_list: [i32; 64] = std::array::from_fn(|part| part as i32);
600 | 
601 |         self.bc7_enc_mode01237(0, &part_list, 16);
602 | 
603 |         if self.settings.skip_mode2 == 0 {
604 |             self.bc7_enc_mode01237(2, &part_list, 64);
605 |         }
606 |     }
607 | 
608 |     fn bc7_enc_mode13(&mut self) {
609 |         if self.settings.fast_skip_threshold_mode1 == 0
610 |             && self.settings.fast_skip_threshold_mode3 == 0
611 |         {
612 |             return;
613 |         }
614 | 
615 |         let mut full_stats = [0.0; 15];
616 |         compute_stats_masked(&mut full_stats, &self.block, 0xFFFFFFFF, 3);
617 | 
618 |         let mut part_list = [0; 64];
619 |         for part in 0..64 {
620 |             let mask = get_pattern_mask(part, 0);
621 |             let bound12 = block_pca_bound_split(&self.block, mask, full_stats, 3);
622 |             let bound = bound12 as i32;
623 |             part_list[part as usize] = part + bound * 64;
624 |         }
625 | 
626 |         let partial_count = u32::max(
627 |             self.settings.fast_skip_threshold_mode1,
628 |             self.settings.fast_skip_threshold_mode3,
629 |         );
630 |         partial_sort_list(&mut part_list, 64, partial_count);
631 |         self.bc7_enc_mode01237(
632 |             1,
633 |             &part_list,
634 |             self.settings.fast_skip_threshold_mode1 as usize,
635 |         );
636 |         self.bc7_enc_mode01237(
637 |             3,
638 |             &part_list,
639 |             self.settings.fast_skip_threshold_mode3 as usize,
640 |         );
641 |     }
642 | 
643 |     fn bc7_enc_mode45_candidate(
644 |         &self,
645 |         best_candidate: &mut Mode45Parameters,
646 |         best_err: &mut f32,
647 |         mode: usize,
648 |         rotation: u32,
649 |         swap: u32,
650 |     ) {
651 |         let mut bits = 2;
652 |         let mut abits = 2;
653 |         let mut aepbits = 8;
654 | 
655 |         if mode == 4 {
656 |             abits = 3;
657 |             aepbits = 6;
658 |         }
659 | 
660 |         // (mode 4)
661 |         if swap == 1 {
662 |             bits = 3;
663 |             abits = 2;
664 |         }
665 | 
666 |         let mut candidate_block = [0.0; 64];
667 | 
668 |         for k in 0..16 {
669 |             for p in 0..3 {
670 |                 candidate_block[k + p * 16] = self.block[k + p * 16];
671 |             }
672 | 
673 |             if rotation < 3 {
674 |                 // Apply channel rotation
675 |                 if self.settings.channels == 4 {
676 |                     candidate_block[k + rotation as usize * 16] = self.block[k + 3 * 16];
677 |                 }
678 |                 if self.settings.channels == 3 {
679 |                     candidate_block[k + rotation as usize * 16] = 255.0;
680 |                 }
681 |             }
682 |         }
683 | 
684 |         let mut ep = [0.0; 8];
685 |         Self::block_segment(&mut ep, &candidate_block, 0xFFFFFFFF, 3);
686 | 
687 |         let mut qep = [0; 8];
688 |         Self::ep_quant_dequant(&mut qep, &mut ep, mode, 3);
689 | 
690 |         let mut qblock = [0; 2];
691 |         let mut err = block_quant(&mut qblock, &candidate_block, bits, &ep, 0, 3);
692 | 
693 |         // Refine
694 |         let refine_iterations = self.settings.refine_iterations[mode];
695 |         for _ in 0..refine_iterations {
696 |             opt_endpoints(&mut ep, &candidate_block, bits, qblock, 0xFFFFFFFF, 3);
697 |             Self::ep_quant_dequant(&mut qep, &mut ep, mode, 3);
698 |             err = block_quant(&mut qblock, &candidate_block, bits, &ep, 0, 3);
699 |         }
700 | 
701 |         let channel_data: [f32; 16] =
702 |             std::array::from_fn(|k| self.block[k + rotation as usize * 16]);
703 | 
704 |         // Encoding selected channel
705 |         let mut aqep = [0; 2];
706 |         let mut aqblock = [0; 2];
707 | 
708 |         err += self.opt_channel(&mut aqblock, &mut aqep, &channel_data, abits, aepbits);
709 | 
710 |         if err < *best_err {
711 |             best_candidate.qep.copy_from_slice(&qep[..8]);
712 |             best_candidate.qblock.copy_from_slice(&qblock);
713 |             best_candidate.aqblock.copy_from_slice(&aqblock);
714 |             best_candidate.aqep.copy_from_slice(&aqep);
715 |             best_candidate.rotation = rotation;
716 |             best_candidate.swap = swap;
717 |             *best_err = err;
718 |         }
719 |     }
720 | 
721 |     fn bc7_enc_mode45(&mut self) {
722 |         let mut best_candidate = Mode45Parameters::default();
723 |         let mut best_err = self.best_err;
724 | 
725 |         let channel0 = self.settings.mode45_channel0;
726 |         for p in channel0..self.settings.channels {
727 |             self.bc7_enc_mode45_candidate(&mut best_candidate, &mut best_err, 4, p, 0);
728 |             self.bc7_enc_mode45_candidate(&mut best_candidate, &mut best_err, 4, p, 1);
729 |         }
730 | 
731 |         // Mode 4
732 |         if best_err < self.best_err {
733 |             self.best_err = best_err;
734 |             self.bc7_code_mode45(&best_candidate, 4);
735 |         }
736 | 
737 |         for p in channel0..self.settings.channels {
738 |             self.bc7_enc_mode45_candidate(&mut best_candidate, &mut best_err, 5, p, 0);
739 |         }
740 | 
741 |         // Mode 5
742 |         if best_err < self.best_err {
743 |             self.best_err = best_err;
744 |             self.bc7_code_mode45(&best_candidate, 5);
745 |         }
746 |     }
747 | 
748 |     fn bc7_enc_mode6(&mut self) {
749 |         const MODE: usize = 6;
750 |         const BITS: u32 = 4;
751 | 
752 |         let mut ep = [0.0; 8];
753 |         Self::block_segment(
754 |             &mut ep,
755 |             &self.block,
756 |             0xFFFFFFFF,
757 |             self.settings.channels as usize,
758 |         );
759 | 
760 |         if self.settings.channels == 3 {
761 |             ep[3] = 255.0;
762 |             ep[7] = 255.0;
763 |         }
764 | 
765 |         let mut qep = [0; 8];
766 |         Self::ep_quant_dequant(&mut qep, &mut ep, MODE, self.settings.channels as usize);
767 | 
768 |         let mut qblock = [0; 2];
769 |         let mut err = block_quant(
770 |             &mut qblock,
771 |             &self.block,
772 |             BITS,
773 |             &ep,
774 |             0,
775 |             self.settings.channels as usize,
776 |         );
777 | 
778 |         let refine_iterations = self.settings.refine_iterations[MODE];
779 |         for _ in 0..refine_iterations {
780 |             opt_endpoints(
781 |                 &mut ep,
782 |                 &self.block,
783 |                 BITS,
784 |                 qblock,
785 |                 0xFFFFFFFF,
786 |                 self.settings.channels as usize,
787 |             );
788 |             Self::ep_quant_dequant(&mut qep, &mut ep, MODE, self.settings.channels as usize);
789 |             err = block_quant(
790 |                 &mut qblock,
791 |                 &self.block,
792 |                 BITS,
793 |                 &ep,
794 |                 0,
795 |                 self.settings.channels as usize,
796 |             );
797 |         }
798 | 
799 |         if err < self.best_err {
800 |             self.best_err = err;
801 |             self.bc7_code_mode6(&mut qep, &mut qblock);
802 |         }
803 |     }
804 | 
805 |     fn bc7_enc_mode7(&mut self) {
806 |         if self.settings.fast_skip_threshold_mode7 == 0 {
807 |             return;
808 |         }
809 | 
810 |         let mut full_stats = [0.0; 15];
811 |         compute_stats_masked(
812 |             &mut full_stats,
813 |             &self.block,
814 |             0xFFFFFFFF,
815 |             self.settings.channels as usize,
816 |         );
817 | 
818 |         let mut part_list = [0; 64];
819 |         for part in 0..64 {
820 |             let mask = get_pattern_mask(part, 0);
821 |             let bound12 = block_pca_bound_split(
822 |                 &self.block,
823 |                 mask,
824 |                 full_stats,
825 |                 self.settings.channels as usize,
826 |             );
827 |             let bound = bound12 as i32;
828 |             part_list[part as usize] = part + bound * 64;
829 |         }
830 | 
831 |         partial_sort_list(&mut part_list, 64, self.settings.fast_skip_threshold_mode7);
832 |         self.bc7_enc_mode01237(
833 |             7,
834 |             &part_list,
835 |             self.settings.fast_skip_threshold_mode7 as usize,
836 |         );
837 |     }
838 | 
839 |     pub(crate) fn compress_block_bc7_core(&mut self) {
840 |         if self.settings.mode_selection[0] != 0 {
841 |             self.bc7_enc_mode02();
842 |         }
843 |         if self.settings.mode_selection[1] != 0 {
844 |             self.bc7_enc_mode13();
845 |             self.bc7_enc_mode7();
846 |         }
847 |         if self.settings.mode_selection[2] != 0 {
848 |             self.bc7_enc_mode45();
849 |         }
850 |         if self.settings.mode_selection[3] != 0 {
851 |             self.bc7_enc_mode6();
852 |         }
853 |     }
854 | 
855 |     pub(crate) fn compute_opaque_err(&mut self) {
856 |         self.opaque_err = if self.settings.channels == 3 {
857 |             0.0
858 |         } else {
859 |             let mut err = 0.0;
860 |             for k in 0..16 {
861 |                 err += sq(self.block[48 + k] - 255.0);
862 |             }
863 |             err
864 |         };
865 |     }
866 | }
867 | 


--------------------------------------------------------------------------------