├── .editorconfig ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md ├── data ├── test └── test.yaz0 └── src ├── bin └── yaztool.rs ├── deflate.rs ├── error.rs ├── header.rs ├── inflate.rs └── lib.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | 9 | [*.{rs,toml}] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [*.yml] 14 | indent_style = space 15 | indent_size = 2 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | sudo: required 3 | 4 | rust: 5 | - stable 6 | - beta 7 | 8 | cache: 9 | cargo: true 10 | 11 | matrix: 12 | allow_failures: 13 | - rust: nightly 14 | 15 | notifications: 16 | email: 17 | on_success: never 18 | on_failure: always 19 | 20 | script: 21 | - cargo test 22 | 23 | after_success: | 24 | if [[ "$TRAVIS_RUST_VERSION" == stable ]]; then 25 | bash <(curl https://raw.githubusercontent.com/xd009642/tarpaulin/master/travis-install.sh) 26 | cargo tarpaulin --out Xml 27 | bash <(curl -s https://codecov.io/bash) 28 | fi 29 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "yaz0" 3 | version = "0.3.0" 4 | authors = ["Erin Moon "] 5 | description = "A library for de/compressing Nintendo Yaz0 files" 6 | license = "MIT" 7 | repository = "https://github.com/gcnhax/yaz0-rs" 8 | readme = "README.md" 9 | documentation = "https://docs.rs/yaz0" 10 | keywords = ["yaz0", "gamecube", "gcn", "romhacking"] 11 | categories = ["compression"] 12 | exclude = ["data/*"] 13 | edition = "2018" 14 | 15 | [dependencies] 16 | byteorder = "1.3" 17 | arrayvec = "0.5" 18 | thiserror = "1.0" 19 | indicatif = { version = "0.15", optional = true } 20 | clap = { version = "2.33", optional = true } 21 | 22 | [dev-dependencies] 23 | rand = "0.7" 24 | pretty_assertions = "0.6" 25 | indicatif = "0.15" 26 | 27 | [features] 28 | yaztool = ["clap", "indicatif"] 29 | 30 | [[bin]] 31 | name = "yaztool" 32 | path = "src/bin/yaztool.rs" 33 | required-features = ["yaztool"] 34 | 35 | [badges] 36 | maintenance = { status = "actively-developed" } 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Erin Moon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yaz0 - a Rust crate for de/compressing Nintendo Yaz0 ("SZS") files 2 | 3 | [![Build Status](https://travis-ci.com/gcnhax/yaz0-rs.svg?branch=master)](https://travis-ci.com/gcnhax/yaz0-rs) 4 | [![codecov](https://codecov.io/gh/gcnhax/yaz0-rs/branch/master/graph/badge.svg)](https://codecov.io/gh/gcnhax/yaz0-rs) 5 | [![Crates.io Version](https://img.shields.io/crates/v/yaz0.svg)](https://crates.io/crates/yaz0) 6 | 7 | Yaz0 is Nintendo's version of Haruhiko Okumura's in/famous 1989 LZSS implementation. It's been continually used in first party titles, wrapping various other formats, since the N64 era. 8 | 9 | **2024 note**: you might want to look into [szs](https://crates.io/crates/szs), by riidefi, which has much higher performance and a wide selection of compression methods, including emulations of compression methods used for specific games. It's not pure Rust, but it's unlikely that matters in any application you're using a SZS decompressor in. 10 | 11 | ## tools 12 | To install `yaztool`, a de/flating utility for yaz0 files, do 13 | ``` 14 | $ cargo install yaz0 --features=yaztool 15 | ``` 16 | 17 | ## licensing 18 | All code in this repository is licensed under the MIT license; see `LICENSE`. 19 | -------------------------------------------------------------------------------- /data/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gcnhax/yaz0-rs/4efc18f3c531450e7f2ef37ca8f3f6e2ce94cd62/data/test -------------------------------------------------------------------------------- /data/test.yaz0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gcnhax/yaz0-rs/4efc18f3c531450e7f2ef37ca8f3f6e2ce94cd62/data/test.yaz0 -------------------------------------------------------------------------------- /src/bin/yaztool.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | extern crate indicatif; 3 | extern crate yaz0; 4 | 5 | use std::io::Write; 6 | use clap::{App, AppSettings, Arg, SubCommand}; 7 | use indicatif::ProgressBar; 8 | use std::error::Error; 9 | use std::fs::File; 10 | use std::io::{Read, BufReader}; 11 | use std::sync::mpsc; 12 | use std::thread; 13 | use std::path::Path; 14 | use yaz0::{Yaz0Archive, Yaz0Writer, CompressionLevel}; 15 | use yaz0::deflate::ProgressMsg; 16 | 17 | fn main() -> Result<(), Box> { 18 | let matches = App::new("yaztool") 19 | .author("Erin Moon ") 20 | .about("(de)compresses Yaz0 files") 21 | .setting(AppSettings::ArgRequiredElseHelp) 22 | .subcommand(SubCommand::with_name("decompress") 23 | .arg(Arg::with_name("INPUT") 24 | .required(true)) 25 | .arg(Arg::with_name("OUTPUT") 26 | .required(true))) 27 | .subcommand(SubCommand::with_name("compress") 28 | .arg(Arg::with_name("INPUT") 29 | .required(true)) 30 | .arg(Arg::with_name("OUTPUT") 31 | .required(true))) 32 | .get_matches(); 33 | 34 | match matches.subcommand() { 35 | ("decompress", Some(matches)) => { 36 | let in_path = Path::new(matches.value_of("INPUT").unwrap()); 37 | let out_path = Path::new(matches.value_of("OUTPUT").unwrap()); 38 | 39 | let reader = BufReader::new(File::open(in_path)?); 40 | 41 | let mut yazfile = Yaz0Archive::new(reader)?; 42 | let inflated = yazfile.decompress()?; 43 | 44 | let mut outfile = File::create(out_path)?; 45 | outfile.write_all(&inflated)?; 46 | }, 47 | ("compress", Some(matches)) => { 48 | let in_path = Path::new(matches.value_of("INPUT").unwrap()); 49 | let out_path = Path::new(matches.value_of("OUTPUT").unwrap()); 50 | 51 | let data = { 52 | let mut d = Vec::new(); 53 | File::open(in_path)?.read_to_end(&mut d)?; 54 | d 55 | }; 56 | 57 | let pb = ProgressBar::new(data.len() as u64); 58 | let (tx, rx) = mpsc::channel::(); 59 | thread::spawn(move || { 60 | while let Ok(progress) = rx.recv() { 61 | pb.set_position(progress.read_head as u64); 62 | } 63 | }); 64 | 65 | let quality = CompressionLevel::Lookahead {quality: 10}; 66 | let deflated = { 67 | let mut d = Vec::new(); 68 | Yaz0Writer::new(&mut d) 69 | .compress_and_write_with_progress(&data, quality, tx)?; 70 | d 71 | }; 72 | 73 | let mut outfile = File::create(out_path)?; 74 | outfile.write_all(&deflated)?; 75 | }, 76 | _ => unreachable!(), 77 | } 78 | 79 | Ok(()) 80 | } 81 | -------------------------------------------------------------------------------- /src/deflate.rs: -------------------------------------------------------------------------------- 1 | use arrayvec::{self, ArrayVec}; 2 | use crate::header::Yaz0Header; 3 | use std::io::Write; 4 | use std::sync::mpsc::{self, Sender}; 5 | use crate::Error; 6 | 7 | pub struct Yaz0Writer<'a, W: 'a> 8 | where 9 | W: Write, 10 | { 11 | writer: &'a mut W, 12 | } 13 | 14 | /// Represents a compression run of length `length` starting at `cursor`. 15 | #[derive(Debug, Clone, Copy)] 16 | struct Run { 17 | pub cursor: usize, 18 | pub length: usize, 19 | } 20 | 21 | impl Run { 22 | /// Returns a run of zero length starting at position 0. 23 | pub fn zero() -> Run { 24 | Run { 25 | cursor: 0, 26 | length: 0, 27 | } 28 | } 29 | 30 | /// Returns `self` unless `other` is a longer run, in which case it returns `other`. 31 | pub fn swap_if_better(self, other: Run) -> Run { 32 | if self.length > other.length { 33 | self 34 | } else { 35 | other 36 | } 37 | } 38 | } 39 | 40 | 41 | /// Message sent by the compressor to inform other threads of the compression progress. 42 | #[derive(Debug)] 43 | pub struct ProgressMsg { 44 | pub read_head: usize, 45 | } 46 | 47 | /// Naively looks back in the input stream, trying to find the longest possible 48 | /// substring that matches the data after the current read cursor. 49 | fn find_naive_run(src: &[u8], cursor: usize, lookback: usize) -> Run { 50 | // the location which we start searching at, `lookback` bytes before 51 | // the current read cursor. saturating_sub prevents underflow. 52 | let search_start = cursor.saturating_sub(lookback); 53 | 54 | // the best runlength we've seen so far, and where the match occured. 55 | let mut run = Run::zero(); 56 | 57 | for search_head in search_start..cursor { 58 | // incremental check for every possible substring after the read head. 59 | let mut runlength = 0; 60 | while runlength < src.len() - cursor { 61 | if src[search_head + runlength] != src[cursor + runlength] { 62 | break; 63 | } 64 | runlength += 1; 65 | } 66 | 67 | // if this search position was better than we've seen before, update our best run. 68 | run = run.swap_if_better(Run { 69 | cursor: search_head, 70 | length: runlength, 71 | }) 72 | } 73 | 74 | run 75 | } 76 | 77 | /// Looks back in the input stream, finding a naive run; if one is found, it tries 78 | /// copying a single byte of that run and then finding a new one. 79 | /// If it's at least two bytes longer than the initial run, it picks that instead and signals 80 | /// that we need to copy that byte before copying the run. 81 | /// 82 | /// Returns a tuple of whether we need to copy an initial byte for a lookahead run, and whatever run was found. 83 | /// 84 | /// This is much better than plain naive search in most cases. It's also pretty much what Nintendo does. 85 | fn find_lookahead_run(src: &[u8], cursor: usize, lookback: usize) -> (bool, Run) { 86 | // get the best naive run. 87 | let run = find_naive_run(src, cursor, lookback); 88 | 89 | // was this run worthwhile at all? 90 | if run.length >= 3 { 91 | // if we look forward a single byte and reencode, how does that look? 92 | let lookahead_run = find_naive_run(src, cursor + 1, lookback); 93 | 94 | // if it's +2 better than the original naive run, pick it. 95 | if lookahead_run.length >= run.length + 2 { 96 | return (true, lookahead_run); 97 | } 98 | } 99 | 100 | return (false, run); 101 | } 102 | 103 | /// Writes a [Run] to the `destination`, with the cursor at `read_head`. 104 | fn write_run(read_head: usize, run: &Run, destination: &mut ArrayVec) -> usize 105 | where 106 | A: arrayvec::Array, 107 | { 108 | // compute how far back the start of the run is from the read head, minus an offset of 1 109 | // due to the offst, reading the byte before the read head is encoded as dist = 0. 110 | let dist = read_head - run.cursor - 1; 111 | 112 | // if the run is longer than 18 bytes, we must use a 3-byte packet instead of a 2-byte one. 113 | if run.length >= 0x12 { 114 | // 3-byte packet. this looks like the following: 115 | // 116 | // 1 byte 2 bytes 3 bytes 117 | // ├────────┬───────────────┼───────────────┼───────────┐ 118 | // │ 0b0000 │ dist (4 msbs) │ dist (8 lsbs) │ length-12 │ 119 | // └────────┴───────────────┴───────────────┴───────────┘ 120 | 121 | destination.push((dist as u32 >> 8) as u8); 122 | destination.push((dist as u32 & 0xff) as u8); 123 | let actual_runlength = run.length.min(0xff + 0x12); // clip to maximum possible runlength 124 | destination.push((actual_runlength - 0x12) as u8); 125 | 126 | return actual_runlength; 127 | } else { 128 | // 2-byte packet. this looks like the following: 129 | // 130 | // 1 byte 2 bytes 131 | // ├──────────┬───────────────┼───────────────┐ 132 | // │ length-2 │ dist (4 msbs) │ dist (8 lsbs) │ 133 | // └──────────┴───────────────┴───────────────┘ 134 | 135 | destination.push(((run.length as u8 - 2) << 4) | (dist as u32 >> 8) as u8); 136 | destination.push((dist as u32 & 0xff) as u8); 137 | 138 | return run.length; 139 | } 140 | } 141 | 142 | /// Compresses the data in `src` at [CompressionLevel] `level`, using either naive or 143 | /// lookahead compression, sending progress updates over `progress_tx`. Returns a [Vec] containing 144 | /// the compressed payload. 145 | fn compress_lookaround( 146 | src: &[u8], 147 | level: CompressionLevel, 148 | progress_tx: Sender, 149 | ) -> Vec { 150 | let quality = match level { 151 | CompressionLevel::Naive { quality } => quality, 152 | CompressionLevel::Lookahead { quality } => quality, 153 | }; 154 | const MAX_LOOKBACK: usize = 0x1000; 155 | let lookback = (MAX_LOOKBACK as f32 / (10. / quality as f32)).floor() as usize; 156 | 157 | // used to cache lookahead runs to put in the next packet, 158 | // since we need to write a head packet first 159 | let mut lookahead_cache: Option = None; 160 | let mut read_head = 0; 161 | let mut encoded = Vec::new(); 162 | // -- encode a packet stream 163 | while read_head < src.len() { 164 | // the chunk codon 165 | let mut codon: u8 = 0x0; 166 | 167 | // we use this as an arena for preparing packets. 168 | // justification for the size: 169 | // 8 codes * 3 bytes/code = 24 bytes of packet (abs. max.) 170 | let mut packets = ArrayVec::<[u8; 24]>::new(); 171 | 172 | // -- encode the packets 173 | let mut packet_n = 0; 174 | while packet_n < 8 { 175 | // -- search back for existing data. if we already have data in the lookahead cache, use that instead. 176 | let (hit_lookahead, best_run) = if let Some(cache) = lookahead_cache.take() { 177 | (false, cache) 178 | } else { 179 | match level { 180 | CompressionLevel::Lookahead { .. } => { 181 | find_lookahead_run(src, read_head, lookback) 182 | } 183 | CompressionLevel::Naive { .. } => { 184 | (false, find_naive_run(src, read_head, lookback)) 185 | } 186 | } 187 | }; 188 | 189 | if hit_lookahead { 190 | lookahead_cache = Some(best_run); 191 | } 192 | 193 | // if we hit a lookahead sequence, we need to write the head byte in preparation for the run. 194 | // otherwise, if the run was a compression, just do the thing. 195 | if best_run.length >= 3 && !hit_lookahead { 196 | read_head += write_run(read_head, &best_run, &mut packets); 197 | } else { 198 | // force a failout if we've hit the end of the file. 199 | if read_head >= src.len() { 200 | break; 201 | } 202 | 203 | // push the packet data 204 | packets.push(src[read_head]); 205 | 206 | // mark the codon with the packet 207 | codon |= 0x80 >> packet_n; 208 | 209 | // push the read head forward 210 | read_head += 1; 211 | } 212 | 213 | // advance the packet counter 214 | packet_n += 1; 215 | } 216 | 217 | // -- write (codon :: packets) into the compressed stream 218 | encoded.push(codon); 219 | encoded.extend(&packets); 220 | 221 | if read_head % 10 == 0 || read_head == src.len() - 1 { 222 | // ignore errors if the rx is disconnected 223 | let _ = progress_tx.send(ProgressMsg { read_head }); 224 | } 225 | } 226 | 227 | encoded 228 | } 229 | 230 | /// Compresses `data` with [CompressionLevel] `level`, sending progress updates over `progress_tx`. 231 | /// Returns a [Vec] of the compressed payload. 232 | fn compress_with_progress( 233 | data: &[u8], 234 | level: CompressionLevel, 235 | progress_tx: Sender, 236 | ) -> Vec { 237 | match level { 238 | CompressionLevel::Naive { .. } | CompressionLevel::Lookahead { .. } => { 239 | compress_lookaround(data, level, progress_tx) 240 | } 241 | } 242 | } 243 | 244 | /// Compresses `data` with [CompressionLevel] `level`. 245 | /// Returns a [Vec] of the compressed payload. 246 | fn compress(data: &[u8], level: CompressionLevel) -> Vec { 247 | let (tx, _) = mpsc::channel(); 248 | compress_with_progress(data, level, tx) 249 | } 250 | 251 | impl<'a, W> Yaz0Writer<'a, W> 252 | where 253 | W: Write, 254 | { 255 | pub fn new(writer: &'a mut W) -> Yaz0Writer 256 | where 257 | W: Write, 258 | { 259 | Yaz0Writer { writer } 260 | } 261 | 262 | /// Compress and write the passed `data`, at compression level `level`. 263 | pub fn compress_and_write(self, data: &[u8], level: CompressionLevel) -> Result<(), Error> { 264 | // -- construct and write the header 265 | let header = Yaz0Header::new(data.len()); 266 | header.write(self.writer)?; 267 | 268 | // -- compress and write the data 269 | let compressed = compress(data, level); 270 | self.writer.write_all(&compressed)?; 271 | 272 | Ok(()) 273 | } 274 | 275 | /// Compress and write the passed `data`, at compression level `level`. 276 | /// Progress updates are streamed out of `progress_tx`. 277 | pub fn compress_and_write_with_progress( 278 | self, 279 | data: &[u8], 280 | level: CompressionLevel, 281 | progress_tx: Sender, 282 | ) -> Result<(), Error> { 283 | // -- construct and write the header 284 | let header = Yaz0Header::new(data.len()); 285 | header.write(self.writer)?; 286 | 287 | // -- compress and write the data 288 | let compressed = compress_with_progress(data, level, progress_tx); 289 | self.writer.write_all(&compressed)?; 290 | 291 | Ok(()) 292 | } 293 | } 294 | 295 | /// Represents the agressiveness of lookback used by the compressor. 296 | #[derive(Clone, Copy)] 297 | pub enum CompressionLevel { 298 | Naive { 299 | /// Lookback distance. Set between 1 and 10; 10 corresponds to greatest lookback distance. 300 | quality: usize 301 | }, 302 | Lookahead { 303 | /// Lookback distance. Set between 1 and 10; 10 corresponds to greatest lookback distance. 304 | quality: usize 305 | }, 306 | } 307 | 308 | #[cfg(test)] 309 | mod test { 310 | use super::*; 311 | use pretty_assertions::assert_eq; 312 | 313 | #[test] 314 | #[cfg_attr(rustfmt, rustfmt_skip)] // don't mess up our arrays 😅 315 | fn deflate_naive() { 316 | const Q: CompressionLevel = CompressionLevel::Naive {quality: 10}; 317 | 318 | assert_eq!(compress(&[12, 34, 56], Q), [0xe0, 12, 34, 56]); 319 | 320 | assert_eq!( 321 | compress(&[0, 1, 2, 0xa, 0, 1, 2, 3, 0xb, 0, 1, 2, 3, 4, 5, 6, 7], Q), 322 | [ 323 | 0xf6, /* | id: */ 0, 1, 2, 0xa, 324 | /* run: */ 0x10, 0x03, 325 | /* id: */ 3, 0xb, 326 | /* run: */ 0x20, 0x04, 327 | 0xf0, /* | id: */ 4, 5, 6, 7, 328 | ] 329 | ); 330 | } 331 | 332 | #[test] 333 | #[cfg_attr(rustfmt, rustfmt_skip)] // don't mess up our arrays 😅 334 | fn deflate_with_lookahead() { 335 | const Q: CompressionLevel = CompressionLevel::Lookahead {quality: 10}; 336 | 337 | assert_eq!( 338 | compress(&[0, 0, 0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa], Q), 339 | [ 340 | 0xfa, /* | id: */ 0, 0, 0, 10, 0, 341 | /* run: */ 0x70, 0x00, 342 | /* id: */ 0xa, 343 | ] 344 | ); 345 | } 346 | 347 | #[test] 348 | #[cfg_attr(rustfmt, rustfmt_skip)] 349 | fn deflate_run() { 350 | const Q: CompressionLevel = CompressionLevel::Lookahead {quality: 10}; 351 | 352 | assert_eq!(compress(&[0;30], Q), [0x80, /*| id: */ 0, /* compr: */ 0, 0, 11]); 353 | } 354 | 355 | #[test] 356 | fn inverts() { 357 | use crate::inflate::Yaz0Archive; 358 | use rand::distributions::Standard; 359 | use rand::{self, Rng}; 360 | use std::io::Cursor; 361 | 362 | for _ in 0..10 { 363 | let data: Vec = rand::thread_rng().sample_iter(&Standard).take(50).collect(); 364 | 365 | let mut deflated = Vec::new(); 366 | Yaz0Writer::new(&mut deflated) 367 | .compress_and_write(&data, CompressionLevel::Lookahead { quality: 10 }) 368 | .expect("Could not deflate"); 369 | 370 | let inflated = Yaz0Archive::new(Cursor::new(deflated)) 371 | .expect("Error creating Yaz0Archive") 372 | .decompress() 373 | .expect("Error deflating Yaz0 archive"); 374 | 375 | assert_eq!(inflated, data); 376 | } 377 | } 378 | 379 | #[test] 380 | // this takes way too long on CI. TODO: figure out how to still test this on CI; 381 | // maybe just build _this one test_ with --release. 382 | #[ignore] 383 | fn inverts_test_file() { 384 | use indicatif::{ProgressBar, ProgressDrawTarget}; 385 | use crate::inflate::Yaz0Archive; 386 | use std::io::Cursor; 387 | use std::thread; 388 | 389 | let data: &[u8] = include_bytes!("../data/test"); 390 | 391 | let (tx, rx) = mpsc::channel::(); 392 | let pb = ProgressBar::new(data.len() as u64); 393 | pb.set_draw_target(ProgressDrawTarget::stdout()); 394 | thread::spawn(move || { 395 | while let Ok(progress) = rx.recv() { 396 | pb.set_position(progress.read_head as u64); 397 | } 398 | }); 399 | 400 | let mut deflated = Vec::new(); 401 | Yaz0Writer::new(&mut deflated) 402 | .compress_and_write_with_progress( 403 | &data, 404 | CompressionLevel::Lookahead { quality: 10 }, 405 | tx, 406 | ) 407 | .expect("Could not deflate"); 408 | 409 | let reader = Cursor::new(&deflated); 410 | 411 | let inflated = Yaz0Archive::new(reader) 412 | .expect("Error creating Yaz0Archive") 413 | .decompress() 414 | .expect("Error deflating Yaz0 archive"); 415 | 416 | println!( 417 | "original: {:#x} / compressed (w/ header): {:#x} ({:.3}%)", 418 | data.len(), 419 | deflated.len(), 420 | deflated.len() as f64 * 100. / data.len() as f64 421 | ); 422 | 423 | assert_eq!(inflated, data); 424 | } 425 | } 426 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | #[derive(Error, Debug)] 4 | pub enum Error { 5 | /// An error was encountered performing IO operations. 6 | #[error("backing i/o error")] 7 | Io(#[from] std::io::Error), 8 | /// The Yaz0 file header's magic was invalid. 9 | #[error("yaz0 header magic invalid")] 10 | InvalidMagic, 11 | } 12 | -------------------------------------------------------------------------------- /src/header.rs: -------------------------------------------------------------------------------- 1 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; 2 | use crate::error::Error; 3 | use std::io::{Read, Seek, SeekFrom, Write}; 4 | 5 | /// The header on a Yaz0 file. 6 | #[derive(Debug)] 7 | pub struct Yaz0Header { 8 | /// Expected size of the decompressed file 9 | pub expected_size: usize, 10 | } 11 | 12 | impl Yaz0Header { 13 | pub fn new(expected_size: usize) -> Yaz0Header { 14 | Yaz0Header { expected_size } 15 | } 16 | 17 | /// Parses the header of a Yaz0 file, provided via the passed reader. 18 | /// Leaves the read head at the start of the data block. 19 | pub fn parse(reader: &mut R) -> Result 20 | where 21 | R: Read + Seek, 22 | { 23 | let mut magic = [0u8; 4]; 24 | reader.read_exact(&mut magic)?; 25 | if &magic != b"Yaz0" { 26 | return Err(Error::InvalidMagic); 27 | } 28 | 29 | let expected_size = reader.read_u32::()?; 30 | 31 | // consume 8 bytes 32 | reader.seek(SeekFrom::Current(8))?; 33 | 34 | Ok(Yaz0Header::new(expected_size as usize)) 35 | } 36 | 37 | /// Writes the header of a Yaz0 file to the passed writer. 38 | /// Leaves the write head at the start of the data block. 39 | pub fn write(&self, writer: &mut W) -> Result<(), Error> 40 | where 41 | W: Write, 42 | { 43 | writer.write_all(b"Yaz0")?; 44 | writer.write_u32::(self.expected_size as u32)?; 45 | writer.write_all(&[0x0; 8])?; 46 | 47 | Ok(()) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/inflate.rs: -------------------------------------------------------------------------------- 1 | use byteorder::ReadBytesExt; 2 | use std::io::{Read, Seek, SeekFrom}; 3 | 4 | use crate::header::Yaz0Header; 5 | use crate::Error; 6 | 7 | /// Wraps a reader of Yaz0 data, providing decompression methods. 8 | #[derive(Debug)] 9 | pub struct Yaz0Archive 10 | where 11 | R: Read + Seek, 12 | { 13 | reader: R, 14 | 15 | data_start: usize, 16 | header: Yaz0Header, 17 | } 18 | 19 | impl Yaz0Archive 20 | where 21 | R: Read + Seek, 22 | { 23 | /// Creates a new `Yaz0` from a reader. 24 | pub fn new(mut reader: R) -> Result, Error> { 25 | // Parses header and advances reader to start of data 26 | let header = Yaz0Header::parse(&mut reader)?; 27 | 28 | let data_start = reader.seek(SeekFrom::Current(0))?; 29 | 30 | Ok(Yaz0Archive { 31 | reader, 32 | header, 33 | data_start: data_start as usize, 34 | }) 35 | } 36 | 37 | /// Get the expected size of inflated data from parsed `Yaz0Header`. 38 | pub fn expected_size(&self) -> usize { 39 | self.header.expected_size 40 | } 41 | 42 | /// Decompresses the Yaz0 file, producing a `Vec` of the decompressed data. 43 | pub fn decompress(&mut self) -> Result, Error> { 44 | let mut dest: Vec = Vec::with_capacity(self.header.expected_size); 45 | dest.resize(self.header.expected_size, 0x00); 46 | self.decompress_into(&mut dest)?; 47 | Ok(dest) 48 | } 49 | 50 | /// Decompresses the Yaz0 file into a destination buffer. 51 | /// 52 | /// # Invariants 53 | /// `dest` must have a length of at least the required size to decompress successfully (consider using [`Yaz0Archive::expected_size`] to determine this) 54 | pub fn decompress_into(&mut self, dest: &mut [u8]) -> Result<(), Error> { 55 | assert!(dest.len() >= self.expected_size()); 56 | 57 | let mut dest_pos: usize = 0; 58 | 59 | let mut ops_left: u8 = 0; 60 | let mut code_byte: u8 = 0; 61 | 62 | while dest_pos < self.header.expected_size { 63 | if ops_left == 0 { 64 | code_byte = self.reader.read_u8()?; 65 | ops_left = 8; 66 | } 67 | 68 | if code_byte & 0x80 != 0 { 69 | dest[dest_pos] = self.reader.read_u8()?; 70 | dest_pos += 1; 71 | } else { 72 | let byte1: u8 = self.reader.read_u8()?; 73 | let byte2: u8 = self.reader.read_u8()?; 74 | 75 | // Calculate where the copy should start 76 | let dist = (((byte1 & 0xf) as usize) << 8) | (byte2 as usize); 77 | let run_base = dest_pos - (dist + 1); 78 | 79 | // Figure out how many bytes we have to copy 80 | let copy_len: usize = match byte1 >> 4 { 81 | 0 => self.reader.read_u8()? as usize + 0x12, // read the next input byte and add 0x12 82 | // to get the length to copy 83 | n => n as usize + 2 // otherwise, just take the upper nybble of byte1 and add 2 to get the length 84 | }; 85 | 86 | for i in 0..copy_len { 87 | dest[dest_pos] = dest[run_base + i]; 88 | dest_pos += 1; 89 | } 90 | } 91 | 92 | // use next operation bit from the code byte 93 | code_byte <<= 1; 94 | ops_left -= 1; 95 | } 96 | 97 | Ok(()) 98 | } 99 | } 100 | 101 | #[cfg(test)] 102 | mod tests { 103 | use super::*; 104 | use std::io::Cursor; 105 | use pretty_assertions::assert_eq; 106 | 107 | /// Deflate a test .szs file encoded by yaz0enc, and compare to the decompressed file produced by yaz0dec. 108 | #[test] 109 | fn test_deflate_bianco() { 110 | let data: &[u8] = include_bytes!("../data/test.yaz0"); 111 | let reference_decompressed: &[u8] = include_bytes!("../data/test"); 112 | 113 | let reader = Cursor::new(data); 114 | 115 | let mut f = Yaz0Archive::new(reader).unwrap(); 116 | 117 | let deflated = f.decompress().unwrap(); 118 | 119 | println!("{} :: {}", deflated.len(), reference_decompressed.len()); 120 | 121 | assert!(deflated == reference_decompressed, "deflated bianco0 did not match reference deflation!"); 122 | } 123 | 124 | /// Test loading a small constructed Yaz0 file containing random data. 125 | /// Note: this file will almost certainly error if decompression is attempted. 126 | #[test] 127 | fn test_load() { 128 | let data: &[u8] = &[ 129 | // 'Yaz0' 130 | 0x59, 0x61, 0x7a, 0x30, 131 | // 13371337 bytes, when deflated 132 | 0x00, 0xcc, 0x07, 0xc9, 133 | // 8 bytes of zeros 134 | 0x00, 0x00, 0x00, 0x00, 135 | 0x00, 0x00, 0x00, 0x00, 136 | 137 | // 20 bytes of (random) data 138 | 0x69, 0x95, 0xa4, 0xa3, 139 | 0x5f, 0xfd, 0xf6, 0x8c, 140 | 0x7d, 0xee, 0x93, 0xc5, 141 | 0x4a, 0x1f, 0xd3, 0x19, 142 | 0xdc, 0x78, 0xfd, 0x3f, 143 | ]; 144 | 145 | let cursor = Cursor::new(&data); 146 | let f = Yaz0Archive::new(cursor).unwrap(); 147 | 148 | assert_eq!(f.header.expected_size, 13371337); 149 | } 150 | 151 | /// Check that the Yaz0 header parsing fails when provided with a file not starting with the Yaz0 magic. 152 | #[test] 153 | fn test_bad_magic() { 154 | let data: &[u8] = &[ 155 | // 'Foo0' 156 | 0x46, 0x6f, 0x6f, 0x30, 157 | // 13371337 bytes, when deflated 158 | 0x00, 0xcc, 0x07, 0xc9, 159 | // 8 bytes of zeros 160 | 0x00, 0x00, 0x00, 0x00, 161 | 0x00, 0x00, 0x00, 0x00, 162 | ]; 163 | 164 | let cursor = Cursor::new(&data); 165 | let result = Yaz0Archive::new(cursor); 166 | 167 | assert!(result.is_err()); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | #[macro_use] 3 | extern crate pretty_assertions; 4 | 5 | mod error; 6 | pub mod deflate; 7 | pub mod header; 8 | pub mod inflate; 9 | 10 | pub use crate::deflate::{CompressionLevel, Yaz0Writer}; 11 | pub use crate::error::Error; 12 | pub use crate::header::Yaz0Header; 13 | pub use crate::inflate::Yaz0Archive; 14 | --------------------------------------------------------------------------------