├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .vscode/ 3 | /target 4 | **/*.rs.bk 5 | Cargo.lock 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kaitai" 3 | version = "0.2.0" 4 | authors = ["Bradlee Speice ", "Aon"] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | encoding-next = "0.3" 9 | cp437 = "*" 10 | unicode-segmentation = "1.9.0" 11 | flate2 = "1.0" 12 | 13 | [dev-dependencies] 14 | tempfile = "3.4.0" 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2024 Kaitai Project 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kaitai Struct: runtime library for Rust 2 | 3 | This library implements Kaitai Struct API for Rust. 4 | 5 | Kaitai Struct is a declarative language used for describe various binary 6 | data structures, laid out in files or in memory: i.e. binary file 7 | formats, network stream packet formats, etc. 8 | 9 | Further reading: 10 | 11 | * [About Kaitai Struct](https://kaitai.io/) 12 | * [About API implemented in this library](https://doc.kaitai.io/stream_api.html) 13 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use encoding::{label::encoding_from_whatwg_label, DecoderTrap}; 2 | use flate2::read::ZlibDecoder; 3 | 4 | use std::{ 5 | any::{type_name, Any}, 6 | cell::{Ref, RefCell, RefMut}, 7 | convert::TryInto, 8 | fmt, 9 | io::{Read, Seek, SeekFrom}, 10 | ops::Deref, 11 | path::Path, 12 | rc::{Rc, Weak}, 13 | }; 14 | use unicode_segmentation::UnicodeSegmentation; 15 | 16 | #[derive(Debug, PartialEq, Eq, Clone)] 17 | #[non_exhaustive] 18 | pub enum KError { 19 | Eof { requested: usize, available: usize }, 20 | EmptyIterator, 21 | UnknownEncoding { name: String }, 22 | MissingRoot, 23 | MissingParent, 24 | ReadBitsTooLarge { requested: usize }, 25 | ValidationFailed(ValidationFailedError), 26 | NoTerminatorFound, 27 | IoError { msg: String }, 28 | BytesDecodingError { msg: String }, 29 | CastError, 30 | UndecidedEndianness { src_path: String }, 31 | } 32 | pub type KResult = Result; 33 | 34 | /// Details of the failed validation. 35 | /// 36 | ///
37 | /// 38 | /// The content of this struct is likely to change in future Kaitai Struct versions. 39 | /// 40 | ///
41 | #[derive(Debug, PartialEq, Eq, Clone)] 42 | pub struct ValidationFailedError { 43 | pub kind: ValidationKind, 44 | pub src_path: String, 45 | } 46 | 47 | #[derive(Debug, PartialEq, Eq, Clone)] 48 | #[non_exhaustive] 49 | pub enum ValidationKind { 50 | NotEqual, 51 | LessThan, 52 | GreaterThan, 53 | NotAnyOf, 54 | NotInEnum, 55 | Expr, 56 | } 57 | 58 | pub trait CustomDecoder { 59 | fn decode(&self, bytes: &[u8]) -> Result, String>; 60 | } 61 | 62 | #[derive(Default)] 63 | pub struct SharedType(RefCell>); 64 | 65 | impl Clone for SharedType { 66 | fn clone(&self) -> Self { 67 | Self(RefCell::new(Weak::clone(&*self.0.borrow()))) 68 | } 69 | } 70 | 71 | // stop recursion while printing 72 | impl fmt::Debug for SharedType { 73 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 74 | let w = &*self.0.borrow(); 75 | match w.strong_count() { 76 | 0 => write!(f, "SharedType(Empty)"), 77 | _ => write!(f, "SharedType(Weak({:?}))", Weak::::as_ptr(w)), 78 | } 79 | } 80 | } 81 | 82 | impl SharedType { 83 | pub fn new(rc: Rc) -> Self { 84 | Self(RefCell::new(Rc::downgrade(&rc))) 85 | } 86 | 87 | pub fn empty() -> Self { 88 | Self(RefCell::new(Weak::new())) 89 | } 90 | 91 | pub fn is_empty(&self) -> bool { 92 | self.0.borrow().strong_count() == 0 93 | } 94 | 95 | pub fn get(&self) -> KResult> { 96 | match self.0.borrow().upgrade() { 97 | Some(rc) => Ok(OptRc::from(rc)), 98 | None => Err(KError::MissingParent), 99 | } 100 | } 101 | 102 | pub fn get_value(&self) -> &RefCell> { 103 | &self.0 104 | } 105 | 106 | pub fn set(&self, rc: KResult>) { 107 | *self.0.borrow_mut() = match rc.ok() { 108 | Some(v) => Rc::downgrade(&v.get()), 109 | None => Weak::new(), 110 | } 111 | } 112 | } 113 | 114 | // we use own type OptRc<> instead of Rc<> only for one reason: 115 | // by default to not create default value of type T (instead contain Option(None)) 116 | // (T could have cyclic-types inside, as a result we got stack overflow) 117 | #[derive(Debug)] 118 | pub struct OptRc(Option>); 119 | 120 | impl OptRc { 121 | pub fn new(orc: &Option>) -> Self { 122 | match orc { 123 | Some(rc) => OptRc::from(rc.clone()), 124 | None => OptRc::default(), 125 | } 126 | } 127 | 128 | pub fn get(&self) -> Rc { 129 | self.0.as_ref().unwrap().clone() 130 | } 131 | 132 | pub fn get_value(&self) -> &Option> { 133 | &self.0 134 | } 135 | 136 | pub fn is_none(&self) -> bool { 137 | self.0.is_none() 138 | } 139 | 140 | pub fn get_mut(&mut self) -> &mut Rc { 141 | self.0.as_mut().unwrap() 142 | } 143 | } 144 | 145 | impl Default for OptRc { 146 | #[inline] 147 | fn default() -> Self { 148 | OptRc(None) 149 | } 150 | } 151 | 152 | impl Clone for OptRc { 153 | fn clone(&self) -> Self { 154 | OptRc(self.0.clone()) 155 | } 156 | } 157 | 158 | impl From> for OptRc { 159 | fn from(v: Rc) -> Self { 160 | OptRc(Some(v)) 161 | } 162 | } 163 | 164 | impl From for OptRc { 165 | fn from(v: T) -> Self { 166 | OptRc(Some(v.into())) 167 | } 168 | } 169 | 170 | impl Deref for OptRc { 171 | type Target = T; 172 | 173 | #[inline(always)] 174 | fn deref(&self) -> &Self::Target { 175 | self.0.as_ref().unwrap() 176 | } 177 | } 178 | 179 | pub trait KStruct: Default { 180 | type Root: KStruct; 181 | type Parent: KStruct; 182 | 183 | /// Parse this struct (and any children) from the supplied stream 184 | fn read( 185 | self_rc: &OptRc, 186 | _io: &S, 187 | _root: SharedType, 188 | _parent: SharedType, 189 | ) -> KResult<()>; 190 | 191 | /// helper function to read struct 192 | fn read_into( 193 | _io: &S, 194 | _root: Option>, 195 | _parent: Option>, 196 | ) -> KResult> { 197 | let t = OptRc::from(T::default()); 198 | let root = Self::downcast(_root, t.clone(), true); 199 | let parent = Self::downcast(_parent, t.clone(), false); 200 | T::read(&t, _io, root, parent)?; 201 | Ok(t) 202 | } 203 | 204 | /// helper function to special initialize and read struct 205 | fn read_into_with_init( 206 | _io: &S, 207 | _root: Option>, 208 | _parent: Option>, 209 | init: &dyn Fn(&mut T) -> KResult<()>, 210 | ) -> KResult> { 211 | let mut t = OptRc::from(T::default()); 212 | init(Rc::get_mut(t.get_mut()).unwrap())?; 213 | 214 | let root = Self::downcast(_root, t.clone(), true); 215 | let parent = Self::downcast(_parent, t.clone(), false); 216 | T::read(&t, _io, root, parent)?; 217 | Ok(t) 218 | } 219 | 220 | fn downcast(opt_rc: Option>, t: OptRc, panic: bool) -> SharedType 221 | where 222 | T: KStruct + Default + Any, 223 | U: 'static, 224 | { 225 | if let Some(rc) = opt_rc { 226 | rc 227 | } else { 228 | let t_any = &t.get() as &dyn Any; 229 | //println!("`{}` is a '{}' type", type_name_of_val(&t), type_name::>()); 230 | match t_any.downcast_ref::>() { 231 | Some(as_result) => SharedType::::new(Rc::clone(as_result)), 232 | None => { 233 | if panic { 234 | #[cfg(feature = "type_name_of_val")] 235 | panic!( 236 | "`{}` is not a '{}' type", 237 | std::any::type_name_of_val(&t), 238 | type_name::>() 239 | ); 240 | #[cfg(not(feature = "type_name_of_val"))] 241 | panic!("`{:p}` is not a '{}' type", &t, type_name::>()); 242 | } 243 | SharedType::::empty() 244 | } 245 | } 246 | } 247 | } 248 | } 249 | 250 | /// Dummy struct used to indicate an absence of value; needed for 251 | /// root structs to satisfy the associated type bounds in the 252 | /// `KStruct` trait. 253 | #[derive(Debug, Default, Copy, Clone, PartialEq)] 254 | pub struct KStructUnit; 255 | 256 | impl KStruct for KStructUnit { 257 | type Root = KStructUnit; 258 | type Parent = KStructUnit; 259 | 260 | fn read( 261 | _self_rc: &OptRc, 262 | _io: &S, 263 | _root: SharedType, 264 | _parent: SharedType, 265 | ) -> KResult<()> { 266 | Ok(()) 267 | } 268 | } 269 | 270 | impl From for KError { 271 | fn from(err: std::io::Error) -> Self { 272 | Self::IoError { 273 | msg: err.to_string(), 274 | } 275 | } 276 | } 277 | 278 | pub trait KStream { 279 | fn clone(&self) -> BytesReader; 280 | fn size(&self) -> usize; 281 | 282 | fn is_eof(&self) -> bool { 283 | if self.get_state().bits_left > 0 { 284 | return false; 285 | } 286 | self.pos() >= self.size() 287 | } 288 | 289 | fn seek(&self, position: usize) -> KResult<()> { 290 | self.get_state_mut().pos = position; 291 | Ok(()) 292 | } 293 | 294 | fn pos(&self) -> usize { 295 | self.get_state().pos 296 | } 297 | 298 | fn read_s1(&self) -> KResult { 299 | Ok(self.read_bytes(1)?[0] as i8) 300 | } 301 | fn read_s2be(&self) -> KResult { 302 | Ok(i16::from_be_bytes(self.read_bytes(2)?.try_into().unwrap())) 303 | } 304 | fn read_s4be(&self) -> KResult { 305 | Ok(i32::from_be_bytes(self.read_bytes(4)?.try_into().unwrap())) 306 | } 307 | fn read_s8be(&self) -> KResult { 308 | Ok(i64::from_be_bytes(self.read_bytes(8)?.try_into().unwrap())) 309 | } 310 | fn read_s2le(&self) -> KResult { 311 | Ok(i16::from_le_bytes(self.read_bytes(2)?.try_into().unwrap())) 312 | } 313 | fn read_s4le(&self) -> KResult { 314 | Ok(i32::from_le_bytes(self.read_bytes(4)?.try_into().unwrap())) 315 | } 316 | fn read_s8le(&self) -> KResult { 317 | Ok(i64::from_le_bytes(self.read_bytes(8)?.try_into().unwrap())) 318 | } 319 | 320 | fn read_u1(&self) -> KResult { 321 | Ok(self.read_bytes(1)?[0]) 322 | } 323 | fn read_u2be(&self) -> KResult { 324 | Ok(u16::from_be_bytes(self.read_bytes(2)?.try_into().unwrap())) 325 | } 326 | fn read_u4be(&self) -> KResult { 327 | Ok(u32::from_be_bytes(self.read_bytes(4)?.try_into().unwrap())) 328 | } 329 | fn read_u8be(&self) -> KResult { 330 | Ok(u64::from_be_bytes(self.read_bytes(8)?.try_into().unwrap())) 331 | } 332 | fn read_u2le(&self) -> KResult { 333 | Ok(u16::from_le_bytes(self.read_bytes(2)?.try_into().unwrap())) 334 | } 335 | fn read_u4le(&self) -> KResult { 336 | Ok(u32::from_le_bytes(self.read_bytes(4)?.try_into().unwrap())) 337 | } 338 | fn read_u8le(&self) -> KResult { 339 | Ok(u64::from_le_bytes(self.read_bytes(8)?.try_into().unwrap())) 340 | } 341 | 342 | fn read_f4be(&self) -> KResult { 343 | Ok(f32::from_be_bytes(self.read_bytes(4)?.try_into().unwrap())) 344 | } 345 | fn read_f8be(&self) -> KResult { 346 | Ok(f64::from_be_bytes(self.read_bytes(8)?.try_into().unwrap())) 347 | } 348 | fn read_f4le(&self) -> KResult { 349 | Ok(f32::from_le_bytes(self.read_bytes(4)?.try_into().unwrap())) 350 | } 351 | fn read_f8le(&self) -> KResult { 352 | Ok(f64::from_le_bytes(self.read_bytes(8)?.try_into().unwrap())) 353 | } 354 | 355 | fn get_state(&self) -> Ref; 356 | fn get_state_mut(&self) -> RefMut; 357 | 358 | fn align_to_byte(&self) -> KResult<()> { 359 | let mut inner = self.get_state_mut(); 360 | inner.bits = 0; 361 | inner.bits_left = 0; 362 | 363 | Ok(()) 364 | } 365 | 366 | fn read_bits_int_be(&self, n: usize) -> KResult { 367 | let mut res: u64 = 0; 368 | 369 | if n > 64 { 370 | return Err(KError::ReadBitsTooLarge { requested: n }); 371 | } 372 | 373 | let n: i32 = n.try_into().unwrap(); 374 | let bits_needed = n - self.get_state().bits_left; 375 | self.get_state_mut().bits_left = -bits_needed & 7; 376 | 377 | if bits_needed > 0 { 378 | let bytes_needed = ((bits_needed - 1) / 8) + 1; 379 | let buf = self.read_bytes(bytes_needed.try_into().unwrap())?; 380 | for b in buf { 381 | res = res << 8 | u64::from(b); 382 | } 383 | let mut inner = self.get_state_mut(); 384 | let new_bits = res; 385 | res >>= inner.bits_left; 386 | if bits_needed < 64 { 387 | res |= inner.bits << bits_needed; 388 | } 389 | inner.bits = new_bits; 390 | } else { 391 | res = self.get_state().bits >> -bits_needed; 392 | } 393 | 394 | let mut inner = self.get_state_mut(); 395 | let mask = (1u64 << inner.bits_left) - 1; 396 | inner.bits &= mask; 397 | 398 | Ok(res) 399 | } 400 | 401 | fn read_bits_int_le(&self, n: usize) -> KResult { 402 | let mut res: u64 = 0; 403 | 404 | if n > 64 { 405 | return Err(KError::ReadBitsTooLarge { requested: n }); 406 | } 407 | 408 | let n: i32 = n.try_into().unwrap(); 409 | let bits_needed = n - self.get_state().bits_left; 410 | 411 | if bits_needed > 0 { 412 | let bytes_needed = ((bits_needed - 1) / 8) + 1; 413 | let buf = self.read_bytes(bytes_needed.try_into().unwrap())?; 414 | for (i, &b) in buf.iter().enumerate() { 415 | res |= u64::from(b) << (i * 8); 416 | } 417 | let mut inner = self.get_state_mut(); 418 | let new_bits = if bits_needed < 64 { 419 | res >> bits_needed 420 | } else { 421 | 0 422 | }; 423 | res = res << inner.bits_left | inner.bits; 424 | inner.bits = new_bits; 425 | } else { 426 | let mut inner = self.get_state_mut(); 427 | res = inner.bits; 428 | inner.bits >>= n; 429 | } 430 | 431 | self.get_state_mut().bits_left = -bits_needed & 7; 432 | 433 | if n < 64 { 434 | let mask = (1u64 << n) - 1; 435 | res &= mask; 436 | } 437 | 438 | Ok(res) 439 | } 440 | 441 | fn read_bytes(&self, len: usize) -> KResult>; 442 | fn read_bytes_full(&self) -> KResult>; 443 | 444 | fn read_bytes_term( 445 | &self, 446 | term: u8, 447 | include: bool, 448 | consume: bool, 449 | eos_error: bool, 450 | ) -> KResult> { 451 | let mut buf = vec![]; 452 | loop { 453 | let c = match self.read_u1() { 454 | Ok(c) => c, 455 | Err(KError::Eof { .. }) => { 456 | if eos_error { 457 | return Err(KError::NoTerminatorFound); 458 | } 459 | return Ok(buf); 460 | } 461 | Err(e) => return Err(e), 462 | }; 463 | if c == term { 464 | if include { 465 | buf.push(c); 466 | } 467 | if !consume { 468 | self.get_state_mut().pos -= 1; 469 | } 470 | return Ok(buf); 471 | } 472 | buf.push(c); 473 | } 474 | } 475 | } 476 | 477 | #[derive(Default, Debug, Clone)] 478 | pub struct ReaderState { 479 | pos: usize, 480 | bits: u64, 481 | bits_left: i32, 482 | } 483 | 484 | trait ReadSeek: Read + Seek {} 485 | 486 | impl ReadSeek for T where T: Read + Seek {} 487 | 488 | impl fmt::Display for dyn ReadSeek { 489 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 490 | write!(f, "ReadSeek") 491 | } 492 | } 493 | 494 | impl fmt::Debug for dyn ReadSeek { 495 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result { 496 | write!(f, "ReadSeek") 497 | } 498 | } 499 | 500 | #[derive(Debug, Default, Clone)] 501 | pub struct BytesReader { 502 | state: RefCell, 503 | // share same "instance" of data beetween all clones 504 | // reposition before each read call 505 | buf: OptRc>>, 506 | file_size: u64, 507 | } 508 | 509 | impl From> for BytesReader { 510 | fn from(bytes: Vec) -> BytesReader { 511 | BytesReader::from_buffer(bytes) 512 | } 513 | } 514 | 515 | impl From<&[u8]> for BytesReader { 516 | fn from(slice: &[u8]) -> BytesReader { 517 | BytesReader::from_buffer(slice.to_vec()) 518 | } 519 | } 520 | 521 | impl BytesReader { 522 | pub fn open>(filename: T) -> KResult { 523 | let f = std::fs::File::open(filename)?; 524 | let file_size = f.metadata().unwrap().len(); 525 | let r: Box = Box::new(f); 526 | Ok(BytesReader { 527 | state: RefCell::new(ReaderState::default()), 528 | file_size, 529 | buf: OptRc::from(RefCell::new(r)), 530 | }) 531 | } 532 | 533 | fn from_buffer(bytes: Vec) -> Self { 534 | let file_size = bytes.len() as u64; 535 | let r: Box = Box::new(std::io::Cursor::new(bytes)); 536 | BytesReader { 537 | state: RefCell::new(ReaderState::default()), 538 | file_size, 539 | buf: OptRc::from(RefCell::new(r)), 540 | } 541 | } 542 | 543 | // sync stream pos with state.pos 544 | fn sync_pos(&self) -> KResult<()> { 545 | let cur_pos = self 546 | .buf 547 | .borrow_mut() 548 | .stream_position()?; 549 | if self.pos() != cur_pos as usize { 550 | self.buf 551 | .borrow_mut() 552 | .seek(SeekFrom::Start(self.pos() as u64))?; 553 | } 554 | Ok(()) 555 | } 556 | } 557 | 558 | impl KStream for BytesReader { 559 | fn clone(&self) -> Self { 560 | Clone::clone(self) 561 | } 562 | 563 | fn get_state(&self) -> Ref { 564 | self.state.borrow() 565 | } 566 | 567 | fn get_state_mut(&self) -> RefMut { 568 | self.state.borrow_mut() 569 | } 570 | 571 | fn size(&self) -> usize { 572 | self.file_size as usize 573 | } 574 | 575 | fn read_bytes(&self, len: usize) -> KResult> { 576 | // handle read beyond end of file 577 | let num_bytes_available = self.size().saturating_sub(self.pos()); 578 | if len > num_bytes_available { 579 | return Err(KError::Eof { 580 | requested: len, 581 | available: num_bytes_available, 582 | }); 583 | } 584 | self.sync_pos()?; 585 | // let state = self.state.borrow_mut(); 586 | // state.buf.resize(len, 0); 587 | let mut buf = vec![0; len]; 588 | self 589 | .buf 590 | .borrow_mut() 591 | .read_exact(&mut buf[..])?; 592 | self.get_state_mut().pos += len; 593 | Ok(buf) 594 | } 595 | 596 | fn read_bytes_full(&self) -> KResult> { 597 | self.sync_pos()?; 598 | //let state = self.state.borrow_mut(); 599 | let mut buf = Vec::new(); 600 | let readed = self 601 | .buf 602 | .borrow_mut() 603 | .read_to_end(&mut buf)?; 604 | self.get_state_mut().pos += readed; 605 | Ok(buf) 606 | } 607 | } 608 | 609 | /// Return a byte array that is sized to exclude all trailing instances of the 610 | /// padding character. 611 | pub fn bytes_strip_right(bytes: &Vec, pad: u8) -> Vec { 612 | if let Some(last_non_pad_index) = bytes.iter().rposition(|&c| c != pad) { 613 | bytes[..=last_non_pad_index].to_vec() 614 | } else { 615 | vec![] 616 | } 617 | } 618 | 619 | /// Return a byte array that contains all bytes up until the 620 | /// termination byte. Can optionally include the termination byte as well. 621 | pub fn bytes_terminate(bytes: &Vec, term: u8, include_term: bool) -> Vec { 622 | if let Some(term_index) = bytes.iter().position(|&c| c == term) { 623 | &bytes[..term_index + if include_term { 1 } else { 0 }] 624 | } else { 625 | bytes 626 | }.to_vec() 627 | } 628 | 629 | pub fn bytes_to_str(bytes: &Vec, label: &str) -> KResult { 630 | if let Some(enc) = encoding_from_whatwg_label(label) { 631 | return Ok(enc 632 | .decode(bytes.as_slice(), DecoderTrap::Replace) 633 | .expect("this should never fail because we use DecoderTrap::Replace")); 634 | } 635 | 636 | if label.eq_ignore_ascii_case("cp437") || label.eq_ignore_ascii_case("ibm437") { 637 | use std::io::BufReader; 638 | let reader = BufReader::new(bytes.as_slice()); 639 | let mut buffer = reader.bytes(); 640 | let mut r = cp437::Reader::new(&mut buffer); 641 | return Ok(r.consume(bytes.len())); 642 | } 643 | 644 | Err(KError::UnknownEncoding { 645 | name: label.to_string(), 646 | }) 647 | } 648 | 649 | pub fn process_xor_one(bytes: &Vec, key: u8) -> Vec { 650 | let mut res = bytes.to_vec(); 651 | for i in &mut res { 652 | *i ^= key; 653 | } 654 | res 655 | } 656 | 657 | pub fn process_xor_many(bytes: &Vec, key: &[u8]) -> Vec { 658 | let mut res = bytes.to_vec(); 659 | let mut ki = 0; 660 | for i in &mut res { 661 | *i ^= key[ki]; 662 | ki += 1; 663 | if ki >= key.len() { 664 | ki = 0; 665 | } 666 | } 667 | res 668 | } 669 | 670 | pub fn process_rotate_left(bytes: &Vec, amount: u8) -> Vec { 671 | let mut res = bytes.to_vec(); 672 | for i in &mut res { 673 | *i = i.rotate_left(amount.into()); 674 | } 675 | res 676 | } 677 | 678 | pub fn process_zlib(bytes: &Vec) -> Result, String> { 679 | let mut dec = ZlibDecoder::new(bytes.as_slice()); 680 | let mut dec_bytes = Vec::new(); 681 | dec.read_to_end(&mut dec_bytes).map_err(|e| e.to_string())?; 682 | Ok(dec_bytes) 683 | } 684 | 685 | pub fn reverse_string>(s: S) -> KResult { 686 | Ok(s.as_ref().graphemes(true).rev().collect()) 687 | } 688 | 689 | pub fn modulo(a: i64, b: i64) -> i64 { 690 | a.rem_euclid(b) 691 | } 692 | 693 | #[cfg(test)] 694 | mod tests { 695 | use super::*; 696 | use std::io::Write; 697 | use tempfile::tempdir; 698 | 699 | #[test] 700 | fn basic_strip_right() { 701 | let b = vec![1, 2, 3, 4, 5, 5, 5, 5]; 702 | let c = bytes_strip_right(&b, 5); 703 | 704 | assert_eq!([1, 2, 3, 4], c[..]); 705 | } 706 | 707 | #[test] 708 | fn basic_read_bytes() { 709 | let b = vec![1, 2, 3, 4, 5, 6, 7, 8]; 710 | let reader = BytesReader::from(b); 711 | 712 | assert_eq!(reader.read_bytes(4).unwrap()[..], [1, 2, 3, 4]); 713 | assert_eq!(reader.read_bytes(3).unwrap()[..], [5, 6, 7]); 714 | assert_eq!( 715 | reader.read_bytes(4).unwrap_err(), 716 | KError::Eof { 717 | requested: 4, 718 | available: 1 719 | } 720 | ); 721 | assert_eq!(reader.read_bytes(1).unwrap()[..], [8]); 722 | } 723 | 724 | #[test] 725 | fn read_bits_single() { 726 | let b = vec![0x80]; 727 | let reader = BytesReader::from(b); 728 | 729 | assert_eq!(reader.read_bits_int_be(1).unwrap(), 1); 730 | } 731 | 732 | #[test] 733 | fn read_bits_multiple() { 734 | // 0xA0 735 | let b = vec![0b10100000]; 736 | let reader = BytesReader::from(b); 737 | 738 | assert_eq!(reader.read_bits_int_be(1).unwrap(), 1); 739 | assert_eq!(reader.read_bits_int_be(1).unwrap(), 0); 740 | assert_eq!(reader.read_bits_int_be(1).unwrap(), 1); 741 | } 742 | 743 | #[test] 744 | fn read_bits_large() { 745 | let b = vec![0b10100000]; 746 | let reader = BytesReader::from(b); 747 | 748 | assert_eq!(reader.read_bits_int_be(3).unwrap(), 5); 749 | } 750 | 751 | #[test] 752 | fn read_bits_span() { 753 | let b = vec![0x01, 0x80]; 754 | let reader = BytesReader::from(b); 755 | 756 | assert_eq!(reader.read_bits_int_be(9).unwrap(), 3); 757 | } 758 | 759 | #[test] 760 | fn read_bits_too_large() { 761 | let b: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; 762 | let reader = BytesReader::from(b); 763 | 764 | assert_eq!( 765 | reader.read_bits_int_be(65).unwrap_err(), 766 | KError::ReadBitsTooLarge { requested: 65 } 767 | ) 768 | } 769 | 770 | #[test] 771 | fn read_bytes_term() { 772 | let b = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 773 | let reader = BytesReader::from(b); 774 | 775 | assert_eq!( 776 | reader.read_bytes_term(3, false, false, false).unwrap()[..], 777 | [1, 2] 778 | ); 779 | assert_eq!( 780 | reader.read_bytes_term(3, true, false, true).unwrap()[..], 781 | [3] 782 | ); 783 | assert_eq!( 784 | reader.read_bytes_term(3, false, true, true).unwrap()[..], 785 | [] 786 | ); 787 | assert_eq!( 788 | reader.read_bytes_term(5, true, true, true).unwrap()[..], 789 | [4, 5] 790 | ); 791 | assert_eq!( 792 | reader.read_bytes_term(8, false, false, true).unwrap()[..], 793 | [6, 7] 794 | ); 795 | assert_eq!( 796 | reader.read_bytes_term(11, false, true, true).unwrap_err(), 797 | KError::NoTerminatorFound 798 | ); 799 | // restore position 800 | reader.seek(7).unwrap(); 801 | assert_eq!( 802 | reader.read_bytes_term(9, true, true, false).unwrap()[..], 803 | [8, 9] 804 | ); 805 | assert_eq!( 806 | reader.read_bytes_term(10, true, false, false).unwrap()[..], 807 | [10] 808 | ); 809 | } 810 | 811 | #[test] 812 | fn process_xor_one_test() { 813 | let b = vec![0x66]; 814 | let reader = BytesReader::from(b); 815 | let res = process_xor_one(&reader.read_bytes(1).unwrap(), 3); 816 | assert_eq!(0x65, res[0]); 817 | } 818 | 819 | #[test] 820 | fn process_xor_many_test() { 821 | let b = vec![0x66, 0x6F]; 822 | let reader = BytesReader::from(b); 823 | let key: Vec = vec![3, 3]; 824 | let res = process_xor_many(&reader.read_bytes(2).unwrap(), &key); 825 | assert_eq!(vec![0x65, 0x6C], res); 826 | } 827 | 828 | #[test] 829 | fn process_rotate_left_test() { 830 | let b = vec![0x09, 0xAC]; 831 | let reader = BytesReader::from(b); 832 | let res = process_rotate_left(&reader.read_bytes(2).unwrap(), 3); 833 | let expected: Vec = vec![0x48, 0x65]; 834 | assert_eq!(expected, res); 835 | } 836 | 837 | #[test] 838 | fn basic_seek() { 839 | let b = vec![1, 2, 3, 4, 5, 6, 7, 8]; 840 | let reader = BytesReader::from(b); 841 | 842 | assert_eq!(reader.read_bytes(4).unwrap()[..], [1, 2, 3, 4]); 843 | let pos = reader.pos(); 844 | reader.seek(1).unwrap(); 845 | assert_eq!(reader.read_bytes(4).unwrap()[..], [2, 3, 4, 5]); 846 | reader.seek(pos).unwrap(); 847 | assert_eq!(reader.read_bytes(4).unwrap()[..], [5, 6, 7, 8]); 848 | reader.seek(9).unwrap(); 849 | } 850 | 851 | fn dump_and_open(bytes: &[u8]) -> BytesReader { 852 | let tmp_dir = tempdir().unwrap(); 853 | let file_path = tmp_dir.path().join("test.txt"); 854 | { 855 | let mut tmp_file = std::fs::File::create(file_path.clone()).unwrap(); 856 | tmp_file.write_all(bytes).unwrap(); 857 | } 858 | BytesReader::open(file_path).unwrap() 859 | } 860 | 861 | #[test] 862 | fn basic_read_bytes_file() { 863 | let reader = dump_and_open(&[1, 2, 3, 4, 5, 6, 7, 8]); 864 | 865 | assert_eq!(reader.read_bytes(4).unwrap()[..], [1, 2, 3, 4]); 866 | assert_eq!(reader.read_bytes(3).unwrap()[..], [5, 6, 7]); 867 | assert_eq!( 868 | reader.read_bytes(4).unwrap_err(), 869 | KError::Eof { 870 | requested: 4, 871 | available: 1 872 | } 873 | ); 874 | assert_eq!(reader.read_bytes(1).unwrap()[..], [8]); 875 | } 876 | 877 | #[test] 878 | fn basic_seek_file() { 879 | let reader = dump_and_open(&[1, 2, 3, 4, 5, 6, 7, 8]); 880 | 881 | assert_eq!(reader.read_bytes(4).unwrap()[..], [1, 2, 3, 4]); 882 | let pos = reader.pos(); 883 | reader.seek(1).unwrap(); 884 | assert_eq!(reader.read_bytes(4).unwrap()[..], [2, 3, 4, 5]); 885 | reader.seek(pos).unwrap(); 886 | assert_eq!(reader.read_bytes(4).unwrap()[..], [5, 6, 7, 8]); 887 | reader.seek(9).unwrap(); 888 | } 889 | } 890 | --------------------------------------------------------------------------------