├── .github └── workflows │ └── rust.yml ├── .gitignore ├── .rustfmt.toml ├── Cargo.toml ├── LICENSE ├── README.md ├── gif.hxt ├── hext ├── Cargo.toml ├── src │ ├── error.rs │ └── lib.rs └── tests │ ├── everything.correct │ └── everything.hxt ├── hxt ├── Cargo.toml ├── README.md └── src │ └── main.rs └── test.gif /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - name: Install minimal stable 21 | uses: actions-rs/toolchain@v1 22 | with: 23 | profile: minimal 24 | toolchain: stable 25 | 26 | - uses: Swatinem/rust-cache@v1 27 | 28 | - name: Build 29 | run: cargo build 30 | - name: Run tests 31 | run: cargo test 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target 2 | **/*.rs.bk 3 | .vscode 4 | **/Cargo.lock 5 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | hard_tabs = true 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "hext", 4 | "hxt" 5 | ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Genevieve 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 8 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 9 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 10 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 11 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 12 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 13 | PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hext 2 | Do you find yourself exploring file formats, such as GIF, and finding yourself 3 | annoyed with hex editors? Do you find yourself wanting to craft 802.11 packets, 4 | but you know you'll never be able to keep all the fields straight in your head? 5 | Write those packets in hext! hext (`*.hxt`) is a file format for writing small 6 | (or large, if you dare) binary files with comments. You can switch between hex, 7 | binary, and decimal and even stick some string literals in there. 8 | 9 | The CLI tool is called `hxt` and it can be found [here](https://crates.io/crates/hxt). 10 | To install it, run 11 | ``` 12 | cargo install hxt 13 | ``` 14 | 15 | This is what a small GIF looks like: 16 | ``` 17 | ~little-endian msb0 18 | "GIF89a" # File header and version string 19 | 20 | # Logical Screen Descriptor 21 | u16=4 u16=4 # Canvas width/height 22 | .1 # Global Color Table Flag 23 | .000 # Color Resolution 24 | .0 # Reserved in 87a 25 | .000 # Number of colors. 2^(this_value + 1) colors 26 | 00 # Background Color Index 27 | 00 # Pixel Aspect Ratio 28 | 29 | # Global Color Table 30 | =0 =0 =0 # Black 31 | =255 =255 =255 # White 32 | 33 | # Image Descriptor 34 | "," # Image Separator 35 | u16=0 u16=0 # Image offset left/top 36 | u16=4 u16=4 # Image width/height 37 | .1 # Local Color Table Flag 38 | .0 # Interlaced Flag 39 | .000 # Reserved in 87a 40 | .000 # Size of local color table 41 | 42 | # Local Color Table 43 | =0 =0 =0 # Black 44 | =128 =0 =255 # Purple 45 | 46 | # Image Data 47 | 02 # LZW Minimum Code Size 48 | 05 841D817A50 00 # Data sub-block 49 | 50 | # Graphic Control Extension 51 | "!" F9 # Extension Introducer and Graphic Control Label 52 | =4 # Extension data block size 53 | .000 # Reserved 54 | .010 # Disposal Method (010 is restore to background color) 55 | .0 # User Input Flag 56 | .0 # Transparent color flag 57 | 58 | u16=50 # Delay (1/100ths of a second) 59 | 00 # Transparent Color Index 60 | 00 # Block terminator 61 | 62 | # 2nd Image Descriptor 63 | "," u16=0 u16=0 u16=4 u16=4 .00000000 64 | 65 | # 2nd Image's Data (same as the first) 66 | 02 05 841D817A50 00 67 | 68 | ";" # GIF Terminator 69 | ``` 70 | 71 | I really do mean *small*. Here it is: 72 | ![a very small gif](test.gif) -------------------------------------------------------------------------------- /gif.hxt: -------------------------------------------------------------------------------- 1 | ~little-endian msb0 2 | "GIF89a" # File header and version string 3 | 4 | # Logical Screen Descriptor 5 | u16=4 u16=4 # Canvas width/height 6 | .1 # Global Color Table Flag 7 | .000 # Color Resolution 8 | .0 # Reserved in 87a 9 | .000 # Number of colors. 2^(this_value + 1) colors 10 | 00 # Background Color Index 11 | 00 # Pixel Aspect Ratio 12 | 13 | # Global Color Table 14 | =0 =0 =0 # Black 15 | =255 =255 =255 # White 16 | 17 | # Image Descriptor 18 | "," # Image Separator 19 | u16=0 u16=0 # Image offset left/top 20 | u16=4 u16=4 # Image width/height 21 | .1 # Local Color Table Flag 22 | .0 # Interlaced Flag 23 | .000 # Reserved in 87a 24 | .000 # Size of local color table 25 | 26 | # Local Color Table 27 | =0 =0 =0 # Black 28 | =128 =0 =255 # Purple 29 | 30 | # Image Data 31 | 02 # LZW Minimum Code Size 32 | 05 841D817A50 00 # Data sub-block 33 | 34 | # Graphic Control Extension 35 | "!" F9 # Extension Introducer and Graphic Control Label 36 | =4 # Extension data block size 37 | .000 # Reserved 38 | .010 # Disposal Method (010 is restore to background color) 39 | .0 # User Input Flag 40 | .0 # Transparent color flag 41 | 42 | u16=50 # Delay (1/100ths of a second) 43 | 00 # Transparent Color Index 44 | 00 # Block terminator 45 | 46 | # 2nd Image Descriptor 47 | "," u16=0 u16=0 u16=4 u16=4 .00000000 48 | 49 | # 2nd Image's Data (same as the first) 50 | 02 05 841D817A50 00 51 | 52 | ";" # GIF Terminator -------------------------------------------------------------------------------- /hext/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hext" 3 | version = "0.4.2" 4 | authors = ["Genevieve "] 5 | edition = "2021" 6 | description = "A binary file markup language" 7 | repository = "https://github.com/gennyble/hext" 8 | license = "ISC" 9 | keywords = ["binary", "hexadecimal"] 10 | categories = ["encoding", "parsing"] 11 | 12 | [dependencies] 13 | bitvec = "1.0" 14 | -------------------------------------------------------------------------------- /hext/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error as ErrorTrait; 2 | use std::fmt; 3 | 4 | #[derive(Debug, PartialEq)] 5 | pub enum Error { 6 | NoHeader, 7 | InvalidHeader(InvalidHeaderKind), 8 | 9 | IncompleteOctet, 10 | 11 | InvalidDecimal(String), 12 | InvalidSignedDecimal(String), 13 | InvalidUnsignedDecimal(String), 14 | InvalidBitness(String), 15 | 16 | InvalidCharacter(char), 17 | 18 | InvalidEscape(char), 19 | UnclosedStringLiteral, 20 | 21 | GarbageCharacterInBitstream, 22 | 23 | UnalignedBits, 24 | } 25 | 26 | impl ErrorTrait for Error { 27 | fn source(&self) -> Option<&(dyn ErrorTrait + 'static)> { 28 | None 29 | } 30 | } 31 | 32 | impl fmt::Display for Error { 33 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 34 | match self { 35 | Error::NoHeader => write!(f, "The file must start with a header"), 36 | Error::InvalidHeader(kind) => write!(f, "{}", kind), 37 | Error::InvalidCharacter(c) => write!(f, "'{}' is not valid base16", c), 38 | Error::InvalidEscape(c) => write!(f, "\\{} is not a valid escape code", c), 39 | Error::UnclosedStringLiteral => { 40 | write!( 41 | f, 42 | "The line or file ended in an unterminated string literal" 43 | ) 44 | } 45 | Error::IncompleteOctet => write!(f, "Octet was not complete"), 46 | Error::GarbageCharacterInBitstream => write!( 47 | f, 48 | "Periods to indicate binary data must be directly followed by that data" 49 | ), 50 | Error::UnalignedBits => write!(f, "Not enough bits to form an octet"), 51 | Error::InvalidDecimal(string) => write!(f, "'{}' is not valid decimal", string), 52 | Error::InvalidSignedDecimal(value) => { 53 | write!(f, "'{}' is not valid signed decimal", value) 54 | } 55 | Error::InvalidUnsignedDecimal(value) => { 56 | write!(f, "'{}' is not valid unsigned decimal", value) 57 | } 58 | Error::InvalidBitness(bitness) => write!( 59 | f, 60 | "'{}' is not a valid width. Valid widths are 8, 16, 32, and 64", 61 | bitness 62 | ), 63 | } 64 | } 65 | } 66 | 67 | #[derive(Debug, PartialEq)] 68 | pub enum InvalidHeaderKind { 69 | TwoBitOrder, 70 | TwoByteOrder, 71 | NoBitOrder, 72 | NoByteOrder, 73 | 74 | TwoNegativeKind, 75 | InvalidProperty(String), 76 | } 77 | 78 | impl fmt::Display for InvalidHeaderKind { 79 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 80 | match self { 81 | InvalidHeaderKind::TwoBitOrder => write!(f, "You may only specify the bit order once"), 82 | InvalidHeaderKind::TwoByteOrder => { 83 | write!(f, "You may only specify the byte order once") 84 | } 85 | InvalidHeaderKind::NoBitOrder => write!(f, "You must specify a bit order"), 86 | InvalidHeaderKind::NoByteOrder => write!(f, "You must specify a byte order"), 87 | 88 | InvalidHeaderKind::TwoNegativeKind => write!(f, "You may only specify one negative"), 89 | InvalidHeaderKind::InvalidProperty(property) => { 90 | write!(f, "'{}' is not a valid file property", property) 91 | } 92 | } 93 | } 94 | } 95 | 96 | impl Into for InvalidHeaderKind { 97 | fn into(self) -> Error { 98 | Error::InvalidHeader(self) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /hext/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod error; 2 | 3 | pub use crate::error::Error; 4 | use bitvec::prelude::*; 5 | use error::InvalidHeaderKind; 6 | use std::iter::Peekable; 7 | use std::num::ParseIntError; 8 | use std::str::Chars; 9 | 10 | #[derive(Debug, PartialEq)] 11 | struct Header { 12 | bitorder: BitOrder, 13 | byteorder: ByteOrder, 14 | negativekind: NegativeKind, 15 | pad_bits: bool, 16 | } 17 | 18 | #[derive(Debug, PartialEq)] 19 | enum BitOrder { 20 | Msb0, 21 | Lsb0, 22 | } 23 | 24 | #[derive(Debug, PartialEq)] 25 | enum ByteOrder { 26 | LittleEndian, 27 | BigEndian, 28 | } 29 | 30 | #[derive(Debug, PartialEq)] 31 | enum NegativeKind { 32 | TwosCompliment, 33 | OnesCompliment, 34 | SignMagnitude, 35 | } 36 | 37 | pub struct Hext { 38 | parsed: Vec, 39 | } 40 | 41 | impl Hext { 42 | pub fn new() -> Self { 43 | Self { parsed: vec![] } 44 | } 45 | 46 | pub fn parse>(mut self, raw: S) -> Result, Error> { 47 | let mut chars = raw.as_ref().chars().peekable(); 48 | 49 | // Clear through any leading comments or blank lines 50 | Self::skip_nondata(&mut chars); 51 | 52 | let header: Header; 53 | loop { 54 | match chars.next() { 55 | Some('~') => { 56 | header = Self::parse_header(Self::consume_line(&mut chars))?; 57 | break; 58 | } 59 | Some(_) => return Err(Error::NoHeader), 60 | None => return Ok(self.parsed), //todo: is this an error? 61 | } 62 | } 63 | 64 | let mut bits: BitVec = BitVec::new(); 65 | let mut state = State::ReadingHex; 66 | 67 | loop { 68 | match state { 69 | State::ReadingHex => match chars.next_if(|&c| c != '.') { 70 | Some('#') => Self::skip_line(&mut chars), 71 | Some(c) if c.is_whitespace() => continue, 72 | 73 | Some(high) if high.is_ascii_hexdigit() => { 74 | match chars.next_if(|&c| c.is_ascii_hexdigit()) { 75 | Some(low) => self.parsed.push( 76 | ((high.to_digit(16).unwrap() * 16) + low.to_digit(16).unwrap()) 77 | as u8, 78 | ), 79 | None => return Err(Error::IncompleteOctet), 80 | } 81 | } 82 | 83 | Some('=') => state = State::ReadingUnsizedDecimal, 84 | Some('i') => state = State::ReadingSignedDecimal, 85 | Some('u') => state = State::ReadingUnsignedDecimal, 86 | Some('\"') => state = State::ReadingLiteral, 87 | Some(c) => return Err(Error::InvalidCharacter(c)), 88 | 89 | None => match chars.peek() { 90 | Some('.') => state = State::ReadingBinary, 91 | Some(_) => unreachable!(), 92 | None => return Ok(self.parsed), 93 | }, 94 | }, 95 | 96 | State::ReadingUnsizedDecimal => { 97 | let decimal = Self::consume_until_whitespace(&mut chars); 98 | state = State::ReadingHex; 99 | 100 | let is_signed = if let Some(sign) = decimal.chars().next() { 101 | sign == '-' || sign == '+' 102 | } else { 103 | // it was a lone =. Send the maybe-decimal string even 104 | // though we know it's empty 105 | return Err(Error::InvalidDecimal(decimal)); 106 | }; 107 | 108 | let mut bytes = if is_signed { 109 | Self::signed_smallest_le_bytes(&decimal) 110 | } else { 111 | Self::unsigned_smallest_le_bytes(&decimal) 112 | } 113 | .map_err(|_e| Error::InvalidDecimal(decimal))?; 114 | 115 | if header.byteorder == ByteOrder::BigEndian { 116 | bytes.reverse(); 117 | } 118 | 119 | self.parsed.extend_from_slice(&bytes); 120 | } 121 | 122 | State::ReadingSignedDecimal => { 123 | let signed_decimal_string = Self::consume_until_whitespace(&mut chars); 124 | state = State::ReadingHex; 125 | 126 | let splits = signed_decimal_string.split_once('='); 127 | match splits { 128 | Some((bitness, value)) => { 129 | let mut bytes = Self::signed_le_bytes(bitness, value)?; 130 | 131 | if header.byteorder == ByteOrder::BigEndian { 132 | bytes.reverse(); 133 | } 134 | 135 | self.parsed.extend_from_slice(&bytes); 136 | } 137 | None => return Err(Error::InvalidSignedDecimal(signed_decimal_string)), 138 | } 139 | } 140 | 141 | State::ReadingUnsignedDecimal => { 142 | let signed_decimal_string = Self::consume_until_whitespace(&mut chars); 143 | state = State::ReadingHex; 144 | 145 | let splits = signed_decimal_string.split_once('='); 146 | match splits { 147 | Some((bitness, value)) => { 148 | let mut bytes = Self::unsigned_le_bytes(bitness, value)?; 149 | 150 | if header.byteorder == ByteOrder::BigEndian { 151 | bytes.reverse(); 152 | } 153 | 154 | self.parsed.extend_from_slice(&bytes); 155 | } 156 | None => return Err(Error::InvalidDecimal(signed_decimal_string)), 157 | } 158 | } 159 | 160 | State::ReadingLiteral => match chars.next() { 161 | Some('\"') => state = State::ReadingHex, 162 | Some('\\') => match chars.next() { 163 | Some(c) => match Self::escape(c) { 164 | Some(c) => self.parsed.push(c as u8), 165 | None => return Err(Error::InvalidEscape(c)), 166 | }, 167 | None => return Err(Error::UnclosedStringLiteral), 168 | }, 169 | Some('\n') => return Err(Error::UnclosedStringLiteral), 170 | Some(c) => { 171 | let mut encode = vec![0; c.len_utf8()]; 172 | c.encode_utf8(&mut encode); 173 | self.parsed.extend_from_slice(&encode) 174 | } 175 | None => return Err(Error::UnclosedStringLiteral), 176 | }, 177 | 178 | State::ReadingBinary => match chars.next_if(|&c| c == '.') { 179 | Some('.') => loop { 180 | match chars 181 | .next_if(|&c| c == '1' || c == '0' || c == '#' || c.is_whitespace()) 182 | { 183 | Some('0') => bits.push(false), 184 | Some('1') => bits.push(true), 185 | Some('#') => Self::skip_line(&mut chars), 186 | Some(c) if c.is_whitespace() => { 187 | Self::skip_nondata(&mut chars); 188 | break; 189 | } 190 | Some(_) => return Err(Error::GarbageCharacterInBitstream), 191 | None => break, 192 | } 193 | }, 194 | Some(_) => unreachable!(), 195 | None => { 196 | if bits.len() % 8 != 0 { 197 | if !header.pad_bits { 198 | eprintln!("{}", bits.len()); 199 | return Err(Error::UnalignedBits); 200 | } else { 201 | while bits.len() % 8 != 0 { 202 | bits.insert(0, false); 203 | } 204 | } 205 | } 206 | 207 | self.parsed.extend_from_slice(bits.as_raw_slice()); 208 | bits = BitVec::new(); 209 | 210 | state = State::ReadingHex; 211 | } 212 | }, 213 | } 214 | } 215 | } 216 | 217 | fn parse_header>(string: S) -> Result { 218 | let splits: Vec<&str> = string.as_ref().trim_end().split(' ').collect(); 219 | 220 | let mut bitorder = None; 221 | let mut byteorder = None; 222 | let mut negativekind = None; 223 | let mut pad_bits = false; 224 | 225 | for split in splits { 226 | match split { 227 | "msb0" => { 228 | if bitorder.replace(BitOrder::Msb0).is_some() { 229 | return Err(InvalidHeaderKind::TwoBitOrder.into()); 230 | } 231 | } 232 | "lsb0" => { 233 | if bitorder.replace(BitOrder::Lsb0).is_some() { 234 | return Err(InvalidHeaderKind::TwoBitOrder.into()); 235 | } 236 | } 237 | "big-endian" => { 238 | if byteorder.replace(ByteOrder::BigEndian).is_some() { 239 | return Err(InvalidHeaderKind::TwoByteOrder.into()); 240 | } 241 | } 242 | "little-endian" => { 243 | if byteorder.replace(ByteOrder::LittleEndian).is_some() { 244 | return Err(InvalidHeaderKind::TwoByteOrder.into()); 245 | } 246 | } 247 | "twos-compliment" => { 248 | if negativekind.replace(NegativeKind::TwosCompliment).is_none() { 249 | return Err(InvalidHeaderKind::TwoNegativeKind.into()); 250 | } 251 | } 252 | "ones-compliment" => { 253 | if negativekind.replace(NegativeKind::OnesCompliment).is_none() { 254 | return Err(InvalidHeaderKind::TwoNegativeKind.into()); 255 | } 256 | } 257 | "sign-magnitude" => { 258 | if negativekind.replace(NegativeKind::SignMagnitude).is_none() { 259 | return Err(InvalidHeaderKind::TwoNegativeKind.into()); 260 | } 261 | } 262 | "padbits" => pad_bits = true, 263 | _ => return Err(InvalidHeaderKind::InvalidProperty(split.into()).into()), 264 | } 265 | } 266 | 267 | if bitorder.is_none() { 268 | return Err(InvalidHeaderKind::NoBitOrder.into()); 269 | } else if byteorder.is_none() { 270 | return Err(InvalidHeaderKind::NoByteOrder.into()); 271 | } else { 272 | Ok(Header { 273 | bitorder: bitorder.unwrap(), 274 | byteorder: byteorder.unwrap(), 275 | negativekind: negativekind.unwrap_or(NegativeKind::TwosCompliment), 276 | pad_bits, 277 | }) 278 | } 279 | } 280 | 281 | fn escape(c: char) -> Option { 282 | match c { 283 | '\"' => Some('\"'), 284 | '\\' => Some('\\'), 285 | 'n' => Some('\n'), 286 | 'r' => Some('\r'), 287 | 't' => Some('\t'), 288 | _ => None, 289 | } 290 | } 291 | 292 | fn skip_nondata(mut chars: &mut Peekable) { 293 | loop { 294 | match chars.peek() { 295 | Some('#') => Self::skip_line(&mut chars), 296 | Some(c) if c.is_whitespace() => { 297 | chars.next(); 298 | } 299 | _ => return, 300 | }; 301 | } 302 | } 303 | 304 | fn skip_line(chars: &mut Peekable) { 305 | chars.find(|&c| c == '\n'); 306 | } 307 | 308 | fn consume_line(chars: &mut Peekable) -> String { 309 | chars.take_while(|&c| c != '\n').collect() 310 | } 311 | 312 | fn consume_until_whitespace(chars: &mut Peekable) -> String { 313 | chars.take_while(|&c| !c.is_whitespace()).collect() 314 | } 315 | 316 | fn signed_le_bytes>(bitness: S, value: S) -> Result, Error> { 317 | match bitness.as_ref() { 318 | "8" => Ok(i8::from_str_radix(value.as_ref(), 10) 319 | .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))? 320 | .to_le_bytes() 321 | .to_vec()), 322 | "16" => Ok(i16::from_str_radix(value.as_ref(), 10) 323 | .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))? 324 | .to_le_bytes() 325 | .to_vec()), 326 | "32" => Ok(i32::from_str_radix(value.as_ref(), 10) 327 | .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))? 328 | .to_le_bytes() 329 | .to_vec()), 330 | "64" => Ok(i64::from_str_radix(value.as_ref(), 10) 331 | .map_err(|_| Error::InvalidSignedDecimal(value.as_ref().to_string()))? 332 | .to_le_bytes() 333 | .to_vec()), 334 | _ => return Err(Error::InvalidBitness(bitness.as_ref().to_string())), 335 | } 336 | } 337 | 338 | fn signed_smallest_le_bytes>(string: S) -> Result, ParseIntError> { 339 | let large: i64 = i64::from_str_radix(string.as_ref(), 10)?; 340 | 341 | Ok(if large > i32::MAX as i64 || large < i32::MIN as i64 { 342 | large.to_le_bytes().to_vec() 343 | } else if large > i16::MAX as i64 || large < i16::MIN as i64 { 344 | (large as i32).to_le_bytes().to_vec() 345 | } else if large > i8::MAX as i64 || large < i8::MIN as i64 { 346 | (large as i16).to_le_bytes().to_vec() 347 | } else { 348 | (large as i8).to_le_bytes().to_vec() 349 | }) 350 | } 351 | 352 | fn unsigned_le_bytes>(bitness: S, value: S) -> Result, Error> { 353 | match bitness.as_ref() { 354 | "8" => Ok(u8::from_str_radix(value.as_ref(), 10) 355 | .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))? 356 | .to_le_bytes() 357 | .to_vec()), 358 | "16" => Ok(u16::from_str_radix(value.as_ref(), 10) 359 | .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))? 360 | .to_le_bytes() 361 | .to_vec()), 362 | "32" => Ok(u32::from_str_radix(value.as_ref(), 10) 363 | .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))? 364 | .to_le_bytes() 365 | .to_vec()), 366 | "64" => Ok(u64::from_str_radix(value.as_ref(), 10) 367 | .map_err(|_| Error::InvalidUnsignedDecimal(value.as_ref().to_string()))? 368 | .to_le_bytes() 369 | .to_vec()), 370 | _ => return Err(Error::InvalidBitness(bitness.as_ref().to_string())), 371 | } 372 | } 373 | 374 | fn unsigned_smallest_le_bytes>(string: S) -> Result, ParseIntError> { 375 | let large: u64 = u64::from_str_radix(string.as_ref(), 10)?; 376 | 377 | Ok(if large > u32::MAX as u64 { 378 | large.to_le_bytes().to_vec() 379 | } else if large > u16::MAX as u64 { 380 | (large as u32).to_le_bytes().to_vec() 381 | } else if large > u8::MAX as u64 { 382 | (large as u16).to_le_bytes().to_vec() 383 | } else { 384 | (large as u8).to_le_bytes().to_vec() 385 | }) 386 | } 387 | } 388 | 389 | enum State { 390 | ReadingHex, 391 | ReadingUnsizedDecimal, 392 | ReadingSignedDecimal, 393 | ReadingUnsignedDecimal, 394 | ReadingBinary, 395 | ReadingLiteral, 396 | } 397 | 398 | #[cfg(test)] 399 | mod test { 400 | use super::*; 401 | 402 | #[test] 403 | fn pares_header_success() { 404 | // Recognizes the keytwords... 405 | assert_eq!( 406 | Hext::parse_header("msb0 big-endian").unwrap(), 407 | Header { 408 | byteorder: ByteOrder::BigEndian, 409 | bitorder: crate::BitOrder::Msb0, 410 | negativekind: NegativeKind::TwosCompliment, 411 | pad_bits: false 412 | } 413 | ); 414 | 415 | assert_eq!( 416 | Hext::parse_header("lsb0 little-endian").unwrap(), 417 | Header { 418 | byteorder: ByteOrder::LittleEndian, 419 | bitorder: crate::BitOrder::Lsb0, 420 | negativekind: NegativeKind::TwosCompliment, 421 | pad_bits: false 422 | } 423 | ); 424 | 425 | // ...In either order 426 | assert_eq!( 427 | Hext::parse_header("big-endian lsb0").unwrap(), 428 | Header { 429 | byteorder: ByteOrder::BigEndian, 430 | bitorder: crate::BitOrder::Lsb0, 431 | negativekind: NegativeKind::TwosCompliment, 432 | pad_bits: false 433 | } 434 | ); 435 | } 436 | 437 | #[test] 438 | fn parse_header_fail_twobits() { 439 | assert_eq!( 440 | Hext::parse_header("lsb0 msb0"), 441 | Err(InvalidHeaderKind::TwoBitOrder.into()) 442 | ) 443 | } 444 | 445 | #[test] 446 | fn parse_header_fail_twobytes() { 447 | assert_eq!( 448 | Hext::parse_header("little-endian big-endian"), 449 | Err(InvalidHeaderKind::TwoByteOrder.into()) 450 | ) 451 | } 452 | 453 | #[test] 454 | fn parse_header_fail_nobits() { 455 | assert_eq!( 456 | Hext::parse_header("big-endian"), 457 | Err(InvalidHeaderKind::NoBitOrder.into()) 458 | ) 459 | } 460 | 461 | #[test] 462 | fn parse_header_fail_nobytes() { 463 | assert_eq!( 464 | Hext::parse_header("msb0"), 465 | Err(InvalidHeaderKind::NoByteOrder.into()) 466 | ) 467 | } 468 | 469 | #[test] 470 | fn parse_header_fail_invalidproperty() { 471 | assert_eq!( 472 | Hext::parse_header("lsb0 big-endian invalidproperty"), 473 | Err(InvalidHeaderKind::InvalidProperty("invalidproperty".into()).into()) 474 | ) 475 | } 476 | 477 | //## Bytes tests ## 478 | #[test] 479 | fn test_onebyte() { 480 | let test = "~little-endian msb0\n41"; 481 | let cmp = vec![0x41]; 482 | 483 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 484 | } 485 | 486 | #[test] 487 | fn test_only_comment() { 488 | let test = "~little-endian msb0\n# Comment"; 489 | 490 | assert_eq!(Hext::new().parse(&test).unwrap(), vec![]); 491 | } 492 | 493 | #[test] 494 | fn test_1byte_comment() { 495 | let test = "~little-endian msb0\n41 #A"; 496 | let cmp = vec![0x41]; 497 | 498 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 499 | } 500 | 501 | #[test] 502 | fn test_byte_nospace_comment() { 503 | let test = "~little-endian msb0\n41#A"; 504 | let cmp = vec![0x41]; 505 | 506 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 507 | } 508 | 509 | #[test] 510 | fn test_2byte_multiline() { 511 | let test = "~little-endian msb0\n41\n42"; 512 | let cmp = vec![0x41, 0x42]; 513 | 514 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 515 | } 516 | 517 | #[test] 518 | fn test_2bytes_nospace() { 519 | let test = "~little-endian msb0\n4142"; 520 | let cmp = vec![0x41, 0x42]; 521 | 522 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 523 | } 524 | 525 | //## Bit Tests ## 526 | #[test] 527 | fn test_8bits() { 528 | let test = "~little-endian msb0\n.01000001"; 529 | let cmp = vec![0x41]; 530 | 531 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 532 | } 533 | 534 | #[test] 535 | fn test_8bits_hex10() { 536 | let test = "~little-endian msb0\n.01000001 10"; 537 | let cmp = vec![0x41, 0x10]; 538 | 539 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 540 | } 541 | 542 | #[test] 543 | fn test_8bit_comment() { 544 | let test = "~little-endian msb0\n.01000001 # A"; 545 | let cmp = vec![0x41]; 546 | 547 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp) 548 | } 549 | 550 | #[test] 551 | fn test_8bit_nospace_comment() { 552 | let test = "~little-endian msb0\n.01000001#A"; 553 | let cmp = vec![0x41]; 554 | 555 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 556 | } 557 | 558 | #[test] 559 | fn test_1bit() { 560 | let test = "~little-endian msb0 padbits\n.1"; 561 | let cmp = vec![0x01]; 562 | 563 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 564 | } 565 | 566 | #[test] 567 | fn test_8bits_halved_space() { 568 | let test_space = "~little-endian msb0\n.0100 .0010"; 569 | let cmp = vec![0x42]; 570 | 571 | assert_eq!(Hext::new().parse(&test_space).unwrap(), cmp); 572 | } 573 | 574 | #[test] 575 | fn test_8bits_halved_line() { 576 | let test_line = "~little-endian msb0\n.0100\n.0010"; 577 | let cmp = vec![0x42]; 578 | 579 | assert_eq!(Hext::new().parse(&test_line).unwrap(), cmp); 580 | } 581 | 582 | #[test] 583 | fn test_8bits_halved_line_comments() { 584 | let test_line_comments = "~little-endian msb0\n.0100#Half of capital letter\n.0010 # B"; 585 | let cmp = vec![0x42]; 586 | 587 | assert_eq!(Hext::new().parse(&test_line_comments).unwrap(), cmp); 588 | } 589 | 590 | #[test] 591 | fn test_1bit_then_byte() { 592 | let test = "~little-endian msb0 padbits\n.1 41"; 593 | let cmp = vec![0x01, 0x41]; 594 | 595 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 596 | } 597 | 598 | //## Literal Tests ## 599 | #[test] 600 | fn literal_multibyte() { 601 | let test = "~big-endian lsb0\n\"🥺\""; 602 | let cmp = vec![0xf0, 0x9f, 0xa5, 0xba]; 603 | 604 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 605 | } 606 | 607 | //## Decimal Tests ## 608 | #[test] 609 | fn decimal_unsized_u8() { 610 | let test = "~big-endian lsb0\n=200"; 611 | let cmp = vec![200]; 612 | 613 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 614 | } 615 | 616 | #[test] 617 | fn decimal_unsized_i8() { 618 | let test = "~big-endian lsb0\n=-127"; 619 | let cmp = (-127i8).to_be_bytes().to_vec(); 620 | 621 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 622 | } 623 | 624 | #[test] 625 | fn decimal_unsized_u32() { 626 | let test = "~little-endian lsb0\n=65536"; 627 | let cmp = 65536u32.to_le_bytes().to_vec(); 628 | 629 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 630 | } 631 | 632 | #[test] 633 | fn decimal_unsized_i32() { 634 | let test = "~little-endian lsb0\n=-40000"; 635 | let cmp = (-40000i32).to_le_bytes().to_vec(); 636 | 637 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 638 | } 639 | 640 | #[test] 641 | fn decimal_sized_u16() { 642 | let test = "~little-endian lsb0\nu16=65534"; 643 | let cmp = 65534u16.to_le_bytes().to_vec(); 644 | 645 | assert_eq!(Hext::new().parse(&test).unwrap(), cmp); 646 | } 647 | 648 | #[test] 649 | fn decimal_overflow_sized_u16() { 650 | let test = "~little-endian lsb0\nu16=65536"; 651 | 652 | assert_eq!( 653 | Hext::new().parse(&test).unwrap_err(), 654 | Error::InvalidUnsignedDecimal("65536".into()) 655 | ); 656 | } 657 | 658 | //## Everything ## 659 | #[test] 660 | fn everything() { 661 | let to_parse = std::fs::read_to_string("tests/everything.hxt").unwrap(); 662 | let cmp = std::fs::read_to_string("tests/everything.correct") 663 | .unwrap() 664 | .into_bytes(); 665 | 666 | assert_eq!(Hext::new().parse(&to_parse).unwrap(), cmp) 667 | } 668 | 669 | //## Failing Tests ## 670 | #[test] 671 | fn ftest_incompleteoctet() { 672 | let test = "~little-endian msb0\n4"; 673 | 674 | assert_eq!( 675 | Hext::new().parse(&test).unwrap_err(), 676 | Error::IncompleteOctet 677 | ); 678 | } 679 | 680 | #[test] 681 | fn ftest_invalidcharacter() { 682 | let test = "~little-endian msb0\nG"; 683 | 684 | assert_eq!( 685 | Hext::new().parse(&test).unwrap_err(), 686 | Error::InvalidCharacter('G') 687 | ); 688 | } 689 | 690 | #[test] 691 | fn ftest_unaligned_bit() { 692 | let test = "~little-endian msb0\n.1"; 693 | let cmp = Error::UnalignedBits; 694 | 695 | assert_eq!(Hext::new().parse(&test).unwrap_err(), cmp); 696 | } 697 | 698 | #[test] 699 | fn ftest_unaligned_bit_then_byte() { 700 | let test = "~little-endian msb0\n.1 41"; 701 | let cmp = Error::UnalignedBits; 702 | 703 | assert_eq!(Hext::new().parse(&test).unwrap_err(), cmp); 704 | } 705 | } 706 | -------------------------------------------------------------------------------- /hext/tests/everything.correct: -------------------------------------------------------------------------------- 1 | QWERTY 2 | AB 3 | ASDFQWERTY 4 | ASDF -------------------------------------------------------------------------------- /hext/tests/everything.hxt: -------------------------------------------------------------------------------- 1 | ~big-endian lsb0 2 | 3 | 515745525459 # Packed 'QWERTY' as hex 4 | 0D 0A # Separate CR/LF 5 | .01000001 # Binary A 6 | .0100 .0010 # Non-octet Binary B 7 | 0A # Lone byte LF 8 | 9 | # Literals 10 | "ASDF" # Quoted string 11 | "QWERTY\nASDF" # Quoted string with escapes 12 | -------------------------------------------------------------------------------- /hxt/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hxt" 3 | version = "0.1.2" 4 | authors = ["Genevieve "] 5 | edition = "2021" 6 | description = "A binary file markup language" 7 | repository = "https://github.com/gennyble/hext" 8 | license = "ISC" 9 | keywords = ["binary", "hexadecimal", "cli"] 10 | categories = ["command-line-utilities"] 11 | 12 | [dependencies] 13 | hext = { path = "../hext", version = "0.4.2" } 14 | getopts = "0.2" 15 | -------------------------------------------------------------------------------- /hxt/README.md: -------------------------------------------------------------------------------- 1 | # hxt 2 | `hxt` is the command-line tool that utilizes the `hext` crate. It acts like 3 | `cat` meaning if you pass no file name, it reads input from stdin. If you pass 4 | multiple file names, it will process them in order. 5 | 6 | For a more complete readme, you should read the the one for 7 | [`hext`](https://crates.io/crates/hext), the library 8 | that does all the work. 9 | 10 | You may specify an output file with the `-o` option. If no output file is 11 | specified, hxt will output to stdout. 12 | 13 | ``` 14 | Usage: hxt [options] FILES 15 | 16 | Options: 17 | -o, --output FILE output to a file 18 | -h, --help print this message and exit 19 | ``` 20 | -------------------------------------------------------------------------------- /hxt/src/main.rs: -------------------------------------------------------------------------------- 1 | use getopts::Options; 2 | use hext; 3 | use hext::Hext; 4 | use std::env; 5 | use std::fs; 6 | use std::fs::File; 7 | use std::io::{self, Read, Write}; 8 | 9 | fn print_usage(program: &str, opts: Options) { 10 | let brief = format!("Usage: {} [options] FILES", program); 11 | println!("{}", opts.usage(&brief)); 12 | } 13 | 14 | fn main() { 15 | let args: Vec = env::args().collect(); 16 | 17 | let mut opts = Options::new(); 18 | opts.optopt("o", "output", "output to a file", "FILE"); 19 | opts.optflag("h", "help", "print this message and exit"); 20 | 21 | // Get matches for all arguments passed, excluing the program name which is args[0] 22 | let matches = match opts.parse(&args[1..]) { 23 | Ok(m) => m, 24 | Err(f) => panic!("{}", f.to_string()), 25 | }; 26 | 27 | if matches.opt_present("h") { 28 | print_usage(&args[0], opts); 29 | return; 30 | } 31 | 32 | let mut outfile = if let Some(filename) = matches.opt_str("o") { 33 | match File::create(filename) { 34 | Ok(f) => Some(f), 35 | Err(e) => { 36 | eprintln!("hext: {}", e); 37 | std::process::exit(1); 38 | } 39 | } 40 | } else { 41 | None 42 | }; 43 | 44 | let files = matches.free.as_slice(); 45 | if files.len() == 0 { 46 | let mut raw = String::new(); 47 | io::stdin().read_to_string(&mut raw).unwrap(); 48 | 49 | do_hext(&raw, &mut outfile); 50 | } else { 51 | for file in files { 52 | let raw = match fs::read_to_string(file) { 53 | Ok(raw) => raw, 54 | Err(e) => { 55 | eprintln!("hext: {}", e); 56 | continue; 57 | } 58 | }; 59 | 60 | do_hext(&raw, &mut outfile); 61 | } 62 | } 63 | } 64 | 65 | fn do_hext(raw: &str, outfile: &mut Option) { 66 | match Hext::new().parse(raw) { 67 | Ok(bytes) => match outfile.as_mut() { 68 | Some(f) => f.write_all(&bytes).unwrap(), 69 | None => io::stdout().write_all(&bytes).unwrap(), 70 | }, 71 | Err(e) => eprintln!("hext: {}", e), 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /test.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gennyble/hext/f607221fd25873f42c638c9419aabd75072b9c40/test.gif --------------------------------------------------------------------------------