├── LICENSE ├── README.md ├── archive.go ├── archive15.go ├── archive50.go ├── bit_reader.go ├── decode20.go ├── decode20_audio.go ├── decode20_lz.go ├── decode29.go ├── decode29_lz.go ├── decode29_ppm.go ├── decode50.go ├── decode_reader.go ├── decrypt_reader.go ├── filters.go ├── go.mod ├── huffman.go ├── ppm_model.go ├── reader.go ├── vm.go └── volume.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Nicholas Waples 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rardecode 2 | [![GoDoc](https://godoc.org/github.com/nwaples/rardecode?status.svg)](https://godoc.org/github.com/nwaples/rardecode) 3 | [![Go Report Card](https://goreportcard.com/badge/github.com/nwaples/rardecode/v2)](https://goreportcard.com/report/github.com/nwaples/rardecode/v2) 4 | 5 | A go package for reading RAR archives. 6 | -------------------------------------------------------------------------------- /archive.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "errors" 5 | "hash" 6 | ) 7 | 8 | const ( 9 | _ = iota 10 | decode20Ver 11 | decode29Ver 12 | decode50Ver 13 | decode70Ver 14 | 15 | archiveVersion15 = 0 16 | archiveVersion50 = 1 17 | ) 18 | 19 | var ( 20 | ErrCorruptBlockHeader = errors.New("rardecode: corrupt block header") 21 | ErrCorruptFileHeader = errors.New("rardecode: corrupt file header") 22 | ErrBadHeaderCRC = errors.New("rardecode: bad header crc") 23 | ErrUnknownDecoder = errors.New("rardecode: unknown decoder version") 24 | ErrDecoderOutOfData = errors.New("rardecode: decoder expected more data than is in packed file") 25 | ErrArchiveEncrypted = errors.New("rardecode: archive encrypted, password required") 26 | ErrArchivedFileEncrypted = errors.New("rardecode: archived files encrypted, password required") 27 | ) 28 | 29 | type readBuf []byte 30 | 31 | func (b *readBuf) byte() byte { 32 | v := (*b)[0] 33 | *b = (*b)[1:] 34 | return v 35 | } 36 | 37 | func (b *readBuf) uint16() uint16 { 38 | v := uint16((*b)[0]) | uint16((*b)[1])<<8 39 | *b = (*b)[2:] 40 | return v 41 | } 42 | 43 | func (b *readBuf) uint32() uint32 { 44 | v := uint32((*b)[0]) | uint32((*b)[1])<<8 | uint32((*b)[2])<<16 | uint32((*b)[3])<<24 45 | *b = (*b)[4:] 46 | return v 47 | } 48 | 49 | func (b *readBuf) uint64() uint64 { 50 | v := uint64((*b)[0]) | uint64((*b)[1])<<8 | uint64((*b)[2])<<16 | uint64((*b)[3])<<24 | 51 | uint64((*b)[4])<<32 | uint64((*b)[5])<<40 | uint64((*b)[6])<<48 | uint64((*b)[7])<<56 52 | *b = (*b)[8:] 53 | return v 54 | } 55 | 56 | func (b *readBuf) bytes(n int) []byte { 57 | v := (*b)[:n] 58 | *b = (*b)[n:] 59 | return v 60 | } 61 | 62 | func (b *readBuf) uvarint() uint64 { 63 | var x uint64 64 | var s uint 65 | for i, n := range *b { 66 | if n < 0x80 { 67 | *b = (*b)[i+1:] 68 | return x | uint64(n)<>8)) 89 | } 90 | p = append(p, salt...) 91 | 92 | hash := sha1.New() 93 | iv = make([]byte, 16) 94 | s := make([]byte, hash.Size()) 95 | b := s[:3] 96 | for i := 0; i < hashRounds; i++ { 97 | // ignore hash Write errors, should always succeed 98 | _, _ = hash.Write(p) 99 | b[0], b[1], b[2] = byte(i), byte(i>>8), byte(i>>16) 100 | _, _ = hash.Write(b) 101 | if i%(hashRounds/16) == 0 { 102 | s = hash.Sum(s[:0]) 103 | iv[i/(hashRounds/16)] = s[4*4+3] 104 | } 105 | } 106 | key = hash.Sum(s[:0]) 107 | key = key[:16] 108 | 109 | for k := key; len(k) >= 4; k = k[4:] { 110 | k[0], k[1], k[2], k[3] = k[3], k[2], k[1], k[0] 111 | } 112 | return key, iv 113 | } 114 | 115 | // parseDosTime converts a 32bit DOS time value to time.Time 116 | func parseDosTime(t uint32) time.Time { 117 | n := int(t) 118 | sec := n & 0x1f << 1 119 | min := n >> 5 & 0x3f 120 | hr := n >> 11 & 0x1f 121 | day := n >> 16 & 0x1f 122 | mon := time.Month(n >> 21 & 0x0f) 123 | yr := n>>25&0x7f + 1980 124 | return time.Date(yr, mon, day, hr, min, sec, 0, time.Local) 125 | } 126 | 127 | // decodeName decodes a non-unicode filename from a file header. 128 | func decodeName(buf []byte) string { 129 | i := bytes.IndexByte(buf, 0) 130 | if i < 0 { 131 | return string(buf) // filename is UTF-8 132 | } 133 | 134 | name := buf[:i] 135 | encName := readBuf(buf[i+1:]) 136 | if len(encName) < 2 { 137 | return "" // invalid encoding 138 | } 139 | highByte := uint16(encName.byte()) << 8 140 | flags := encName.byte() 141 | flagBits := 8 142 | var wchars []uint16 // decoded characters are UTF-16 143 | for len(wchars) < len(name) && len(encName) > 0 { 144 | if flagBits == 0 { 145 | flags = encName.byte() 146 | flagBits = 8 147 | if len(encName) == 0 { 148 | break 149 | } 150 | } 151 | switch flags >> 6 { 152 | case 0: 153 | wchars = append(wchars, uint16(encName.byte())) 154 | case 1: 155 | wchars = append(wchars, uint16(encName.byte())|highByte) 156 | case 2: 157 | if len(encName) < 2 { 158 | break 159 | } 160 | wchars = append(wchars, encName.uint16()) 161 | case 3: 162 | n := encName.byte() 163 | b := name[len(wchars):] 164 | if l := int(n&0x7f) + 2; l < len(b) { 165 | b = b[:l] 166 | } 167 | if n&0x80 > 0 { 168 | if len(encName) < 1 { 169 | break 170 | } 171 | ec := encName.byte() 172 | for _, c := range b { 173 | wchars = append(wchars, uint16(c+ec)|highByte) 174 | } 175 | } else { 176 | for _, c := range b { 177 | wchars = append(wchars, uint16(c)) 178 | } 179 | } 180 | } 181 | flags <<= 2 182 | flagBits -= 2 183 | } 184 | return string(utf16.Decode(wchars)) 185 | } 186 | 187 | // readExtTimes reads and parses the optional extra time field from the file header. 188 | func readExtTimes(f *fileBlockHeader, b *readBuf) { 189 | if len(*b) < 2 { 190 | return // invalid, not enough data 191 | } 192 | flags := b.uint16() 193 | 194 | ts := []*time.Time{&f.ModificationTime, &f.CreationTime, &f.AccessTime} 195 | 196 | for i, t := range ts { 197 | n := flags >> uint((3-i)*4) 198 | if n&0x8 == 0 { 199 | continue 200 | } 201 | if i != 0 { // ModificationTime already read so skip 202 | if len(*b) < 4 { 203 | return // invalid, not enough data 204 | } 205 | *t = parseDosTime(b.uint32()) 206 | } 207 | if n&0x4 > 0 { 208 | *t = t.Add(time.Second) 209 | } 210 | n &= 0x3 211 | if n == 0 { 212 | continue 213 | } 214 | if len(*b) < int(n) { 215 | return // invalid, not enough data 216 | } 217 | // add extra time data in 100's of nanoseconds 218 | d := time.Duration(0) 219 | for j := 3 - n; j < n; j++ { 220 | d |= time.Duration(b.byte()) << (j * 8) 221 | } 222 | d *= 100 223 | *t = t.Add(d) 224 | } 225 | } 226 | 227 | func (a *archive15) getKeys(salt []byte) (key, iv []byte) { 228 | // check cache of keys 229 | for _, v := range a.keyCache { 230 | if bytes.Equal(v.salt[:], salt) { 231 | return v.key, v.iv 232 | } 233 | } 234 | key, iv = calcAes30Params(a.pass, salt) 235 | 236 | // save a copy in the cache 237 | copy(a.keyCache[1:], a.keyCache[:]) 238 | a.keyCache[0].salt = append([]byte(nil), salt...) // copy so byte slice can be reused 239 | a.keyCache[0].key = key 240 | a.keyCache[0].iv = iv 241 | 242 | return key, iv 243 | } 244 | 245 | func (a *archive15) parseFileHeader(h *blockHeader15) (*fileBlockHeader, error) { 246 | f := new(fileBlockHeader) 247 | 248 | f.first = h.flags&fileSplitBefore == 0 249 | f.last = h.flags&fileSplitAfter == 0 250 | 251 | f.Solid = h.flags&fileSolid > 0 252 | f.arcSolid = a.solid 253 | f.Encrypted = h.flags&fileEncrypted > 0 254 | f.HeaderEncrypted = a.encrypted 255 | f.IsDir = h.flags&fileWindowMask == fileWindowMask 256 | if !f.IsDir { 257 | f.winSize = 0x10000 << ((h.flags & fileWindowMask) >> 5) 258 | } 259 | 260 | b := h.data 261 | if len(b) < 21 { 262 | return nil, ErrCorruptFileHeader 263 | } 264 | 265 | f.PackedSize = h.dataSize 266 | f.UnPackedSize = int64(b.uint32()) 267 | f.HostOS = b.byte() + 1 268 | if f.HostOS > HostOSBeOS { 269 | f.HostOS = HostOSUnknown 270 | } 271 | f.sum = append([]byte(nil), b.bytes(4)...) 272 | 273 | f.ModificationTime = parseDosTime(b.uint32()) 274 | unpackver := b.byte() // decoder version 275 | method := b.byte() - 0x30 // decryption method 276 | namesize := int(b.uint16()) 277 | f.Attributes = int64(b.uint32()) 278 | if h.flags&fileLargeData > 0 { 279 | if len(b) < 8 { 280 | return nil, ErrCorruptFileHeader 281 | } 282 | _ = b.uint32() // already read large PackedSize in readBlockHeader 283 | f.UnPackedSize |= int64(b.uint32()) << 32 284 | f.UnKnownSize = f.UnPackedSize == -1 285 | } else if int32(f.UnPackedSize) == -1 { 286 | f.UnKnownSize = true 287 | f.UnPackedSize = -1 288 | } 289 | if len(b) < namesize { 290 | return nil, ErrCorruptFileHeader 291 | } 292 | name := b.bytes(namesize) 293 | if h.flags&fileUnicode == 0 { 294 | f.Name = string(name) 295 | } else { 296 | f.Name = decodeName(name) 297 | } 298 | // Rar 4.x uses '\' as file separator 299 | f.Name = strings.Replace(f.Name, "\\", "/", -1) 300 | 301 | if h.flags&fileVersion > 0 { 302 | // file version is stored as ';n' appended to file name 303 | i := strings.LastIndex(f.Name, ";") 304 | if i > 0 { 305 | j, err := strconv.Atoi(f.Name[i+1:]) 306 | if err == nil && j >= 0 { 307 | f.Version = j 308 | f.Name = f.Name[:i] 309 | } 310 | } 311 | } 312 | 313 | var salt []byte 314 | if h.flags&fileSalt > 0 { 315 | if len(b) < saltSize { 316 | return nil, ErrCorruptFileHeader 317 | } 318 | salt = append([]byte(nil), b.bytes(saltSize)...) 319 | } 320 | if h.flags&fileExtTime > 0 { 321 | readExtTimes(f, &b) 322 | } 323 | 324 | if !f.first { 325 | return f, nil 326 | } 327 | // fields only needed for first block in a file 328 | if h.flags&fileEncrypted > 0 && len(salt) == saltSize { 329 | f.genKeys = func() error { 330 | if a.pass == nil { 331 | return ErrArchivedFileEncrypted 332 | } 333 | f.key, f.iv = a.getKeys(salt) 334 | return nil 335 | } 336 | } 337 | f.hash = newLittleEndianCRC32 338 | if method != 0 { 339 | switch unpackver { 340 | case 15: 341 | return nil, ErrUnsupportedDecoder 342 | case 20, 26: 343 | f.decVer = decode20Ver 344 | case 29: 345 | f.decVer = decode29Ver 346 | default: 347 | return nil, ErrUnknownDecoder 348 | } 349 | } 350 | return f, nil 351 | } 352 | 353 | func (a *archive15) parseArcBlock(v *volume, h *blockHeader15) error { 354 | a.encrypted = h.flags&arcEncrypted > 0 355 | a.multi = h.flags&arcVolume > 0 356 | if v.num == 0 { 357 | v.old = h.flags&arcNewNaming == 0 358 | } 359 | a.solid = h.flags&arcSolid > 0 360 | if a.encrypted && a.pass == nil { 361 | return ErrArchiveEncrypted 362 | } 363 | return nil 364 | } 365 | 366 | // readBlockHeader returns the next block header in the archive. 367 | // It will return io.EOF if there were no bytes read. 368 | func (a *archive15) readBlockHeader(r sliceReader) (*blockHeader15, error) { 369 | if a.encrypted { 370 | salt, err := r.readSlice(saltSize) 371 | if err != nil { 372 | return nil, err 373 | } 374 | key, iv := a.getKeys(salt) 375 | r = newAesSliceReader(r, key, iv) 376 | } 377 | var b readBuf 378 | var err error 379 | // peek to find the header size 380 | b, err = r.peek(7) 381 | if err != nil { 382 | if err == io.EOF && a.encrypted { 383 | err = io.ErrUnexpectedEOF 384 | } 385 | return nil, err 386 | } 387 | crc := b.uint16() 388 | h := new(blockHeader15) 389 | h.htype = b.byte() 390 | h.flags = b.uint16() 391 | size := int(b.uint16()) 392 | if h.htype == blockArc && h.flags&arcComment > 0 { 393 | // comment block embedded into archive block 394 | if size < 13 { 395 | return nil, ErrCorruptBlockHeader 396 | } 397 | size = 13 398 | } else if size < 7 { 399 | return nil, ErrCorruptBlockHeader 400 | } 401 | h.data, err = r.readSlice(size) 402 | if err != nil { 403 | if err == io.EOF { 404 | err = io.ErrUnexpectedEOF 405 | } 406 | return nil, err 407 | } 408 | hash := crc32.NewIEEE() 409 | if h.htype == blockComment { 410 | if size < 13 { 411 | return nil, ErrCorruptBlockHeader 412 | } 413 | _, _ = hash.Write(h.data[2:13]) 414 | } else { 415 | _, _ = hash.Write(h.data[2:]) 416 | } 417 | if crc != uint16(hash.Sum32()) { 418 | return nil, ErrBadHeaderCRC 419 | } 420 | h.data = h.data[7:] 421 | if h.flags&blockHasData > 0 { 422 | if len(h.data) < 4 { 423 | return nil, ErrCorruptBlockHeader 424 | } 425 | h.dataSize = int64(h.data.uint32()) 426 | } 427 | if (h.htype == blockService || h.htype == blockFile) && h.flags&fileLargeData > 0 { 428 | if len(h.data) < 25 { 429 | return nil, ErrCorruptBlockHeader 430 | } 431 | b := h.data[21:25] 432 | h.dataSize |= int64(b.uint32()) << 32 433 | } 434 | return h, nil 435 | } 436 | 437 | func (a *archive15) readArcHeader(v *volume) error { 438 | h, err := a.readBlockHeader(v) 439 | if err != nil { 440 | if err == io.EOF { 441 | err = io.ErrUnexpectedEOF 442 | } 443 | return err 444 | } 445 | if h.htype != blockArc { 446 | return ErrNoArchiveBlock 447 | } 448 | return a.parseArcBlock(v, h) 449 | } 450 | 451 | // next advances to the next file block in the archive 452 | func (a *archive15) next(v *volume) (*fileBlockHeader, error) { 453 | for { 454 | // could return an io.EOF here as 1.5 archives may not have an end block. 455 | h, err := a.readBlockHeader(v) 456 | if err != nil { 457 | if err != io.EOF { 458 | return nil, err 459 | } 460 | // if reached end of file without an end block try to open next volume 461 | a.encrypted = false // reset encryption when opening new volume file 462 | err = v.next() 463 | if err != nil { 464 | // new volume doesnt exist, assume end of archive 465 | if os.IsNotExist(err) { 466 | return nil, io.EOF 467 | } 468 | return nil, err 469 | } 470 | err = a.readArcHeader(v) 471 | if err != nil { 472 | return nil, err 473 | } 474 | continue 475 | } 476 | switch h.htype { 477 | case blockFile: 478 | return a.parseFileHeader(h) 479 | case blockEnd: 480 | if h.flags&endArcNotLast == 0 || !a.multi { 481 | return nil, io.EOF 482 | } 483 | a.encrypted = false // reset encryption when opening new volume file 484 | err = v.next() 485 | if err != nil { 486 | return nil, err 487 | } 488 | err = a.readArcHeader(v) 489 | if err != nil { 490 | return nil, err 491 | } 492 | default: 493 | if h.dataSize > 0 { 494 | err = v.discard(h.dataSize) // skip over block data 495 | if err != nil { 496 | return nil, err 497 | } 498 | } 499 | } 500 | } 501 | } 502 | 503 | // newArchive15 creates a new fileBlockReader for a Version 1.5 archive 504 | func newArchive15(v *volume, password *string) (*archive15, error) { 505 | a := new(archive15) 506 | if password != nil { 507 | a.pass = utf16.Encode([]rune(*password)) // convert to UTF-16 508 | } 509 | return a, a.readArcHeader(v) 510 | } 511 | -------------------------------------------------------------------------------- /archive50.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "bytes" 5 | "crypto/hmac" 6 | "crypto/sha256" 7 | "errors" 8 | "hash" 9 | "hash/crc32" 10 | "io" 11 | "math" 12 | "math/bits" 13 | "time" 14 | ) 15 | 16 | const ( 17 | // block types 18 | block5Arc = 1 19 | block5File = 2 20 | // block5Service = 3 21 | block5Encrypt = 4 22 | block5End = 5 23 | 24 | // block flags 25 | block5HasExtra = 0x0001 26 | block5HasData = 0x0002 27 | block5DataNotFirst = 0x0008 28 | block5DataNotLast = 0x0010 29 | 30 | // end block flags 31 | endArc5NotLast = 0x0001 32 | 33 | // archive encryption block flags 34 | enc5CheckPresent = 0x0001 // password check data is present 35 | 36 | // main archive block flags 37 | arc5MultiVol = 0x0001 38 | arc5VolNum = 0x0002 39 | arc5Solid = 0x0004 40 | 41 | // file block flags 42 | file5IsDir = 0x0001 43 | file5HasUnixMtime = 0x0002 44 | file5HasCRC32 = 0x0004 45 | file5UnpSizeUnknown = 0x0008 46 | 47 | // file compression flags 48 | file5CompAlgorithm = 0x0000003F 49 | file5CompSolid = 0x00000040 50 | file5CompMethod = 0x00000380 51 | file5CompDictSize = 0x00007C00 52 | file5CompDictFract = 0x000F8000 53 | file5CompV5Compat = 0x00100000 54 | 55 | // file encryption record flags 56 | file5EncCheckPresent = 0x0001 // password check data is present 57 | file5EncUseMac = 0x0002 // use MAC instead of plain checksum 58 | 59 | // precision time flags 60 | file5ExtraTimeIsUnixTime = 0x01 // is unix time_t 61 | file5ExtraTimeHasMTime = 0x02 // has modification time 62 | file5ExtraTimeHasCTime = 0x04 // has creation time 63 | file5ExtraTimeHasATime = 0x08 // has access time 64 | file5ExtraTimeHasUnixNS = 0x10 // unix nanosecond time format 65 | 66 | cacheSize50 = 4 67 | maxPbkdf2Salt = 64 68 | pwCheckSize = 8 69 | maxKdfCount = 24 70 | 71 | maxDictSize = 0x1000000000 // maximum dictionary size 64GB 72 | ) 73 | 74 | var ( 75 | ErrBadPassword = errors.New("rardecode: incorrect password") 76 | ErrCorruptEncryptData = errors.New("rardecode: corrupt encryption data") 77 | ErrUnknownEncryptMethod = errors.New("rardecode: unknown encryption method") 78 | ErrPlatformIntSize = errors.New("rardecode: platform integer size too small") 79 | ErrDictionaryTooLarge = errors.New("rardecode: decode dictionary too large") 80 | ErrBadVolumeNumber = errors.New("rardecode: bad volume number") 81 | ErrNoArchiveBlock = errors.New("rardecode: missing archive block") 82 | ) 83 | 84 | type extra struct { 85 | ftype uint64 // field type 86 | data readBuf // field data 87 | } 88 | 89 | type blockHeader50 struct { 90 | htype uint64 // block type 91 | flags uint64 92 | data readBuf // block header data 93 | extra []extra // extra fields 94 | dataSize int64 // size of block data 95 | } 96 | 97 | // leHash32 wraps a hash.Hash32 to return the result of Sum in little 98 | // endian format. 99 | type leHash32 struct { 100 | hash.Hash32 101 | } 102 | 103 | func (h leHash32) Sum(b []byte) []byte { 104 | s := h.Sum32() 105 | return append(b, byte(s), byte(s>>8), byte(s>>16), byte(s>>24)) 106 | } 107 | 108 | func newLittleEndianCRC32() hash.Hash { 109 | return leHash32{crc32.NewIEEE()} 110 | } 111 | 112 | // archive50 implements fileBlockReader for RAR 5 file format archives 113 | type archive50 struct { 114 | pass []byte 115 | blockKey []byte // key used to encrypt blocks 116 | multi bool // archive is multi-volume 117 | solid bool // is a solid archive 118 | keyCache [cacheSize50]struct { // encryption key cache 119 | kdfCount int 120 | salt []byte 121 | keys [][]byte 122 | } 123 | } 124 | 125 | func (a *archive50) clone() fileBlockReader { 126 | na := new(archive50) 127 | *na = *a 128 | return na 129 | } 130 | 131 | // calcKeys50 calculates the keys used in RAR 5 archive processing. 132 | // The returned slice of byte slices contains 3 keys. 133 | // Key 0 is used for block or file decryption. 134 | // Key 1 is optionally used for file checksum calculation. 135 | // Key 2 is optionally used for password checking. 136 | func calcKeys50(pass, salt []byte, kdfCount int) [][]byte { 137 | if len(salt) > maxPbkdf2Salt { 138 | salt = salt[:maxPbkdf2Salt] 139 | } 140 | keys := make([][]byte, 3) 141 | if len(keys) == 0 { 142 | return keys 143 | } 144 | 145 | prf := hmac.New(sha256.New, pass) 146 | _, _ = prf.Write(salt) 147 | _, _ = prf.Write([]byte{0, 0, 0, 1}) 148 | 149 | t := prf.Sum(nil) 150 | u := append([]byte(nil), t...) 151 | 152 | kdfCount-- 153 | 154 | for i, iter := range []int{kdfCount, 16, 16} { 155 | for iter > 0 { 156 | prf.Reset() 157 | _, _ = prf.Write(u) 158 | u = prf.Sum(u[:0]) 159 | for j := range u { 160 | t[j] ^= u[j] 161 | } 162 | iter-- 163 | } 164 | keys[i] = append([]byte(nil), t...) 165 | } 166 | 167 | pwcheck := keys[2] 168 | for i, v := range pwcheck[pwCheckSize:] { 169 | pwcheck[i&(pwCheckSize-1)] ^= v 170 | } 171 | pwcheck = pwcheck[:pwCheckSize] 172 | // add checksum to end of pwcheck 173 | sum := sha256.Sum256(pwcheck) 174 | pwcheck = append(pwcheck, sum[:4]...) 175 | keys[2] = pwcheck 176 | 177 | return keys 178 | } 179 | 180 | // getKeys returns the the corresponding encryption keys for the given kdfcount and salt. 181 | // It will check the password if check is provided. 182 | func (a *archive50) getKeys(kdfCount int, salt, check []byte) ([][]byte, error) { 183 | var keys [][]byte 184 | 185 | if kdfCount > maxKdfCount { 186 | return nil, ErrCorruptEncryptData 187 | } 188 | kdfCount = 1 << uint(kdfCount) 189 | 190 | // check cache of keys for match 191 | for _, v := range a.keyCache { 192 | if kdfCount == v.kdfCount && bytes.Equal(salt, v.salt) { 193 | keys = v.keys 194 | break 195 | } 196 | } 197 | if keys == nil { 198 | // not found, calculate keys 199 | keys = calcKeys50(a.pass, salt, kdfCount) 200 | 201 | // store in cache 202 | copy(a.keyCache[1:], a.keyCache[:]) 203 | a.keyCache[0].kdfCount = kdfCount 204 | a.keyCache[0].salt = append([]byte(nil), salt...) 205 | a.keyCache[0].keys = keys 206 | } 207 | 208 | // check password 209 | if check != nil && !bytes.Equal(check, keys[2]) { 210 | return nil, ErrBadPassword 211 | } 212 | return keys, nil 213 | } 214 | 215 | // parseFileEncryptionRecord processes the optional file encryption record from a file header. 216 | func (a *archive50) parseFileEncryptionRecord(b readBuf, f *fileBlockHeader) error { 217 | f.Encrypted = true 218 | if ver := b.uvarint(); ver != 0 { 219 | return ErrUnknownEncryptMethod 220 | } 221 | flags := b.uvarint() 222 | if len(b) < 33 { 223 | return ErrCorruptEncryptData 224 | } 225 | kdfCount := int(b.byte()) 226 | salt := append([]byte(nil), b.bytes(16)...) 227 | f.iv = append([]byte(nil), b.bytes(16)...) 228 | 229 | var check []byte 230 | if flags&file5EncCheckPresent > 0 { 231 | if len(b) < 12 { 232 | return ErrCorruptEncryptData 233 | } 234 | check = append([]byte(nil), b.bytes(12)...) 235 | } 236 | useMac := flags&file5EncUseMac > 0 237 | // only need to generate keys for first block or 238 | // last block if it has an optional hash key 239 | if !(f.first || (f.last && useMac)) { 240 | return nil 241 | } 242 | f.genKeys = func() error { 243 | if a.pass == nil { 244 | return ErrArchivedFileEncrypted 245 | } 246 | keys, err := a.getKeys(kdfCount, salt, check) 247 | if err != nil { 248 | return err 249 | } 250 | 251 | f.key = keys[0] 252 | if useMac { 253 | f.hashKey = keys[1] 254 | } 255 | return nil 256 | } 257 | return nil 258 | } 259 | 260 | func readWinFiletime(b *readBuf) (time.Time, error) { 261 | if len(*b) < 8 { 262 | return time.Time{}, ErrCorruptFileHeader 263 | } 264 | // 100-nanosecond intervals since January 1, 1601 265 | t := b.uint64() - 116444736000000000 266 | t *= 100 267 | sec, nsec := bits.Div64(0, t, uint64(time.Second)) 268 | return time.Unix(int64(sec), int64(nsec)), nil 269 | } 270 | 271 | func readUnixTime(b *readBuf) (time.Time, error) { 272 | if len(*b) < 4 { 273 | return time.Time{}, ErrCorruptFileHeader 274 | } 275 | return time.Unix(int64(b.uint32()), 0), nil 276 | } 277 | 278 | func readUnixNanoseconds(b *readBuf) (time.Duration, error) { 279 | if len(*b) < 4 { 280 | return 0, ErrCorruptFileHeader 281 | } 282 | d := time.Duration(b.uint32() & 0x3fffffff) 283 | if d >= time.Second { 284 | return 0, ErrCorruptFileHeader 285 | } 286 | return d, nil 287 | } 288 | 289 | // parseFilePrecisionTimeRecord processes the optional high precision time record from a file header. 290 | func (a *archive50) parseFilePrecisionTimeRecord(b *readBuf, f *fileBlockHeader) error { 291 | var err error 292 | flags := b.uvarint() 293 | isUnixTime := flags&file5ExtraTimeIsUnixTime > 0 294 | if flags&file5ExtraTimeHasMTime > 0 { 295 | if isUnixTime { 296 | f.ModificationTime, err = readUnixTime(b) 297 | } else { 298 | f.ModificationTime, err = readWinFiletime(b) 299 | } 300 | if err != nil { 301 | return err 302 | } 303 | } 304 | if flags&file5ExtraTimeHasCTime > 0 { 305 | if isUnixTime { 306 | f.CreationTime, err = readUnixTime(b) 307 | } else { 308 | f.CreationTime, err = readWinFiletime(b) 309 | } 310 | if err != nil { 311 | return err 312 | } 313 | } 314 | if flags&file5ExtraTimeHasATime > 0 { 315 | if isUnixTime { 316 | f.AccessTime, err = readUnixTime(b) 317 | } else { 318 | f.AccessTime, err = readWinFiletime(b) 319 | } 320 | if err != nil { 321 | return err 322 | } 323 | } 324 | if isUnixTime && flags&file5ExtraTimeHasUnixNS > 0 { 325 | if flags&file5ExtraTimeHasMTime > 0 { 326 | ns, err := readUnixNanoseconds(b) 327 | if err != nil { 328 | return err 329 | } 330 | f.ModificationTime = f.ModificationTime.Add(ns) 331 | } 332 | if flags&file5ExtraTimeHasCTime > 0 { 333 | ns, err := readUnixNanoseconds(b) 334 | if err != nil { 335 | return err 336 | } 337 | f.CreationTime = f.CreationTime.Add(ns) 338 | } 339 | if flags&file5ExtraTimeHasATime > 0 { 340 | ns, err := readUnixNanoseconds(b) 341 | if err != nil { 342 | return err 343 | } 344 | f.AccessTime = f.AccessTime.Add(ns) 345 | } 346 | } 347 | return nil 348 | } 349 | 350 | func (a *archive50) parseFileHeader(h *blockHeader50) (*fileBlockHeader, error) { 351 | f := new(fileBlockHeader) 352 | 353 | f.HeaderEncrypted = a.blockKey != nil 354 | f.first = h.flags&block5DataNotFirst == 0 355 | f.last = h.flags&block5DataNotLast == 0 356 | 357 | flags := h.data.uvarint() // file flags 358 | f.IsDir = flags&file5IsDir > 0 359 | f.UnKnownSize = flags&file5UnpSizeUnknown > 0 360 | f.UnPackedSize = int64(h.data.uvarint()) 361 | f.PackedSize = h.dataSize 362 | f.Attributes = int64(h.data.uvarint()) 363 | if flags&file5HasUnixMtime > 0 { 364 | if len(h.data) < 4 { 365 | return nil, ErrCorruptFileHeader 366 | } 367 | f.ModificationTime = time.Unix(int64(h.data.uint32()), 0) 368 | } 369 | if flags&file5HasCRC32 > 0 { 370 | if len(h.data) < 4 { 371 | return nil, ErrCorruptFileHeader 372 | } 373 | f.sum = append([]byte(nil), h.data.bytes(4)...) 374 | if f.first { 375 | f.hash = newLittleEndianCRC32 376 | } 377 | } 378 | 379 | flags = h.data.uvarint() // compression flags 380 | f.Solid = flags&file5CompSolid > 0 381 | f.arcSolid = a.solid 382 | method := (flags >> 7) & 7 // compression method (0 == none) 383 | if f.first && method != 0 { 384 | unpackver := flags & file5CompAlgorithm 385 | var winSize int64 386 | if unpackver == 0 { 387 | f.decVer = decode50Ver 388 | winSize = 0x20000 << ((flags >> 10) & 0x0F) 389 | } else if unpackver == 1 { 390 | if flags&file5CompV5Compat > 0 { 391 | f.decVer = decode50Ver 392 | } else { 393 | f.decVer = decode70Ver 394 | } 395 | winSize = 0x20000 << ((flags >> 10) & 0x1F) 396 | winSize += winSize / 32 * int64((flags>>15)&0x1F) 397 | if winSize > maxDictSize { 398 | return nil, ErrDictionaryTooLarge 399 | } 400 | } else { 401 | return nil, ErrUnknownDecoder 402 | } 403 | if winSize > math.MaxInt { 404 | return nil, ErrPlatformIntSize 405 | } 406 | f.winSize = int(winSize) 407 | } 408 | switch h.data.uvarint() { 409 | case 0: 410 | f.HostOS = HostOSWindows 411 | case 1: 412 | f.HostOS = HostOSUnix 413 | default: 414 | f.HostOS = HostOSUnknown 415 | } 416 | nlen := int(h.data.uvarint()) 417 | if len(h.data) < nlen { 418 | return nil, ErrCorruptFileHeader 419 | } 420 | f.Name = string(h.data.bytes(nlen)) 421 | 422 | // parse optional extra records 423 | for _, e := range h.extra { 424 | var err error 425 | switch e.ftype { 426 | case 1: // encryption 427 | err = a.parseFileEncryptionRecord(e.data, f) 428 | case 2: 429 | // TODO: hash 430 | case 3: 431 | err = a.parseFilePrecisionTimeRecord(&e.data, f) 432 | case 4: // version 433 | _ = e.data.uvarint() // ignore flags field 434 | f.Version = int(e.data.uvarint()) 435 | case 5: 436 | // TODO: redirection 437 | case 6: 438 | // TODO: owner 439 | } 440 | if err != nil { 441 | return nil, err 442 | } 443 | } 444 | return f, nil 445 | } 446 | 447 | // parseEncryptionBlock calculates the key for block encryption. 448 | func (a *archive50) parseEncryptionBlock(b readBuf) error { 449 | if a.pass == nil { 450 | return ErrArchiveEncrypted 451 | } 452 | if ver := b.uvarint(); ver != 0 { 453 | return ErrUnknownEncryptMethod 454 | } 455 | flags := b.uvarint() 456 | if len(b) < 17 { 457 | return ErrCorruptEncryptData 458 | } 459 | kdfCount := int(b.byte()) 460 | salt := b.bytes(16) 461 | 462 | var check []byte 463 | if flags&enc5CheckPresent > 0 { 464 | if len(b) < 12 { 465 | return ErrCorruptEncryptData 466 | } 467 | check = b.bytes(12) 468 | } 469 | 470 | keys, err := a.getKeys(kdfCount, salt, check) 471 | if err != nil { 472 | return err 473 | } 474 | a.blockKey = keys[0] 475 | return nil 476 | } 477 | 478 | func (a *archive50) parseArcBlock(v *volume, h *blockHeader50) error { 479 | flags := h.data.uvarint() 480 | a.multi = flags&arc5MultiVol > 0 481 | a.solid = flags&arc5Solid > 0 482 | if flags&arc5VolNum > 0 && h.data.uvarint() != uint64(v.num) { 483 | return ErrBadVolumeNumber 484 | } 485 | return nil 486 | } 487 | 488 | func (a *archive50) readBlockHeader(r sliceReader) (*blockHeader50, error) { 489 | if a.blockKey != nil { 490 | // block is encrypted 491 | iv, err := r.readSlice(16) 492 | if err != nil { 493 | return nil, err 494 | } 495 | r = newAesSliceReader(r, a.blockKey, iv) 496 | } 497 | var b readBuf 498 | var err error 499 | // peek to find the header size 500 | b, err = r.peek(7) 501 | if err != nil { 502 | return nil, err 503 | } 504 | crc := b.uint32() 505 | 506 | hash := crc32.NewIEEE() 507 | 508 | size := int(b.uvarint()) // header size 509 | b, err = r.readSlice(7 - len(b) + size) 510 | if err != nil { 511 | return nil, err 512 | } 513 | 514 | // check header crc 515 | _, _ = hash.Write(b[4:]) 516 | if crc != hash.Sum32() { 517 | return nil, ErrBadHeaderCRC 518 | } 519 | 520 | b = b[len(b)-size:] 521 | h := new(blockHeader50) 522 | h.htype = b.uvarint() 523 | h.flags = b.uvarint() 524 | 525 | var extraSize int 526 | if h.flags&block5HasExtra > 0 { 527 | extraSize = int(b.uvarint()) 528 | } 529 | if h.flags&block5HasData > 0 { 530 | h.dataSize = int64(b.uvarint()) 531 | } 532 | if len(b) < extraSize { 533 | return nil, ErrCorruptBlockHeader 534 | } 535 | h.data = b.bytes(len(b) - extraSize) 536 | 537 | // read header extra records 538 | for len(b) > 0 { 539 | size = int(b.uvarint()) 540 | if len(b) < size { 541 | return nil, ErrCorruptBlockHeader 542 | } 543 | data := readBuf(b.bytes(size)) 544 | ftype := data.uvarint() 545 | h.extra = append(h.extra, extra{ftype, data}) 546 | } 547 | 548 | return h, nil 549 | } 550 | 551 | func (a *archive50) mustReadBlockHeader(r sliceReader) (*blockHeader50, error) { 552 | h, err := a.readBlockHeader(r) 553 | if err != nil { 554 | if err == io.EOF { 555 | err = io.ErrUnexpectedEOF 556 | } 557 | return nil, err 558 | } 559 | return h, nil 560 | } 561 | 562 | func (a *archive50) readArcHeaders(v *volume) error { 563 | h, err := a.mustReadBlockHeader(v) 564 | if err != nil { 565 | return err 566 | } 567 | if h.htype == block5Encrypt { 568 | err = a.parseEncryptionBlock(h.data) 569 | if err != nil { 570 | return err 571 | } 572 | h, err = a.mustReadBlockHeader(v) 573 | if err != nil { 574 | return err 575 | } 576 | } 577 | if h.htype != block5Arc { 578 | return ErrNoArchiveBlock 579 | } 580 | return a.parseArcBlock(v, h) 581 | } 582 | 583 | // next advances to the next file block in the archive 584 | func (a *archive50) next(v *volume) (*fileBlockHeader, error) { 585 | for { 586 | // get next block header 587 | h, err := a.mustReadBlockHeader(v) 588 | if err != nil { 589 | return nil, err 590 | } 591 | switch h.htype { 592 | case block5File: 593 | return a.parseFileHeader(h) 594 | case block5End: 595 | flags := h.data.uvarint() 596 | if flags&endArc5NotLast == 0 || !a.multi { 597 | return nil, io.EOF 598 | } 599 | a.blockKey = nil // reset encryption when opening new volume file 600 | err = v.next() 601 | if err != nil { 602 | return nil, err 603 | } 604 | err = a.readArcHeaders(v) 605 | if err != nil { 606 | return nil, err 607 | } 608 | default: 609 | if h.dataSize > 0 { 610 | err = v.discard(h.dataSize) // skip over block data 611 | if err != nil { 612 | return nil, err 613 | } 614 | } 615 | } 616 | } 617 | } 618 | 619 | // newArchive50 creates a new fileBlockReader for a Version 5 archive. 620 | func newArchive50(v *volume, password *string) (*archive50, error) { 621 | a := new(archive50) 622 | if password != nil { 623 | a.pass = []byte(*password) 624 | } 625 | return a, a.readArcHeaders(v) 626 | } 627 | -------------------------------------------------------------------------------- /bit_reader.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "io" 5 | "math/bits" 6 | ) 7 | 8 | type bitReader interface { 9 | readBits(n uint8) (int, error) // read n bits of data 10 | unreadBits(n uint8) // revert the reading of the last n bits read 11 | } 12 | 13 | // rar5BitReader is a bitReader that reads bytes from a byteReader and stops with io.EOF after l bits. 14 | type rar5BitReader struct { 15 | r byteReader 16 | v int // cache of bits read from r 17 | l int // number of bits (not cached) that can be read from r 18 | n uint8 // number of unread bits in v 19 | b []byte // bytes() output cache from r 20 | } 21 | 22 | func (r *rar5BitReader) unreadBits(n uint8) { r.n += n } 23 | 24 | // ReadByte reads a byte from rar5BitReader's byteReader ignoring the bit cache v. 25 | func (r *rar5BitReader) ReadByte() (byte, error) { 26 | if len(r.b) == 0 { 27 | var err error 28 | r.b, err = r.r.bytes() 29 | if err != nil { 30 | if err == io.EOF { 31 | err = ErrDecoderOutOfData 32 | } 33 | return 0, err 34 | } 35 | } 36 | c := r.b[0] 37 | r.b = r.b[1:] 38 | return c, nil 39 | } 40 | 41 | func (r *rar5BitReader) reset(br byteReader) { 42 | r.r = br 43 | r.b = nil 44 | } 45 | 46 | // setLimit sets the maximum bit count that can be read. 47 | func (r *rar5BitReader) setLimit(n int) { 48 | r.l = n 49 | r.n = 0 50 | } 51 | 52 | // readBits returns n bits from the underlying byteReader. 53 | // n must be less than integer size - 8. 54 | func (r *rar5BitReader) readBits(n uint8) (int, error) { 55 | for n > r.n { 56 | if r.l == 0 { 57 | // reached bits limit 58 | return 0, io.EOF 59 | } 60 | if len(r.b) == 0 { 61 | var err error 62 | r.b, err = r.r.bytes() 63 | if err != nil { 64 | if err == io.EOF { 65 | // io.EOF before we reached bit limit 66 | err = ErrDecoderOutOfData 67 | } 68 | return 0, err 69 | } 70 | } 71 | // try to fit as many bits into r.v as possible 72 | for len(r.b) > 0 && r.n <= bits.UintSize-8 { 73 | r.v = r.v<<8 | int(r.b[0]) 74 | r.b = r.b[1:] 75 | r.n += 8 76 | r.l -= 8 77 | if r.l <= 0 { 78 | if r.l < 0 { 79 | // overshot, discard the extra bits 80 | bits := uint8(-r.l) 81 | r.l = 0 82 | r.v >>= bits 83 | r.n -= bits 84 | } 85 | break 86 | } 87 | } 88 | } 89 | r.n -= n 90 | return (r.v >> r.n) & ((1 << n) - 1), nil 91 | } 92 | 93 | // replaceByteReader is a byteReader that returns b on the first call to bytes() 94 | // and then replaces the byteReader at rp with r. 95 | type replaceByteReader struct { 96 | rp *byteReader 97 | r byteReader 98 | b []byte 99 | } 100 | 101 | func (r *replaceByteReader) Read(p []byte) (int, error) { return 0, io.EOF } 102 | 103 | func (r *replaceByteReader) bytes() ([]byte, error) { 104 | *r.rp = r.r 105 | return r.b, nil 106 | } 107 | 108 | // rarBitReader wraps an io.ByteReader to perform various bit and byte 109 | // reading utility functions used in RAR file processing. 110 | type rarBitReader struct { 111 | r byteReader 112 | v int 113 | n uint8 114 | b []byte 115 | } 116 | 117 | func (r *rarBitReader) reset(br byteReader) { 118 | r.r = br 119 | r.n = 0 120 | r.v = 0 121 | r.b = nil 122 | } 123 | 124 | // unshiftBytes moves any bytes in rarBitReader bit cache back into a byte slice 125 | // and sets up byteReader's so that all bytes can now be read by ReadByte() without 126 | // going through the bit cache. 127 | func (r *rarBitReader) unshiftBytes() { 128 | // no cached bits 129 | if r.n == 0 { 130 | return 131 | } 132 | // create and read byte slice for cached bits 133 | b := make([]byte, r.n/8) 134 | for i := len(b) - 1; i >= 0; i-- { 135 | b[i] = byte(r.v) 136 | r.v >>= 8 137 | } 138 | r.n = 0 139 | // current bytes buffer empty, so store b and return 140 | if len(r.b) == 0 { 141 | r.b = b 142 | return 143 | } 144 | // Put current bytes buffer and byteReader in a replaceByteReader and 145 | // the unshifted bytes in the rarBitReader bytes buffer. 146 | // When the bytes buffer is consumed, rarBitReader will call bytes() 147 | // on replaceByteReader which will return the old bytes buffer and 148 | // replace itself with the old byteReader in rarBitReader. 149 | r.r = &replaceByteReader{rp: &r.r, r: r.r, b: r.b} 150 | r.b = b 151 | } 152 | 153 | // readBits returns n bits from the underlying byteReader. 154 | // n must be less than integer size - 8. 155 | func (r *rarBitReader) readBits(n uint8) (int, error) { 156 | for n > r.n { 157 | if len(r.b) == 0 { 158 | var err error 159 | r.b, err = r.r.bytes() 160 | if err != nil { 161 | return 0, err 162 | } 163 | } 164 | // try to fit as many bits into r.v as possible 165 | for len(r.b) > 0 && r.n <= bits.UintSize-8 { 166 | r.v = r.v<<8 | int(r.b[0]) 167 | r.b = r.b[1:] 168 | r.n += 8 169 | } 170 | } 171 | r.n -= n 172 | return (r.v >> r.n) & ((1 << n) - 1), nil 173 | } 174 | 175 | func (r *rarBitReader) unreadBits(n uint8) { 176 | r.n += n 177 | } 178 | 179 | // alignByte aligns the current bit reading input to the next byte boundary. 180 | func (r *rarBitReader) alignByte() { 181 | r.n -= r.n % 8 182 | } 183 | 184 | // readUint32 reads a RAR V3 encoded uint32 185 | func (r *rarBitReader) readUint32() (uint32, error) { 186 | n, err := r.readBits(2) 187 | if err != nil { 188 | return 0, err 189 | } 190 | if n != 1 { 191 | if bits.UintSize == 32 { 192 | if n == 3 { 193 | // 32bit platforms may not be able to read 32 bits as r.v 194 | // will need up to 7 extra bits for overflow from reading a byte. 195 | // Split it into two reads. 196 | n, err = r.readBits(16) 197 | if err != nil { 198 | return 0, err 199 | } 200 | m := uint32(n) << 16 201 | n, err = r.readBits(16) 202 | return m | uint32(n), err 203 | } 204 | } 205 | n, err = r.readBits(4 << uint(n)) 206 | return uint32(n), err 207 | } 208 | n, err = r.readBits(4) 209 | if err != nil { 210 | return 0, err 211 | } 212 | if n == 0 { 213 | n, err = r.readBits(8) 214 | n |= -1 << 8 215 | return uint32(n), err 216 | } 217 | nlow, err := r.readBits(4) 218 | n = n<<4 | nlow 219 | return uint32(n), err 220 | } 221 | 222 | // ReadByte() returns a byte directly from buf b or the io.ByteReader r. 223 | // Current bit offsets are ignored. 224 | func (r *rarBitReader) ReadByte() (byte, error) { 225 | if len(r.b) == 0 { 226 | if r.r == nil { 227 | return 0, io.EOF 228 | } 229 | var err error 230 | r.b, err = r.r.bytes() 231 | if err != nil { 232 | return 0, err 233 | } 234 | } 235 | c := r.b[0] 236 | r.b = r.b[1:] 237 | return c, nil 238 | } 239 | 240 | // readFull reads len(p) bytes into p. If fewer bytes are read an error is returned. 241 | func (r *rarBitReader) readFull(p []byte) error { 242 | if r.n == 0 && len(r.b) > 0 { 243 | n := copy(p, r.b) 244 | p = p[n:] 245 | r.b = r.b[n:] 246 | } 247 | for i := range p { 248 | n, err := r.readBits(8) 249 | if err != nil { 250 | return err 251 | } 252 | p[i] = byte(n) 253 | } 254 | return nil 255 | } 256 | 257 | func newRarBitReader(r byteReader) *rarBitReader { 258 | return &rarBitReader{r: r} 259 | } 260 | -------------------------------------------------------------------------------- /decode20.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "io" 5 | ) 6 | 7 | const audioSize = 257 8 | 9 | type decoder20 struct { 10 | br *rarBitReader 11 | size int64 // unpacked bytes left to be decompressed 12 | hdrRead bool // block header has been read 13 | isAudio bool // current block is Audio 14 | 15 | codeLength [audioSize * 4]byte 16 | 17 | lz *lz20Decoder 18 | audio *audio20Decoder 19 | } 20 | 21 | func (d *decoder20) version() int { return decode20Ver } 22 | 23 | // init intializes the decoder for decoding a new file. 24 | func (d *decoder20) init(r byteReader, reset bool, size int64, ver int) { 25 | if d.br == nil { 26 | d.br = newRarBitReader(r) 27 | } else { 28 | d.br.reset(r) 29 | } 30 | d.size = size 31 | if reset { 32 | d.hdrRead = false 33 | d.isAudio = false 34 | if d.audio != nil { 35 | d.audio.reset() 36 | } 37 | clear(d.codeLength[:]) 38 | } 39 | } 40 | 41 | func readCodeLengthTable20(br *rarBitReader, table []byte) error { 42 | var bitlength [19]byte 43 | for i := 0; i < len(bitlength); i++ { 44 | n, err := br.readBits(4) 45 | if err != nil { 46 | return err 47 | } 48 | bitlength[i] = byte(n) 49 | } 50 | 51 | var bl huffmanDecoder 52 | bl.init(bitlength[:]) 53 | 54 | for i := 0; i < len(table); { 55 | l, err := bl.readSym(br) 56 | if err != nil { 57 | return err 58 | } 59 | if l < 16 { 60 | table[i] = (table[i] + byte(l)) & 0xf 61 | i++ 62 | continue 63 | } 64 | if l == 16 { 65 | if i == 0 { 66 | return ErrInvalidLengthTable 67 | } 68 | var n int 69 | n, err = br.readBits(2) 70 | if err != nil { 71 | return err 72 | } 73 | n += 3 74 | n = min(i+n, len(table)) 75 | v := table[i-1] 76 | for i < n { 77 | table[i] = v 78 | i++ 79 | } 80 | continue 81 | } 82 | var n int 83 | if l == 17 { 84 | n, err = br.readBits(3) 85 | if err != nil { 86 | return err 87 | } 88 | n += 3 89 | } else { 90 | n, err = br.readBits(7) 91 | if err != nil { 92 | return err 93 | } 94 | n += 11 95 | } 96 | n = min(i+n, len(table)) 97 | clear(table[i:n]) 98 | i = n 99 | } 100 | return nil 101 | } 102 | 103 | func (d *decoder20) readBlockHeader() error { 104 | n, err := d.br.readBits(1) 105 | if err != nil { 106 | return err 107 | } 108 | d.isAudio = n > 0 109 | n, err = d.br.readBits(1) 110 | if err != nil { 111 | return err 112 | } 113 | if n == 0 { 114 | clear(d.codeLength[:]) 115 | } 116 | if d.isAudio { 117 | if d.audio == nil { 118 | d.audio = new(audio20Decoder) 119 | } 120 | err = d.audio.init(d.br, d.codeLength[:]) 121 | } else { 122 | if d.lz == nil { 123 | d.lz = new(lz20Decoder) 124 | } 125 | err = d.lz.init(d.br, d.codeLength[:]) 126 | } 127 | d.hdrRead = true 128 | return err 129 | } 130 | 131 | func (d *decoder20) fill(dr *decodeReader) error { 132 | for d.size > 0 && dr.notFull() { 133 | if !d.hdrRead { 134 | if err := d.readBlockHeader(); err != nil { 135 | return err 136 | } 137 | } 138 | var n int64 139 | var err error 140 | if d.isAudio { 141 | n, err = d.audio.fill(dr, d.size) 142 | } else { 143 | n, err = d.lz.fill(dr, d.size) 144 | } 145 | d.size -= n 146 | switch err { 147 | case nil: 148 | continue 149 | case errEndOfBlock: 150 | d.hdrRead = false 151 | continue 152 | case io.EOF: 153 | err = ErrDecoderOutOfData 154 | } 155 | return err 156 | } 157 | if d.size == 0 { 158 | return io.EOF 159 | } 160 | return nil 161 | } 162 | -------------------------------------------------------------------------------- /decode20_audio.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | type audioVar struct { 4 | k [5]int 5 | d [4]int 6 | lastDelta int 7 | dif [11]int 8 | byteCount int 9 | lastChar int 10 | } 11 | 12 | type audio20Decoder struct { 13 | chans int // number of audio channels 14 | curChan int // current audio channel 15 | chanDelta int 16 | 17 | decoders [4]huffmanDecoder 18 | vars [4]audioVar 19 | 20 | br *rarBitReader 21 | } 22 | 23 | func (d *audio20Decoder) reset() { 24 | d.chans = 1 25 | d.curChan = 0 26 | d.chanDelta = 0 27 | 28 | for i := range d.vars { 29 | d.vars[i] = audioVar{} 30 | } 31 | } 32 | 33 | func (d *audio20Decoder) init(br *rarBitReader, table []byte) error { 34 | d.br = br 35 | n, err := br.readBits(2) 36 | if err != nil { 37 | return err 38 | } 39 | d.chans = n + 1 40 | if d.curChan >= d.chans { 41 | d.curChan = 0 42 | } 43 | table = table[:audioSize*d.chans] 44 | if err = readCodeLengthTable20(br, table); err != nil { 45 | return err 46 | } 47 | for i := 0; i < d.chans; i++ { 48 | d.decoders[i].init(table[:audioSize]) 49 | table = table[audioSize:] 50 | } 51 | return nil 52 | } 53 | 54 | func (d *audio20Decoder) decode(delta int) byte { 55 | v := &d.vars[d.curChan] 56 | v.byteCount++ 57 | v.d[3] = v.d[2] 58 | v.d[2] = v.d[1] 59 | v.d[1] = v.lastDelta - v.d[0] 60 | v.d[0] = v.lastDelta 61 | pch := 8*v.lastChar + v.k[0]*v.d[0] + v.k[1]*v.d[1] + v.k[2]*v.d[2] + v.k[3]*v.d[3] + v.k[4]*d.chanDelta 62 | pch = (pch >> 3) & 0xFF 63 | ch := pch - delta 64 | delta <<= 3 65 | 66 | v.dif[0] += abs(delta) 67 | v.dif[1] += abs(delta - v.d[0]) 68 | v.dif[2] += abs(delta + v.d[0]) 69 | v.dif[3] += abs(delta - v.d[1]) 70 | v.dif[4] += abs(delta + v.d[1]) 71 | v.dif[5] += abs(delta - v.d[2]) 72 | v.dif[6] += abs(delta + v.d[2]) 73 | v.dif[7] += abs(delta - v.d[3]) 74 | v.dif[8] += abs(delta + v.d[3]) 75 | v.dif[9] += abs(delta - d.chanDelta) 76 | v.dif[10] += abs(delta + d.chanDelta) 77 | 78 | d.chanDelta = ch - v.lastChar 79 | v.lastDelta = d.chanDelta 80 | v.lastChar = ch 81 | 82 | if v.byteCount&0x1F != 0 { 83 | return byte(ch) 84 | } 85 | 86 | var numMinDif int 87 | minDif := v.dif[0] 88 | v.dif[0] = 0 89 | for i := 1; i < len(v.dif); i++ { 90 | if v.dif[i] < minDif { 91 | minDif = v.dif[i] 92 | numMinDif = i 93 | } 94 | v.dif[i] = 0 95 | } 96 | if numMinDif > 0 { 97 | numMinDif-- 98 | i := numMinDif / 2 99 | if numMinDif%2 == 0 { 100 | if v.k[i] >= -16 { 101 | v.k[i]-- 102 | } 103 | } else if v.k[i] < 16 { 104 | v.k[i]++ 105 | } 106 | } 107 | return byte(ch) 108 | } 109 | 110 | func (d *audio20Decoder) fill(dr *decodeReader, size int64) (int64, error) { 111 | var n int64 112 | for n < size && dr.notFull() { 113 | sym, err := d.decoders[d.curChan].readSym(d.br) 114 | if err != nil { 115 | return n, err 116 | } 117 | if sym == 256 { 118 | return n, errEndOfBlock 119 | } 120 | dr.writeByte(d.decode(sym)) 121 | n++ 122 | d.curChan++ 123 | if d.curChan >= d.chans { 124 | d.curChan = 0 125 | } 126 | } 127 | return n, nil 128 | } 129 | -------------------------------------------------------------------------------- /decode20_lz.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | const ( 4 | main20Size = 298 5 | offset20Size = 48 6 | length20Size = 28 7 | ) 8 | 9 | type lz20Decoder struct { 10 | length int // previous length 11 | offset [4]int // history of previous offsets 12 | 13 | mainDecoder huffmanDecoder 14 | offsetDecoder huffmanDecoder 15 | lengthDecoder huffmanDecoder 16 | 17 | br *rarBitReader 18 | } 19 | 20 | func (d *lz20Decoder) init(br *rarBitReader, table []byte) error { 21 | d.br = br 22 | 23 | table = table[:main20Size+offset20Size+length20Size] 24 | if err := readCodeLengthTable20(br, table); err != nil { 25 | return err 26 | } 27 | d.mainDecoder.init(table[:main20Size]) 28 | table = table[main20Size:] 29 | d.offsetDecoder.init(table[:offset20Size]) 30 | table = table[offset20Size:] 31 | d.lengthDecoder.init(table) 32 | return nil 33 | } 34 | 35 | func (d *lz20Decoder) decodeOffset(i int) error { 36 | d.length = lengthBase[i] + 3 37 | bits := lengthExtraBits[i] 38 | if bits > 0 { 39 | n, err := d.br.readBits(bits) 40 | if err != nil { 41 | return err 42 | } 43 | d.length += n 44 | } 45 | 46 | var err error 47 | i, err = d.offsetDecoder.readSym(d.br) 48 | if err != nil { 49 | return err 50 | } 51 | offset := offsetBase[i] + 1 52 | bits = offsetExtraBits[i] 53 | if bits > 0 { 54 | n, err := d.br.readBits(bits) 55 | if err != nil { 56 | return err 57 | } 58 | offset += n 59 | } 60 | 61 | if offset >= 0x2000 { 62 | d.length++ 63 | if offset >= 0x40000 { 64 | d.length++ 65 | } 66 | } 67 | copy(d.offset[1:], d.offset[:]) 68 | d.offset[0] = offset 69 | return nil 70 | } 71 | 72 | func (d *lz20Decoder) decodeLength(i int) error { 73 | offset := d.offset[i] 74 | copy(d.offset[1:], d.offset[:]) 75 | d.offset[0] = offset 76 | 77 | i, err := d.lengthDecoder.readSym(d.br) 78 | if err != nil { 79 | return err 80 | } 81 | d.length = lengthBase[i] + 2 82 | bits := lengthExtraBits[i] 83 | if bits > 0 { 84 | var n int 85 | n, err = d.br.readBits(bits) 86 | if err != nil { 87 | return err 88 | } 89 | d.length += n 90 | } 91 | if offset >= 0x101 { 92 | d.length++ 93 | if offset >= 0x2000 { 94 | d.length++ 95 | if offset >= 0x40000 { 96 | d.length++ 97 | } 98 | } 99 | } 100 | return nil 101 | } 102 | 103 | func (d *lz20Decoder) decodeShortOffset(i int) error { 104 | copy(d.offset[1:], d.offset[:]) 105 | offset := shortOffsetBase[i] + 1 106 | bits := shortOffsetExtraBits[i] 107 | if bits > 0 { 108 | n, err := d.br.readBits(bits) 109 | if err != nil { 110 | return err 111 | } 112 | offset += n 113 | } 114 | d.offset[0] = offset 115 | d.length = 2 116 | return nil 117 | } 118 | 119 | func (d *lz20Decoder) fill(dr *decodeReader, size int64) (int64, error) { 120 | var n int64 121 | for n < size && dr.notFull() { 122 | sym, err := d.mainDecoder.readSym(d.br) 123 | if err != nil { 124 | return n, err 125 | } 126 | 127 | switch { 128 | case sym < 256: // literal 129 | dr.writeByte(byte(sym)) 130 | n++ 131 | continue 132 | case sym > 269: 133 | err = d.decodeOffset(sym - 270) 134 | case sym == 269: 135 | return n, errEndOfBlock 136 | case sym == 256: // use previous offset and length 137 | copy(d.offset[1:], d.offset[:]) 138 | case sym < 261: 139 | err = d.decodeLength(sym - 257) 140 | default: 141 | err = d.decodeShortOffset(sym - 261) 142 | } 143 | if err != nil { 144 | return n, err 145 | } 146 | dr.copyBytes(d.length, d.offset[0]) 147 | n += int64(d.length) 148 | } 149 | return n, nil 150 | } 151 | -------------------------------------------------------------------------------- /decode29.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | ) 7 | 8 | const ( 9 | maxCodeSize = 0x10000 10 | maxUniqueFilters = 1024 11 | ) 12 | 13 | var ( 14 | // Errors marking the end of the decoding block and/or file 15 | errEndOfFile = errors.New("rardecode: end of file") 16 | errEndOfBlock = errors.New("rardecode: end of block") 17 | errEndOfBlockAndFile = errors.New("rardecode: end of block and file") 18 | ) 19 | 20 | // decoder29 implements the decoder interface for RAR 3.0 compression (unpack version 29) 21 | // Decode input is broken up into 1 or more blocks. The start of each block specifies 22 | // the decoding algorithm (ppm or lz) and optional data to initialize with. 23 | // Block length is not stored, it is determined only after decoding an end of file and/or 24 | // block marker in the data. 25 | type decoder29 struct { 26 | br *rarBitReader 27 | hdrRead bool // block header has been read 28 | isPPM bool // current block is PPM 29 | eof bool // at file eof 30 | fnum int // current filter number (index into filters) 31 | flen []int // filter block length history 32 | filters []v3Filter // list of current filters used by archive encoding 33 | 34 | lz *lz29Decoder // lz decoder 35 | ppm *ppm29Decoder // ppm decoder 36 | } 37 | 38 | func (d *decoder29) version() int { return decode29Ver } 39 | 40 | // init intializes the decoder for decoding a new file. 41 | func (d *decoder29) init(r byteReader, reset bool, size int64, ver int) { 42 | if d.br == nil { 43 | d.br = newRarBitReader(r) 44 | } else { 45 | d.br.reset(r) 46 | } 47 | d.eof = false 48 | if reset { 49 | d.initFilters() 50 | if d.lz != nil { 51 | d.lz.reset() 52 | } 53 | if d.ppm != nil { 54 | d.ppm.reset() 55 | } 56 | d.hdrRead = false 57 | } 58 | } 59 | 60 | func (d *decoder29) initFilters() { 61 | d.fnum = 0 62 | d.flen = nil 63 | d.filters = nil 64 | } 65 | 66 | // readVMCode reads the raw bytes for the code/commands used in a vm filter 67 | func readVMCode(br *rarBitReader) ([]byte, error) { 68 | n, err := br.readUint32() 69 | if err != nil { 70 | return nil, err 71 | } 72 | if n > maxCodeSize || n == 0 { 73 | return nil, ErrInvalidFilter 74 | } 75 | buf := make([]byte, n) 76 | err = br.readFull(buf) 77 | if err != nil { 78 | return nil, err 79 | } 80 | var x byte 81 | for _, c := range buf[1:] { 82 | x ^= c 83 | } 84 | // simple xor checksum on data 85 | if x != buf[0] { 86 | return nil, ErrInvalidFilter 87 | } 88 | return buf, nil 89 | } 90 | 91 | func (d *decoder29) parseVMFilter(buf []byte) (*filterBlock, error) { 92 | flags := buf[0] 93 | br := newRarBitReader(newBufByteReader(buf[1:])) 94 | fb := new(filterBlock) 95 | 96 | // Find the filter number which is an index into d.filters. 97 | // If filter number == len(d.filters) it is a new filter to be added. 98 | if flags&0x80 > 0 { 99 | n, err := br.readUint32() 100 | if err != nil { 101 | return nil, err 102 | } 103 | if n == 0 { 104 | d.initFilters() 105 | } else { 106 | n-- 107 | if n > maxUniqueFilters { 108 | return nil, ErrInvalidFilter 109 | } 110 | if int(n) > len(d.filters) { 111 | return nil, ErrInvalidFilter 112 | } 113 | } 114 | d.fnum = int(n) 115 | } 116 | 117 | // filter offset 118 | n, err := br.readUint32() 119 | if err != nil { 120 | return nil, err 121 | } 122 | if flags&0x40 > 0 { 123 | n += 258 124 | } 125 | fb.offset = int(n) 126 | 127 | // filter length 128 | if d.fnum == len(d.flen) { 129 | d.flen = append(d.flen, 0) 130 | } 131 | if flags&0x20 > 0 { 132 | n, err = br.readUint32() 133 | if err != nil { 134 | return nil, err 135 | } 136 | //fb.length = int(n) 137 | d.flen[d.fnum] = int(n) 138 | } 139 | fb.length = d.flen[d.fnum] 140 | 141 | // initial register values 142 | r := make(map[int]uint32) 143 | if flags&0x10 > 0 { 144 | bits, err := br.readBits(vmRegs - 1) 145 | if err != nil { 146 | return nil, err 147 | } 148 | for i := 0; i < vmRegs-1; i++ { 149 | if bits&1 > 0 { 150 | r[i], err = br.readUint32() 151 | if err != nil { 152 | return nil, err 153 | } 154 | } 155 | bits >>= 1 156 | } 157 | } 158 | 159 | // filter is new so read the code for it 160 | if d.fnum == len(d.filters) { 161 | code, err := readVMCode(br) 162 | if err != nil { 163 | return nil, err 164 | } 165 | f, err := getV3Filter(code) 166 | if err != nil { 167 | return nil, err 168 | } 169 | d.filters = append(d.filters, f) 170 | d.flen = append(d.flen, fb.length) 171 | } 172 | 173 | // read global data 174 | var g []byte 175 | if flags&0x08 > 0 { 176 | n, err := br.readUint32() 177 | if err != nil { 178 | return nil, err 179 | } 180 | if n > vmGlobalSize-vmFixedGlobalSize { 181 | return nil, ErrInvalidFilter 182 | } 183 | g = make([]byte, n) 184 | err = br.readFull(g) 185 | if err != nil { 186 | return nil, err 187 | } 188 | } 189 | 190 | // create filter function 191 | f := d.filters[d.fnum] 192 | fb.filter = func(buf []byte, offset int64) ([]byte, error) { 193 | return f(r, g, buf, offset) 194 | } 195 | 196 | return fb, nil 197 | } 198 | 199 | // readBlockHeader determines and initializes the current decoder for a new decode block. 200 | func (d *decoder29) readBlockHeader() error { 201 | d.br.alignByte() 202 | n, err := d.br.readBits(1) 203 | if err == nil { 204 | if n > 0 { 205 | d.isPPM = true 206 | if d.ppm == nil { 207 | d.ppm = newPPM29Decoder() 208 | } 209 | err = d.ppm.init(d.br) 210 | } else { 211 | d.isPPM = false 212 | if d.lz == nil { 213 | d.lz = new(lz29Decoder) 214 | } 215 | err = d.lz.init(d.br) 216 | } 217 | } 218 | if err == io.EOF { 219 | err = ErrDecoderOutOfData 220 | } 221 | d.hdrRead = true 222 | return err 223 | } 224 | 225 | func (d *decoder29) fill(dr *decodeReader) error { 226 | if d.eof { 227 | return io.EOF 228 | } 229 | 230 | for dr.notFull() { 231 | var err error 232 | if !d.hdrRead { 233 | if err = d.readBlockHeader(); err != nil { 234 | return err 235 | } 236 | } 237 | var b []byte 238 | if d.isPPM { 239 | b, err = d.ppm.fill(dr) 240 | } else { 241 | b, err = d.lz.fill(dr) 242 | } 243 | if len(b) > 0 && err == nil { 244 | // parse raw data for filter and add to list of filters 245 | var f *filterBlock 246 | f, err = d.parseVMFilter(b) 247 | if f != nil { 248 | err = dr.queueFilter(f) 249 | } 250 | } 251 | 252 | switch err { 253 | case nil: 254 | continue 255 | case errEndOfBlock: 256 | d.hdrRead = false 257 | continue 258 | case errEndOfFile: 259 | d.eof = true 260 | err = io.EOF 261 | case errEndOfBlockAndFile: 262 | d.eof = true 263 | d.hdrRead = false 264 | err = io.EOF 265 | case io.EOF: 266 | err = ErrDecoderOutOfData 267 | } 268 | return err 269 | } 270 | return nil 271 | } 272 | -------------------------------------------------------------------------------- /decode29_lz.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | const ( 4 | mainSize = 299 5 | offsetSize = 60 6 | lowOffsetSize = 17 7 | lengthSize = 28 8 | tableSize = mainSize + offsetSize + lowOffsetSize + lengthSize 9 | ) 10 | 11 | var ( 12 | lengthBase = [28]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 13 | 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224} 14 | lengthExtraBits = [28]uint8{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 15 | 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5} 16 | 17 | offsetBase = [60]int{0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 18 | 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 19 | 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 20 | 131072, 196608, 262144, 327680, 393216, 458752, 524288, 21 | 589824, 655360, 720896, 786432, 851968, 917504, 983040, 22 | 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 23 | 2883584, 3145728, 3407872, 3670016, 3932160} 24 | offsetExtraBits = [60]uint8{0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 25 | 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 26 | 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 27 | 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18} 28 | 29 | shortOffsetBase = [8]int{0, 4, 8, 16, 32, 64, 128, 192} 30 | shortOffsetExtraBits = [8]uint8{2, 2, 3, 4, 5, 6, 6, 6} 31 | ) 32 | 33 | type lz29Decoder struct { 34 | codeLength [tableSize]byte 35 | 36 | mainDecoder huffmanDecoder 37 | offsetDecoder huffmanDecoder 38 | lowOffsetDecoder huffmanDecoder 39 | lengthDecoder huffmanDecoder 40 | 41 | offset [4]int // history of previous offsets 42 | length int // previous length 43 | lowOffset int 44 | lowOffsetRepeats int 45 | 46 | br *rarBitReader 47 | } 48 | 49 | func (d *lz29Decoder) reset() { 50 | clear(d.offset[:]) 51 | d.length = 0 52 | clear(d.codeLength[:]) 53 | } 54 | 55 | func (d *lz29Decoder) init(br *rarBitReader) error { 56 | d.br = br 57 | d.lowOffset = 0 58 | d.lowOffsetRepeats = 0 59 | 60 | n, err := d.br.readBits(1) 61 | if err != nil { 62 | return err 63 | } 64 | addOld := n > 0 65 | 66 | cl := d.codeLength[:] 67 | if err = readCodeLengthTable(d.br, cl, addOld); err != nil { 68 | return err 69 | } 70 | 71 | d.mainDecoder.init(cl[:mainSize]) 72 | cl = cl[mainSize:] 73 | d.offsetDecoder.init(cl[:offsetSize]) 74 | cl = cl[offsetSize:] 75 | d.lowOffsetDecoder.init(cl[:lowOffsetSize]) 76 | cl = cl[lowOffsetSize:] 77 | d.lengthDecoder.init(cl) 78 | 79 | return nil 80 | } 81 | 82 | func (d *lz29Decoder) readFilterData() (b []byte, err error) { 83 | flags, err := d.br.readBits(8) 84 | if err != nil { 85 | return nil, err 86 | } 87 | 88 | n := flags&7 + 1 89 | switch n { 90 | case 7: 91 | n, err = d.br.readBits(8) 92 | n += 7 93 | if err != nil { 94 | return nil, err 95 | } 96 | case 8: 97 | n, err = d.br.readBits(16) 98 | if err != nil { 99 | return nil, err 100 | } 101 | } 102 | 103 | buf := make([]byte, n+1) 104 | buf[0] = byte(flags) 105 | err = d.br.readFull(buf[1:]) 106 | 107 | return buf, err 108 | } 109 | 110 | func (d *lz29Decoder) readEndOfBlock() error { 111 | n, err := d.br.readBits(1) 112 | if err != nil { 113 | return err 114 | } 115 | if n > 0 { 116 | return errEndOfBlock 117 | } 118 | n, err = d.br.readBits(1) 119 | if err != nil { 120 | return err 121 | } 122 | if n > 0 { 123 | return errEndOfBlockAndFile 124 | } 125 | return errEndOfFile 126 | } 127 | 128 | func (d *lz29Decoder) decodeLength(i int) error { 129 | offset := d.offset[i] 130 | copy(d.offset[1:i+1], d.offset[:i]) 131 | d.offset[0] = offset 132 | 133 | i, err := d.lengthDecoder.readSym(d.br) 134 | if err != nil { 135 | return err 136 | } 137 | d.length = lengthBase[i] + 2 138 | bits := lengthExtraBits[i] 139 | if bits > 0 { 140 | var n int 141 | n, err = d.br.readBits(bits) 142 | if err != nil { 143 | return err 144 | } 145 | d.length += n 146 | } 147 | return nil 148 | } 149 | 150 | func (d *lz29Decoder) decodeShortOffset(i int) error { 151 | copy(d.offset[1:], d.offset[:]) 152 | offset := shortOffsetBase[i] + 1 153 | bits := shortOffsetExtraBits[i] 154 | if bits > 0 { 155 | n, err := d.br.readBits(bits) 156 | if err != nil { 157 | return err 158 | } 159 | offset += n 160 | } 161 | d.offset[0] = offset 162 | d.length = 2 163 | return nil 164 | } 165 | 166 | func (d *lz29Decoder) decodeOffset(i int) error { 167 | d.length = lengthBase[i] + 3 168 | bits := lengthExtraBits[i] 169 | if bits > 0 { 170 | n, err := d.br.readBits(bits) 171 | if err != nil { 172 | return err 173 | } 174 | d.length += n 175 | } 176 | 177 | var err error 178 | i, err = d.offsetDecoder.readSym(d.br) 179 | if err != nil { 180 | return err 181 | } 182 | offset := offsetBase[i] + 1 183 | bits = offsetExtraBits[i] 184 | 185 | switch { 186 | case bits >= 4: 187 | if bits > 4 { 188 | n, err := d.br.readBits(bits - 4) 189 | if err != nil { 190 | return err 191 | } 192 | offset += n << 4 193 | } 194 | 195 | if d.lowOffsetRepeats > 0 { 196 | d.lowOffsetRepeats-- 197 | offset += d.lowOffset 198 | } else { 199 | n, err := d.lowOffsetDecoder.readSym(d.br) 200 | if err != nil { 201 | return err 202 | } 203 | if n == 16 { 204 | d.lowOffsetRepeats = 15 205 | offset += d.lowOffset 206 | } else { 207 | offset += n 208 | d.lowOffset = n 209 | } 210 | } 211 | case bits > 0: 212 | n, err := d.br.readBits(bits) 213 | if err != nil { 214 | return err 215 | } 216 | offset += n 217 | } 218 | 219 | if offset >= 0x2000 { 220 | d.length++ 221 | if offset >= 0x40000 { 222 | d.length++ 223 | } 224 | } 225 | copy(d.offset[1:], d.offset[:]) 226 | d.offset[0] = offset 227 | return nil 228 | } 229 | 230 | // fill window until full, error, filter found or end of block. 231 | func (d *lz29Decoder) fill(dr *decodeReader) ([]byte, error) { 232 | for dr.notFull() { 233 | sym, err := d.mainDecoder.readSym(d.br) 234 | if err != nil { 235 | return nil, err 236 | } 237 | 238 | switch { 239 | case sym < 256: // literal 240 | dr.writeByte(byte(sym)) 241 | continue 242 | case sym == 258: // use previous offset and length 243 | dr.copyBytes(d.length, d.offset[0]) 244 | continue 245 | case sym >= 271: 246 | err = d.decodeOffset(sym - 271) 247 | case sym >= 263: 248 | err = d.decodeShortOffset(sym - 263) 249 | case sym >= 259: 250 | err = d.decodeLength(sym - 259) 251 | case sym == 256: 252 | return nil, d.readEndOfBlock() 253 | default: // sym == 257 254 | return d.readFilterData() 255 | } 256 | if err != nil { 257 | return nil, err 258 | } 259 | dr.copyBytes(d.length, d.offset[0]) 260 | } 261 | return nil, nil 262 | } 263 | -------------------------------------------------------------------------------- /decode29_ppm.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | type ppm29Decoder struct { 4 | m model // ppm model 5 | esc byte // escape character 6 | br *rarBitReader 7 | } 8 | 9 | func (d *ppm29Decoder) init(br *rarBitReader) error { 10 | maxOrder, err := br.readBits(7) 11 | if err != nil { 12 | return err 13 | } 14 | reset := maxOrder&0x20 > 0 15 | 16 | // Move any bytes in rarBitReader bit cache back into a byte slice. 17 | // PPM only reads bytes so it is more efficient to read those bytes 18 | // directly from byte slices, bypassing the extra bit shifts. 19 | br.unshiftBytes() 20 | d.br = br 21 | 22 | var maxMB int 23 | if reset { 24 | var c byte 25 | c, err = d.br.ReadByte() 26 | if err != nil { 27 | return err 28 | } 29 | maxMB = int(c) + 1 30 | } 31 | 32 | if maxOrder&0x40 > 0 { 33 | d.esc, err = d.br.ReadByte() 34 | if err != nil { 35 | return err 36 | } 37 | } 38 | 39 | maxOrder = (maxOrder & 0x1f) + 1 40 | if maxOrder > 16 { 41 | maxOrder = 16 + (maxOrder-16)*3 42 | } 43 | 44 | return d.m.init(d.br, reset, maxOrder, maxMB) 45 | } 46 | 47 | func (d *ppm29Decoder) reset() { 48 | d.esc = 2 49 | } 50 | 51 | func (d *ppm29Decoder) readFilterData() ([]byte, error) { 52 | c, err := d.m.ReadByte() 53 | if err != nil { 54 | return nil, err 55 | } 56 | n := int(c&7) + 1 57 | if n == 7 { 58 | var b byte 59 | b, err = d.m.ReadByte() 60 | if err != nil { 61 | return nil, err 62 | } 63 | n += int(b) 64 | } else if n == 8 { 65 | var b byte 66 | b, err = d.m.ReadByte() 67 | if err != nil { 68 | return nil, err 69 | } 70 | n = int(b) << 8 71 | b, err = d.m.ReadByte() 72 | if err != nil { 73 | return nil, err 74 | } 75 | n |= int(b) 76 | } 77 | 78 | n++ 79 | buf := make([]byte, n) 80 | buf[0] = c 81 | for i := 1; i < n; i++ { 82 | buf[i], err = d.m.ReadByte() 83 | if err != nil { 84 | return nil, err 85 | } 86 | } 87 | return buf, nil 88 | } 89 | 90 | // fill window until full, error, filter found or end of block. 91 | func (d *ppm29Decoder) fill(dr *decodeReader) ([]byte, error) { 92 | for dr.notFull() { 93 | c, err := d.m.ReadByte() 94 | if err != nil { 95 | return nil, err 96 | } 97 | if c != d.esc { 98 | dr.writeByte(c) 99 | continue 100 | } 101 | c, err = d.m.ReadByte() 102 | if err != nil { 103 | return nil, err 104 | } 105 | 106 | switch c { 107 | case 0: 108 | return nil, errEndOfBlock 109 | case 2: 110 | return nil, errEndOfBlockAndFile 111 | case 3: 112 | return d.readFilterData() 113 | case 4: 114 | offset := 0 115 | for i := 0; i < 3; i++ { 116 | c, err = d.m.ReadByte() 117 | if err != nil { 118 | return nil, err 119 | } 120 | offset = offset<<8 | int(c) 121 | } 122 | len, err := d.m.ReadByte() 123 | if err != nil { 124 | return nil, err 125 | } 126 | dr.copyBytes(int(len)+32, offset+2) 127 | case 5: 128 | len, err := d.m.ReadByte() 129 | if err != nil { 130 | return nil, err 131 | } 132 | dr.copyBytes(int(len)+4, 1) 133 | default: 134 | dr.writeByte(d.esc) 135 | } 136 | } 137 | return nil, nil 138 | } 139 | 140 | func newPPM29Decoder() *ppm29Decoder { 141 | ppm := new(ppm29Decoder) 142 | ppm.reset() 143 | ppm.m.maxOrder = 2 144 | ppm.m.a.init(1) 145 | 146 | return ppm 147 | } 148 | -------------------------------------------------------------------------------- /decode50.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "math/bits" 7 | ) 8 | 9 | const ( 10 | mainSize5 = 306 11 | offsetSize5 = 64 12 | lowoffsetSize5 = 16 13 | lengthSize5 = 44 14 | tableSize5 = mainSize5 + offsetSize5 + lowoffsetSize5 + lengthSize5 15 | 16 | offsetSize7 = 80 17 | tableSize7 = mainSize5 + offsetSize7 + lowoffsetSize5 + lengthSize5 18 | ) 19 | 20 | var ( 21 | ErrUnknownFilter = errors.New("rardecode: unknown V5 filter") 22 | ErrCorruptDecodeHeader = errors.New("rardecode: corrupt decode header") 23 | ) 24 | 25 | // decoder50 implements the decoder interface for RAR 5 compression. 26 | // Decode input it broken up into 1 or more blocks. Each block starts with 27 | // a header containing block length and optional code length tables to initialize 28 | // the huffman decoders with. 29 | type decoder50 struct { 30 | br rar5BitReader // bit reader for current data block 31 | buf [tableSize7]byte 32 | codeLength []byte 33 | offsetSize int 34 | 35 | lastBlock bool // current block is last block in compressed file 36 | 37 | mainDecoder huffmanDecoder 38 | offsetDecoder huffmanDecoder 39 | lowoffsetDecoder huffmanDecoder 40 | lengthDecoder huffmanDecoder 41 | 42 | offset [4]int 43 | length int 44 | } 45 | 46 | func (d *decoder50) version() int { return decode50Ver } 47 | 48 | func (d *decoder50) init(r byteReader, reset bool, size int64, ver int) { 49 | d.br.reset(r) 50 | d.lastBlock = false 51 | if ver == decode70Ver { 52 | d.codeLength = d.buf[:] 53 | d.offsetSize = offsetSize7 54 | } else { 55 | d.codeLength = d.buf[:tableSize5] 56 | d.offsetSize = offsetSize5 57 | } 58 | 59 | if reset { 60 | clear(d.offset[:]) 61 | d.length = 0 62 | clear(d.codeLength[:]) 63 | } 64 | } 65 | 66 | func (d *decoder50) readBlockHeader() error { 67 | flags, err := d.br.ReadByte() 68 | if err != nil { 69 | return err 70 | } 71 | 72 | bytecount := (flags>>3)&3 + 1 73 | if bytecount == 4 { 74 | return ErrCorruptDecodeHeader 75 | } 76 | 77 | hsum, err := d.br.ReadByte() 78 | if err != nil { 79 | return err 80 | } 81 | 82 | blockBits := int(flags)&0x07 + 1 83 | blockBytes := 0 84 | sum := 0x5a ^ flags 85 | for i := byte(0); i < bytecount; i++ { 86 | var n byte 87 | n, err = d.br.ReadByte() 88 | if err != nil { 89 | return err 90 | } 91 | sum ^= n 92 | blockBytes |= int(n) << (i * 8) 93 | } 94 | if sum != hsum { // bad header checksum 95 | return ErrCorruptDecodeHeader 96 | } 97 | blockBits += (blockBytes - 1) * 8 98 | 99 | // reset the bits limit 100 | d.br.setLimit(blockBits) 101 | d.lastBlock = flags&0x40 > 0 102 | 103 | if flags&0x80 > 0 { 104 | // read new code length tables and reinitialize huffman decoders 105 | cl := d.codeLength[:] 106 | err = readCodeLengthTable(&d.br, cl, false) 107 | if err != nil { 108 | return err 109 | } 110 | d.mainDecoder.init(cl[:mainSize5]) 111 | cl = cl[mainSize5:] 112 | d.offsetDecoder.init(cl[:d.offsetSize]) 113 | cl = cl[d.offsetSize:] 114 | d.lowoffsetDecoder.init(cl[:lowoffsetSize5]) 115 | cl = cl[lowoffsetSize5:] 116 | d.lengthDecoder.init(cl) 117 | } 118 | return nil 119 | } 120 | 121 | func slotToLength(br bitReader, n int) (int, error) { 122 | if n >= 8 { 123 | bits := uint8(n/4 - 1) 124 | n = (4 | (n & 3)) << bits 125 | if bits > 0 { 126 | b, err := br.readBits(bits) 127 | if err != nil { 128 | return 0, err 129 | } 130 | n |= b 131 | } 132 | } 133 | n += 2 134 | return n, nil 135 | } 136 | 137 | // readFilter5Data reads an encoded integer used in V5 filters. 138 | func readFilter5Data(br bitReader) (int, error) { 139 | // TODO: should data really be uint? (for 32bit ints). 140 | // It will be masked later anyway by decode window mask. 141 | bytes, err := br.readBits(2) 142 | if err != nil { 143 | return 0, err 144 | } 145 | bytes++ 146 | 147 | var data int 148 | for i := 0; i < bytes; i++ { 149 | n, err := br.readBits(8) 150 | if err != nil { 151 | return 0, err 152 | } 153 | data |= n << (uint(i) * 8) 154 | } 155 | return data, nil 156 | } 157 | 158 | func (d *decoder50) readFilter(dr *decodeReader) error { 159 | fb := new(filterBlock) 160 | var err error 161 | 162 | fb.offset, err = readFilter5Data(&d.br) 163 | if err != nil { 164 | return err 165 | } 166 | fb.length, err = readFilter5Data(&d.br) 167 | if err != nil { 168 | return err 169 | } 170 | ftype, err := d.br.readBits(3) 171 | if err != nil { 172 | return err 173 | } 174 | switch ftype { 175 | case 0: 176 | n, err := d.br.readBits(5) 177 | if err != nil { 178 | return err 179 | } 180 | fb.filter = func(buf []byte, offset int64) ([]byte, error) { return filterDelta(n+1, buf) } 181 | case 1: 182 | fb.filter = func(buf []byte, offset int64) ([]byte, error) { return filterE8(0xe8, true, buf, offset) } 183 | case 2: 184 | fb.filter = func(buf []byte, offset int64) ([]byte, error) { return filterE8(0xe9, true, buf, offset) } 185 | case 3: 186 | fb.filter = filterArm 187 | default: 188 | return ErrUnknownFilter 189 | } 190 | return dr.queueFilter(fb) 191 | } 192 | 193 | func (d *decoder50) decodeLength(dr *decodeReader, i int) error { 194 | offset := d.offset[i] 195 | copy(d.offset[1:i+1], d.offset[:i]) 196 | d.offset[0] = offset 197 | 198 | sl, err := d.lengthDecoder.readSym(&d.br) 199 | if err != nil { 200 | return err 201 | } 202 | d.length, err = slotToLength(&d.br, sl) 203 | if err == nil { 204 | dr.copyBytes(d.length, d.offset[0]) 205 | } 206 | return err 207 | } 208 | 209 | func (d *decoder50) decodeOffset(dr *decodeReader, i int) error { 210 | length, err := slotToLength(&d.br, i) 211 | if err != nil { 212 | return err 213 | } 214 | 215 | offset := 1 216 | slot, err := d.offsetDecoder.readSym(&d.br) 217 | if err != nil { 218 | return err 219 | } 220 | if slot < 4 { 221 | offset += slot 222 | } else { 223 | bitCount := uint8(slot/2 - 1) 224 | offset += (2 | (slot & 1)) << bitCount 225 | 226 | if bitCount >= 4 { 227 | bitCount -= 4 228 | if bitCount > 0 { 229 | if bits.UintSize == 32 { 230 | // bitReader can only read at most intSize-8 bits. 231 | // Split read into two parts. 232 | if bitCount > 24 { 233 | n, err := d.br.readBits(24) 234 | if err != nil { 235 | return err 236 | } 237 | bitCount -= 24 238 | offset += n << (4 + bitCount) 239 | } 240 | } 241 | n, err := d.br.readBits(bitCount) 242 | if err != nil { 243 | return err 244 | } 245 | offset += n << 4 246 | } 247 | n, err := d.lowoffsetDecoder.readSym(&d.br) 248 | if err != nil { 249 | return err 250 | } 251 | offset += n 252 | } else { 253 | n, err := d.br.readBits(bitCount) 254 | if err != nil { 255 | return err 256 | } 257 | offset += n 258 | } 259 | } 260 | if offset > 0x100 { 261 | length++ 262 | if offset > 0x2000 { 263 | length++ 264 | if offset > 0x40000 { 265 | length++ 266 | } 267 | } 268 | } 269 | copy(d.offset[1:], d.offset[:]) 270 | d.offset[0] = offset 271 | d.length = length 272 | dr.copyBytes(d.length, d.offset[0]) 273 | return nil 274 | } 275 | 276 | func (d *decoder50) fill(dr *decodeReader) error { 277 | for dr.notFull() { 278 | sym, err := d.mainDecoder.readSym(&d.br) 279 | if err == nil { 280 | switch { 281 | case sym < 256: 282 | // literal 283 | dr.writeByte(byte(sym)) 284 | continue 285 | case sym >= 262: 286 | err = d.decodeOffset(dr, sym-262) 287 | case sym >= 258: 288 | err = d.decodeLength(dr, sym-258) 289 | case sym == 257: 290 | // use previous offset and length 291 | dr.copyBytes(d.length, d.offset[0]) 292 | continue 293 | default: // sym == 256: 294 | err = d.readFilter(dr) 295 | } 296 | } else if err == io.EOF { 297 | // reached end of the block 298 | if d.lastBlock { 299 | return io.EOF 300 | } 301 | err = d.readBlockHeader() 302 | } 303 | if err != nil { 304 | if err == io.EOF { 305 | return ErrDecoderOutOfData 306 | } 307 | return err 308 | } 309 | } 310 | return nil 311 | } 312 | -------------------------------------------------------------------------------- /decode_reader.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import "errors" 4 | 5 | const ( 6 | minWindowSize = 0x40000 7 | maxQueuedFilters = 8192 8 | ) 9 | 10 | var ( 11 | ErrTooManyFilters = errors.New("rardecode: too many filters") 12 | ErrInvalidFilter = errors.New("rardecode: invalid filter") 13 | ErrMultipleDecoders = errors.New("rardecode: multiple decoders in a single archive not supported") 14 | ) 15 | 16 | // filter functions take a byte slice, the current output offset and 17 | // returns transformed data. 18 | type filter func(b []byte, offset int64) ([]byte, error) 19 | 20 | // filterBlock is a block of data to be processed by a filter. 21 | type filterBlock struct { 22 | length int // length of block 23 | offset int // bytes to be read before start of block 24 | filter filter // filter function 25 | } 26 | 27 | // decoder is the interface for decoding compressed data 28 | type decoder interface { 29 | init(r byteReader, reset bool, size int64, ver int) // initialize decoder for current file 30 | fill(dr *decodeReader) error // fill window with decoded data 31 | version() int // decoder version 32 | } 33 | 34 | // decodeReader implements io.Reader for decoding compressed data in RAR archives. 35 | type decodeReader struct { 36 | tot int64 // total bytes read from window 37 | outbuf []byte // buffered output 38 | buf []byte // filter buffer 39 | fl []*filterBlock // list of filters each with offset relative to previous in list 40 | dec decoder // decoder being used to unpack file 41 | err error // current decoder error output 42 | br byteReader 43 | 44 | win []byte // sliding window buffer 45 | size int // win length 46 | r int // index in win for reads (beginning) 47 | w int // index in win for writes (end) 48 | } 49 | 50 | func (d *decodeReader) init(r byteReader, ver int, size int, reset bool, unPackedSize int64) error { 51 | d.outbuf = nil 52 | d.tot = 0 53 | d.err = nil 54 | if reset { 55 | d.fl = nil 56 | } 57 | d.br = r 58 | 59 | // initialize window 60 | size = max(size, minWindowSize) 61 | if size > len(d.win) { 62 | b := make([]byte, size) 63 | if reset { 64 | d.w = 0 65 | } else if len(d.win) > 0 { 66 | n := copy(b, d.win[d.w:]) 67 | n += copy(b[n:], d.win[:d.w]) 68 | d.w = n 69 | } 70 | d.win = b 71 | d.size = size 72 | } else if reset { 73 | clear(d.win[:]) 74 | d.w = 0 75 | } 76 | d.r = d.w 77 | 78 | // initialize decoder 79 | if d.dec == nil { 80 | switch ver { 81 | case decode29Ver: 82 | d.dec = new(decoder29) 83 | case decode50Ver, decode70Ver: 84 | d.dec = new(decoder50) 85 | case decode20Ver: 86 | d.dec = new(decoder20) 87 | default: 88 | return ErrUnknownDecoder 89 | } 90 | } else if d.dec.version() != ver { 91 | return ErrMultipleDecoders 92 | } 93 | d.dec.init(r, reset, unPackedSize, ver) 94 | return nil 95 | } 96 | 97 | // notFull returns if the window is not full 98 | func (d *decodeReader) notFull() bool { return d.w < d.size } 99 | 100 | // writeByte writes c to the end of the window 101 | func (d *decodeReader) writeByte(c byte) { 102 | d.win[d.w] = c 103 | d.w++ 104 | } 105 | 106 | // copyBytes copies len bytes at off distance from the end 107 | // to the end of the window. 108 | func (d *decodeReader) copyBytes(length, offset int) { 109 | length %= d.size 110 | if length < 0 { 111 | length += d.size 112 | } 113 | 114 | i := (d.w - offset) % d.size 115 | if i < 0 { 116 | i += d.size 117 | } 118 | iend := i + length 119 | if i > d.w { 120 | if iend > d.size { 121 | iend = d.size 122 | } 123 | n := copy(d.win[d.w:], d.win[i:iend]) 124 | d.w += n 125 | length -= n 126 | if length == 0 { 127 | return 128 | } 129 | iend = length 130 | i = 0 131 | } 132 | if iend <= d.w { 133 | n := copy(d.win[d.w:], d.win[i:iend]) 134 | d.w += n 135 | return 136 | } 137 | for length > 0 && d.w < d.size { 138 | d.win[d.w] = d.win[i] 139 | d.w++ 140 | i++ 141 | length-- 142 | } 143 | } 144 | 145 | // queueFilter adds a filterBlock to the end decodeReader's filters. 146 | func (d *decodeReader) queueFilter(f *filterBlock) error { 147 | if len(d.fl) >= maxQueuedFilters { 148 | return ErrTooManyFilters 149 | } 150 | // make offset relative to read index (from write index) 151 | f.offset += d.w - d.r 152 | // make offset relative to previous filter in list 153 | for _, fb := range d.fl { 154 | if f.offset < fb.offset { 155 | // filter block must not start before previous filter 156 | return ErrInvalidFilter 157 | } 158 | f.offset -= fb.offset 159 | } 160 | // offset & length must be < window size 161 | f.offset %= d.size 162 | if f.offset < 0 { 163 | f.offset += d.size 164 | } 165 | f.length %= d.size 166 | if f.length < 0 { 167 | f.length += d.size 168 | } 169 | d.fl = append(d.fl, f) 170 | return nil 171 | } 172 | 173 | func (d *decodeReader) readErr() error { 174 | err := d.err 175 | d.err = nil 176 | return err 177 | } 178 | 179 | // fill the decodeReader window 180 | func (d *decodeReader) fill() error { 181 | if d.err != nil { 182 | return d.readErr() 183 | } 184 | if d.w == d.size { 185 | // wrap to beginning of buffer 186 | d.r = 0 187 | d.w = 0 188 | } 189 | d.err = d.dec.fill(d) // fill window using decoder 190 | if d.w == d.r { 191 | return d.readErr() 192 | } 193 | return nil 194 | } 195 | 196 | // bufBytes returns n bytes from the window in a new buffer. 197 | func (d *decodeReader) bufBytes(n int) ([]byte, error) { 198 | if cap(d.buf) < n { 199 | d.buf = make([]byte, n) 200 | } 201 | // copy into buffer 202 | ns := 0 203 | for { 204 | nn := copy(d.buf[ns:n], d.win[d.r:d.w]) 205 | d.r += nn 206 | ns += nn 207 | if ns == n { 208 | break 209 | } 210 | if err := d.fill(); err != nil { 211 | return nil, err 212 | } 213 | } 214 | return d.buf[:n], nil 215 | } 216 | 217 | // processFilters processes any filters valid at the current read index 218 | // and returns the output in outbuf. 219 | func (d *decodeReader) processFilters() ([]byte, error) { 220 | f := d.fl[0] 221 | flen := f.length 222 | 223 | // get filter input 224 | b, err := d.bufBytes(flen) 225 | if err != nil { 226 | return nil, err 227 | } 228 | for { 229 | d.fl = d.fl[1:] 230 | // run filter passing buffer and total bytes read so far 231 | b, err = f.filter(b, d.tot) 232 | if err != nil { 233 | return nil, err 234 | } 235 | if len(d.fl) == 0 { 236 | d.fl = nil 237 | return b, nil 238 | } 239 | // get next filter 240 | f = d.fl[0] 241 | if f.offset != 0 { 242 | // next filter not at current offset 243 | f.offset -= flen 244 | return b, nil 245 | } 246 | if f.length != len(b) { 247 | return nil, ErrInvalidFilter 248 | } 249 | } 250 | } 251 | 252 | // bytes returns a decoded byte slice or an error. 253 | func (d *decodeReader) bytes() ([]byte, error) { 254 | // fill window if needed 255 | if d.w == d.r { 256 | if err := d.fill(); err != nil { 257 | return nil, err 258 | } 259 | } 260 | n := d.w - d.r 261 | 262 | // return current unread bytes if there are no filters 263 | if len(d.fl) == 0 { 264 | b := d.win[d.r:d.w] 265 | d.r = d.w 266 | d.tot += int64(n) 267 | return b, nil 268 | } 269 | 270 | // check filters 271 | f := d.fl[0] 272 | if f.offset < 0 { 273 | return nil, ErrInvalidFilter 274 | } 275 | if f.offset > 0 { 276 | // filter not at current read index, output bytes before it 277 | n = min(f.offset, n) 278 | b := d.win[d.r : d.r+n] 279 | d.r += n 280 | f.offset -= n 281 | d.tot += int64(n) 282 | return b, nil 283 | } 284 | 285 | // process filters at current index 286 | b, err := d.processFilters() 287 | if cap(b) > cap(d.buf) { 288 | // filter returned a larger buffer, cache it 289 | d.buf = b 290 | } 291 | 292 | d.tot += int64(len(b)) 293 | return b, err 294 | } 295 | 296 | // Read decodes data and stores it in p. 297 | func (d *decodeReader) Read(p []byte) (int, error) { 298 | var err error 299 | if len(d.outbuf) == 0 { 300 | d.outbuf, err = d.bytes() 301 | if err != nil { 302 | return 0, err 303 | } 304 | } 305 | n := copy(p, d.outbuf) 306 | d.outbuf = d.outbuf[n:] 307 | return n, err 308 | } 309 | -------------------------------------------------------------------------------- /decrypt_reader.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "crypto/aes" 5 | "crypto/cipher" 6 | "io" 7 | ) 8 | 9 | // cipherBlockSliceReader is a sliceReader that users a cipher.BlockMode to decrypt the input. 10 | type cipherBlockSliceReader struct { 11 | r sliceReader 12 | mode cipher.BlockMode 13 | n int // bytes encrypted but not read 14 | } 15 | 16 | func (c *cipherBlockSliceReader) sizeInBlocks(n int) int { 17 | bs := c.mode.BlockSize() 18 | if rem := n % bs; rem > 0 { 19 | n += bs - rem 20 | } 21 | return n 22 | } 23 | 24 | func (c *cipherBlockSliceReader) peek(n int) ([]byte, error) { 25 | bn := c.sizeInBlocks(n) 26 | b, err := c.r.peek(bn) 27 | if err != nil { 28 | if err == io.EOF && len(b) > 0 { 29 | err = io.ErrUnexpectedEOF 30 | } 31 | return nil, err 32 | } 33 | if c.n < bn { 34 | c.mode.CryptBlocks(b[c.n:], b[c.n:]) 35 | c.n = bn 36 | } 37 | return b[:n], nil 38 | } 39 | 40 | // readSlice returns the next n bytes of decrypted input. 41 | // If n is not a multiple of the block size, the trailing bytes 42 | // of the last decrypted block will be discarded. 43 | func (c *cipherBlockSliceReader) readSlice(n int) ([]byte, error) { 44 | bn := c.sizeInBlocks(n) 45 | b, err := c.r.readSlice(bn) 46 | if err != nil { 47 | return nil, err 48 | } 49 | if c.n < bn { 50 | c.mode.CryptBlocks(b[c.n:], b[c.n:]) 51 | c.n = 0 52 | } else { 53 | c.n -= bn 54 | } 55 | // ignore padding at end of last block 56 | b = b[:n] 57 | return b, nil 58 | } 59 | 60 | // newAesSliceReader creates a sliceReader that uses AES to decrypt the input 61 | func newAesSliceReader(r sliceReader, key, iv []byte) *cipherBlockSliceReader { 62 | block, err := aes.NewCipher(key) 63 | if err != nil { 64 | panic(err) 65 | } 66 | mode := cipher.NewCBCDecrypter(block, iv) 67 | return &cipherBlockSliceReader{r: r, mode: mode} 68 | } 69 | 70 | // cipherBlockReader implements Block Mode decryption of an io.Reader object. 71 | type cipherBlockReader struct { 72 | r byteReader 73 | mode cipher.BlockMode 74 | getMode func() (cipher.BlockMode, error) 75 | inbuf []byte // raw input blocks not yet decrypted 76 | outbuf []byte // output buffer used when output slice < block size 77 | block []byte // output buffer for a single block 78 | } 79 | 80 | // readBlock returns a single decrypted block. 81 | func (cr *cipherBlockReader) readBlock() ([]byte, error) { 82 | bs := len(cr.block) 83 | if len(cr.inbuf) >= bs { 84 | cr.mode.CryptBlocks(cr.block, cr.inbuf[:bs]) 85 | cr.inbuf = cr.inbuf[bs:] 86 | } else { 87 | n := copy(cr.block, cr.inbuf) 88 | cr.inbuf = nil 89 | _, err := io.ReadFull(cr.r, cr.block[n:]) 90 | if err != nil { 91 | return nil, err 92 | } 93 | cr.mode.CryptBlocks(cr.block, cr.block) 94 | } 95 | return cr.block, nil 96 | } 97 | 98 | // Read reads and decrypts data into p. 99 | // If the input is not a multiple of the cipher block size, 100 | // the trailing bytes will be ignored. 101 | func (cr *cipherBlockReader) Read(p []byte) (int, error) { 102 | if len(cr.outbuf) > 0 { 103 | n := copy(p, cr.outbuf) 104 | cr.outbuf = cr.outbuf[n:] 105 | return n, nil 106 | } 107 | // get input blocks 108 | for len(cr.inbuf) == 0 { 109 | var err error 110 | cr.inbuf, err = cr.r.bytes() 111 | if err != nil { 112 | return 0, err 113 | } 114 | } 115 | if cr.mode == nil { 116 | var err error 117 | cr.mode, err = cr.getMode() 118 | if err != nil { 119 | return 0, err 120 | } 121 | cr.block = make([]byte, cr.mode.BlockSize()) 122 | } 123 | bs := cr.mode.BlockSize() 124 | n := len(cr.inbuf) 125 | l := len(p) 126 | if n < bs || l < bs { 127 | // Next encrypted block spans volumes or Read buffer is too small 128 | // to fit a single block. Decrypt a single block and store the 129 | // leftover in outbuf. 130 | b, err := cr.readBlock() 131 | if err != nil { 132 | return 0, err 133 | } 134 | n = copy(p, b) 135 | cr.outbuf = b[n:] 136 | return n, nil 137 | } 138 | // output buffer smaller than input 139 | n = min(l, n) 140 | // round down to block size 141 | n -= n % bs 142 | cr.mode.CryptBlocks(p[:n], cr.inbuf[:n]) 143 | cr.inbuf = cr.inbuf[n:] 144 | return n, nil 145 | } 146 | 147 | // bytes returns a byte slice of decrypted data. 148 | func (cr *cipherBlockReader) bytes() ([]byte, error) { 149 | if len(cr.outbuf) > 0 { 150 | b := cr.outbuf 151 | cr.outbuf = nil 152 | return b, nil 153 | } 154 | // get more input 155 | for len(cr.inbuf) == 0 { 156 | var err error 157 | cr.inbuf, err = cr.r.bytes() 158 | if err != nil { 159 | return nil, err 160 | } 161 | } 162 | if cr.mode == nil { 163 | var err error 164 | cr.mode, err = cr.getMode() 165 | if err != nil { 166 | return nil, err 167 | } 168 | cr.block = make([]byte, cr.mode.BlockSize()) 169 | } 170 | bs := cr.mode.BlockSize() 171 | if len(cr.inbuf) < bs { 172 | // next encrypted block spans volumes 173 | return cr.readBlock() 174 | } 175 | n := len(cr.inbuf) 176 | n -= n % bs 177 | // get input buffer and round down to nearest block boundary 178 | b := cr.inbuf[:n] 179 | cr.inbuf = cr.inbuf[n:] 180 | cr.mode.CryptBlocks(b, b) 181 | return b, nil 182 | } 183 | 184 | func newCipherBlockReader(r byteReader, getMode func() (cipher.BlockMode, error)) *cipherBlockReader { 185 | c := &cipherBlockReader{r: r, getMode: getMode} 186 | return c 187 | } 188 | 189 | // newAesDecryptReader returns a cipherBlockReader that decrypts input from a given io.Reader using AES. 190 | func newAesDecryptReader(r byteReader, h *fileBlockHeader) *cipherBlockReader { 191 | getMode := func() (cipher.BlockMode, error) { 192 | if h.key == nil { 193 | err := h.genKeys() 194 | if err != nil { 195 | return nil, err 196 | } 197 | } 198 | block, err := aes.NewCipher(h.key) 199 | if err != nil { 200 | return nil, err 201 | } 202 | return cipher.NewCBCDecrypter(block, h.iv), nil 203 | } 204 | return newCipherBlockReader(r, getMode) 205 | } 206 | -------------------------------------------------------------------------------- /filters.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "encoding/binary" 5 | "hash/crc32" 6 | "io" 7 | "math" 8 | ) 9 | 10 | const ( 11 | fileSize = 0x1000000 12 | 13 | vmGlobalAddr = 0x3C000 14 | vmGlobalSize = 0x02000 15 | vmFixedGlobalSize = 0x40 16 | ) 17 | 18 | // v3Filter is the interface type for RAR V3 filters. 19 | // v3Filter performs the same function as the filter type, except that it also takes 20 | // the initial register values r, and global data as input for the RAR V3 VM. 21 | type v3Filter func(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) 22 | 23 | var ( 24 | // standardV3Filters is a list of known filters. We can replace the use of a vm 25 | // filter with a custom filter function. 26 | standardV3Filters = []struct { 27 | crc uint32 // crc of code byte slice for filter 28 | len int // length of code byte slice for filter 29 | f v3Filter // replacement filter function 30 | }{ 31 | {0xad576887, 53, e8FilterV3}, 32 | {0x3cd7e57e, 57, e8e9FilterV3}, 33 | {0x3769893f, 120, itaniumFilterV3}, 34 | {0x0e06077d, 29, deltaFilterV3}, 35 | {0x1c2c5dc8, 149, filterRGBV3}, 36 | {0xbc85e701, 216, filterAudioV3}, 37 | } 38 | 39 | // itanium filter byte masks 40 | byteMask = []int{4, 4, 6, 6, 0, 0, 7, 7, 4, 4, 0, 0, 4, 4, 0, 0} 41 | ) 42 | 43 | func filterE8(c byte, v5 bool, buf []byte, offset int64) ([]byte, error) { 44 | off := int32(offset) 45 | for b := buf; len(b) >= 5; { 46 | ch := b[0] 47 | b = b[1:] 48 | off++ 49 | if ch != 0xe8 && ch != c { 50 | continue 51 | } 52 | if v5 { 53 | off %= fileSize 54 | } 55 | addr := int32(binary.LittleEndian.Uint32(b)) 56 | if addr < 0 { 57 | if addr+off >= 0 { 58 | binary.LittleEndian.PutUint32(b, uint32(addr+fileSize)) 59 | } 60 | } else if addr < fileSize { 61 | binary.LittleEndian.PutUint32(b, uint32(addr-off)) 62 | } 63 | off += 4 64 | b = b[4:] 65 | } 66 | return buf, nil 67 | } 68 | 69 | func e8FilterV3(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) { 70 | return filterE8(0xe8, false, buf, offset) 71 | } 72 | 73 | func e8e9FilterV3(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) { 74 | return filterE8(0xe9, false, buf, offset) 75 | } 76 | 77 | func getBits(buf []byte, pos, count uint) uint32 { 78 | n := binary.LittleEndian.Uint32(buf[pos/8:]) 79 | n >>= pos & 7 80 | mask := uint32(math.MaxUint32) >> (32 - count) 81 | return n & mask 82 | } 83 | 84 | func setBits(buf []byte, pos, count uint, bits uint32) { 85 | mask := uint32(math.MaxUint32) >> (32 - count) 86 | mask <<= pos & 7 87 | bits <<= pos & 7 88 | n := binary.LittleEndian.Uint32(buf[pos/8:]) 89 | n = (n & ^mask) | (bits & mask) 90 | binary.LittleEndian.PutUint32(buf[pos/8:], n) 91 | } 92 | 93 | func itaniumFilterV3(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) { 94 | fileOffset := uint32(offset) >> 4 95 | 96 | for b := buf; len(b) > 21; b = b[16:] { 97 | c := int(b[0]&0x1f) - 0x10 98 | if c >= 0 { 99 | mask := byteMask[c] 100 | if mask != 0 { 101 | for i := uint(0); i <= 2; i++ { 102 | if mask&(1<= 2*l { 123 | res = buf[l : 2*l] // use unused capacity 124 | } else { 125 | res = make([]byte, l, 2*l) 126 | } 127 | 128 | i := 0 129 | for j := 0; j < n; j++ { 130 | var c byte 131 | for k := j; k < len(res); k += n { 132 | c -= buf[i] 133 | i++ 134 | res[k] = c 135 | } 136 | } 137 | return res, nil 138 | } 139 | 140 | func deltaFilterV3(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) { 141 | return filterDelta(int(r[0]), buf) 142 | } 143 | 144 | func abs(n int) int { 145 | if n < 0 { 146 | n = -n 147 | } 148 | return n 149 | } 150 | 151 | func filterRGBV3(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) { 152 | width := int(r[0] - 3) 153 | posR := int(r[1]) 154 | if posR < 0 || width < 0 { 155 | return buf, nil 156 | } 157 | 158 | var res []byte 159 | l := len(buf) 160 | if cap(buf) >= 2*l { 161 | res = buf[l : 2*l] // use unused capacity 162 | } else { 163 | res = make([]byte, l, 2*l) 164 | } 165 | 166 | for c := 0; c < 3; c++ { 167 | var prevByte int 168 | for i := c; i < len(res); i += 3 { 169 | var predicted int 170 | upperPos := i - width 171 | if upperPos >= 3 { 172 | upperByte := int(res[upperPos]) 173 | upperLeftByte := int(res[upperPos-3]) 174 | predicted = prevByte + upperByte - upperLeftByte 175 | pa := abs(predicted - prevByte) 176 | pb := abs(predicted - upperByte) 177 | pc := abs(predicted - upperLeftByte) 178 | if pa <= pb && pa <= pc { 179 | predicted = prevByte 180 | } else if pb <= pc { 181 | predicted = upperByte 182 | } else { 183 | predicted = upperLeftByte 184 | } 185 | } else { 186 | predicted = prevByte 187 | } 188 | prevByte = (predicted - int(buf[0])) & 0xFF 189 | res[i] = uint8(prevByte) 190 | buf = buf[1:] 191 | } 192 | 193 | } 194 | for i := posR; i < len(res)-2; i += 3 { 195 | c := res[i+1] 196 | res[i] += c 197 | res[i+2] += c 198 | } 199 | return res, nil 200 | } 201 | 202 | func filterAudioV3(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) { 203 | var res []byte 204 | l := len(buf) 205 | if cap(buf) >= 2*l { 206 | res = buf[l : 2*l] // use unused capacity 207 | } else { 208 | res = make([]byte, l, 2*l) 209 | } 210 | 211 | chans := int(r[0]) 212 | for c := 0; c < chans; c++ { 213 | var prevByte, byteCount int 214 | var diff [7]int 215 | var d, k [3]int 216 | 217 | for i := c; i < len(res); i += chans { 218 | predicted := prevByte<<3 + k[0]*d[0] + k[1]*d[1] + k[2]*d[2] 219 | predicted = int(int8(predicted >> 3)) 220 | 221 | curByte := int(int8(buf[0])) 222 | buf = buf[1:] 223 | predicted -= curByte 224 | res[i] = uint8(predicted) 225 | 226 | dd := curByte << 3 227 | diff[0] += abs(dd) 228 | diff[1] += abs(dd - d[0]) 229 | diff[2] += abs(dd + d[0]) 230 | diff[3] += abs(dd - d[1]) 231 | diff[4] += abs(dd + d[1]) 232 | diff[5] += abs(dd - d[2]) 233 | diff[6] += abs(dd + d[2]) 234 | 235 | prevDelta := int(int8(predicted - prevByte)) 236 | prevByte = predicted 237 | d[2] = d[1] 238 | d[1] = prevDelta - d[0] 239 | d[0] = prevDelta 240 | 241 | if byteCount&0x1f == 0 { 242 | min := diff[0] 243 | diff[0] = 0 244 | n := 0 245 | for j := 1; j < len(diff); j++ { 246 | if diff[j] < min { 247 | min = diff[j] 248 | n = j 249 | } 250 | diff[j] = 0 251 | } 252 | n-- 253 | if n >= 0 { 254 | m := n / 2 255 | if n%2 == 0 { 256 | if k[m] >= -16 { 257 | k[m]-- 258 | } 259 | } else { 260 | if k[m] < 16 { 261 | k[m]++ 262 | } 263 | } 264 | } 265 | } 266 | byteCount++ 267 | } 268 | 269 | } 270 | return res, nil 271 | } 272 | 273 | func filterArm(buf []byte, offset int64) ([]byte, error) { 274 | for i := 0; len(buf)-i > 3; i += 4 { 275 | if buf[i+3] == 0xeb { 276 | n := uint(buf[i]) 277 | n += uint(buf[i+1]) * 0x100 278 | n += uint(buf[i+2]) * 0x10000 279 | n -= (uint(offset) + uint(i)) / 4 280 | buf[i] = byte(n) 281 | buf[i+1] = byte(n >> 8) 282 | buf[i+2] = byte(n >> 16) 283 | } 284 | } 285 | return buf, nil 286 | } 287 | 288 | type vmFilter struct { 289 | execCount uint32 290 | global []byte 291 | static []byte 292 | code []command 293 | } 294 | 295 | // execute implements v3filter type for VM based RAR 3 filters. 296 | func (f *vmFilter) execute(r map[int]uint32, global, buf []byte, offset int64) ([]byte, error) { 297 | if len(buf) > vmGlobalAddr { 298 | return buf, ErrInvalidFilter 299 | } 300 | v := newVM(buf) 301 | 302 | // register setup 303 | v.r[3] = vmGlobalAddr 304 | v.r[4] = uint32(len(buf)) 305 | v.r[5] = f.execCount 306 | for i, n := range r { 307 | v.r[i] = n 308 | } 309 | 310 | // vm global data memory block 311 | vg := v.m[vmGlobalAddr : vmGlobalAddr+vmGlobalSize] 312 | 313 | // initialize fixed global memory 314 | for i, n := range v.r[:vmRegs-1] { 315 | binary.LittleEndian.PutUint32(vg[i*4:], n) 316 | } 317 | binary.LittleEndian.PutUint32(vg[0x1c:], uint32(len(buf))) 318 | binary.LittleEndian.PutUint64(vg[0x24:], uint64(offset)) 319 | binary.LittleEndian.PutUint32(vg[0x2c:], f.execCount) 320 | 321 | // registers 322 | v.r[6] = uint32(offset) 323 | 324 | // copy program global memory 325 | var n int 326 | if len(f.global) > 0 { 327 | n = copy(vg[vmFixedGlobalSize:], f.global) // use saved global instead 328 | } else { 329 | n = copy(vg[vmFixedGlobalSize:], global) 330 | } 331 | copy(vg[vmFixedGlobalSize+n:], f.static) 332 | 333 | v.execute(f.code) 334 | 335 | f.execCount++ 336 | 337 | // keep largest global buffer 338 | if cap(global) > cap(f.global) { 339 | f.global = global[:0] 340 | } else if len(f.global) > 0 { 341 | f.global = f.global[:0] 342 | } 343 | 344 | // check for global data to be saved for next program execution 345 | globalSize := binary.LittleEndian.Uint32(vg[0x30:]) 346 | if globalSize > 0 { 347 | if globalSize > vmGlobalSize-vmFixedGlobalSize { 348 | globalSize = vmGlobalSize - vmFixedGlobalSize 349 | } 350 | if cap(f.global) < int(globalSize) { 351 | f.global = make([]byte, globalSize) 352 | } else { 353 | f.global = f.global[:globalSize] 354 | } 355 | copy(f.global, vg[vmFixedGlobalSize:]) 356 | } 357 | 358 | // find program output 359 | length := binary.LittleEndian.Uint32(vg[0x1c:]) & vmMask 360 | start := binary.LittleEndian.Uint32(vg[0x20:]) & vmMask 361 | if start+length > vmSize { 362 | // TODO: error 363 | start = 0 364 | length = 0 365 | } 366 | if start != 0 && cap(v.m) > cap(buf) { 367 | // Initial buffer was to small for vm. 368 | // Copy output to beginning of vm memory so that decodeReader 369 | // will re-use the newly allocated vm memory and we will not 370 | // have to reallocate again next time. 371 | copy(v.m, v.m[start:start+length]) 372 | start = 0 373 | } 374 | return v.m[start : start+length], nil 375 | } 376 | 377 | // getV3Filter returns a V3 filter function from a code byte slice. 378 | func getV3Filter(code []byte) (v3Filter, error) { 379 | // check if filter is a known standard filter 380 | c := crc32.ChecksumIEEE(code) 381 | for _, f := range standardV3Filters { 382 | if f.crc == c && f.len == len(code) { 383 | return f.f, nil 384 | } 385 | } 386 | 387 | // create new vm filter 388 | f := new(vmFilter) 389 | r := newRarBitReader(newBufByteReader(code[1:])) // skip first xor byte check 390 | 391 | // read static data 392 | n, err := r.readBits(1) 393 | if err != nil { 394 | return nil, err 395 | } 396 | if n > 0 { 397 | var m uint32 398 | m, err = r.readUint32() 399 | if err != nil { 400 | return nil, err 401 | } 402 | f.static = make([]byte, m+1) 403 | err = r.readFull(f.static) 404 | if err != nil { 405 | return nil, err 406 | } 407 | } 408 | 409 | f.code, err = readCommands(r) 410 | if err == io.EOF { 411 | err = nil 412 | } 413 | 414 | return f.execute, err 415 | } 416 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/nwaples/rardecode/v2 2 | 3 | go 1.21 4 | -------------------------------------------------------------------------------- /huffman.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | ) 7 | 8 | const ( 9 | maxCodeLength = 15 // maximum code length in bits 10 | maxQuickBits = 10 11 | maxQuickSize = 1 << maxQuickBits 12 | ) 13 | 14 | var ( 15 | ErrHuffDecodeFailed = errors.New("rardecode: huffman decode failed") 16 | ErrInvalidLengthTable = errors.New("rardecode: invalid huffman code length table") 17 | ) 18 | 19 | type huffmanDecoder struct { 20 | limit [maxCodeLength + 1]uint16 21 | pos [maxCodeLength + 1]uint16 22 | symbol []uint16 23 | min uint8 24 | quickbits uint8 25 | quicklen [maxQuickSize]uint8 26 | quicksym [maxQuickSize]uint16 27 | } 28 | 29 | func (h *huffmanDecoder) init(codeLengths []byte) { 30 | count := make([]uint16, maxCodeLength+1) 31 | 32 | for _, n := range codeLengths { 33 | if n == 0 { 34 | continue 35 | } 36 | count[n]++ 37 | } 38 | 39 | h.pos[0] = 0 40 | h.limit[0] = 0 41 | h.min = 0 42 | for i := uint8(1); i <= maxCodeLength; i++ { 43 | h.limit[i] = h.limit[i-1] + count[i]<<(maxCodeLength-i) 44 | h.pos[i] = h.pos[i-1] + count[i-1] 45 | if h.min == 0 && h.limit[i] > 0 { 46 | h.min = i 47 | } 48 | } 49 | 50 | if cap(h.symbol) >= len(codeLengths) { 51 | h.symbol = h.symbol[:len(codeLengths)] 52 | clear(h.symbol) 53 | } else { 54 | h.symbol = make([]uint16, len(codeLengths)) 55 | } 56 | 57 | copy(count, h.pos[:]) 58 | for i, n := range codeLengths { 59 | if n != 0 { 60 | h.symbol[count[n]] = uint16(i) 61 | count[n]++ 62 | } 63 | } 64 | 65 | if len(codeLengths) >= 298 { 66 | h.quickbits = maxQuickBits 67 | } else { 68 | h.quickbits = maxQuickBits - 3 69 | } 70 | 71 | bits := uint8(1) 72 | for i := uint16(0); i < 1<= h.limit[bits] && bits < maxCodeLength { 76 | bits++ 77 | } 78 | h.quicklen[i] = bits 79 | 80 | dist := v - h.limit[bits-1] 81 | dist >>= (maxCodeLength - bits) 82 | 83 | pos := int(h.pos[bits]) + int(dist) 84 | if pos < len(h.symbol) { 85 | h.quicksym[i] = h.symbol[pos] 86 | } else { 87 | h.quicksym[i] = 0 88 | } 89 | } 90 | } 91 | 92 | func (h *huffmanDecoder) readSym(r bitReader) (int, error) { 93 | var bits uint8 94 | var v uint16 95 | n, err := r.readBits(maxCodeLength) 96 | if err != nil { 97 | if err != io.EOF { 98 | return 0, err 99 | } 100 | // fall back to 1 bit at a time if we read past EOF 101 | for bits = 1; bits <= maxCodeLength; bits++ { 102 | b, err := r.readBits(1) 103 | if err != nil { 104 | return 0, err // not enough bits return error 105 | } 106 | v |= uint16(b) << (maxCodeLength - bits) 107 | if v < h.limit[bits] { 108 | break 109 | } 110 | } 111 | } else { 112 | v = uint16(n) 113 | if v < h.limit[h.quickbits] { 114 | i := v >> (maxCodeLength - h.quickbits) 115 | r.unreadBits(maxCodeLength - h.quicklen[i]) 116 | return int(h.quicksym[i]), nil 117 | } 118 | 119 | for bits = h.min; bits < maxCodeLength; bits++ { 120 | if v < h.limit[bits] { 121 | r.unreadBits(maxCodeLength - bits) 122 | break 123 | } 124 | } 125 | } 126 | 127 | dist := v - h.limit[bits-1] 128 | dist >>= maxCodeLength - bits 129 | 130 | pos := int(h.pos[bits]) + int(dist) 131 | if pos >= len(h.symbol) { 132 | return 0, ErrHuffDecodeFailed 133 | } 134 | 135 | return int(h.symbol[pos]), nil 136 | } 137 | 138 | // readCodeLengthTable reads a new code length table into codeLength from br. 139 | // If addOld is set the old table is added to the new one. 140 | func readCodeLengthTable(br bitReader, codeLength []byte, addOld bool) error { 141 | var bitlength [20]byte 142 | for i := 0; i < len(bitlength); i++ { 143 | n, err := br.readBits(4) 144 | if err != nil { 145 | return err 146 | } 147 | if n == 0xf { 148 | cnt, err := br.readBits(4) 149 | if err != nil { 150 | return err 151 | } 152 | if cnt > 0 { 153 | // array already zero'd dont need to explicitly set 154 | i += cnt + 1 155 | continue 156 | } 157 | } 158 | bitlength[i] = byte(n) 159 | } 160 | 161 | var bl huffmanDecoder 162 | bl.init(bitlength[:]) 163 | 164 | for i := 0; i < len(codeLength); i++ { 165 | l, err := bl.readSym(br) 166 | if err != nil { 167 | return err 168 | } 169 | 170 | if l < 16 { 171 | if addOld { 172 | codeLength[i] = (codeLength[i] + byte(l)) & 0xf 173 | } else { 174 | codeLength[i] = byte(l) 175 | } 176 | continue 177 | } 178 | 179 | var count int 180 | var value byte 181 | 182 | switch l { 183 | case 16, 18: 184 | count, err = br.readBits(3) 185 | count += 3 186 | default: 187 | count, err = br.readBits(7) 188 | count += 11 189 | } 190 | if err != nil { 191 | return err 192 | } 193 | if l < 18 { 194 | if i == 0 { 195 | return ErrInvalidLengthTable 196 | } 197 | value = codeLength[i-1] 198 | } 199 | for ; count > 0 && i < len(codeLength); i++ { 200 | codeLength[i] = value 201 | count-- 202 | } 203 | i-- 204 | } 205 | return nil 206 | } 207 | -------------------------------------------------------------------------------- /ppm_model.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "math" 7 | ) 8 | 9 | const ( 10 | rangeBottom = 1 << 15 11 | rangeTop = 1 << 24 12 | 13 | maxFreq = 124 14 | 15 | intBits = 7 16 | periodBits = 7 17 | binScale = 1 << (intBits + periodBits) 18 | 19 | n0 = 1 20 | n1 = 4 21 | n2 = 4 22 | n3 = 4 23 | n4 = (128 + 3 - 1*n1 - 2*n2 - 3*n3) / 4 24 | nIndexes = n0 + n1 + n2 + n3 + n4 25 | 26 | // memory is allocated in units. A unit contains unitSize number of bytes. 27 | // A unit can store one context or two states. 28 | unitSize = 12 29 | 30 | freeMark = -1 31 | ) 32 | 33 | var ( 34 | ErrCorruptPPM = errors.New("rardecode: corrupt ppm data") 35 | 36 | expEscape = []byte{25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2} 37 | initBinEsc = []uint16{0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051} 38 | 39 | ns2Index [256]byte 40 | ns2BSIndex [256]byte 41 | 42 | // units2Index maps the number of units in a block to a freelist index 43 | units2Index [128 + 1]byte 44 | // index2Units maps a freelist index to the size of the block in units 45 | index2Units [nIndexes]int32 46 | ) 47 | 48 | func init() { 49 | ns2BSIndex[0] = 2 * 0 50 | ns2BSIndex[1] = 2 * 1 51 | for i := 2; i < 11; i++ { 52 | ns2BSIndex[i] = 2 * 2 53 | } 54 | for i := 11; i < 256; i++ { 55 | ns2BSIndex[i] = 2 * 3 56 | } 57 | 58 | var j, n byte 59 | for i := range ns2Index { 60 | ns2Index[i] = n 61 | if j <= 3 { 62 | n++ 63 | j = n 64 | } else { 65 | j-- 66 | } 67 | } 68 | 69 | var ii byte 70 | var iu, units int32 71 | for i, n := range []int{n0, n1, n2, n3, n4} { 72 | for j := 0; j < n; j++ { 73 | units += int32(i) 74 | index2Units[ii] = units 75 | for iu <= units { 76 | units2Index[iu] = ii 77 | iu++ 78 | } 79 | ii++ 80 | } 81 | } 82 | } 83 | 84 | type rangeCoder struct { 85 | br io.ByteReader 86 | code uint32 87 | low uint32 88 | rnge uint32 89 | } 90 | 91 | func (r *rangeCoder) init(br io.ByteReader) error { 92 | r.br = br 93 | r.low = 0 94 | r.rnge = ^uint32(0) 95 | for i := 0; i < 4; i++ { 96 | c, err := r.br.ReadByte() 97 | if err != nil { 98 | return err 99 | } 100 | r.code = r.code<<8 | uint32(c) 101 | } 102 | return nil 103 | } 104 | 105 | func (r *rangeCoder) currentCount(scale uint32) uint32 { 106 | r.rnge /= scale 107 | return (r.code - r.low) / r.rnge 108 | } 109 | 110 | func (r *rangeCoder) normalize() error { 111 | for { 112 | if r.low^(r.low+r.rnge) >= rangeTop { 113 | if r.rnge >= rangeBottom { 114 | return nil 115 | } 116 | r.rnge = -r.low & (rangeBottom - 1) 117 | } 118 | c, err := r.br.ReadByte() 119 | if err != nil { 120 | return err 121 | } 122 | r.code = r.code<<8 | uint32(c) 123 | r.rnge <<= 8 124 | r.low <<= 8 125 | } 126 | } 127 | 128 | func (r *rangeCoder) decode(lowCount, highCount uint32) error { 129 | r.low += r.rnge * lowCount 130 | r.rnge *= highCount - lowCount 131 | 132 | return r.normalize() 133 | } 134 | 135 | type see2Context struct { 136 | summ uint16 137 | shift byte 138 | count byte 139 | } 140 | 141 | func newSee2Context(i uint16) see2Context { 142 | return see2Context{i << (periodBits - 4), (periodBits - 4), 4} 143 | } 144 | 145 | func (s *see2Context) mean() uint32 { 146 | if s == nil { 147 | return 1 148 | } 149 | n := s.summ >> s.shift 150 | if n == 0 { 151 | return 1 152 | } 153 | s.summ -= n 154 | return uint32(n) 155 | } 156 | 157 | func (s *see2Context) update() { 158 | if s == nil || s.shift >= periodBits { 159 | return 160 | } 161 | s.count-- 162 | if s.count == 0 { 163 | s.summ += s.summ 164 | s.count = 3 << s.shift 165 | s.shift++ 166 | } 167 | } 168 | 169 | type state struct { 170 | sym byte 171 | freq byte 172 | 173 | // succ can point to a context or byte in memory. 174 | // A context pointer is a positive integer. It is an index into the states 175 | // array that points to the first of two states which the context is 176 | // marshalled into. 177 | // A byte pointer is a negative integer. The magnitude represents the position 178 | // in bytes from the bottom of the memory. As memory is modelled as an array of 179 | // states, this is used to calculate which state, and where in the state the 180 | // byte is stored. 181 | // A zero value represents a nil pointer. 182 | succ int32 183 | } 184 | 185 | // uint16 return a uint16 stored in the sym and freq fields of a state 186 | func (s state) uint16() uint16 { return uint16(s.sym) | uint16(s.freq)<<8 } 187 | 188 | // setUint16 stores a uint16 in the sym and freq fields of a state 189 | func (s *state) setUint16(n uint16) { s.sym = byte(n); s.freq = byte(n >> 8) } 190 | 191 | // A context is marshalled into a slice of two states. 192 | // The first state contains the number of states, and the suffix pointer. 193 | // If there is only one state, the second state contains that state. 194 | // If there is more than one state, the second state contains the summFreq 195 | // and the index to the slice of states. 196 | // The context is represented by the index into the states array for these two states. 197 | type context int32 198 | 199 | // succContext returns a context given a state.succ index 200 | func succContext(i int32) context { 201 | if i <= 0 { 202 | return 0 203 | } 204 | return context(i) 205 | } 206 | 207 | type subAllocator struct { 208 | // memory for allocation is split into two heaps 209 | 210 | glueCount int 211 | heap1MaxBytes int32 // maximum bytes available in heap1 212 | heap1Lo int32 // heap1 bottom in number of bytes 213 | heap1Hi int32 // heap1 top in number of bytes 214 | heap2Lo int32 // heap2 bottom index in states 215 | heap2Hi int32 // heap2 top index in states 216 | 217 | // Each freeList entry contains an index into states for the beginning 218 | // of a free block. The first state in that block may contain an index 219 | // to another free block and so on. The size of the free block in units 220 | // (2 states) for that freeList index can be determined from the 221 | // index2Units array. 222 | freeList [nIndexes]int32 223 | 224 | // Instead of bytes, memory is represented by a slice of states. 225 | // context's are marshalled to and from a pair of states. 226 | // multiple bytes are stored in a state. 227 | states []state 228 | } 229 | 230 | func (a *subAllocator) init(maxMB int) { 231 | bytes := int32(maxMB) << 20 232 | heap2Units := bytes / 8 / unitSize * 7 233 | a.heap1MaxBytes = bytes - heap2Units*unitSize 234 | // Add one for the case when bytes are not a multiple of unitSize 235 | heap1Units := a.heap1MaxBytes/unitSize + 1 236 | // Calculate total size in state's. Add 1 unit so we can reserve the first unit. 237 | // This will allow us to use the zero index as a nil pointer. 238 | n := int(1+heap1Units+heap2Units) * 2 239 | if cap(a.states) > n { 240 | a.states = a.states[:n] 241 | } else { 242 | a.states = make([]state, n) 243 | } 244 | } 245 | 246 | func (a *subAllocator) restart() { 247 | // Pad heap1 start by 1 unit and enough bytes so that there is no 248 | // gap between heap1 end and heap2 start. 249 | a.heap1Lo = unitSize + (unitSize - a.heap1MaxBytes%unitSize) 250 | a.heap1Hi = unitSize + (a.heap1MaxBytes/unitSize+1)*unitSize 251 | a.heap2Lo = a.heap1Hi / unitSize * 2 252 | a.heap2Hi = int32(len(a.states)) 253 | a.glueCount = 0 254 | clear(a.freeList[:]) 255 | } 256 | 257 | // pushByte puts a byte on the heap and returns a state.succ index that 258 | // can be used to retrieve it. 259 | func (a *subAllocator) pushByte(c byte) int32 { 260 | si := a.heap1Lo / 6 // state index 261 | oi := a.heap1Lo % 6 // byte position in state 262 | switch oi { 263 | case 0: 264 | a.states[si].sym = c 265 | case 1: 266 | a.states[si].freq = c 267 | default: 268 | n := (uint(oi) - 2) * 8 269 | mask := ^(uint32(0xFF) << n) 270 | succ := uint32(a.states[si].succ) & mask 271 | succ |= uint32(c) << n 272 | a.states[si].succ = int32(succ) 273 | } 274 | a.heap1Lo++ 275 | if a.heap1Lo >= a.heap1Hi { 276 | return 0 277 | } 278 | return -a.heap1Lo 279 | } 280 | 281 | // popByte reverses the previous pushByte 282 | func (a *subAllocator) popByte() { a.heap1Lo-- } 283 | 284 | // succByte returns a byte from the heap given a state.succ index 285 | func (a *subAllocator) succByte(i int32) byte { 286 | i = -i 287 | si := i / 6 288 | oi := i % 6 289 | switch oi { 290 | case 0: 291 | return a.states[si].sym 292 | case 1: 293 | return a.states[si].freq 294 | default: 295 | n := (uint(oi) - 2) * 8 296 | succ := uint32(a.states[si].succ) >> n 297 | return byte(succ & 0xff) 298 | } 299 | } 300 | 301 | // nextByteAddr takes a state.succ value representing a pointer 302 | // to a byte, and returns the next bytes address 303 | func (a *subAllocator) nextByteAddr(n int32) int32 { return n - 1 } 304 | 305 | func (a *subAllocator) removeFreeBlock(i byte) int32 { 306 | n := a.freeList[i] 307 | if n != 0 { 308 | a.freeList[i] = a.states[n].succ 309 | a.states[n] = state{} 310 | } 311 | return n 312 | } 313 | 314 | func (a *subAllocator) addFreeBlock(n int32, i byte) { 315 | a.states[n].succ = a.freeList[i] 316 | a.freeList[i] = n 317 | } 318 | 319 | func (a *subAllocator) freeUnits(n, u int32) { 320 | i := units2Index[u] 321 | if u != index2Units[i] { 322 | i-- 323 | a.addFreeBlock(n, i) 324 | u -= index2Units[i] 325 | n += index2Units[i] << 1 326 | i = units2Index[u] 327 | } 328 | a.addFreeBlock(n, i) 329 | } 330 | 331 | func (a *subAllocator) glueFreeBlocks() { 332 | var freeIndex int32 333 | 334 | for i, n := range a.freeList { 335 | s := state{succ: freeMark} 336 | s.setUint16(uint16(index2Units[i])) 337 | for n != 0 { 338 | states := a.states[n:] 339 | states[1].succ = freeIndex 340 | freeIndex = n 341 | n = states[0].succ 342 | states[0] = s 343 | } 344 | a.freeList[i] = 0 345 | } 346 | 347 | for i := freeIndex; i != 0; i = a.states[i+1].succ { 348 | if a.states[i].succ != freeMark { 349 | continue 350 | } 351 | u := int32(a.states[i].uint16()) 352 | states := a.states[i+u<<1:] 353 | for len(states) > 0 && states[0].succ == freeMark { 354 | u += int32(states[0].uint16()) 355 | if u > math.MaxUint16 { 356 | break 357 | } 358 | states[0].succ = 0 359 | a.states[i].setUint16(uint16(u)) 360 | states = a.states[i+u<<1:] 361 | } 362 | } 363 | 364 | for n := freeIndex; n != 0; n = a.states[n+1].succ { 365 | if a.states[n].succ != freeMark { 366 | continue 367 | } 368 | a.states[n].succ = 0 369 | u := int32(a.states[n].uint16()) 370 | m := n 371 | for u > 128 { 372 | a.addFreeBlock(m, nIndexes-1) 373 | u -= 128 374 | m += 256 375 | } 376 | a.freeUnits(m, u) 377 | } 378 | } 379 | 380 | func (a *subAllocator) allocUnitsRare(index byte) int32 { 381 | if a.glueCount == 0 { 382 | a.glueCount = 255 383 | a.glueFreeBlocks() 384 | if n := a.removeFreeBlock(index); n > 0 { 385 | return n 386 | } 387 | } 388 | // try to find a larger free block and split it 389 | for i := index + 1; i < nIndexes; i++ { 390 | if n := a.removeFreeBlock(i); n > 0 { 391 | u := index2Units[i] - index2Units[index] 392 | a.freeUnits(n+index2Units[index]<<1, u) 393 | return n 394 | } 395 | } 396 | a.glueCount-- 397 | 398 | // try to allocate units from the top of heap1 399 | n := a.heap1Hi - index2Units[index]*unitSize 400 | if n > a.heap1Lo { 401 | a.heap1Hi = n 402 | return a.heap1Hi / unitSize * 2 403 | } 404 | return 0 405 | } 406 | 407 | func (a *subAllocator) allocUnits(i byte) int32 { 408 | // try to allocate a free block 409 | if n := a.removeFreeBlock(i); n > 0 { 410 | return n 411 | } 412 | // try to allocate from the bottom of heap2 413 | n := index2Units[i] << 1 414 | if a.heap2Lo+n <= a.heap2Hi { 415 | lo := a.heap2Lo 416 | a.heap2Lo += n 417 | return lo 418 | } 419 | return a.allocUnitsRare(i) 420 | } 421 | 422 | func (a *subAllocator) newContext(s state, suffix context) context { 423 | var n int32 424 | if a.heap2Lo < a.heap2Hi { 425 | // allocate from top of heap2 426 | a.heap2Hi -= 2 427 | n = a.heap2Hi 428 | } else if n = a.removeFreeBlock(1); n == 0 { 429 | if n = a.allocUnitsRare(1); n == 0 { 430 | return 0 431 | } 432 | } 433 | // we don't need to set numStates to 1 as the default value of 0 in the sym 434 | // field is always incremented by 1 to get numStates. 435 | a.states[n] = state{succ: int32(suffix)} 436 | a.states[n+1] = s 437 | return context(n) 438 | } 439 | 440 | func (a *subAllocator) newContextSize(ns int) context { 441 | c := a.newContext(state{}, context(0)) 442 | a.contextSetNumStates(c, ns) 443 | i := units2Index[(ns+1)>>1] 444 | n := a.allocUnits(i) 445 | a.contextSetStatesIndex(c, n) 446 | return c 447 | } 448 | 449 | // since number of states is always > 0 && <= 256, we can fit it in a single byte 450 | func (a *subAllocator) contextNumStates(c context) int { return int(a.states[c].sym) + 1 } 451 | func (a *subAllocator) contextSetNumStates(c context, n int) { a.states[c].sym = byte(n - 1) } 452 | 453 | func (a *subAllocator) contextSummFreq(c context) uint16 { return a.states[c+1].uint16() } 454 | func (a *subAllocator) contextSetSummFreq(c context, n uint16) { a.states[c+1].setUint16(n) } 455 | func (a *subAllocator) contextIncSummFreq(c context, n uint16) { 456 | a.states[c+1].setUint16(a.states[c+1].uint16() + n) 457 | } 458 | 459 | func (a *subAllocator) contextSuffix(c context) context { return succContext(a.states[c].succ) } 460 | 461 | func (a *subAllocator) contextStatesIndex(c context) int32 { return a.states[c+1].succ } 462 | func (a *subAllocator) contextSetStatesIndex(c context, n int32) { a.states[c+1].succ = n } 463 | 464 | func (a *subAllocator) contextStates(c context) []state { 465 | if ns := int32(a.states[c].sym) + 1; ns != 1 { 466 | i := a.states[c+1].succ 467 | return a.states[i : i+ns] 468 | } 469 | return a.states[c+1 : c+2] 470 | } 471 | 472 | // shrinkStates shrinks the state list down to size states 473 | func (a *subAllocator) shrinkStates(c context, states []state, size int) []state { 474 | i1 := units2Index[(len(states)+1)>>1] 475 | i2 := units2Index[(size+1)>>1] 476 | 477 | if size == 1 { 478 | // store state in context, and free states block 479 | n := a.contextStatesIndex(c) 480 | a.states[c+1] = states[0] 481 | states = a.states[c+1:] 482 | a.addFreeBlock(n, i1) 483 | } else if i1 != i2 { 484 | if n := a.removeFreeBlock(i2); n > 0 { 485 | // allocate new block and copy 486 | copy(a.states[n:], states[:size]) 487 | states = a.states[n:] 488 | // free old block 489 | a.addFreeBlock(a.contextStatesIndex(c), i1) 490 | a.contextSetStatesIndex(c, n) 491 | } else { 492 | // split current block, and free units not needed 493 | n = a.contextStatesIndex(c) + index2Units[i2]<<1 494 | u := index2Units[i1] - index2Units[i2] 495 | a.freeUnits(n, u) 496 | } 497 | } 498 | a.contextSetNumStates(c, size) 499 | return states[:size] 500 | } 501 | 502 | // expandStates expands the states list by one 503 | func (a *subAllocator) expandStates(c context) []state { 504 | states := a.contextStates(c) 505 | ns := len(states) 506 | if ns == 1 { 507 | s := states[0] 508 | n := a.allocUnits(1) 509 | if n == 0 { 510 | return nil 511 | } 512 | a.contextSetStatesIndex(c, n) 513 | states = a.states[n:] 514 | states[0] = s 515 | } else if ns&0x1 == 0 { 516 | u := ns >> 1 517 | i1 := units2Index[u] 518 | i2 := units2Index[u+1] 519 | if i1 != i2 { 520 | n := a.allocUnits(i2) 521 | if n == 0 { 522 | return nil 523 | } 524 | copy(a.states[n:], states) 525 | a.addFreeBlock(a.contextStatesIndex(c), i1) 526 | a.contextSetStatesIndex(c, n) 527 | states = a.states[n:] 528 | } 529 | } 530 | a.contextSetNumStates(c, ns+1) 531 | return states[:ns+1] 532 | } 533 | 534 | func (a *subAllocator) findState(c context, sym byte) *state { 535 | var i int 536 | states := a.contextStates(c) 537 | for i = range states { 538 | if states[i].sym == sym { 539 | break 540 | } 541 | } 542 | return &states[i] 543 | } 544 | 545 | type model struct { 546 | maxOrder int 547 | orderFall int 548 | initRL int 549 | runLength int 550 | prevSuccess byte 551 | escCount byte 552 | prevSym byte 553 | initEsc byte 554 | c context 555 | rc rangeCoder 556 | a subAllocator 557 | charMask [256]byte 558 | binSumm [128][64]uint16 559 | see2Cont [25][16]see2Context 560 | ibuf [256]int 561 | sbuf [256]*state 562 | } 563 | 564 | func (m *model) restart() { 565 | clear(m.charMask[:]) 566 | m.escCount = 1 567 | 568 | if m.maxOrder < 12 { 569 | m.initRL = -m.maxOrder - 1 570 | } else { 571 | m.initRL = -12 - 1 572 | } 573 | m.orderFall = m.maxOrder 574 | m.runLength = m.initRL 575 | m.prevSuccess = 0 576 | 577 | m.a.restart() 578 | 579 | m.c = m.a.newContextSize(256) 580 | m.a.contextSetSummFreq(m.c, 257) 581 | states := m.a.contextStates(m.c) 582 | for i := range states { 583 | states[i] = state{sym: byte(i), freq: 1} 584 | } 585 | 586 | for i := range m.binSumm { 587 | for j, esc := range initBinEsc { 588 | n := binScale - esc/(uint16(i)+2) 589 | for k := j; k < len(m.binSumm[i]); k += len(initBinEsc) { 590 | m.binSumm[i][k] = n 591 | } 592 | } 593 | } 594 | 595 | for i := range m.see2Cont { 596 | see := newSee2Context(5*uint16(i) + 10) 597 | for j := range m.see2Cont[i] { 598 | m.see2Cont[i][j] = see 599 | } 600 | } 601 | } 602 | 603 | func (m *model) init(br io.ByteReader, reset bool, maxOrder, maxMB int) error { 604 | err := m.rc.init(br) 605 | if err != nil { 606 | return err 607 | } 608 | if !reset { 609 | return nil 610 | } 611 | 612 | m.a.init(maxMB) 613 | 614 | if maxOrder == 1 { 615 | return ErrCorruptPPM 616 | } 617 | m.maxOrder = maxOrder 618 | m.prevSym = 0 619 | m.c = 0 620 | return nil 621 | } 622 | 623 | func (m *model) rescale(c context, s *state) *state { 624 | if s.freq <= maxFreq { 625 | return s 626 | } 627 | 628 | var summFreq uint16 629 | 630 | s.freq += 4 631 | states := m.a.contextStates(c) 632 | escFreq := m.a.contextSummFreq(c) + 4 633 | 634 | for i := range states { 635 | f := states[i].freq 636 | escFreq -= uint16(f) 637 | if m.orderFall != 0 { 638 | f++ 639 | } 640 | f >>= 1 641 | summFreq += uint16(f) 642 | states[i].freq = f 643 | 644 | if i == 0 || f <= states[i-1].freq { 645 | continue 646 | } 647 | j := i - 1 648 | for j > 0 && f > states[j-1].freq { 649 | j-- 650 | } 651 | t := states[i] 652 | copy(states[j+1:i+1], states[j:i]) 653 | states[j] = t 654 | } 655 | 656 | i := len(states) - 1 657 | for states[i].freq == 0 { 658 | i-- 659 | escFreq++ 660 | } 661 | if i != len(states)-1 { 662 | states = m.a.shrinkStates(c, states, i+1) 663 | } 664 | s = &states[0] 665 | if i == 0 { 666 | for { 667 | s.freq -= s.freq >> 1 668 | escFreq >>= 1 669 | if escFreq <= 1 { 670 | return s 671 | } 672 | } 673 | } 674 | summFreq += escFreq - (escFreq >> 1) 675 | m.a.contextSetSummFreq(c, summFreq) 676 | return s 677 | } 678 | 679 | func (m *model) decodeBinSymbol(c context) (*state, error) { 680 | s := &m.a.contextStates(c)[0] 681 | 682 | ns := m.a.contextNumStates(m.a.contextSuffix(c)) 683 | i := m.prevSuccess + ns2BSIndex[ns-1] + byte(m.runLength>>26)&0x20 684 | if m.prevSym >= 64 { 685 | i += 8 686 | } 687 | if s.sym >= 64 { 688 | i += 2 * 8 689 | } 690 | bs := &m.binSumm[s.freq-1][i] 691 | mean := (*bs + 1<<(periodBits-2)) >> periodBits 692 | 693 | if m.rc.currentCount(binScale) < uint32(*bs) { 694 | err := m.rc.decode(0, uint32(*bs)) 695 | if s.freq < 128 { 696 | s.freq++ 697 | } 698 | *bs += 1<>10] 706 | m.charMask[s.sym] = m.escCount 707 | m.prevSuccess = 0 708 | return nil, err 709 | } 710 | 711 | func (m *model) decodeSymbol1(c context) (*state, error) { 712 | states := m.a.contextStates(c) 713 | scale := uint32(m.a.contextSummFreq(c)) 714 | // protect against divide by zero 715 | // TODO: look at why this happens, may be problem elsewhere 716 | if scale == 0 { 717 | return nil, ErrCorruptPPM 718 | } 719 | count := m.rc.currentCount(scale) 720 | m.prevSuccess = 0 721 | 722 | var n uint32 723 | for i := range states { 724 | s := &states[i] 725 | n += uint32(s.freq) 726 | if n <= count { 727 | continue 728 | } 729 | err := m.rc.decode(n-uint32(s.freq), n) 730 | s.freq += 4 731 | m.a.contextSetSummFreq(c, uint16(scale+4)) 732 | if i == 0 { 733 | if 2*n > scale { 734 | m.prevSuccess = 1 735 | m.runLength++ 736 | } 737 | } else { 738 | if s.freq <= states[i-1].freq { 739 | return s, err 740 | } 741 | states[i-1], states[i] = states[i], states[i-1] 742 | s = &states[i-1] 743 | } 744 | return m.rescale(c, s), err 745 | } 746 | 747 | for _, s := range states { 748 | m.charMask[s.sym] = m.escCount 749 | } 750 | return nil, m.rc.decode(n, scale) 751 | } 752 | 753 | func (m *model) makeEscFreq(c context, numMasked int) *see2Context { 754 | ns := m.a.contextNumStates(c) 755 | if ns == 256 { 756 | return nil 757 | } 758 | diff := ns - numMasked 759 | 760 | var i int 761 | if m.prevSym >= 64 { 762 | i = 8 763 | } 764 | if diff < m.a.contextNumStates(m.a.contextSuffix(c))-ns { 765 | i++ 766 | } 767 | if int(m.a.contextSummFreq(c)) < 11*ns { 768 | i += 2 769 | } 770 | if numMasked > diff { 771 | i += 4 772 | } 773 | return &m.see2Cont[ns2Index[diff-1]][i] 774 | } 775 | 776 | func (m *model) decodeSymbol2(c context, numMasked int) (*state, error) { 777 | see := m.makeEscFreq(c, numMasked) 778 | scale := see.mean() 779 | 780 | var i int 781 | var hi uint32 782 | states := m.a.contextStates(c) 783 | n := len(states) - numMasked 784 | sl := m.ibuf[:n] 785 | for j := range sl { 786 | for m.charMask[states[i].sym] == m.escCount { 787 | i++ 788 | } 789 | hi += uint32(states[i].freq) 790 | sl[j] = i 791 | i++ 792 | } 793 | 794 | scale += hi 795 | count := m.rc.currentCount(scale) 796 | 797 | if count >= scale { 798 | return nil, ErrCorruptPPM 799 | } 800 | if count >= hi { 801 | err := m.rc.decode(hi, scale) 802 | if see != nil { 803 | see.summ += uint16(scale) 804 | } 805 | for _, i := range sl { 806 | m.charMask[states[i].sym] = m.escCount 807 | } 808 | return nil, err 809 | } 810 | 811 | hi = uint32(states[sl[0]].freq) 812 | n = 0 813 | for hi <= count { 814 | n++ 815 | hi += uint32(states[sl[n]].freq) 816 | } 817 | s := &states[sl[n]] 818 | 819 | err := m.rc.decode(hi-uint32(s.freq), hi) 820 | 821 | see.update() 822 | 823 | m.escCount++ 824 | m.runLength = m.initRL 825 | 826 | s.freq += 4 827 | m.a.contextIncSummFreq(c, 4) 828 | return m.rescale(c, s), err 829 | } 830 | 831 | func (m *model) createSuccessors(c context, s, ss *state) context { 832 | sl := m.sbuf[:0] 833 | 834 | if m.orderFall != 0 { 835 | sl = append(sl, s) 836 | } 837 | 838 | for suff := m.a.contextSuffix(c); suff > 0; suff = m.a.contextSuffix(c) { 839 | c = suff 840 | 841 | if ss == nil { 842 | ss = m.a.findState(c, s.sym) 843 | } 844 | if ss.succ != s.succ { 845 | c = succContext(ss.succ) 846 | break 847 | } 848 | sl = append(sl, ss) 849 | ss = nil 850 | } 851 | 852 | if len(sl) == 0 { 853 | return c 854 | } 855 | 856 | var up state 857 | up.sym = m.a.succByte(s.succ) 858 | up.succ = m.a.nextByteAddr(s.succ) 859 | 860 | states := m.a.contextStates(c) 861 | if len(states) > 1 { 862 | s = m.a.findState(c, up.sym) 863 | 864 | cf := uint16(s.freq) - 1 865 | s0 := m.a.contextSummFreq(c) - uint16(len(states)) - cf 866 | 867 | if 2*cf <= s0 { 868 | if 5*cf > s0 { 869 | up.freq = 2 870 | } else { 871 | up.freq = 1 872 | } 873 | } else { 874 | up.freq = byte(1 + (2*cf+3*s0-1)/(2*s0)) 875 | } 876 | } else { 877 | up.freq = states[0].freq 878 | } 879 | 880 | for i := len(sl) - 1; i >= 0; i-- { 881 | c = m.a.newContext(up, c) 882 | if c == 0 { 883 | return c 884 | } 885 | sl[i].succ = int32(c) 886 | } 887 | return c 888 | } 889 | 890 | func (m *model) update(minC, maxC context, s *state) context { 891 | if m.orderFall == 0 { 892 | if s.succ > 0 { 893 | return context(s.succ) 894 | } 895 | } 896 | 897 | if m.escCount == 0 { 898 | m.escCount = 1 899 | clear(m.charMask[:]) 900 | } 901 | 902 | var ss *state // matching minC.suffix state 903 | 904 | if s.freq < maxFreq/4 && m.a.contextSuffix(minC) > 0 { 905 | c := m.a.contextSuffix(minC) 906 | states := m.a.contextStates(c) 907 | 908 | var i int 909 | if len(states) > 1 { 910 | for states[i].sym != s.sym { 911 | i++ 912 | } 913 | if i > 0 && states[i].freq >= states[i-1].freq { 914 | states[i-1], states[i] = states[i], states[i-1] 915 | i-- 916 | } 917 | if states[i].freq < maxFreq-9 { 918 | states[i].freq += 2 919 | m.a.contextIncSummFreq(c, 2) 920 | } 921 | } else if states[0].freq < 32 { 922 | states[0].freq++ 923 | } 924 | ss = &states[i] // save later for createSuccessors 925 | } 926 | 927 | if m.orderFall == 0 { 928 | minC = m.createSuccessors(minC, s, ss) 929 | s.succ = int32(minC) 930 | return minC 931 | } 932 | 933 | succ := m.a.pushByte(s.sym) 934 | if succ == 0 { 935 | return context(0) 936 | } 937 | 938 | var newC context 939 | if s.succ == 0 { 940 | s.succ = succ 941 | newC = minC 942 | } else { 943 | if s.succ > 0 { 944 | newC = context(s.succ) 945 | } else { 946 | newC = m.createSuccessors(minC, s, ss) 947 | if newC == 0 { 948 | return context(0) 949 | } 950 | } 951 | m.orderFall-- 952 | if m.orderFall == 0 { 953 | succ = int32(newC) 954 | if maxC != minC { 955 | m.a.popByte() 956 | } 957 | } 958 | } 959 | 960 | n := m.a.contextNumStates(minC) 961 | s0 := int(m.a.contextSummFreq(minC)) - n - int(s.freq-1) 962 | for c := maxC; c != minC; c = m.a.contextSuffix(c) { 963 | var summFreq uint16 964 | 965 | states := m.a.expandStates(c) 966 | if states == nil { 967 | return context(0) 968 | } 969 | if ns := len(states) - 1; ns != 1 { 970 | summFreq = m.a.contextSummFreq(c) 971 | if 4*ns <= n && int(summFreq) <= 8*ns { 972 | summFreq += 2 973 | } 974 | if 2*ns < n { 975 | summFreq++ 976 | } 977 | } else { 978 | p := &states[0] 979 | if p.freq < maxFreq/4-1 { 980 | p.freq += p.freq 981 | } else { 982 | p.freq = maxFreq - 4 983 | } 984 | summFreq = uint16(p.freq) + uint16(m.initEsc) 985 | if n > 3 { 986 | summFreq++ 987 | } 988 | } 989 | 990 | cf := 2 * int(s.freq) * int(summFreq+6) 991 | sf := s0 + int(summFreq) 992 | var freq byte 993 | if cf >= 6*sf { 994 | switch { 995 | case cf >= 15*sf: 996 | freq = 7 997 | case cf >= 12*sf: 998 | freq = 6 999 | case cf >= 9*sf: 1000 | freq = 5 1001 | default: 1002 | freq = 4 1003 | } 1004 | summFreq += uint16(freq) 1005 | } else { 1006 | switch { 1007 | case cf >= 4*sf: 1008 | freq = 3 1009 | case cf > sf: 1010 | freq = 2 1011 | default: 1012 | freq = 1 1013 | } 1014 | summFreq += 3 1015 | } 1016 | states[len(states)-1] = state{sym: s.sym, freq: freq, succ: succ} 1017 | m.a.contextSetSummFreq(c, summFreq) 1018 | } 1019 | return newC 1020 | } 1021 | 1022 | func (m *model) ReadByte() (byte, error) { 1023 | if m.c == 0 { 1024 | m.restart() 1025 | } 1026 | minC := m.c 1027 | maxC := minC 1028 | var s *state 1029 | var err error 1030 | if m.a.contextNumStates(minC) == 1 { 1031 | s, err = m.decodeBinSymbol(minC) 1032 | } else { 1033 | s, err = m.decodeSymbol1(minC) 1034 | } 1035 | for s == nil && err == nil { 1036 | n := m.a.contextNumStates(minC) 1037 | for m.a.contextNumStates(minC) == n { 1038 | m.orderFall++ 1039 | minC = m.a.contextSuffix(minC) 1040 | if minC <= 0 { 1041 | return 0, ErrCorruptPPM 1042 | } 1043 | } 1044 | s, err = m.decodeSymbol2(minC, n) 1045 | } 1046 | if err != nil { 1047 | return 0, err 1048 | } 1049 | 1050 | m.c = m.update(minC, maxC, s) 1051 | m.prevSym = s.sym 1052 | return s.sym, nil 1053 | } 1054 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "bytes" 5 | "crypto/hmac" 6 | "crypto/sha256" 7 | "errors" 8 | "hash" 9 | "io" 10 | "os" 11 | "time" 12 | ) 13 | 14 | // FileHeader HostOS types 15 | const ( 16 | HostOSUnknown = 0 17 | HostOSMSDOS = 1 18 | HostOSOS2 = 2 19 | HostOSWindows = 3 20 | HostOSUnix = 4 21 | HostOSMacOS = 5 22 | HostOSBeOS = 6 23 | ) 24 | 25 | const ( 26 | maxPassword = int(128) 27 | ) 28 | 29 | var ( 30 | ErrShortFile = errors.New("rardecode: decoded file too short") 31 | ErrInvalidFileBlock = errors.New("rardecode: invalid file block") 32 | ErrUnexpectedArcEnd = errors.New("rardecode: unexpected end of archive") 33 | ErrBadFileChecksum = errors.New("rardecode: bad file checksum") 34 | ErrSolidOpen = errors.New("rardecode: solid files don't support Open") 35 | ErrUnknownVersion = errors.New("rardecode: unknown archive version") 36 | ) 37 | 38 | // FileHeader represents a single file in a RAR archive. 39 | type FileHeader struct { 40 | Name string // file name using '/' as the directory separator 41 | IsDir bool // is a directory 42 | Solid bool // is a solid file 43 | Encrypted bool // file contents are encrypted 44 | HeaderEncrypted bool // file header is encrypted 45 | HostOS byte // Host OS the archive was created on 46 | Attributes int64 // Host OS specific file attributes 47 | PackedSize int64 // packed file size (or first block if the file spans volumes) 48 | UnPackedSize int64 // unpacked file size 49 | UnKnownSize bool // unpacked file size is not known 50 | ModificationTime time.Time // modification time (non-zero if set) 51 | CreationTime time.Time // creation time (non-zero if set) 52 | AccessTime time.Time // access time (non-zero if set) 53 | Version int // file version 54 | } 55 | 56 | // Mode returns an os.FileMode for the file, calculated from the Attributes field. 57 | func (f *FileHeader) Mode() os.FileMode { 58 | var m os.FileMode 59 | 60 | if f.IsDir { 61 | m = os.ModeDir 62 | } 63 | if f.HostOS == HostOSWindows { 64 | if f.IsDir { 65 | m |= 0777 66 | } else if f.Attributes&1 > 0 { 67 | m |= 0444 // readonly 68 | } else { 69 | m |= 0666 70 | } 71 | return m 72 | } 73 | // assume unix perms for all remaining os types 74 | m |= os.FileMode(f.Attributes) & os.ModePerm 75 | 76 | // only check other bits on unix host created archives 77 | if f.HostOS != HostOSUnix { 78 | return m 79 | } 80 | 81 | if f.Attributes&0x200 != 0 { 82 | m |= os.ModeSticky 83 | } 84 | if f.Attributes&0x400 != 0 { 85 | m |= os.ModeSetgid 86 | } 87 | if f.Attributes&0x800 != 0 { 88 | m |= os.ModeSetuid 89 | } 90 | 91 | // Check for additional file types. 92 | if f.Attributes&0xF000 == 0xA000 { 93 | m |= os.ModeSymlink 94 | } 95 | return m 96 | } 97 | 98 | type byteReader interface { 99 | io.Reader 100 | bytes() ([]byte, error) 101 | } 102 | 103 | type bufByteReader struct { 104 | buf []byte 105 | } 106 | 107 | func (b *bufByteReader) Read(p []byte) (int, error) { 108 | if len(b.buf) == 0 { 109 | return 0, io.EOF 110 | } 111 | n := copy(p, b.buf) 112 | b.buf = b.buf[n:] 113 | return n, nil 114 | } 115 | 116 | func (b *bufByteReader) bytes() ([]byte, error) { 117 | if len(b.buf) == 0 { 118 | return nil, io.EOF 119 | } 120 | buf := b.buf 121 | b.buf = nil 122 | return buf, nil 123 | } 124 | 125 | func newBufByteReader(buf []byte) *bufByteReader { 126 | return &bufByteReader{buf: buf} 127 | } 128 | 129 | // packedFileReader provides sequential access to packed files in a RAR archive. 130 | type packedFileReader struct { 131 | n int64 // bytes left in current data block 132 | v *volume 133 | r fileBlockReader 134 | h *fileBlockHeader // current file header 135 | } 136 | 137 | // init initializes a cloned packedFileReader 138 | func (f *packedFileReader) init() error { return f.v.init() } 139 | 140 | func (f *packedFileReader) clone() *packedFileReader { 141 | nr := &packedFileReader{n: f.n, h: f.h} 142 | nr.r = f.r.clone() 143 | nr.v = f.v.clone() 144 | return nr 145 | } 146 | 147 | func (f *packedFileReader) Close() error { return f.v.Close() } 148 | 149 | // nextBlock reads the next file block in the current file at the current 150 | // archive file position, or returns an error if there is a problem. 151 | // It is invalid to call this when already at the last block in the current file. 152 | func (f *packedFileReader) nextBlock() error { 153 | if f.h == nil { 154 | return io.EOF 155 | } 156 | // discard current block data 157 | if f.n > 0 { 158 | if err := f.v.discard(f.n); err != nil { 159 | return err 160 | } 161 | f.n = 0 162 | } 163 | if f.h.last { 164 | return io.EOF 165 | } 166 | h, err := f.r.next(f.v) 167 | if err != nil { 168 | if err == io.EOF { 169 | // archive ended, but file hasn't 170 | return ErrUnexpectedArcEnd 171 | } 172 | return err 173 | } 174 | if h.first || h.Name != f.h.Name { 175 | return ErrInvalidFileBlock 176 | } 177 | f.n = h.PackedSize 178 | f.h = h 179 | return nil 180 | } 181 | 182 | // next advances to the next packed file in the RAR archive. 183 | func (f *packedFileReader) next() (*fileBlockHeader, error) { 184 | // skip to last block in current file 185 | var err error 186 | for err == nil { 187 | err = f.nextBlock() 188 | } 189 | if err != io.EOF { 190 | return nil, err 191 | } 192 | f.h, err = f.r.next(f.v) // get next file block 193 | if err != nil { 194 | return nil, err 195 | } 196 | if !f.h.first { 197 | return nil, ErrInvalidFileBlock 198 | } 199 | f.n = f.h.PackedSize 200 | return f.h, nil 201 | } 202 | 203 | // Read reads the packed data for the current file into p. 204 | func (f *packedFileReader) Read(p []byte) (int, error) { 205 | for f.n == 0 { 206 | if err := f.nextBlock(); err != nil { 207 | return 0, err 208 | } 209 | } 210 | if int64(len(p)) > f.n { 211 | p = p[0:f.n] 212 | } 213 | n, err := f.v.Read(p) 214 | f.n -= int64(n) 215 | if err == io.EOF && f.n > 0 { 216 | return n, io.ErrUnexpectedEOF 217 | } 218 | if n > 0 { 219 | return n, nil 220 | } 221 | return n, err 222 | } 223 | 224 | func (f *packedFileReader) bytes() ([]byte, error) { 225 | for f.n == 0 { 226 | if err := f.nextBlock(); err != nil { 227 | return nil, err 228 | } 229 | } 230 | n := int(min(f.n, int64(f.v.br.Size()))) 231 | if k := f.v.br.Buffered(); k > 0 { 232 | n = min(k, n) 233 | } else { 234 | b, err := f.v.peek(n) 235 | if err != nil { 236 | return nil, err 237 | } 238 | n = len(b) 239 | } 240 | b, err := f.v.readSlice(n) 241 | f.n -= int64(len(b)) 242 | return b, err 243 | } 244 | 245 | func newPackedFileReader(v *volume, pass *string) (*packedFileReader, error) { 246 | var err error 247 | var fbr fileBlockReader 248 | switch v.ver { 249 | case archiveVersion15: 250 | fbr, err = newArchive15(v, pass) 251 | case archiveVersion50: 252 | fbr, err = newArchive50(v, pass) 253 | default: 254 | err = ErrUnknownVersion 255 | } 256 | if err != nil { 257 | return nil, err 258 | } 259 | return &packedFileReader{r: fbr, v: v}, nil 260 | } 261 | 262 | type limitedReader struct { 263 | r byteReader 264 | n int64 // bytes remaining 265 | shortErr error // error returned when r returns io.EOF with n > 0 266 | } 267 | 268 | func (l *limitedReader) Read(p []byte) (int, error) { 269 | if l.n <= 0 { 270 | return 0, io.EOF 271 | } 272 | if int64(len(p)) > l.n { 273 | p = p[0:l.n] 274 | } 275 | n, err := l.r.Read(p) 276 | l.n -= int64(n) 277 | if err == io.EOF && l.n > 0 { 278 | return n, l.shortErr 279 | } 280 | return n, err 281 | } 282 | 283 | func (l *limitedReader) bytes() ([]byte, error) { 284 | b, err := l.r.bytes() 285 | if n := len(b); int64(n) > l.n { 286 | b = b[:int(l.n)] 287 | } 288 | l.n -= int64(len(b)) 289 | return b, err 290 | } 291 | 292 | type checksumReader struct { 293 | r byteReader 294 | hash hash.Hash 295 | pr *packedFileReader 296 | } 297 | 298 | func (cr *checksumReader) eofError() error { 299 | // calculate file checksum 300 | h := cr.pr.h 301 | sum := cr.hash.Sum(nil) 302 | if !h.first && h.genKeys != nil { 303 | if err := h.genKeys(); err != nil { 304 | return err 305 | } 306 | } 307 | if len(h.hashKey) > 0 { 308 | mac := hmac.New(sha256.New, h.hashKey) 309 | _, _ = mac.Write(sum) // ignore error, should always succeed 310 | sum = mac.Sum(sum[:0]) 311 | if len(h.sum) == 4 { 312 | // CRC32 313 | for i, v := range sum[4:] { 314 | sum[i&3] ^= v 315 | } 316 | sum = sum[:4] 317 | } 318 | } 319 | if !bytes.Equal(sum, h.sum) { 320 | return ErrBadFileChecksum 321 | } 322 | return io.EOF 323 | } 324 | 325 | func (cr *checksumReader) Read(p []byte) (int, error) { 326 | n, err := cr.r.Read(p) 327 | if n > 0 { 328 | if n, err = cr.hash.Write(p[:n]); err != nil { 329 | return n, err 330 | } 331 | } 332 | if err != io.EOF { 333 | return n, err 334 | } 335 | return n, cr.eofError() 336 | } 337 | 338 | func (cr *checksumReader) bytes() ([]byte, error) { 339 | b, err := cr.r.bytes() 340 | if len(b) > 0 { 341 | if _, err = cr.hash.Write(b); err != nil { 342 | return b, err 343 | } 344 | } 345 | if err != io.EOF { 346 | return b, err 347 | } 348 | return b, cr.eofError() 349 | } 350 | 351 | // Reader provides sequential access to files in a RAR archive. 352 | type Reader struct { 353 | r byteReader // reader for current unpacked file 354 | dr *decodeReader // reader for decoding and filters if file is compressed 355 | pr *packedFileReader // reader for current raw file bytes 356 | } 357 | 358 | // Read reads from the current file in the RAR archive. 359 | func (r *Reader) Read(p []byte) (int, error) { 360 | if r.r == nil { 361 | err := r.nextFile() 362 | if err != nil { 363 | return 0, err 364 | } 365 | } 366 | return r.r.Read(p) 367 | } 368 | 369 | // WriteTo implements io.WriterTo. 370 | func (r *Reader) WriteTo(w io.Writer) (int64, error) { 371 | if r.r == nil { 372 | err := r.nextFile() 373 | if err != nil { 374 | return 0, err 375 | } 376 | } 377 | var n int64 378 | b, err := r.r.bytes() 379 | for err == nil { 380 | var nn int 381 | nn, err = w.Write(b) 382 | n += int64(nn) 383 | if err == nil { 384 | b, err = r.r.bytes() 385 | } 386 | } 387 | if err == io.EOF { 388 | err = nil 389 | } 390 | return n, err 391 | } 392 | 393 | // Next advances to the next file in the archive. 394 | func (r *Reader) Next() (*FileHeader, error) { 395 | // check if file is a compressed file in a solid archive 396 | if h := r.pr.h; h != nil && h.decVer > 0 && h.arcSolid { 397 | var err error 398 | if r.r == nil { 399 | // setup full file reader 400 | err = r.nextFile() 401 | } 402 | // decode and discard bytes 403 | for err == nil { 404 | _, err = r.dr.bytes() 405 | } 406 | if err != io.EOF { 407 | return nil, err 408 | } 409 | } 410 | // get next packed file 411 | h, err := r.pr.next() 412 | if err != nil { 413 | return nil, err 414 | } 415 | // Clear the reader as it will be setup on the next Read() or WriteTo(). 416 | r.r = nil 417 | return &h.FileHeader, nil 418 | } 419 | 420 | func (r *Reader) nextFile() error { 421 | h := r.pr.h 422 | if h == nil { 423 | return io.EOF 424 | } 425 | // start with packed file reader 426 | r.r = r.pr 427 | // check for encryption 428 | if h.genKeys != nil { 429 | r.r = newAesDecryptReader(r.pr, h) // decrypt 430 | } 431 | // check for compression 432 | if h.decVer > 0 { 433 | if r.dr == nil { 434 | r.dr = new(decodeReader) 435 | } 436 | err := r.dr.init(r.r, h.decVer, h.winSize, !h.Solid, h.UnPackedSize) 437 | if err != nil { 438 | return err 439 | } 440 | r.r = r.dr 441 | } 442 | if h.UnPackedSize >= 0 && !h.UnKnownSize { 443 | // Limit reading to UnPackedSize as there may be padding 444 | r.r = &limitedReader{r.r, h.UnPackedSize, ErrShortFile} 445 | } 446 | if h.hash != nil { 447 | r.r = &checksumReader{r.r, h.hash(), r.pr} 448 | } 449 | return nil 450 | } 451 | 452 | // NewReader creates a Reader reading from r. 453 | // NewReader only supports single volume archives. 454 | // Multi-volume archives must use OpenReader. 455 | func NewReader(r io.Reader, opts ...Option) (*Reader, error) { 456 | options := getOptions(opts) 457 | v, err := newVolume(r, options) 458 | if err != nil { 459 | return nil, err 460 | } 461 | pr, err := newPackedFileReader(v, options.pass) 462 | if err != nil { 463 | return nil, err 464 | } 465 | return &Reader{pr: pr}, nil 466 | } 467 | 468 | // ReadCloser is a Reader that allows closing of the rar archive. 469 | type ReadCloser struct { 470 | Reader 471 | v *volume 472 | } 473 | 474 | // Close closes the rar file. 475 | func (rc *ReadCloser) Close() error { 476 | return rc.pr.Close() 477 | } 478 | 479 | // Volumes returns the volume filenames that have been used in decoding the archive 480 | // up to this point. This will include the current open volume if the archive is still 481 | // being processed. 482 | func (rc *ReadCloser) Volumes() []string { 483 | return rc.v.files 484 | } 485 | 486 | // OpenReader opens a RAR archive specified by the name and returns a ReadCloser. 487 | func OpenReader(name string, opts ...Option) (*ReadCloser, error) { 488 | options := getOptions(opts) 489 | options.file = name 490 | if options.fs == nil { 491 | options.fs = defaultFS 492 | } 493 | f, err := options.fs.Open(name) 494 | if err != nil { 495 | return nil, err 496 | } 497 | v, err := newVolume(f, options) 498 | if err != nil { 499 | _ = f.Close() 500 | return nil, err 501 | } 502 | pr, err := newPackedFileReader(v, options.pass) 503 | if err != nil { 504 | _ = v.Close() 505 | return nil, err 506 | } 507 | 508 | return &ReadCloser{Reader: Reader{pr: pr}, v: v}, nil 509 | } 510 | 511 | // File represents a file in a RAR archive 512 | type File struct { 513 | FileHeader 514 | pr *packedFileReader 515 | } 516 | 517 | // Open returns an io.ReadCloser that provides access to the File's contents. 518 | // Open is not supported on Solid File's as their contents depend on the decoding 519 | // of the preceding files in the archive. Use OpenReader and Next to access Solid file 520 | // contents instead. 521 | func (f *File) Open() (io.ReadCloser, error) { 522 | if f.Solid { 523 | return nil, ErrSolidOpen 524 | } 525 | r := new(ReadCloser) 526 | r.pr = f.pr.clone() 527 | err := r.pr.init() 528 | if err != nil { 529 | r.Close() 530 | return nil, err 531 | } 532 | return r, nil 533 | } 534 | 535 | // List returns a list of File's in the RAR archive specified by name. 536 | func List(name string, opts ...Option) ([]*File, error) { 537 | r, err := OpenReader(name, opts...) 538 | if err != nil { 539 | return nil, err 540 | } 541 | pr := r.pr 542 | defer pr.Close() 543 | 544 | var fl []*File 545 | for { 546 | // get next file 547 | h, err := pr.next() 548 | if err != nil { 549 | if err == io.EOF { 550 | return fl, nil 551 | } 552 | return nil, err 553 | } 554 | 555 | // save information for File 556 | f := new(File) 557 | f.FileHeader = h.FileHeader 558 | f.pr = pr.clone() 559 | fl = append(fl, f) 560 | } 561 | } 562 | -------------------------------------------------------------------------------- /vm.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | ) 7 | 8 | const ( 9 | // vm flag bits 10 | flagC = 1 // Carry 11 | flagZ = 2 // Zero 12 | flagS = 0x80000000 // Sign 13 | 14 | maxCommands = 25000000 // maximum number of commands that can be run in a program 15 | 16 | vmRegs = 8 // number if registers 17 | vmSize = 0x40000 // memory size 18 | vmMask = vmSize - 1 19 | ) 20 | 21 | var ( 22 | ErrInvalidVMInstruction = errors.New("rardecode: invalid vm instruction") 23 | ) 24 | 25 | type vm struct { 26 | ip uint32 // instruction pointer 27 | ipMod bool // ip was modified 28 | fl uint32 // flag bits 29 | r [vmRegs]uint32 // registers 30 | m []byte // memory 31 | } 32 | 33 | func (v *vm) setIP(ip uint32) { 34 | v.ip = ip 35 | v.ipMod = true 36 | } 37 | 38 | // execute runs a list of commands on the vm. 39 | func (v *vm) execute(cmd []command) { 40 | v.ip = 0 // reset instruction pointer 41 | for n := 0; n < maxCommands; n++ { 42 | ip := v.ip 43 | if ip >= uint32(len(cmd)) { 44 | return 45 | } 46 | ins := cmd[ip] 47 | ins.f(v, ins.bm, ins.op) // run cpu instruction 48 | if v.ipMod { 49 | // command modified ip, don't increment 50 | v.ipMod = false 51 | } else { 52 | v.ip++ // increment ip for next command 53 | } 54 | } 55 | } 56 | 57 | // newVM creates a new RAR virtual machine using the byte slice as memory. 58 | func newVM(mem []byte) *vm { 59 | v := new(vm) 60 | 61 | if cap(mem) < vmSize+4 { 62 | v.m = make([]byte, vmSize+4) 63 | copy(v.m, mem) 64 | } else { 65 | v.m = mem[:vmSize+4] 66 | if l := len(mem); l < len(v.m) { 67 | clear(v.m[l:]) 68 | } 69 | } 70 | v.r[7] = vmSize 71 | return v 72 | } 73 | 74 | type operand interface { 75 | get(v *vm, byteMode bool) uint32 76 | set(v *vm, byteMode bool, n uint32) 77 | } 78 | 79 | // Immediate Operand 80 | type opI uint32 81 | 82 | func (op opI) get(v *vm, bm bool) uint32 { return uint32(op) } 83 | func (op opI) set(v *vm, bm bool, n uint32) {} 84 | 85 | // Direct Operand 86 | type opD uint32 87 | 88 | func (op opD) get(v *vm, byteMode bool) uint32 { 89 | if byteMode { 90 | return uint32(v.m[op]) 91 | } 92 | return binary.LittleEndian.Uint32(v.m[op:]) 93 | } 94 | 95 | func (op opD) set(v *vm, byteMode bool, n uint32) { 96 | if byteMode { 97 | v.m[op] = byte(n) 98 | } else { 99 | binary.LittleEndian.PutUint32(v.m[op:], n) 100 | } 101 | } 102 | 103 | // Register Operand 104 | type opR uint32 105 | 106 | func (op opR) get(v *vm, byteMode bool) uint32 { 107 | if byteMode { 108 | return v.r[op] & 0xFF 109 | } 110 | return v.r[op] 111 | } 112 | 113 | func (op opR) set(v *vm, byteMode bool, n uint32) { 114 | if byteMode { 115 | v.r[op] = (v.r[op] & 0xFFFFFF00) | (n & 0xFF) 116 | } else { 117 | v.r[op] = n 118 | } 119 | } 120 | 121 | // Register Indirect Operand 122 | type opRI uint32 123 | 124 | func (op opRI) get(v *vm, byteMode bool) uint32 { 125 | i := v.r[op] & vmMask 126 | if byteMode { 127 | return uint32(v.m[i]) 128 | } 129 | return binary.LittleEndian.Uint32(v.m[i:]) 130 | } 131 | func (op opRI) set(v *vm, byteMode bool, n uint32) { 132 | i := v.r[op] & vmMask 133 | if byteMode { 134 | v.m[i] = byte(n) 135 | } else { 136 | binary.LittleEndian.PutUint32(v.m[i:], n) 137 | } 138 | } 139 | 140 | // Base Plus Index Indirect Operand 141 | type opBI struct { 142 | r uint32 143 | i uint32 144 | } 145 | 146 | func (op opBI) get(v *vm, byteMode bool) uint32 { 147 | i := (v.r[op.r] + op.i) & vmMask 148 | if byteMode { 149 | return uint32(v.m[i]) 150 | } 151 | return binary.LittleEndian.Uint32(v.m[i:]) 152 | } 153 | func (op opBI) set(v *vm, byteMode bool, n uint32) { 154 | i := (v.r[op.r] + op.i) & vmMask 155 | if byteMode { 156 | v.m[i] = byte(n) 157 | } else { 158 | binary.LittleEndian.PutUint32(v.m[i:], n) 159 | } 160 | } 161 | 162 | type commandFunc func(v *vm, byteMode bool, op []operand) 163 | 164 | type command struct { 165 | f commandFunc 166 | bm bool // is byte mode 167 | op []operand 168 | } 169 | 170 | var ( 171 | ops = []struct { 172 | f commandFunc 173 | byteMode bool // supports byte mode 174 | nops int // number of operands 175 | jop bool // is a jump op 176 | }{ 177 | {mov, true, 2, false}, 178 | {cmp, true, 2, false}, 179 | {add, true, 2, false}, 180 | {sub, true, 2, false}, 181 | {jz, false, 1, true}, 182 | {jnz, false, 1, true}, 183 | {inc, true, 1, false}, 184 | {dec, true, 1, false}, 185 | {jmp, false, 1, true}, 186 | {xor, true, 2, false}, 187 | {and, true, 2, false}, 188 | {or, true, 2, false}, 189 | {test, true, 2, false}, 190 | {js, false, 1, true}, 191 | {jns, false, 1, true}, 192 | {jb, false, 1, true}, 193 | {jbe, false, 1, true}, 194 | {ja, false, 1, true}, 195 | {jae, false, 1, true}, 196 | {push, false, 1, false}, 197 | {pop, false, 1, false}, 198 | {call, false, 1, true}, 199 | {ret, false, 0, false}, 200 | {not, true, 1, false}, 201 | {shl, true, 2, false}, 202 | {shr, true, 2, false}, 203 | {sar, true, 2, false}, 204 | {neg, true, 1, false}, 205 | {pusha, false, 0, false}, 206 | {popa, false, 0, false}, 207 | {pushf, false, 0, false}, 208 | {popf, false, 0, false}, 209 | {movzx, false, 2, false}, 210 | {movsx, false, 2, false}, 211 | {xchg, true, 2, false}, 212 | {mul, true, 2, false}, 213 | {div, true, 2, false}, 214 | {adc, true, 2, false}, 215 | {sbb, true, 2, false}, 216 | {print, false, 0, false}, 217 | } 218 | ) 219 | 220 | func mov(v *vm, bm bool, op []operand) { 221 | op[0].set(v, bm, op[1].get(v, bm)) 222 | } 223 | 224 | func cmp(v *vm, bm bool, op []operand) { 225 | v1 := op[0].get(v, bm) 226 | r := v1 - op[1].get(v, bm) 227 | if r == 0 { 228 | v.fl = flagZ 229 | } else { 230 | v.fl = 0 231 | if r > v1 { 232 | v.fl = flagC 233 | } 234 | v.fl |= r & flagS 235 | } 236 | } 237 | 238 | func add(v *vm, bm bool, op []operand) { 239 | v1 := op[0].get(v, bm) 240 | r := v1 + op[1].get(v, bm) 241 | v.fl = 0 242 | signBit := uint32(flagS) 243 | if bm { 244 | r &= 0xFF 245 | signBit = 0x80 246 | } 247 | if r < v1 { 248 | v.fl |= flagC 249 | } 250 | if r == 0 { 251 | v.fl |= flagZ 252 | } else if r&signBit > 0 { 253 | v.fl |= flagS 254 | } 255 | op[0].set(v, bm, r) 256 | } 257 | 258 | func sub(v *vm, bm bool, op []operand) { 259 | v1 := op[0].get(v, bm) 260 | r := v1 - op[1].get(v, bm) 261 | v.fl = 0 262 | 263 | if r == 0 { 264 | v.fl = flagZ 265 | } else { 266 | v.fl = 0 267 | if r > v1 { 268 | v.fl = flagC 269 | } 270 | v.fl |= r & flagS 271 | } 272 | op[0].set(v, bm, r) 273 | } 274 | 275 | func jz(v *vm, bm bool, op []operand) { 276 | if v.fl&flagZ > 0 { 277 | v.setIP(op[0].get(v, false)) 278 | } 279 | } 280 | 281 | func jnz(v *vm, bm bool, op []operand) { 282 | if v.fl&flagZ == 0 { 283 | v.setIP(op[0].get(v, false)) 284 | } 285 | } 286 | 287 | func inc(v *vm, bm bool, op []operand) { 288 | r := op[0].get(v, bm) + 1 289 | if bm { 290 | r &= 0xFF 291 | } 292 | op[0].set(v, bm, r) 293 | if r == 0 { 294 | v.fl = flagZ 295 | } else { 296 | v.fl = r & flagS 297 | } 298 | } 299 | 300 | func dec(v *vm, bm bool, op []operand) { 301 | r := op[0].get(v, bm) - 1 302 | op[0].set(v, bm, r) 303 | if r == 0 { 304 | v.fl = flagZ 305 | } else { 306 | v.fl = r & flagS 307 | } 308 | } 309 | 310 | func jmp(v *vm, bm bool, op []operand) { 311 | v.setIP(op[0].get(v, false)) 312 | } 313 | 314 | func xor(v *vm, bm bool, op []operand) { 315 | r := op[0].get(v, bm) ^ op[1].get(v, bm) 316 | op[0].set(v, bm, r) 317 | if r == 0 { 318 | v.fl = flagZ 319 | } else { 320 | v.fl = r & flagS 321 | } 322 | } 323 | 324 | func and(v *vm, bm bool, op []operand) { 325 | r := op[0].get(v, bm) & op[1].get(v, bm) 326 | op[0].set(v, bm, r) 327 | if r == 0 { 328 | v.fl = flagZ 329 | } else { 330 | v.fl = r & flagS 331 | } 332 | } 333 | 334 | func or(v *vm, bm bool, op []operand) { 335 | r := op[0].get(v, bm) | op[1].get(v, bm) 336 | op[0].set(v, bm, r) 337 | if r == 0 { 338 | v.fl = flagZ 339 | } else { 340 | v.fl = r & flagS 341 | } 342 | } 343 | 344 | func test(v *vm, bm bool, op []operand) { 345 | r := op[0].get(v, bm) & op[1].get(v, bm) 346 | if r == 0 { 347 | v.fl = flagZ 348 | } else { 349 | v.fl = r & flagS 350 | } 351 | } 352 | 353 | func js(v *vm, bm bool, op []operand) { 354 | if v.fl&flagS > 0 { 355 | v.setIP(op[0].get(v, false)) 356 | } 357 | } 358 | 359 | func jns(v *vm, bm bool, op []operand) { 360 | if v.fl&flagS == 0 { 361 | v.setIP(op[0].get(v, false)) 362 | } 363 | } 364 | 365 | func jb(v *vm, bm bool, op []operand) { 366 | if v.fl&flagC > 0 { 367 | v.setIP(op[0].get(v, false)) 368 | } 369 | } 370 | 371 | func jbe(v *vm, bm bool, op []operand) { 372 | if v.fl&(flagC|flagZ) > 0 { 373 | v.setIP(op[0].get(v, false)) 374 | } 375 | } 376 | 377 | func ja(v *vm, bm bool, op []operand) { 378 | if v.fl&(flagC|flagZ) == 0 { 379 | v.setIP(op[0].get(v, false)) 380 | } 381 | } 382 | 383 | func jae(v *vm, bm bool, op []operand) { 384 | if v.fl&flagC == 0 { 385 | v.setIP(op[0].get(v, false)) 386 | } 387 | } 388 | 389 | func push(v *vm, bm bool, op []operand) { 390 | v.r[7] -= 4 391 | opRI(7).set(v, false, op[0].get(v, false)) 392 | 393 | } 394 | 395 | func pop(v *vm, bm bool, op []operand) { 396 | op[0].set(v, false, opRI(7).get(v, false)) 397 | v.r[7] += 4 398 | } 399 | 400 | func call(v *vm, bm bool, op []operand) { 401 | v.r[7] -= 4 402 | opRI(7).set(v, false, v.ip+1) 403 | v.setIP(op[0].get(v, false)) 404 | } 405 | 406 | func ret(v *vm, bm bool, op []operand) { 407 | r7 := v.r[7] 408 | if r7 >= vmSize { 409 | v.setIP(0xFFFFFFFF) // trigger end of program 410 | } else { 411 | v.setIP(binary.LittleEndian.Uint32(v.m[r7:])) 412 | v.r[7] += 4 413 | } 414 | } 415 | 416 | func not(v *vm, bm bool, op []operand) { 417 | op[0].set(v, bm, ^op[0].get(v, bm)) 418 | } 419 | 420 | func shl(v *vm, bm bool, op []operand) { 421 | v1 := op[0].get(v, bm) 422 | v2 := op[1].get(v, bm) 423 | r := v1 << v2 424 | op[0].set(v, bm, r) 425 | if r == 0 { 426 | v.fl = flagZ 427 | } else { 428 | v.fl = r & flagS 429 | } 430 | if (v1<<(v2-1))&0x80000000 > 0 { 431 | v.fl |= flagC 432 | } 433 | } 434 | 435 | func shr(v *vm, bm bool, op []operand) { 436 | v1 := op[0].get(v, bm) 437 | v2 := op[1].get(v, bm) 438 | r := v1 >> v2 439 | op[0].set(v, bm, r) 440 | if r == 0 { 441 | v.fl = flagZ 442 | } else { 443 | v.fl = r & flagS 444 | } 445 | if (v1>>(v2-1))&0x1 > 0 { 446 | v.fl |= flagC 447 | } 448 | } 449 | 450 | func sar(v *vm, bm bool, op []operand) { 451 | v1 := op[0].get(v, bm) 452 | v2 := op[1].get(v, bm) 453 | r := uint32(int32(v1) >> v2) 454 | op[0].set(v, bm, r) 455 | if r == 0 { 456 | v.fl = flagZ 457 | } else { 458 | v.fl = r & flagS 459 | } 460 | if (v1>>(v2-1))&0x1 > 0 { 461 | v.fl |= flagC 462 | } 463 | } 464 | 465 | func neg(v *vm, bm bool, op []operand) { 466 | r := 0 - op[0].get(v, bm) 467 | op[0].set(v, bm, r) 468 | if r == 0 { 469 | v.fl = flagZ 470 | } else { 471 | v.fl = r&flagS | flagC 472 | } 473 | } 474 | 475 | func pusha(v *vm, bm bool, op []operand) { 476 | sp := opD(v.r[7]) 477 | for _, r := range v.r { 478 | sp = (sp - 4) & vmMask 479 | sp.set(v, false, r) 480 | } 481 | v.r[7] = uint32(sp) 482 | } 483 | 484 | func popa(v *vm, bm bool, op []operand) { 485 | sp := opD(v.r[7]) 486 | for i := 7; i >= 0; i-- { 487 | v.r[i] = sp.get(v, false) 488 | sp = (sp + 4) & vmMask 489 | } 490 | } 491 | 492 | func pushf(v *vm, bm bool, op []operand) { 493 | v.r[7] -= 4 494 | opRI(7).set(v, false, v.fl) 495 | } 496 | 497 | func popf(v *vm, bm bool, op []operand) { 498 | v.fl = opRI(7).get(v, false) 499 | v.r[7] += 4 500 | } 501 | 502 | func movzx(v *vm, bm bool, op []operand) { 503 | op[0].set(v, false, op[1].get(v, true)) 504 | } 505 | 506 | func movsx(v *vm, bm bool, op []operand) { 507 | op[0].set(v, false, uint32(int8(op[1].get(v, true)))) 508 | } 509 | 510 | func xchg(v *vm, bm bool, op []operand) { 511 | v1 := op[0].get(v, bm) 512 | op[0].set(v, bm, op[1].get(v, bm)) 513 | op[1].set(v, bm, v1) 514 | } 515 | 516 | func mul(v *vm, bm bool, op []operand) { 517 | r := op[0].get(v, bm) * op[1].get(v, bm) 518 | op[0].set(v, bm, r) 519 | } 520 | 521 | func div(v *vm, bm bool, op []operand) { 522 | div := op[1].get(v, bm) 523 | if div != 0 { 524 | r := op[0].get(v, bm) / div 525 | op[0].set(v, bm, r) 526 | } 527 | } 528 | 529 | func adc(v *vm, bm bool, op []operand) { 530 | v1 := op[0].get(v, bm) 531 | fc := v.fl & flagC 532 | r := v1 + op[1].get(v, bm) + fc 533 | if bm { 534 | r &= 0xFF 535 | } 536 | op[0].set(v, bm, r) 537 | 538 | if r == 0 { 539 | v.fl = flagZ 540 | } else { 541 | v.fl = r & flagS 542 | } 543 | if r < v1 || (r == v1 && fc > 0) { 544 | v.fl |= flagC 545 | } 546 | } 547 | 548 | func sbb(v *vm, bm bool, op []operand) { 549 | v1 := op[0].get(v, bm) 550 | fc := v.fl & flagC 551 | r := v1 - op[1].get(v, bm) - fc 552 | if bm { 553 | r &= 0xFF 554 | } 555 | op[0].set(v, bm, r) 556 | 557 | if r == 0 { 558 | v.fl = flagZ 559 | } else { 560 | v.fl = r & flagS 561 | } 562 | if r > v1 || (r == v1 && fc > 0) { 563 | v.fl |= flagC 564 | } 565 | } 566 | 567 | func print(v *vm, bm bool, op []operand) { 568 | // TODO: ignore print for the moment 569 | } 570 | 571 | func decodeArg(br *rarBitReader, byteMode bool) (operand, error) { 572 | n, err := br.readBits(1) 573 | if err != nil { 574 | return nil, err 575 | } 576 | if n > 0 { // Register 577 | n, err = br.readBits(3) 578 | return opR(n), err 579 | } 580 | n, err = br.readBits(1) 581 | if err != nil { 582 | return nil, err 583 | } 584 | if n == 0 { // Immediate 585 | if byteMode { 586 | n, err = br.readBits(8) 587 | } else { 588 | var m uint32 589 | m, err = br.readUint32() 590 | return opI(m), err 591 | } 592 | return opI(n), err 593 | } 594 | n, err = br.readBits(1) 595 | if err != nil { 596 | return nil, err 597 | } 598 | if n == 0 { 599 | // Register Indirect 600 | n, err = br.readBits(3) 601 | return opRI(n), err 602 | } 603 | n, err = br.readBits(1) 604 | if err != nil { 605 | return nil, err 606 | } 607 | if n == 0 { 608 | // Base + Index Indirect 609 | n, err = br.readBits(3) 610 | if err != nil { 611 | return nil, err 612 | } 613 | var i uint32 614 | i, err = br.readUint32() 615 | return opBI{r: uint32(n), i: i}, err 616 | } 617 | // Direct addressing 618 | m, err := br.readUint32() 619 | return opD(m & vmMask), err 620 | } 621 | 622 | func fixJumpOp(op operand, off int) operand { 623 | n, ok := op.(opI) 624 | if !ok { 625 | return op 626 | } 627 | if n >= 256 { 628 | return n - 256 629 | } 630 | if n >= 136 { 631 | n -= 264 632 | } else if n >= 16 { 633 | n -= 8 634 | } else if n >= 8 { 635 | n -= 16 636 | } 637 | return n + opI(off) 638 | } 639 | 640 | func readCommands(br *rarBitReader) ([]command, error) { 641 | var cmds []command 642 | 643 | for { 644 | code, err := br.readBits(4) 645 | if err != nil { 646 | return cmds, err 647 | } 648 | if code&0x08 > 0 { 649 | var n int 650 | n, err = br.readBits(2) 651 | if err != nil { 652 | return cmds, err 653 | } 654 | code = (code<<2 | n) - 24 655 | } 656 | 657 | if code >= len(ops) { 658 | return cmds, ErrInvalidVMInstruction 659 | } 660 | ins := ops[code] 661 | 662 | var com command 663 | 664 | if ins.byteMode { 665 | var n int 666 | n, err = br.readBits(1) 667 | if err != nil { 668 | return cmds, err 669 | } 670 | com.bm = n > 0 671 | } 672 | com.f = ins.f 673 | 674 | if ins.nops > 0 { 675 | com.op = make([]operand, ins.nops) 676 | com.op[0], err = decodeArg(br, com.bm) 677 | if err != nil { 678 | return cmds, err 679 | } 680 | if ins.nops == 2 { 681 | com.op[1], err = decodeArg(br, com.bm) 682 | if err != nil { 683 | return cmds, err 684 | } 685 | } else if ins.jop { 686 | com.op[0] = fixJumpOp(com.op[0], len(cmds)) 687 | } 688 | } 689 | cmds = append(cmds, com) 690 | } 691 | } 692 | -------------------------------------------------------------------------------- /volume.go: -------------------------------------------------------------------------------- 1 | package rardecode 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "io/fs" 10 | "math" 11 | "os" 12 | "path/filepath" 13 | "slices" 14 | "strconv" 15 | "strings" 16 | ) 17 | 18 | const ( 19 | maxSfxSize = 0x100000 // maximum number of bytes to read when searching for RAR signature 20 | sigPrefix = "Rar!\x1A\x07" 21 | ) 22 | 23 | var ( 24 | ErrNoSig = errors.New("rardecode: RAR signature not found") 25 | ErrVerMismatch = errors.New("rardecode: volume version mistmatch") 26 | ErrArchiveNameEmpty = errors.New("rardecode: archive name empty") 27 | ErrFileNameRequired = errors.New("rardecode: filename required for multi volume archive") 28 | 29 | defaultFS = osFS{} 30 | defaultBufSize = 4096 31 | ) 32 | 33 | type osFS struct{} 34 | 35 | func (fs osFS) Open(name string) (fs.File, error) { 36 | return os.Open(name) 37 | } 38 | 39 | type options struct { 40 | bsize int // size to be use for bufio.Reader 41 | fs fs.FS // filesystem to use to open files 42 | pass *string // password for encrypted volumes 43 | file string // filename for volume 44 | } 45 | 46 | // An Option is used for optional archive extraction settings. 47 | type Option func(*options) 48 | 49 | // BufferSize sets the size of the bufio.Reader used in reading the archive. 50 | func BufferSize(size int) Option { 51 | return func(o *options) { o.bsize = size } 52 | } 53 | 54 | // FileSystem sets the fs.FS to be used for opening archive volumes. 55 | func FileSystem(fs fs.FS) Option { 56 | return func(o *options) { o.fs = fs } 57 | } 58 | 59 | // Password sets the password to use for decrypting archives. 60 | func Password(pass string) Option { 61 | return func(o *options) { o.pass = &pass } 62 | } 63 | 64 | func getOptions(opts []Option) options { 65 | opt := options{bsize: defaultBufSize} 66 | for _, f := range opts { 67 | f(&opt) 68 | } 69 | // truncate password 70 | if opt.pass != nil { 71 | runes := []rune(*opt.pass) 72 | if len(runes) > maxPassword { 73 | pw := string(runes[:maxPassword]) 74 | opt.pass = &pw 75 | } 76 | } 77 | return opt 78 | } 79 | 80 | // volume extends a fileBlockReader to be used across multiple 81 | // files in a multi-volume archive 82 | type volume struct { 83 | f io.Reader // current file handle 84 | br *bufio.Reader // buffered reader for current volume file 85 | dir string // current volume directory path 86 | files []string // file names for each volume 87 | num int // volume number 88 | old bool // uses old naming scheme 89 | off int64 // current file offset 90 | ver int // archive file format version 91 | fs fs.FS // filesystem to use to open files 92 | } 93 | 94 | func (v *volume) openFile(file string, volnum int) error { 95 | f, err := v.fs.Open(v.dir + file) 96 | if err != nil { 97 | return err 98 | } 99 | v.f = f 100 | v.num = volnum 101 | v.off = 0 102 | v.br.Reset(v.f) 103 | if volnum == len(v.files) { 104 | v.files = append(v.files, file) 105 | } 106 | version, err := v.findSig() 107 | if err != nil { 108 | _ = v.Close() 109 | return err 110 | } 111 | if version != v.ver { 112 | return ErrVerMismatch 113 | } 114 | return nil 115 | } 116 | 117 | func (v *volume) init() error { 118 | off := v.off 119 | err := v.openFile(v.files[v.num], v.num) 120 | if err != nil { 121 | return err 122 | } 123 | return v.discard(off - v.off) 124 | } 125 | 126 | func (v *volume) clone() *volume { 127 | nv := new(volume) 128 | *nv = *v 129 | nv.f = nil 130 | nv.br = bufio.NewReaderSize(bytes.NewReader(nil), nv.br.Size()) 131 | nv.files = slices.Clone(nv.files) 132 | return nv 133 | } 134 | 135 | func (v *volume) Close() error { 136 | // v.f may be nil if os.Open fails in next(). 137 | // We only close if we opened it (ie. name in v.files). 138 | if v.f != nil && len(v.files) > 0 { 139 | if c, ok := v.f.(io.Closer); ok { 140 | err := c.Close() 141 | v.f = nil // set to nil so we can only close v.f once 142 | return err 143 | } 144 | } 145 | return nil 146 | } 147 | 148 | func (v *volume) discard(n int64) error { 149 | var err error 150 | v.off += n 151 | l := int64(v.br.Buffered()) 152 | if n <= l { 153 | _, err = v.br.Discard(int(n)) 154 | } else if sr, ok := v.f.(io.Seeker); ok { 155 | n -= l 156 | _, err = sr.Seek(n, io.SeekCurrent) 157 | v.br.Reset(v.f) 158 | } else { 159 | for n > math.MaxInt && err == nil { 160 | _, err = v.br.Discard(math.MaxInt) 161 | n -= math.MaxInt 162 | } 163 | if err == nil && n > 0 { 164 | _, err = v.br.Discard(int(n)) 165 | } 166 | } 167 | if err == io.EOF { 168 | err = io.ErrUnexpectedEOF 169 | } 170 | return err 171 | } 172 | 173 | func (v *volume) peek(n int) ([]byte, error) { 174 | b, err := v.br.Peek(n) 175 | if err == io.EOF && len(b) > 0 { 176 | err = io.ErrUnexpectedEOF 177 | } 178 | return b, err 179 | } 180 | 181 | func (v *volume) readSlice(n int) ([]byte, error) { 182 | if n <= v.br.Size() { 183 | b, err := v.br.Peek(n) 184 | if err != nil { 185 | if err == io.EOF && len(b) > 0 { 186 | err = io.ErrUnexpectedEOF 187 | } 188 | return nil, err 189 | } 190 | n, err = v.br.Discard(n) 191 | v.off += int64(n) 192 | return b[:n:n], err 193 | } 194 | // bufio.Reader buffer is too small, create a new slice and copy to it 195 | b := make([]byte, n) 196 | if _, err := io.ReadFull(v.br, b); err != nil { 197 | return nil, err 198 | } 199 | v.off += int64(n) 200 | return b, nil 201 | } 202 | 203 | func (v *volume) Read(p []byte) (int, error) { 204 | n, err := v.br.Read(p) 205 | v.off += int64(n) 206 | return n, err 207 | } 208 | 209 | // findSig searches for the RAR signature and version at the beginning of a file. 210 | // It searches no more than maxSfxSize bytes. 211 | func (v *volume) findSig() (int, error) { 212 | v.off = 0 213 | for v.off <= maxSfxSize { 214 | b, err := v.br.ReadSlice(sigPrefix[0]) 215 | v.off += int64(len(b)) 216 | if err == bufio.ErrBufferFull { 217 | continue 218 | } else if err != nil { 219 | if err == io.EOF { 220 | err = ErrNoSig 221 | } 222 | return 0, err 223 | } 224 | 225 | b, err = v.br.Peek(len(sigPrefix[1:]) + 2) 226 | if err != nil { 227 | if err == io.EOF { 228 | err = ErrNoSig 229 | } 230 | return 0, err 231 | } 232 | if !bytes.HasPrefix(b, []byte(sigPrefix[1:])) { 233 | continue 234 | } 235 | b = b[len(sigPrefix)-1:] 236 | 237 | ver := int(b[0]) 238 | if b[0] != 0 && b[1] != 0 { 239 | continue 240 | } 241 | b, err = v.br.ReadSlice('\x00') 242 | if err != nil { 243 | return 0, err 244 | } 245 | v.off += int64(len(b)) 246 | return ver, nil 247 | } 248 | return 0, ErrNoSig 249 | } 250 | 251 | func nextNewVolName(file string) string { 252 | var inDigit bool 253 | var m []int 254 | for i, c := range file { 255 | if c >= '0' && c <= '9' { 256 | if !inDigit { 257 | m = append(m, i) 258 | inDigit = true 259 | } 260 | } else if inDigit { 261 | m = append(m, i) 262 | inDigit = false 263 | } 264 | } 265 | if inDigit { 266 | m = append(m, len(file)) 267 | } 268 | if l := len(m); l >= 4 { 269 | // More than 1 match so assume name.part###of###.rar style. 270 | // Take the last 2 matches where the first is the volume number. 271 | m = m[l-4 : l] 272 | if strings.Contains(file[m[1]:m[2]], ".") || !strings.Contains(file[:m[0]], ".") { 273 | // Didn't match above style as volume had '.' between the two numbers or didnt have a '.' 274 | // before the first match. Use the second number as volume number. 275 | m = m[2:] 276 | } 277 | } 278 | // extract and increment volume number 279 | lo, hi := m[0], m[1] 280 | n, err := strconv.Atoi(file[lo:hi]) 281 | if err != nil { 282 | n = 0 283 | } else { 284 | n++ 285 | } 286 | // volume number must use at least the same number of characters as previous volume 287 | vol := fmt.Sprintf("%0"+fmt.Sprint(hi-lo)+"d", n) 288 | return file[:lo] + vol + file[hi:] 289 | } 290 | 291 | func nextOldVolName(file string) string { 292 | // old style volume naming 293 | i := strings.LastIndex(file, ".") 294 | // get file extension 295 | b := []byte(file[i+1:]) 296 | 297 | // If 2nd and 3rd character of file extension is not a digit replace 298 | // with "00" and ignore any trailing characters. 299 | if len(b) < 3 || b[1] < '0' || b[1] > '9' || b[2] < '0' || b[2] > '9' { 300 | return file[:i+2] + "00" 301 | } 302 | 303 | // start incrementing volume number digits from rightmost 304 | for j := 2; j >= 0; j-- { 305 | if b[j] != '9' { 306 | b[j]++ 307 | break 308 | } 309 | // digit overflow 310 | if j == 0 { 311 | // last character before '.' 312 | b[j] = 'A' 313 | } else { 314 | // set to '0' and loop to next character 315 | b[j] = '0' 316 | } 317 | } 318 | return file[:i+1] + string(b) 319 | } 320 | 321 | func hasDigits(s string) bool { 322 | for _, c := range s { 323 | if c >= '0' && c <= '9' { 324 | return true 325 | } 326 | } 327 | return false 328 | } 329 | 330 | func fixFileExtension(file string) string { 331 | // check file extensions 332 | i := strings.LastIndex(file, ".") 333 | if i < 0 { 334 | // no file extension, add one 335 | return file + ".rar" 336 | } 337 | ext := strings.ToLower(file[i+1:]) 338 | // replace with .rar for empty extensions & self extracting archives 339 | if ext == "" || ext == "exe" || ext == "sfx" { 340 | file = file[:i+1] + "rar" 341 | } 342 | return file 343 | } 344 | 345 | // next opens the next volume file in the archive. 346 | func (v *volume) next() error { 347 | if len(v.files) == 0 { 348 | return ErrFileNameRequired 349 | } 350 | err := v.Close() 351 | if err != nil { 352 | return err 353 | } 354 | 355 | nextVolNum := v.num + 1 356 | // check for cached volume name 357 | if nextVolNum < len(v.files) { 358 | return v.openFile(v.files[nextVolNum], nextVolNum) 359 | } 360 | 361 | file := v.files[v.num] 362 | if nextVolNum == 1 { 363 | file = fixFileExtension(file) 364 | // new naming scheme must have volume number in filename 365 | if !v.old && hasDigits(file) { 366 | // found digits, try using new naming scheme 367 | err = v.openFile(nextNewVolName(file), nextVolNum) 368 | if err == nil || !os.IsNotExist(err) { 369 | return err 370 | } 371 | // file didn't exist, try old naming scheme 372 | oldErr := v.openFile(nextOldVolName(file), nextVolNum) 373 | if oldErr == nil || !os.IsNotExist(err) { 374 | v.old = true 375 | return oldErr 376 | } 377 | return err 378 | } 379 | v.old = true 380 | } 381 | if v.old { 382 | file = nextOldVolName(file) 383 | } else { 384 | file = nextNewVolName(file) 385 | } 386 | return v.openFile(file, nextVolNum) 387 | } 388 | 389 | func newVolume(r io.Reader, options options) (*volume, error) { 390 | v := &volume{ 391 | f: r, 392 | br: bufio.NewReaderSize(r, options.bsize), 393 | fs: options.fs, 394 | } 395 | if options.file != "" { 396 | dir, file := filepath.Split(options.file) 397 | v.dir = dir 398 | v.files = []string{file} 399 | } 400 | var err error 401 | v.ver, err = v.findSig() 402 | if err != nil { 403 | _ = v.Close() 404 | return nil, err 405 | } 406 | return v, nil 407 | } 408 | --------------------------------------------------------------------------------