├── .gitignore ├── LICENSE ├── README.md └── src ├── Makefile ├── archiver ├── csa_adler32.cpp ├── csa_adler32.h ├── csa_common.cpp ├── csa_common.h ├── csa_file.cpp ├── csa_file.h ├── csa_indexpack.cpp ├── csa_indexpack.h ├── csa_io.h ├── csa_progress.cpp ├── csa_progress.h ├── csa_thread.h ├── csa_typedef.h ├── csa_worker.cpp ├── csa_worker.h └── csarc.cpp ├── libcsc ├── Types.h ├── csc.cpp ├── csc_analyzer.cpp ├── csc_analyzer.h ├── csc_coder.cpp ├── csc_coder.h ├── csc_common.h ├── csc_dec.cpp ├── csc_dec.h ├── csc_default_alloc.cpp ├── csc_default_alloc.h ├── csc_enc.cpp ├── csc_enc.h ├── csc_encoder_main.cpp ├── csc_encoder_main.h ├── csc_filters.cpp ├── csc_filters.h ├── csc_lz.cpp ├── csc_lz.h ├── csc_memio.cpp ├── csc_memio.h ├── csc_mf.cpp ├── csc_mf.h ├── csc_model.cpp ├── csc_model.h ├── csc_profiler.cpp ├── csc_profiler.h ├── csc_typedef.h └── decomp.cpp ├── others ├── dist_table.cpp └── make_dict.cpp └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSC (CSArc) 2 | 3 | It includes 4 | * libcsc: A Loss-less data compression algorithm inspired by LZMA 5 | * csarc: Experimental command line archiver running on Linux/Windows based on libcsc 6 | 7 | 8 | ### Usage of CSArc: 9 | * Create a new archive: 10 | ```csarc a [options] archive_name file2 file2 ...``` 11 | [options] can be: 12 | ```-m[1..5] Compression level from most efficient to strongest``` 13 | ```-d##[k|m] Dictionary size, must be in range of [32k, 1024m]``` 14 | ```-r Recursively scan files in directories``` 15 | ```-f Forcely overwrite existing archive``` 16 | ```-p## Only works with single file compression, split the work for multi-threading``` 17 | ```-t# Multithreading-number, ranges in [1,8], memory usage will be multiplied by this number``` 18 | 19 | * Extract file(s) from an archive: 20 | ```csarc x [options] archive_name [file1_in_arc file2_in_arc ...]``` 21 | [options] can be: 22 | ```-t# Multithreading-number, ranges in [1,8], memory usage will be multiplied by this number``` 23 | ```-o out_dir Extraction output directory``` 24 | 25 | * List file(s) in archive: 26 | ```csarc l [options] archive_name [file1_in_arc file2_in_arc ...]``` 27 | [options] can be: 28 | ```-v Shows fragments information with Adler32 hash``` 29 | 30 | * Test to extract file(s) in archive: 31 | ```csarc t [options] archive_name [file1_in_arc file2_in_arc ...]``` 32 | [options] can be: 33 | ```-t# Multithreading-number, ranges in [1,8]``` 34 | 35 | * Examples: 36 | csarc a -m2 -d64m -r -t2 out.csa /disk2/* 37 | csarc x -t2 -o /tmp/ out.csa *.jpg 38 | csarc l out.csa 39 | csarc t out.csa *.dll 40 | 41 | ### Introduction for libcsc: 42 | * The whole compressor was mostly inspired by LZMA, with some ideas from others or my own mixed. 43 | * Based on LZ77 Sliding Window + Range Coder. Literals are with pure order-1, literal/match flag was coded with state of previous 3 flag. Match length / distance was coded into different slots. The schema is quite similar with LZMA. 44 | * Match finder for LZ77 is using 2-bytes hash table and 3-bytes hash table with only one candidate. And for -m1 .. -m4, another 6-bytes hash table was used, with width(candidates) of 1, 8, 2, 8. For -m5, there is binary tree with 6-bytes hash searching head. The implementation for binary tree was from LZMA. The binary tree match finder does not hold the whole dictionary range, instead it is smaller, to avoid 10x dictionary size memory usage. But the searching head table for BT does not have such limitation. 45 | * Two kinds of parser: Lazy one and advanced one. Advanced parser is calculating the lowest price path for every several KB chunk. -m1 , -m2 is using the lazy one, -m3 .. -m5 is with advanced. Details for m1 .. m5 configuration can be find in csc_enc.cpp 46 | * A simple data analyzer is being used on every 8KB of raw data block. It calculates: the order-0 bits/Byte of current block, data similarity of every 1,2,3,4,8 bytes for potential data tables, heuristic method to tell EXE codes or English text. 47 | * Continuous blocks with same type analyzed will be compressed as one chunk, base on its type, on it may apply: 48 | * e8e9 preprocessor, Engeue Shelwien gave me the code before. 49 | * A very simple & fast English preprocessor, just replacing some words into char 128 to 255. The dictionary was from Sami Runsas. 50 | * Delta preprocessor on corresponding channel, and then use order-1 direct coding instead of LZ77. The entropy for delta-processed block was also calculated and compared before to decide coding to avoid false positive. 51 | * No compression is applied on high entropy data, which is measured by order-0 bpB. 52 | * For delta and high entropy data, the original data was stored into LZ77 window and match finder. Before it is compressed, it will be checked whether the block is duplicated. So for two same block in one file, it still will be compressed into one. 53 | * libcsc API is very LZMA liked, which is using the same Types.h in LZMA. 54 | * Memory usage is about 2 - 4x dictionary size. 55 | 56 | ### Introduction For CSArc: 57 | * Cross-platform thread / file / directory operation codes were from ZPAQ. 58 | * Files were sorted by extension and size (if it is not a very small file), then split into tasks by extension. 59 | * Each task is compressed by one thread, and there can be several threads working for multiple tasks. 60 | * Each worker task contains separate I/O thread and buffer. 61 | * Multi-threading also works in same way when doing extraction. 62 | * For single big file, -p# switch can be used to force split the file into multiple tasks for multi-threading, however will hurt compression. 63 | * Memory usage will be multiplied by the thread number. 64 | * Adler32 checksum is being used. 65 | * Meta info of all files is also compressed by libcsc, appended at the end of all other compressed data. 66 | 67 | 68 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | OPT=-O4 4 | #OPT=-Os 5 | #DEBUG=-g -DDEBUG #-fsanitize=undefined #-D_HAVE_PROFILER_ 6 | 7 | CFLAGS=-Wall -Wno-format -fPIC $(OPT) $(DEBUG) $(TEST) -D_7Z_TYPES_ 8 | 9 | CC=g++ 10 | INCLUDE_PATH = -I ./libcsc/ -I ./archiver/ 11 | 12 | LIBCSCOBJS=$(addprefix libcsc/, csc_analyzer.o csc_encoder_main.o csc_coder.o csc_filters.o csc_lz.o csc_memio.o csc_enc.o csc_dec.o csc_model.o csc_profiler.o csc_mf.o csc_default_alloc.o) 13 | 14 | LIBCSCDECOBJS=$(addprefix libcsc/, csc_dec.o csc_memio.o csc_filters.o csc_default_alloc.o) 15 | 16 | CSARCOBJS=$(addprefix archiver/, csa_indexpack.o csa_worker.o csa_file.o csa_adler32.o csa_common.o csa_progress.o) 17 | 18 | LIBOUTPUT=libcsc.a 19 | 20 | all: $(LIBOUTPUT) csc decomp csarc 21 | 22 | csc: ./libcsc/csc.cpp $(LIBOUTPUT) 23 | $(CC) $(INCLUDE_PATH) $(CFLAGS) -o $@ $^ 24 | 25 | decomp: ./libcsc/decomp.cpp $(LIBCSCDECOBJS) 26 | $(CC) $(INCLUDE_PATH) $(CFLAGS) -o $@ $^ 27 | 28 | csarc: ./archiver/csarc.cpp $(CSARCOBJS) $(LIBOUTPUT) 29 | $(CC) $(INCLUDE_PATH) $(CFLAGS) -o $@ $^ -pthread 30 | 31 | $(LIBOUTPUT): $(LIBCSCOBJS) 32 | ar rvs $@ $^ 33 | 34 | %.o: %.cpp 35 | $(CC) $(INCLUDE_PATH) -c $(CFLAGS) -o $@ $^ 36 | 37 | clean: 38 | rm -f $(CSARCOBJS) $(LIBCSCDECOBJS) $(LIBOUTPUT) $(LIBCSCOBJS) csc decomp csarc 39 | 40 | rebuild: clean all 41 | -------------------------------------------------------------------------------- /src/archiver/csa_adler32.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Modified from: 3 | * 4 | * 5 | * adler32.c -- compute the Adler-32 checksum of a data stream 6 | * Copyright (C) 1995-2007 Mark Adler 7 | * For conditions of distribution and use, see copyright notice in zlib.h 8 | */ 9 | 10 | /* @(#) $Id$ */ 11 | 12 | #include 13 | #include 14 | 15 | static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, uint64_t len2); 16 | 17 | #define BASE 65521UL /* largest prime smaller than 65536 */ 18 | #define NMAX 5552 19 | /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ 20 | 21 | #define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} 22 | #define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); 23 | #define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); 24 | #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); 25 | #define DO16(buf) DO8(buf,0); DO8(buf,8); 26 | 27 | /* use NO_DIVIDE if your processor does not do division in hardware */ 28 | #ifdef NO_DIVIDE 29 | # define MOD(a) \ 30 | do { \ 31 | if (a >= (BASE << 16)) a -= (BASE << 16); \ 32 | if (a >= (BASE << 15)) a -= (BASE << 15); \ 33 | if (a >= (BASE << 14)) a -= (BASE << 14); \ 34 | if (a >= (BASE << 13)) a -= (BASE << 13); \ 35 | if (a >= (BASE << 12)) a -= (BASE << 12); \ 36 | if (a >= (BASE << 11)) a -= (BASE << 11); \ 37 | if (a >= (BASE << 10)) a -= (BASE << 10); \ 38 | if (a >= (BASE << 9)) a -= (BASE << 9); \ 39 | if (a >= (BASE << 8)) a -= (BASE << 8); \ 40 | if (a >= (BASE << 7)) a -= (BASE << 7); \ 41 | if (a >= (BASE << 6)) a -= (BASE << 6); \ 42 | if (a >= (BASE << 5)) a -= (BASE << 5); \ 43 | if (a >= (BASE << 4)) a -= (BASE << 4); \ 44 | if (a >= (BASE << 3)) a -= (BASE << 3); \ 45 | if (a >= (BASE << 2)) a -= (BASE << 2); \ 46 | if (a >= (BASE << 1)) a -= (BASE << 1); \ 47 | if (a >= BASE) a -= BASE; \ 48 | } while (0) 49 | # define MOD4(a) \ 50 | do { \ 51 | if (a >= (BASE << 4)) a -= (BASE << 4); \ 52 | if (a >= (BASE << 3)) a -= (BASE << 3); \ 53 | if (a >= (BASE << 2)) a -= (BASE << 2); \ 54 | if (a >= (BASE << 1)) a -= (BASE << 1); \ 55 | if (a >= BASE) a -= BASE; \ 56 | } while (0) 57 | #else 58 | # define MOD(a) a %= BASE 59 | # define MOD4(a) a %= BASE 60 | #endif 61 | 62 | /* ========================================================================= */ 63 | uint32_t adler32(uint32_t adler, const uint8_t *buf, uint64_t len) 64 | { 65 | unsigned long sum2; 66 | unsigned n; 67 | 68 | /* split Adler-32 into component sums */ 69 | sum2 = (adler >> 16) & 0xffff; 70 | adler &= 0xffff; 71 | 72 | /* in case user likes doing a byte at a time, keep it fast */ 73 | if (len == 1) { 74 | adler += buf[0]; 75 | if (adler >= BASE) 76 | adler -= BASE; 77 | sum2 += adler; 78 | if (sum2 >= BASE) 79 | sum2 -= BASE; 80 | return adler | (sum2 << 16); 81 | } 82 | 83 | /* initial Adler-32 value (deferred check for len == 1 speed) */ 84 | if (buf == NULL) 85 | return 1L; 86 | 87 | /* in case short lengths are provided, keep it somewhat fast */ 88 | if (len < 16) { 89 | while (len--) { 90 | adler += *buf++; 91 | sum2 += adler; 92 | } 93 | if (adler >= BASE) 94 | adler -= BASE; 95 | MOD4(sum2); /* only added so many BASE's */ 96 | return adler | (sum2 << 16); 97 | } 98 | 99 | /* do length NMAX blocks -- requires just one modulo operation */ 100 | while (len >= NMAX) { 101 | len -= NMAX; 102 | n = NMAX / 16; /* NMAX is divisible by 16 */ 103 | do { 104 | DO16(buf); /* 16 sums unrolled */ 105 | buf += 16; 106 | } while (--n); 107 | MOD(adler); 108 | MOD(sum2); 109 | } 110 | 111 | /* do remaining bytes (less than NMAX, still just one modulo) */ 112 | if (len) { /* avoid modulos if none remaining */ 113 | while (len >= 16) { 114 | len -= 16; 115 | DO16(buf); 116 | buf += 16; 117 | } 118 | while (len--) { 119 | adler += *buf++; 120 | sum2 += adler; 121 | } 122 | MOD(adler); 123 | MOD(sum2); 124 | } 125 | 126 | /* return recombined sums */ 127 | return adler | (sum2 << 16); 128 | } 129 | 130 | /* ========================================================================= */ 131 | static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, uint64_t len2) 132 | { 133 | unsigned long sum1; 134 | unsigned long sum2; 135 | unsigned rem; 136 | 137 | /* the derivation of this formula is left as an exercise for the reader */ 138 | rem = (unsigned)(len2 % BASE); 139 | sum1 = adler1 & 0xffff; 140 | sum2 = rem * sum1; 141 | MOD(sum2); 142 | sum1 += (adler2 & 0xffff) + BASE - 1; 143 | sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; 144 | if (sum1 >= BASE) sum1 -= BASE; 145 | if (sum1 >= BASE) sum1 -= BASE; 146 | if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); 147 | if (sum2 >= BASE) sum2 -= BASE; 148 | return sum1 | (sum2 << 16); 149 | } 150 | 151 | /* ========================================================================= */ 152 | uint32_t adler32_combine(uint32_t adler1, uint32_t adler2, uint64_t len2) 153 | { 154 | return adler32_combine_(adler1, adler2, len2); 155 | } 156 | 157 | uint32_t adler32_combine64(uint32_t adler1, uint32_t adler2, uint64_t len2) 158 | { 159 | return adler32_combine_(adler1, adler2, len2); 160 | } 161 | 162 | -------------------------------------------------------------------------------- /src/archiver/csa_adler32.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_ADLER32_H_ 2 | #define _CSA_ADLER32_H_ 3 | #include 4 | 5 | uint32_t adler32(uint32_t adler, const uint8_t *buf, uint64_t len); 6 | 7 | #endif 8 | 9 | -------------------------------------------------------------------------------- /src/archiver/csa_common.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int64_t decimal_time(time_t tt) { 4 | if (tt==-1) tt=0; 5 | int64_t t=(sizeof(tt)==4) ? unsigned(tt) : tt; 6 | const int second=t%60; 7 | const int minute=t/60%60; 8 | const int hour=t/3600%24; 9 | t/=86400; // days since Jan 1 1970 10 | const int term=t/1461; // 4 year terms since 1970 11 | t%=1461; 12 | t+=(t>=59); // insert Feb 29 on non leap years 13 | t+=(t>=425); 14 | t+=(t>=1157); 15 | const int year=term*4+t/366+1970; // actual year 16 | t%=366; 17 | t+=(t>=60)*2; // make Feb. 31 days 18 | t+=(t>=123); // insert Apr 31 19 | t+=(t>=185); // insert June 31 20 | t+=(t>=278); // insert Sept 31 21 | t+=(t>=340); // insert Nov 31 22 | const int month=t/31+1; 23 | const int day=t%31+1; 24 | return year*10000000000LL+month*100000000+day*1000000 25 | +hour*10000+minute*100+second; 26 | } 27 | 28 | time_t unix_time(int64_t date) { 29 | if (date<=0) return -1; 30 | static const int days[12]={0,31,59,90,120,151,181,212,243,273,304,334}; 31 | const int year=date/10000000000LL%10000; 32 | const int month=(date/100000000%100-1)%12; 33 | const int day=date/1000000%100; 34 | const int hour=date/10000%100; 35 | const int min=date/100%100; 36 | const int sec=date%100; 37 | return (day-1+days[month]+(year%4==0 && month>1)+((year-1970)*1461+1)/4) 38 | *86400+hour*3600+min*60+sec; 39 | } 40 | // In Windows, convert 16-bit wide string to UTF-8 and \ to / 41 | #ifndef unix 42 | string wtou(const wchar_t* s) { 43 | assert(sizeof(wchar_t)==2); // Not true in Linux 44 | assert((wchar_t)(-1)==65535); 45 | string r; 46 | if (!s) return r; 47 | for (; *s; ++s) { 48 | if (*s=='\\') r+='/'; 49 | else if (*s<128) r+=*s; 50 | else if (*s<2048) r+=192+*s/64, r+=128+*s%64; 51 | else r+=224+*s/4096, r+=128+*s/64%64, r+=128+*s%64; 52 | } 53 | return r; 54 | } 55 | 56 | // In Windows, convert UTF-8 string to wide string ignoring 57 | // invalid UTF-8 or >64K. If doslash then convert "/" to "\". 58 | std::wstring utow(const char* ss, bool doslash) { 59 | assert(sizeof(wchar_t)==2); 60 | assert((wchar_t)(-1)==65535); 61 | std::wstring r; 62 | if (!ss) return r; 63 | const unsigned char* s=(const unsigned char*)ss; 64 | for (; s && *s; ++s) { 65 | if (s[0]=='/' && doslash) r+='\\'; 66 | else if (s[0]<128) r+=s[0]; 67 | else if (s[0]>=192 && s[0]<224 && s[1]>=128 && s[1]<192) 68 | r+=(s[0]-192)*64+s[1]-128, ++s; 69 | else if (s[0]>=224 && s[0]<240 && s[1]>=128 && s[1]<192 70 | && s[2]>=128 && s[2]<192) 71 | r+=(s[0]-224)*4096+(s[1]-128)*64+s[2]-128, s+=2; 72 | } 73 | return r; 74 | } 75 | 76 | // Print a UTF-8 string to f (stdout, stderr) so it displays properly 77 | void printUTF8(const char* s, FILE* f) { 78 | assert(f); 79 | assert(s); 80 | #ifdef unix 81 | fprintf(f, "%s", s); 82 | #else 83 | const HANDLE h=(HANDLE)_get_osfhandle(_fileno(f)); 84 | DWORD ft=GetFileType(h); 85 | if (ft==FILE_TYPE_CHAR) { 86 | fflush(f); 87 | std::wstring w=utow(s); // Windows console: convert to UTF-16 88 | DWORD n=0; 89 | WriteConsole(h, w.c_str(), w.size(), &n, 0); 90 | } 91 | else // stdout redirected to file 92 | fprintf(f, "%s", s); 93 | #endif 94 | } 95 | // Print error message 96 | void winError(const char* filename) { 97 | int err=GetLastError(); 98 | printUTF8(filename, stderr); 99 | if (err==ERROR_FILE_NOT_FOUND) 100 | fprintf(stderr, ": file not found\n"); 101 | else if (err==ERROR_PATH_NOT_FOUND) 102 | fprintf(stderr, ": path not found\n"); 103 | else if (err==ERROR_ACCESS_DENIED) 104 | fprintf(stderr, ": access denied\n"); 105 | else if (err==ERROR_SHARING_VIOLATION) 106 | fprintf(stderr, ": sharing violation\n"); 107 | else if (err==ERROR_BAD_PATHNAME) 108 | fprintf(stderr, ": bad pathname\n"); 109 | else if (err==ERROR_INVALID_NAME) 110 | fprintf(stderr, ": invalid name\n"); 111 | else 112 | fprintf(stderr, ": Windows error %d\n", err); 113 | } 114 | 115 | // Set the last-modified date of an open file handle 116 | void setDate(HANDLE out, int64_t date) { 117 | if (date>0) { 118 | SYSTEMTIME st; 119 | FILETIME ft; 120 | st.wYear=date/10000000000LL%10000; 121 | st.wMonth=date/100000000%100; 122 | st.wDayOfWeek=0; // ignored 123 | st.wDay=date/1000000%100; 124 | st.wHour=date/10000%100; 125 | st.wMinute=date/100%100; 126 | st.wSecond=date%100; 127 | st.wMilliseconds=0; 128 | SystemTimeToFileTime(&st, &ft); 129 | if (!SetFileTime(out, NULL, NULL, &ft)) 130 | fprintf(stderr, "SetFileTime error %d\n", int(GetLastError())); 131 | } 132 | } 133 | 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /src/archiver/csa_common.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_COMMON_H_ 2 | #define _CSA_COMMON_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | 22 | #ifdef unix 23 | #define PTHREAD 1 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #ifdef unixtest 33 | struct termios { 34 | int c_lflag; 35 | }; 36 | #define ECHO 1 37 | #define ECHONL 2 38 | #define TCSANOW 4 39 | int tcgetattr(int, termios*) {return 0;} 40 | int tcsetattr(int, int, termios*) {return 0;} 41 | #else 42 | #include 43 | #endif 44 | 45 | #else // Assume Windows 46 | #include 47 | #include 48 | #include 49 | #endif 50 | 51 | #ifdef _MSC_VER // Microsoft C++ 52 | #define fseeko(a,b,c) _fseeki64(a,b,c) 53 | #define ftello(a) _ftelli64(a) 54 | #else 55 | #ifndef unix 56 | #ifndef fseeko 57 | #define fseeko(a,b,c) fseeko64(a,b,c) 58 | #endif 59 | #ifndef ftello 60 | #define ftello(a) ftello64(a) 61 | #endif 62 | #endif 63 | #endif 64 | 65 | // Convert seconds since 0000 1/1/1970 to 64 bit decimal YYYYMMDDHHMMSS 66 | // Valid from 1970 to 2099. 67 | int64_t decimal_time(time_t tt); 68 | 69 | // Convert decimal date to time_t - inverse of decimal_time() 70 | time_t unix_time(int64_t date); 71 | 72 | inline int tolowerW(int c) { 73 | #ifndef unix 74 | if (c>='A' && c<='Z') return c-'A'+'a'; 75 | #endif 76 | return c; 77 | } 78 | 79 | 80 | static const string DUMMY_FILENAME = "****"; 81 | 82 | // In Windows, convert 16-bit wide string to UTF-8 and \ to / 83 | #ifndef unix 84 | string wtou(const wchar_t* s); 85 | 86 | // In Windows, convert UTF-8 string to wide string ignoring 87 | // invalid UTF-8 or >64K. If doslash then convert "/" to "\". 88 | std::wstring utow(const char* ss, bool doslash=false); 89 | 90 | // Print a UTF-8 string to f (stdout, stderr) so it displays properly 91 | void printUTF8(const char* s, FILE* f); 92 | 93 | // Print error message 94 | void winError(const char* filename); 95 | 96 | // Set the last-modified date of an open file handle 97 | void setDate(HANDLE out, int64_t date); 98 | 99 | #endif 100 | 101 | #endif 102 | 103 | 104 | -------------------------------------------------------------------------------- /src/archiver/csa_file.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | void makepath(string path, int64_t date, int64_t attr) { 5 | int quiet = 1; 6 | for (size_t i = 0; i < path.size(); ++i) { 7 | if (path[i]=='\\' || path[i]=='/') { 8 | path[i]=0; 9 | #ifdef unix 10 | int ok=!mkdir(path.c_str(), 0777); 11 | #else 12 | int ok=CreateDirectory(utow(path.c_str(), true).c_str(), 0); 13 | #endif 14 | if (ok && quiet<=0) { 15 | /* 16 | fprintf(con, "Created directory "); 17 | printUTF8(path.c_str(), con); 18 | fprintf(con, "\n"); 19 | */ 20 | } 21 | path[i]='/'; 22 | } 23 | } 24 | 25 | // Set date and attributes 26 | string filename=path; 27 | if (filename!="" && filename[filename.size()-1]=='/') { 28 | filename=filename.substr(0, filename.size()-1); // remove trailing slash 29 | #ifdef unix 30 | if (date>0) { 31 | struct utimbuf ub; 32 | ub.actime=time(NULL); 33 | ub.modtime=unix_time(date); 34 | utime(filename.c_str(), &ub); 35 | } 36 | if ((attr&255)=='u') 37 | chmod(filename.c_str(), attr>>8); 38 | #else 39 | for (int i=0; i0) { 42 | HANDLE out=CreateFile(utow(filename.c_str(), true).c_str(), 43 | FILE_WRITE_ATTRIBUTES, 0, NULL, OPEN_EXISTING, 44 | FILE_FLAG_BACKUP_SEMANTICS, NULL); 45 | if (out!=INVALID_HANDLE_VALUE) { 46 | setDate(out, date); 47 | CloseHandle(out); 48 | } 49 | else { 50 | winError(filename.c_str()); 51 | } 52 | } 53 | if ((attr&255)=='w') { 54 | SetFileAttributes(utow(filename.c_str(), true).c_str(), attr>>8); 55 | } 56 | #endif 57 | } 58 | } 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/archiver/csa_file.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_FILE_H_ 2 | #define _CSA_FILE_H_ 3 | #include 4 | // File types accepting UTF-8 filenames 5 | 6 | #ifdef _MSC_VER // Microsoft C++ 7 | #define fseeko(a,b,c) _fseeki64(a,b,c) 8 | #define ftello(a) _ftelli64(a) 9 | #else 10 | #ifndef unix 11 | #ifndef fseeko 12 | #define fseeko(a,b,c) fseeko64(a,b,c) 13 | #endif 14 | #ifndef ftello 15 | #define ftello(a) ftello64(a) 16 | #endif 17 | #endif 18 | #endif 19 | 20 | 21 | #ifdef unix 22 | 23 | class InputFile { 24 | FILE* in; 25 | 26 | public: 27 | InputFile(): in(0) {} 28 | 29 | bool open(const char* filename) { 30 | in=fopen(filename, "rb"); 31 | return in!=0; 32 | } 33 | 34 | // True if open 35 | bool isopen() {return in!=0;} 36 | 37 | // Return file position 38 | int64_t tell() { 39 | return ftello(in); 40 | } 41 | 42 | uint32_t read(char *buf, uint32_t size) { 43 | return fread(buf, 1, size, in); 44 | } 45 | 46 | // Set file position 47 | void seek(int64_t pos, int whence) { 48 | if (whence==SEEK_CUR) { 49 | whence=SEEK_SET; 50 | pos+=tell(); 51 | } 52 | fseeko(in, pos, whence); 53 | } 54 | 55 | // Close file if open 56 | void close() {if (in) fclose(in); in = 0;} 57 | ~InputFile() {close();} 58 | }; 59 | 60 | class OutputFile { 61 | FILE* out; 62 | string filename; 63 | bool dummy; 64 | public: 65 | OutputFile(): out(0), dummy(false) {} 66 | 67 | // Return true if file is open 68 | bool isopen() {return out!=0;} 69 | 70 | // Open for append/update or create if needed. 71 | // If aes then encrypt with aes+eoff. 72 | bool open(const char* filename) { 73 | assert(!isopen()); 74 | this->filename=filename; 75 | dummy = (this->filename == DUMMY_FILENAME); 76 | if (dummy) { 77 | out = (FILE *)1; 78 | return true; 79 | } 80 | out=fopen(filename, "rb+"); 81 | if (!out) out=fopen(filename, "wb+"); 82 | if (!out) perror(filename); 83 | if (out) fseeko(out, 0, SEEK_END); 84 | return isopen(); 85 | } 86 | 87 | // Write bufp[0..size-1] 88 | void write(const char* bufp, int size) { 89 | if (!dummy) fwrite(bufp, 1, size, out); 90 | } 91 | 92 | // Write size bytes at offset 93 | void write(const char* bufp, int64_t pos, int size) { 94 | assert(isopen()); 95 | if (!dummy) { 96 | fseeko(out, pos, SEEK_SET); 97 | write(bufp, size); 98 | } 99 | } 100 | 101 | // Seek to pos. whence is SEEK_SET, SEEK_CUR, or SEEK_END 102 | void seek(int64_t pos, int whence) { 103 | assert(isopen()); 104 | if (!dummy) { 105 | fseeko(out, pos, whence); 106 | } 107 | } 108 | 109 | // return position 110 | int64_t tell() { 111 | assert(isopen()); 112 | if (!dummy) 113 | return ftello(out); 114 | else 115 | return 0; 116 | } 117 | 118 | // Truncate file and move file pointer to end 119 | void truncate(int64_t newsize=0) { 120 | assert(isopen()); 121 | seek(newsize, SEEK_SET); 122 | if (!dummy && ftruncate(fileno(out), newsize)) perror("ftruncate"); 123 | } 124 | 125 | // Close file and set date if not 0. Set permissions if attr low byte is 'u' 126 | void close(int64_t date=0, int64_t attr=0) { 127 | if (dummy) { 128 | dummy = false; 129 | out = 0; 130 | return; 131 | } 132 | if (out) { 133 | fclose(out); 134 | } 135 | out=0; 136 | if (date>0) { 137 | struct utimbuf ub; 138 | ub.actime=time(NULL); 139 | ub.modtime=unix_time(date); 140 | utime(filename.c_str(), &ub); 141 | } 142 | if ((attr&255)=='u') 143 | chmod(filename.c_str(), attr>>8); 144 | } 145 | 146 | ~OutputFile() {close();} 147 | }; 148 | 149 | #else // Windows 150 | 151 | class InputFile { 152 | HANDLE in; // input file handle 153 | public: 154 | InputFile(): 155 | in(INVALID_HANDLE_VALUE) {} 156 | 157 | // Open for reading. Return true if successful. 158 | // Encrypt with aes+e if aes. 159 | bool open(const char* filename) { 160 | assert(in==INVALID_HANDLE_VALUE); 161 | std::wstring w=utow(filename, true); 162 | in=CreateFile(w.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, 163 | OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 164 | return in!=INVALID_HANDLE_VALUE; 165 | } 166 | 167 | bool isopen() {return in!=INVALID_HANDLE_VALUE;} 168 | 169 | // set file pointer 170 | void seek(int64_t pos, int whence) { 171 | if (whence==SEEK_SET) whence=FILE_BEGIN; 172 | else if (whence==SEEK_END) whence=FILE_END; 173 | else if (whence==SEEK_CUR) { 174 | whence=FILE_BEGIN; 175 | pos+=tell(); 176 | } 177 | LONG offhigh=pos>>32; 178 | SetFilePointer(in, pos, &offhigh, whence); 179 | } 180 | 181 | uint32_t read(char *buf, uint32_t size) { 182 | DWORD size_read; 183 | ReadFile(in, (LPVOID)buf, size, &size_read, NULL); 184 | return size_read; 185 | } 186 | 187 | // get file pointer 188 | int64_t tell() { 189 | LONG offhigh=0; 190 | DWORD r=SetFilePointer(in, 0, &offhigh, FILE_CURRENT); 191 | return (int64_t(offhigh)<<32)+r; 192 | } 193 | 194 | // Close handle if open 195 | void close() { 196 | if (in!=INVALID_HANDLE_VALUE) { 197 | CloseHandle(in); 198 | in=INVALID_HANDLE_VALUE; 199 | } 200 | } 201 | ~InputFile() {close();} 202 | }; 203 | 204 | class OutputFile { 205 | HANDLE out; 206 | std::wstring filename; 207 | bool dummy; 208 | public: 209 | OutputFile(): out(INVALID_HANDLE_VALUE),dummy(false) {} 210 | 211 | // Return true if file is open 212 | bool isopen() { 213 | return out!=INVALID_HANDLE_VALUE; 214 | } 215 | 216 | // Open file ready to update or append, create if needed. 217 | // If aes then encrypt with aes+e. 218 | bool open(const char* filename_) { 219 | assert(!isopen()); 220 | dummy = (filename_ == DUMMY_FILENAME); 221 | if (dummy) { 222 | out = (HANDLE)1; 223 | return true; 224 | } 225 | filename=utow(filename_, true); 226 | out=CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE, 227 | FILE_SHARE_READ | FILE_SHARE_WRITE, 228 | NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); 229 | if (out==INVALID_HANDLE_VALUE) { 230 | fprintf(stderr, "File open error %s\n", filename.c_str()); 231 | DWORD x = GetLastError(); 232 | printf("%d", x); 233 | } else { 234 | LONG hi=0; 235 | SetFilePointer(out, 0, &hi, FILE_END); 236 | } 237 | return isopen(); 238 | } 239 | 240 | // Write bufp[0..size-1] 241 | void write(const char* buf, int size) { 242 | DWORD size_written; 243 | if (!dummy) 244 | WriteFile(out, (LPVOID)buf, size, &size_written, NULL); 245 | } 246 | 247 | // Write size bytes at offset 248 | void write(const char* buf, int64_t pos, int size) { 249 | assert(isopen()); 250 | if (dummy) 251 | return; 252 | if (pos!=tell()) seek(pos, SEEK_SET); 253 | write(buf, size); 254 | } 255 | 256 | // set file pointer 257 | void seek(int64_t pos, int whence) { 258 | if (whence==SEEK_SET) whence=FILE_BEGIN; 259 | else if (whence==SEEK_CUR) whence=FILE_CURRENT; 260 | else if (whence==SEEK_END) whence=FILE_END; 261 | LONG offhigh=pos>>32; 262 | if (!dummy) 263 | SetFilePointer(out, pos, &offhigh, whence); 264 | } 265 | 266 | // get file pointer 267 | int64_t tell() { 268 | LONG offhigh=0; 269 | if (dummy) 270 | return 0; 271 | DWORD r=SetFilePointer(out, 0, &offhigh, FILE_CURRENT); 272 | return (int64_t(offhigh)<<32)+r; 273 | } 274 | 275 | // Truncate file and move file pointer to end 276 | void truncate(int64_t newsize=0) { 277 | if (dummy) 278 | return; 279 | seek(newsize, SEEK_SET); 280 | SetEndOfFile(out); 281 | } 282 | 283 | // Close file and set date if not 0. Set attr if low byte is 'w'. 284 | void close(int64_t date=0, int64_t attr=0) { 285 | if (isopen()) { 286 | if (dummy) { 287 | out=INVALID_HANDLE_VALUE; 288 | dummy = false; 289 | return; 290 | } 291 | setDate(out, date); 292 | CloseHandle(out); 293 | out=INVALID_HANDLE_VALUE; 294 | if ((attr&255)=='w') 295 | SetFileAttributes(filename.c_str(), attr>>8); 296 | filename=L""; 297 | } 298 | } 299 | ~OutputFile() {close();} 300 | }; 301 | 302 | #endif 303 | 304 | void makepath(string path, int64_t date=0, int64_t attr=0); 305 | 306 | #endif 307 | 308 | 309 | -------------------------------------------------------------------------------- /src/archiver/csa_indexpack.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using std::make_pair; 4 | 5 | char *Put8(uint64_t i, char *buf) 6 | { 7 | for(int j = 0; j < 8; j++) { 8 | *buf++ = (i & 0xFF); 9 | i >>= 8; 10 | } 11 | return buf; 12 | } 13 | 14 | char *Get8(uint64_t& i, char *buf) 15 | { 16 | i = 0; 17 | for(int j = 7; j >= 0; j--) { 18 | i = (i << 8) + (uint8_t)buf[j]; 19 | } 20 | return buf + 8; 21 | } 22 | 23 | char *Get8(int64_t& i, char *buf) 24 | { 25 | uint64_t j; 26 | char *ret = Get8(j, buf); 27 | i = j; 28 | return ret; 29 | } 30 | 31 | char *Put4(uint32_t i, char *buf) 32 | { 33 | for(int j = 0; j < 4; j++) { 34 | *buf++ = (i & 0xFF); 35 | i >>= 8; 36 | } 37 | return buf; 38 | } 39 | 40 | char *Get4(uint32_t& i, char *buf) 41 | { 42 | i = 0; 43 | for(int j = 3; j >= 0; j--) { 44 | i = (i << 8) + (uint8_t)buf[j]; 45 | } 46 | return buf + 4; 47 | } 48 | 49 | char *Put2(uint16_t i, char *buf) 50 | { 51 | *buf++ = (i & 0xFF); 52 | *buf++ = (i >> 8); 53 | return buf; 54 | } 55 | 56 | char *Get2(uint64_t& i, char *buf) 57 | { 58 | i = 0; 59 | for(int j = 1; j >= 0; j--) { 60 | i = (i << 8) + (uint8_t)buf[j]; 61 | } 62 | return buf + 2; 63 | } 64 | 65 | 66 | 67 | static uint64_t FileEntrySize(IterFileEntry it) 68 | { 69 | return 4 + it->first.size() 70 | // file name len 71 | + 3 * 8 + 1 72 | // data size eattr, frags num 73 | + it->second.frags.size() * (4 + 4 + 24); 74 | // frags info 75 | } 76 | 77 | static char *FileEntryToBuf(IterFileEntry it, char *buf) 78 | { 79 | buf = Put4(it->first.size(), buf); 80 | memcpy(buf, it->first.c_str(), it->first.size()); 81 | buf += it->first.size(); 82 | buf = Put8(it->second.edate, buf); 83 | buf = Put8(it->second.esize, buf); 84 | buf = Put8(it->second.eattr, buf); 85 | *buf++ = it->second.frags.size(); 86 | for(size_t i = 0; i < it->second.frags.size(); i++) { 87 | buf = Put4(it->second.frags[i].bid, buf); 88 | buf = Put4(it->second.frags[i].checksum, buf); 89 | buf = Put8(it->second.frags[i].posblock, buf); 90 | buf = Put8(it->second.frags[i].size, buf); 91 | buf = Put8(it->second.frags[i].posfile, buf); 92 | } 93 | return buf; 94 | } 95 | 96 | static char *BufToFileEntry(string& filename, FileEntry& fe, char *buf) 97 | { 98 | uint32_t u4; 99 | buf = Get4(u4, buf); 100 | filename.assign(buf, u4); 101 | buf += u4; 102 | buf = Get8(fe.edate, buf); 103 | buf = Get8(fe.esize, buf); 104 | buf = Get8(fe.eattr, buf); 105 | int8_t i1 = *buf++; 106 | fe.frags.clear(); 107 | for(int i = 0; i < i1; i++) { 108 | FileEntry::Frag f; 109 | buf = Get4(f.bid, buf); 110 | buf = Get4(f.checksum, buf); 111 | buf = Get8(f.posblock, buf); 112 | buf = Get8(f.size, buf); 113 | buf = Get8(f.posfile, buf); 114 | fe.frags.push_back(f); 115 | } 116 | return buf; 117 | } 118 | 119 | static uint64_t ArchiveBlocksSize(IterAB it) 120 | { 121 | return 8 + 4 + it->second.filename.size() 122 | // file name len 123 | + 4 + it->second.blocks.size() * 16; 124 | // blocks 125 | } 126 | 127 | static char *ArchiveBlocksToBuf(IterAB it, char *buf) 128 | { 129 | buf = Put8(it->first, buf); 130 | ArchiveBlocks &abs = it->second; 131 | //buf = Put4(abs.filename.size(), buf); 132 | //memcpy(buf, abs.filename.c_str(), abs.filename.size()); 133 | //buf += abs.filename.size(); 134 | buf = Put4(abs.blocks.size(), buf); 135 | for(size_t i = 0; i < abs.blocks.size(); i++) { 136 | buf = Put8(abs.blocks[i].off, buf); 137 | buf = Put8(abs.blocks[i].size, buf); 138 | } 139 | return buf; 140 | } 141 | 142 | static char *BufToArchiveBlocks(uint64_t& id, ArchiveBlocks& abs, char *buf) 143 | { 144 | buf = Get8(id, buf); 145 | uint32_t u4; 146 | //buf = Get4(u4, buf); 147 | //abs.filename.assign(buf, u4); 148 | //buf += u4; 149 | buf = Get4(u4, buf); 150 | abs.blocks.clear(); 151 | for(uint32_t i = 0; i < u4; i++) { 152 | ArchiveBlocks::Block b; 153 | buf = Get8(b.off, buf); 154 | buf = Get8(b.size, buf); 155 | abs.blocks.push_back(b); 156 | } 157 | return buf; 158 | } 159 | 160 | char *PackIndex(FileIndex &fi, ABIndex &abi, uint64_t &total_size) 161 | { 162 | total_size = 0; 163 | 164 | total_size += 4; 165 | for(IterFileEntry it = fi.begin(); it != fi.end(); it++) 166 | total_size += FileEntrySize(it); 167 | 168 | total_size += 4; 169 | for(IterAB it = abi.begin(); it != abi.end(); it++) 170 | total_size += ArchiveBlocksSize(it); 171 | 172 | char *buf = new char[total_size]; 173 | char *tmp = buf; 174 | tmp = Put4(fi.size(), tmp); 175 | for(IterFileEntry it = fi.begin(); it != fi.end(); it++) 176 | tmp = FileEntryToBuf(it, tmp); 177 | 178 | tmp = Put4(abi.size(), tmp); 179 | for(IterAB it = abi.begin(); it != abi.end(); it++) 180 | tmp = ArchiveBlocksToBuf(it, tmp); 181 | return buf; 182 | } 183 | 184 | bool UnpackIndex(FileIndex &fi, ABIndex &abi, char *buf, uint64_t total_size) 185 | { 186 | char *tmp = buf, *limit = buf + total_size; 187 | (void)total_size; 188 | 189 | (void)limit; 190 | fi.clear(); 191 | abi.clear(); 192 | uint32_t isize; 193 | tmp = Get4(isize, tmp); 194 | for(uint32_t i = 0; i < isize; i++) { 195 | string filename; 196 | FileEntry fe; 197 | tmp = BufToFileEntry(filename, fe, tmp); 198 | fi.insert(make_pair(filename, fe)); 199 | } 200 | 201 | tmp = Get4(isize, tmp); 202 | for(uint32_t i = 0; i < isize; i++) { 203 | uint64_t id; 204 | ArchiveBlocks abs; 205 | tmp = BufToArchiveBlocks(id, abs, tmp); 206 | abi.insert(make_pair(id, abs)); 207 | } 208 | return true; 209 | } 210 | 211 | 212 | -------------------------------------------------------------------------------- /src/archiver/csa_indexpack.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_INDEXPACK_H_ 2 | #define _CSA_INDEXPACK_H_ 3 | #include 4 | #include 5 | 6 | char *Put8(uint64_t i, char *buf); 7 | char *Get8(uint64_t& i, char *buf); 8 | char *Get8(int64_t& i, char *buf); 9 | char *Put4(uint32_t i, char *buf); 10 | char *Get4(uint32_t& i, char *buf); 11 | char *Put2(uint16_t i, char *buf); 12 | char *Get2(uint64_t& i, char *buf); 13 | char *PackIndex(FileIndex &fi, ABIndex &abi, uint64_t &total_size); 14 | bool UnpackIndex(FileIndex &fi, ABIndex &abi, char *buf, uint64_t total_size); 15 | 16 | 17 | #endif 18 | 19 | -------------------------------------------------------------------------------- /src/archiver/csa_io.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_IO_ 2 | #define _CSA_IO_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class AsyncReader; 12 | class AsyncWriter; 13 | 14 | class AsyncReader { 15 | protected: 16 | ThreadID iothread_; 17 | static ThreadReturn entrance(void *arg) 18 | { 19 | AsyncReader *ar = (AsyncReader *)arg; 20 | return ar->run(); 21 | } 22 | 23 | Mutex lock_; 24 | Semaphore sem_full_; 25 | Semaphore sem_empty_; 26 | 27 | 28 | struct Block { 29 | char *buf; 30 | uint32_t size; 31 | uint32_t prog; 32 | }; 33 | 34 | std::deque queue_; 35 | uint32_t size_; 36 | uint32_t bufsize_; 37 | volatile bool finished_; 38 | 39 | virtual ThreadReturn run() = 0; 40 | 41 | public: 42 | AsyncReader() : 43 | finished_(false) 44 | { 45 | init_mutex(lock_); 46 | sem_full_.init(0); 47 | sem_empty_.init(0); 48 | } 49 | 50 | virtual ~AsyncReader() { 51 | while(!queue_.empty()) { 52 | Block &b = queue_.front(); 53 | delete[] b.buf; 54 | queue_.pop_front(); 55 | } 56 | destroy_mutex(lock_); 57 | sem_full_.destroy(); 58 | sem_empty_.destroy(); 59 | } 60 | 61 | void Run() { 62 | ::run(iothread_, AsyncReader::entrance, this); 63 | } 64 | 65 | int Read(void *buf, size_t *size) { 66 | size_t prog = 0; 67 | for(; prog < *size;) { 68 | lock(lock_); 69 | if (size_ == 0) { 70 | release(lock_); 71 | if (finished_) { 72 | *size = prog; 73 | return 0; 74 | } 75 | sem_empty_.wait(); 76 | continue; 77 | } 78 | 79 | Block &b = queue_.front(); 80 | size_t cpy = std::min(*size - prog, b.size - b.prog); 81 | memcpy((char *)buf + prog, b.buf + b.prog, cpy); 82 | prog += cpy; 83 | b.prog += cpy; 84 | if (b.prog == b.size) { 85 | delete[] b.buf; 86 | size_ -= b.size; 87 | queue_.pop_front(); 88 | sem_full_.signal(); 89 | } 90 | release(lock_); 91 | } 92 | return 0; 93 | } 94 | 95 | virtual void Finish() { 96 | lock(lock_); 97 | finished_ = true; 98 | release(lock_); 99 | sem_full_.signal(); 100 | join(iothread_); 101 | } 102 | }; 103 | 104 | 105 | class AsyncWriter { 106 | protected: 107 | struct Block { 108 | char *buf; 109 | uint32_t size; 110 | uint32_t prog; 111 | }; 112 | 113 | private: 114 | Block curblock_; 115 | uint32_t cap_; 116 | 117 | protected: 118 | ThreadID iothread_; 119 | Mutex lock_; 120 | Semaphore sem_full_; 121 | Semaphore sem_empty_; 122 | uint32_t size_; 123 | uint32_t bufsize_; 124 | volatile bool finished_; 125 | 126 | static ThreadReturn entrance(void *arg) 127 | { 128 | AsyncWriter *ar = (AsyncWriter *)arg; 129 | return ar->run(); 130 | } 131 | 132 | std::deque queue_; 133 | virtual ThreadReturn run() = 0; 134 | 135 | void flush() { 136 | if (curblock_.size > 0) { 137 | queue_.push_back(curblock_); 138 | size_ += curblock_.size; 139 | } else { 140 | delete []curblock_.buf; 141 | } 142 | curblock_.buf = NULL; 143 | curblock_.size = 0; 144 | } 145 | 146 | public: 147 | void Run() { 148 | ::run(iothread_, AsyncWriter::entrance, this); 149 | } 150 | 151 | AsyncWriter(): 152 | finished_(false) 153 | { 154 | init_mutex(lock_); 155 | sem_full_.init(0); 156 | sem_empty_.init(0); 157 | curblock_.size = 0; 158 | cap_ = 1048576; 159 | curblock_.buf = new char[cap_]; 160 | } 161 | 162 | virtual ~AsyncWriter() { 163 | while(!queue_.empty()) { 164 | Block &b = queue_.front(); 165 | delete[] b.buf; 166 | queue_.pop_front(); 167 | } 168 | delete[] curblock_.buf; 169 | destroy_mutex(lock_); 170 | sem_full_.destroy(); 171 | sem_empty_.destroy(); 172 | } 173 | 174 | size_t Write(const void *buf, size_t size) { 175 | for(;;) { 176 | lock(lock_); 177 | if (finished_ == true) { 178 | release(lock_); 179 | return CSC_WRITE_ABORT; 180 | } 181 | 182 | if (size_ >= bufsize_) { 183 | release(lock_); 184 | sem_full_.wait(); 185 | continue; 186 | } 187 | 188 | if (curblock_.size + size > cap_) { 189 | flush(); 190 | cap_ = std::max(1048576, size); 191 | curblock_.buf = new char[cap_]; 192 | } 193 | 194 | memcpy(curblock_.buf + curblock_.size, buf, size); 195 | curblock_.size += size; 196 | release(lock_); 197 | sem_empty_.signal(); 198 | return size; 199 | } 200 | } 201 | 202 | virtual void Finish() { 203 | } 204 | 205 | }; 206 | 207 | class AsyncFileReader : public AsyncReader { 208 | 209 | vector &filelist_; 210 | size_t curfidx_; 211 | uint64_t curfprog_; 212 | uint64_t cumsize_; 213 | InputFile if_; 214 | 215 | ThreadReturn run() { 216 | while(1) { 217 | lock(lock_); 218 | if (size_ >= bufsize_) { 219 | release(lock_); 220 | sem_full_.wait(); 221 | continue; 222 | } 223 | uint64_t rdsize = bufsize_ - size_; 224 | release(lock_); 225 | 226 | // open next file if last was done 227 | if (!if_.isopen()) { 228 | if (curfidx_ >= filelist_.size()) { 229 | finished_ = true; 230 | sem_empty_.signal(); 231 | break; 232 | } 233 | if (!if_.open(filelist_[curfidx_].filename.c_str())) { 234 | filelist_[curfidx_].size = 0; 235 | curfidx_++; 236 | continue; 237 | } 238 | 239 | //printf("%s: %lld\n", filelist_[curfidx_].filename.c_str(), filelist_[curfidx_].size); 240 | filelist_[curfidx_].posblock = cumsize_; 241 | if_.seek(filelist_[curfidx_].off, SEEK_SET); 242 | } 243 | 244 | // how much to read? 245 | rdsize = std::min(rdsize, filelist_[curfidx_].size - curfprog_); 246 | rdsize = std::min(rdsize, 2 * 1048576); 247 | char *buf = new char[rdsize]; 248 | // read and push to queue 249 | rdsize = if_.read(buf, rdsize); 250 | filelist_[curfidx_].checksum = adler32(filelist_[curfidx_].checksum, (const uint8_t *)buf, rdsize); 251 | Block b; 252 | b.prog = 0; 253 | b.size = rdsize; 254 | b.buf = buf; 255 | 256 | lock(lock_); 257 | queue_.push_back(b); 258 | size_ += b.size; 259 | release(lock_); 260 | sem_empty_.signal(); 261 | 262 | curfprog_ += rdsize; 263 | cumsize_ += rdsize; 264 | // end of current file, close it 265 | if (curfprog_ == filelist_[curfidx_].size) { 266 | if_.close(); 267 | curfprog_ = 0; 268 | curfidx_ ++; 269 | } 270 | } 271 | return (ThreadReturn)NULL; 272 | } 273 | 274 | public: 275 | AsyncFileReader(vector &filelist, uint32_t bufsize) 276 | : filelist_(filelist), 277 | curfidx_(0), 278 | curfprog_(0), 279 | cumsize_(0) 280 | { 281 | size_ = 0; 282 | bufsize_ = bufsize; 283 | } 284 | 285 | ~AsyncFileReader() { 286 | } 287 | }; 288 | 289 | class AsyncFileWriter : public AsyncWriter { 290 | vector &filelist_; 291 | uint64_t curfidx_; 292 | uint64_t curfprog_; 293 | uint64_t curbprog_; 294 | uint64_t cumsize_; 295 | bool done_; 296 | uint32_t curchecksum_; 297 | 298 | OutputFile of_; 299 | 300 | ThreadReturn run() { 301 | while(1) { 302 | lock(lock_); 303 | if (done_) { 304 | release(lock_); 305 | break; 306 | } 307 | 308 | if (size_ == 0) { 309 | if (finished_) { 310 | release(lock_); 311 | break; 312 | } 313 | release(lock_); 314 | sem_empty_.wait(); 315 | continue; 316 | } 317 | 318 | Block& b = queue_.front(); 319 | if (cumsize_ + b.size > filelist_[curfidx_].posblock) { 320 | if (!of_.isopen()) { 321 | //printf("Opening %s\n", filelist_[curfidx_].filename.c_str()); 322 | if (!of_.open(filelist_[curfidx_].filename.c_str())) { 323 | curfidx_++; 324 | if (curfidx_ >= filelist_.size()) { 325 | done_ = true; 326 | finished_ = true; 327 | sem_full_.signal(); 328 | } 329 | release(lock_); 330 | continue; 331 | } 332 | curfprog_ = 0; 333 | of_.seek(filelist_[curfidx_].off, SEEK_SET); 334 | curchecksum_ = 0; 335 | // skip the unnecessary data in front of current file 336 | if (cumsize_ + curbprog_ < filelist_[curfidx_].posblock) 337 | curbprog_ += filelist_[curfidx_].posblock - cumsize_ - curbprog_; 338 | } 339 | 340 | uint64_t wrsize = std::min(b.size - curbprog_, filelist_[curfidx_].size - curfprog_); 341 | of_.write(b.buf + curbprog_, wrsize); 342 | curchecksum_ = adler32(curchecksum_, (const uint8_t *)b.buf + curbprog_, wrsize); 343 | curfprog_ += wrsize; 344 | curbprog_ += wrsize; 345 | if (curfprog_ == filelist_[curfidx_].size) { 346 | of_.close(filelist_[curfidx_].it->second.edate, 347 | filelist_[curfidx_].it->second.eattr); 348 | if (filelist_[curfidx_].checksum != curchecksum_) 349 | fprintf(stderr, "******** %s extraction/verify failed\n", filelist_[curfidx_].it->first.c_str()); 350 | 351 | curfidx_++; 352 | if (curfidx_ >= filelist_.size()) { 353 | done_ = true; 354 | finished_ = true; 355 | sem_full_.signal(); 356 | } 357 | } 358 | } else { 359 | curbprog_ = b.size; 360 | } 361 | 362 | if (curbprog_ == b.size) { 363 | delete[] b.buf; 364 | size_ -= b.size; 365 | cumsize_ += b.size; 366 | queue_.pop_front(); 367 | release(lock_); 368 | curbprog_ = 0; 369 | sem_full_.signal(); 370 | } else { 371 | release(lock_); 372 | } 373 | } 374 | return NULL; 375 | } 376 | 377 | public: 378 | 379 | AsyncFileWriter(vector &filelist, uint32_t bufsize) 380 | : filelist_(filelist), 381 | curfidx_(0), 382 | curfprog_(0), 383 | curbprog_(0), 384 | cumsize_(0), 385 | done_(false) 386 | { 387 | size_ = 0, 388 | bufsize_ = bufsize; 389 | } 390 | 391 | ~AsyncFileWriter() { 392 | } 393 | 394 | void Finish() { 395 | lock(lock_); 396 | flush(); 397 | finished_ = true; 398 | release(lock_); 399 | sem_empty_.signal(); 400 | join(iothread_); 401 | if (of_.isopen()) { 402 | if (filelist_[curfidx_].checksum != curchecksum_) 403 | fprintf(stderr, "******** %s extraction/verify failed\n", filelist_[curfidx_].it->first.c_str()); 404 | //filelist_[curfidx_].size = curfprog_; 405 | of_.close(filelist_[curfidx_].it->second.edate, 406 | filelist_[curfidx_].it->second.eattr); 407 | } 408 | } 409 | }; 410 | 411 | 412 | struct FileWriter { 413 | ISeqOutStream os; 414 | AsyncWriter *obj; 415 | }; 416 | 417 | inline size_t file_write_proc(void *p, const void *buf, size_t size) 418 | { 419 | FileWriter *w = (FileWriter *)p; 420 | return w->obj->Write(buf, size); 421 | } 422 | 423 | struct FileReader { 424 | ISeqInStream is; 425 | AsyncReader *obj; 426 | }; 427 | 428 | inline int file_read_proc(void *p, void *buf, size_t *size) 429 | { 430 | FileReader *r = (FileReader *)p; 431 | return r->obj->Read(buf, size); 432 | } 433 | 434 | struct MemReader { 435 | ISeqInStream is; 436 | char *ptr; 437 | uint64_t size; 438 | uint64_t pos; 439 | }; 440 | 441 | inline int mem_read_proc(void *p, void *buf, size_t *size) 442 | { 443 | MemReader *r = (MemReader*)p; 444 | uint64_t s = std::min(*size, r->size - r->pos); 445 | memcpy(buf, r->ptr + r->pos, s); 446 | r->pos += s; 447 | *size = s; 448 | return 0; 449 | } 450 | 451 | struct MemWriter { 452 | ISeqOutStream os; 453 | char *ptr; 454 | uint64_t size; 455 | uint64_t pos; 456 | }; 457 | 458 | inline size_t mem_write_proc(void *p, const void *buf, size_t size) 459 | { 460 | MemWriter *r = (MemWriter*)p; 461 | uint64_t s = std::min(size, r->size - r->pos); 462 | memcpy(r->ptr + r->pos, buf, s); 463 | r->pos += s; 464 | return s; 465 | } 466 | 467 | 468 | 469 | class AsyncArchiveReader; 470 | class AsyncArchiveWriter; 471 | 472 | class AsyncArchiveReader : public AsyncReader { 473 | ArchiveBlocks &ab_; 474 | size_t curbidx_; 475 | size_t curbprog_; 476 | InputFile if_; 477 | 478 | ThreadReturn run() { 479 | while(1) { 480 | lock(lock_); 481 | if (finished_) { 482 | release(lock_); 483 | break; 484 | } 485 | if (size_ >= bufsize_) { 486 | release(lock_); 487 | sem_full_.wait(); 488 | continue; 489 | } 490 | uint64_t rdsize = bufsize_ - size_; 491 | release(lock_); 492 | 493 | // how much to read? 494 | rdsize = std::min(rdsize, ab_.blocks[curbidx_].size - curbprog_); 495 | rdsize = std::min(rdsize, 2 * 1048576); 496 | char *buf = new char[rdsize]; 497 | // read and push to queue 498 | rdsize = if_.read(buf, rdsize); 499 | Block b; 500 | b.prog = 0; 501 | b.size = rdsize; 502 | b.buf = buf; 503 | 504 | lock(lock_); 505 | queue_.push_back(b); 506 | size_ += b.size; 507 | release(lock_); 508 | sem_empty_.signal(); 509 | 510 | curbprog_ += rdsize; 511 | // end of current file, close it 512 | if (curbprog_ == ab_.blocks[curbidx_].size) { 513 | curbprog_ = 0; 514 | curbidx_ ++; 515 | if (curbidx_ < ab_.blocks.size()) 516 | if_.seek(ab_.blocks[curbidx_].off, SEEK_SET); 517 | else 518 | break; 519 | } 520 | } 521 | return (ThreadReturn)NULL; 522 | } 523 | 524 | public: 525 | AsyncArchiveReader(ArchiveBlocks &ab ,uint32_t bufsize) 526 | : ab_(ab), 527 | curbidx_(0), 528 | curbprog_(0) 529 | { 530 | size_ = 0, 531 | bufsize_ = bufsize; 532 | if_.open(ab_.filename.c_str()); 533 | if_.seek(ab_.blocks[curbidx_].off, SEEK_SET); 534 | } 535 | 536 | ~AsyncArchiveReader() { 537 | if_.close(); 538 | } 539 | }; 540 | 541 | class AsyncArchiveWriter : public AsyncWriter { 542 | ArchiveBlocks &ab_; 543 | OutputFile of_; 544 | Mutex& arc_lock_; 545 | 546 | ThreadReturn run() { 547 | while(1) { 548 | lock(lock_); 549 | if (size_ == 0) { 550 | if (finished_) { 551 | release(lock_); 552 | break; 553 | } 554 | release(lock_); 555 | sem_empty_.wait(); 556 | continue; 557 | } 558 | 559 | Block b = queue_.front(); 560 | release(lock_); 561 | 562 | lock(arc_lock_); 563 | of_.seek(0, SEEK_END); 564 | uint64_t pos = of_.tell(); 565 | of_.truncate(pos + b.size); 566 | release(arc_lock_); 567 | of_.write(b.buf, pos, b.size); 568 | delete[] b.buf; 569 | 570 | ArchiveBlocks::Block abb; 571 | abb.size = b.size; 572 | abb.off = pos; 573 | ab_.blocks.push_back(abb); 574 | 575 | lock(lock_); 576 | size_ -= b.size; 577 | queue_.pop_front(); 578 | release(lock_); 579 | sem_full_.signal(); 580 | } 581 | return (ThreadReturn)NULL; 582 | } 583 | 584 | public: 585 | 586 | AsyncArchiveWriter(ArchiveBlocks &ab, uint32_t bufsize, Mutex& arc_lock) 587 | : ab_(ab), 588 | arc_lock_(arc_lock) 589 | { 590 | size_ = 0, 591 | bufsize_ = bufsize; 592 | of_.open(ab_.filename.c_str()); 593 | } 594 | 595 | ~AsyncArchiveWriter() { 596 | } 597 | 598 | void Finish() { 599 | lock(lock_); 600 | flush(); 601 | finished_ = true; 602 | release(lock_); 603 | sem_empty_.signal(); 604 | join(iothread_); 605 | of_.close(); 606 | } 607 | }; 608 | 609 | 610 | #endif 611 | 612 | -------------------------------------------------------------------------------- /src/archiver/csa_progress.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #ifdef unix 3 | #include 4 | #define my_sleep(k) usleep(k * 1000) 5 | #else 6 | #include 7 | #define my_sleep(k) Sleep(k) 8 | #endif 9 | #include 10 | 11 | ThreadReturn ProgressIndicator::run() 12 | { 13 | int k = 0; 14 | while(!finished_) { 15 | if (k == 0) { 16 | uint64_t done_size = 0; 17 | for(size_t i = 0; i < tasks_.size(); i++) { 18 | if (!tasks_[i].finished) continue; 19 | if (tasks_[i].filelist.rbegin()->posblock == 0) 20 | done_size += tasks_[i].total_size; 21 | else { 22 | const FileBlock& b = *tasks_[i].filelist.rbegin(); 23 | done_size += b.size + b.posblock; 24 | } 25 | } 26 | for(int i = 0; i < mt_count_; i++) 27 | done_size += workers_[i]->GetProcessedRawSize(); 28 | draw_progress_bar(done_size); 29 | } 30 | // refresh progress every 30 * 10 ms 31 | k = (k + 1) % 30; 32 | // check finish ever 10ms 33 | my_sleep(10); 34 | } 35 | draw_progress_bar(total_size_); 36 | printf("\n"); 37 | return (ThreadReturn)NULL; 38 | } 39 | 40 | static uint32_t get_ratio(uint32_t base, uint64_t a, uint64_t b) { 41 | uint64_t p = b / base; 42 | if (p == 0) p = 1; 43 | a /= p; 44 | if (a > base) 45 | a = base; 46 | return a; 47 | } 48 | 49 | void ProgressIndicator::draw_progress_bar(uint64_t done_size) { 50 | char bar[53] = {0}; 51 | bar[0] = '['; 52 | bar[51] = ']'; 53 | uint32_t r = get_ratio(50, done_size, total_size_); 54 | for(uint32_t i = 1; i <= 50; i++) { 55 | if (i < r || r == 50) 56 | bar[i] = '='; 57 | else if (i == r ) 58 | bar[i] = '>'; 59 | else 60 | bar[i] = ' '; 61 | } 62 | char bs[100]; // 100 is long enough 63 | for(int i = 0; i < 100; i++) 64 | bs[i] = '\b'; 65 | bs[99] = '\0'; 66 | 67 | printf("%s%s %2d%% done", bs, bar, get_ratio(100, done_size, total_size_)); 68 | fflush(stdout); 69 | } 70 | 71 | -------------------------------------------------------------------------------- /src/archiver/csa_progress.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_PROGRESS_H_ 2 | #define _CSA_PROGRESS_H_ 3 | 4 | // This is a prograss indicator 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class ProgressIndicator { 12 | ThreadID thread_; 13 | static ThreadReturn entrance(void *arg) 14 | { 15 | ProgressIndicator *ar = (ProgressIndicator *)arg; 16 | return ar->run(); 17 | } 18 | 19 | ThreadReturn run(); 20 | const vector& tasks_; 21 | const MainWorker **workers_; 22 | const int mt_count_; 23 | volatile bool finished_; 24 | uint64_t total_size_; 25 | void draw_progress_bar(uint64_t done_size); 26 | 27 | public: 28 | ProgressIndicator(const vector& tasks, 29 | const MainWorker **workers, const int mt_count) : 30 | tasks_(tasks), 31 | workers_(workers), 32 | mt_count_(mt_count), 33 | finished_(false) 34 | { 35 | total_size_ = 0; 36 | for(size_t i = 0; i < tasks_.size(); i++) { 37 | if (tasks[i].filelist.rbegin()->posblock == 0) 38 | total_size_ += tasks_[i].total_size; 39 | else { 40 | const FileBlock& b = *tasks_[i].filelist.rbegin(); 41 | total_size_ += b.size + b.posblock; 42 | } 43 | } 44 | } 45 | 46 | void Run() { 47 | ::run(thread_, entrance, this); 48 | } 49 | 50 | void Finish() { 51 | finished_ = true; 52 | join(thread_); 53 | } 54 | }; 55 | 56 | #endif 57 | 58 | -------------------------------------------------------------------------------- /src/archiver/csa_thread.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_THREAD_H_ 2 | #define _CSA_THREAD_H_ 3 | #include 4 | 5 | #ifdef PTHREAD 6 | #include 7 | typedef void* ThreadReturn; // job return type 8 | typedef pthread_t ThreadID; // job ID type 9 | inline void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg)// start job 10 | {pthread_create(&tid, NULL, f, arg);} 11 | inline void join(ThreadID tid) {pthread_join(tid, NULL);} // wait for job 12 | typedef pthread_mutex_t Mutex; // mutex type 13 | inline void init_mutex(Mutex& m) {pthread_mutex_init(&m, 0);} // init mutex 14 | inline void lock(Mutex& m) {pthread_mutex_lock(&m);} // wait for mutex 15 | inline void release(Mutex& m) {pthread_mutex_unlock(&m);} // release mutex 16 | inline void destroy_mutex(Mutex& m) {pthread_mutex_destroy(&m);} // destroy mutex 17 | 18 | class Semaphore { 19 | public: 20 | Semaphore() {sem=-1;} 21 | inline void init(int n) { 22 | assert(n>=0); 23 | assert(sem==-1); 24 | pthread_cond_init(&cv, 0); 25 | pthread_mutex_init(&mutex, 0); 26 | sem=n; 27 | } 28 | inline void destroy() { 29 | assert(sem>=0); 30 | pthread_mutex_destroy(&mutex); 31 | pthread_cond_destroy(&cv); 32 | } 33 | inline int wait() { 34 | assert(sem>=0); 35 | pthread_mutex_lock(&mutex); 36 | int r=0; 37 | if (sem==0) r=pthread_cond_wait(&cv, &mutex); 38 | assert(sem>0); 39 | --sem; 40 | pthread_mutex_unlock(&mutex); 41 | return r; 42 | } 43 | inline void signal() { 44 | assert(sem>=0); 45 | pthread_mutex_lock(&mutex); 46 | ++sem; 47 | pthread_cond_signal(&cv); 48 | pthread_mutex_unlock(&mutex); 49 | } 50 | private: 51 | pthread_cond_t cv; // to signal FINISHED 52 | pthread_mutex_t mutex; // protects cv 53 | int sem; // semaphore count 54 | }; 55 | 56 | #else // Windows 57 | typedef DWORD ThreadReturn; 58 | typedef HANDLE ThreadID; 59 | inline void run(ThreadID& tid, ThreadReturn(*f)(void*), void* arg) { 60 | tid=CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)f, arg, 0, NULL); 61 | //if (tid==NULL) error("CreateThread failed"); 62 | } 63 | inline void join(ThreadID& tid) {WaitForSingleObject(tid, INFINITE);} 64 | typedef HANDLE Mutex; 65 | inline void init_mutex(Mutex& m) {m=CreateMutex(NULL, FALSE, NULL);} 66 | inline void lock(Mutex& m) {WaitForSingleObject(m, INFINITE);} 67 | inline void release(Mutex& m) {ReleaseMutex(m);} 68 | inline void destroy_mutex(Mutex& m) {CloseHandle(m);} 69 | 70 | class Semaphore { 71 | public: 72 | enum {MAXCOUNT=2000000000}; 73 | Semaphore(): h(NULL) {} 74 | void init(int n) {assert(!h); h=CreateSemaphore(NULL, n, MAXCOUNT, NULL);} 75 | void destroy() {assert(h); CloseHandle(h);} 76 | int wait() {assert(h); return WaitForSingleObject(h, INFINITE);} 77 | void signal() {assert(h); ReleaseSemaphore(h, 1, NULL);} 78 | private: 79 | HANDLE h; // Windows semaphore 80 | }; 81 | 82 | #endif 83 | 84 | #endif 85 | 86 | -------------------------------------------------------------------------------- /src/archiver/csa_typedef.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_TYPEDEF_ 2 | #define _CSA_TYPEDEF_ 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using std::vector; 9 | using std::map; 10 | using std::string; 11 | 12 | // Index in memory 13 | struct FileEntry { 14 | int64_t edate; // date of external file, 0=not found 15 | int64_t esize; // size of external file 16 | int64_t eattr; // external file attributes ('u' or 'w' in low byte) 17 | char ext[4]; 18 | struct Frag{ 19 | uint32_t bid; 20 | uint32_t checksum; 21 | uint64_t posblock; 22 | uint64_t size; 23 | uint64_t posfile; 24 | }; 25 | vector frags; 26 | }; 27 | 28 | typedef map FileIndex; 29 | typedef FileIndex::iterator IterFileEntry; 30 | 31 | 32 | // For raw files I/O 33 | struct FileBlock { 34 | string filename; 35 | uint32_t checksum; 36 | uint64_t off; 37 | uint64_t size; 38 | uint64_t posblock; 39 | // used only when decompression 40 | IterFileEntry it; 41 | }; 42 | 43 | 44 | // For archiver I/O 45 | struct ArchiveBlocks { 46 | struct Block { 47 | uint64_t off; 48 | uint64_t size; 49 | }; 50 | string filename; 51 | vector blocks; 52 | }; 53 | 54 | typedef map ABIndex; 55 | typedef ABIndex::iterator IterAB; 56 | 57 | struct MainTask { 58 | uint64_t total_size; 59 | vector filelist; 60 | uint32_t ab_id; 61 | bool finished; 62 | 63 | MainTask() 64 | : total_size(0), 65 | finished(false) {} 66 | 67 | void push_back(const string& filename, uint64_t off, uint64_t size, uint64_t posblock, uint32_t checksum, 68 | IterFileEntry it = IterFileEntry()) { 69 | FileBlock b; 70 | b.filename = filename; 71 | b.off = off; 72 | b.size = size; 73 | b.posblock = posblock; 74 | b.it = it; 75 | b.checksum = checksum; 76 | filelist.push_back(b); 77 | total_size += size; 78 | } 79 | 80 | void clear() { 81 | filelist.clear(); 82 | total_size = 0; 83 | } 84 | }; 85 | 86 | 87 | 88 | #endif 89 | 90 | 91 | -------------------------------------------------------------------------------- /src/archiver/csa_worker.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | struct ProgressObject { 7 | ICompressProgress p; 8 | MainWorker *w; 9 | }; 10 | 11 | static int decompress_update_progress(void *p, UInt64 insize, UInt64 outsize) { 12 | ProgressObject *po = (ProgressObject *)p; 13 | po->w->processed_raw_ = outsize; 14 | return 0; 15 | } 16 | 17 | static int compress_update_progress(void *p, UInt64 insize, UInt64 outsize) { 18 | ProgressObject *po = (ProgressObject *)p; 19 | po->w->processed_raw_ = insize; 20 | return 0; 21 | } 22 | 23 | int CompressionWorker::do_work() 24 | { 25 | FileReader file_reader; 26 | file_reader.obj = new AsyncFileReader(task_->filelist, 27 | std::min(32 * 1048576, task_->total_size)); 28 | file_reader.is.Read = file_read_proc; 29 | 30 | FileWriter file_writer; 31 | file_writer.obj = new AsyncArchiveWriter(*abs_, 8 * 1048576, arc_lock_); 32 | file_writer.os.Write = file_write_proc; 33 | 34 | CSCProps p; 35 | CSCEncProps_Init(&p, std::min(dict_size_, task_->total_size), level_); 36 | CSCEncHandle h = CSCEnc_Create(&p, (ISeqOutStream*)&file_writer, NULL); 37 | uint8_t buf[CSC_PROP_SIZE]; 38 | CSCEnc_WriteProperties(&p, buf, 0); 39 | 40 | file_reader.obj->Run(); 41 | file_writer.obj->Run(); 42 | file_writer.obj->Write(buf, CSC_PROP_SIZE); 43 | 44 | ProgressObject prog; 45 | prog.p.Progress = compress_update_progress; 46 | prog.w = this; 47 | 48 | int ret = CSCEnc_Encode(h, (ISeqInStream*)&file_reader, (ICompressProgress*)&prog); 49 | CSCEnc_Encode_Flush(h); 50 | CSCEnc_Destroy(h); 51 | file_reader.obj->Finish(); 52 | file_writer.obj->Finish(); 53 | delete file_writer.obj; 54 | delete file_reader.obj; 55 | return ret; 56 | } 57 | 58 | 59 | int DecompressionWorker::do_work() 60 | { 61 | FileReader file_reader; 62 | file_reader.obj = new AsyncArchiveReader(*abs_, 8 * 1048576); 63 | file_reader.is.Read = file_read_proc; 64 | 65 | FileWriter file_writer; 66 | file_writer.obj = new AsyncFileWriter(task_->filelist, 32 * 1048576); 67 | file_writer.os.Write = file_write_proc; 68 | 69 | file_reader.obj->Run(); 70 | file_writer.obj->Run(); 71 | 72 | ProgressObject prog; 73 | prog.p.Progress = decompress_update_progress; 74 | prog.w = this; 75 | 76 | CSCProps p; 77 | uint8_t buf[CSC_PROP_SIZE]; 78 | size_t prop_size = CSC_PROP_SIZE; 79 | file_reader.obj->Read(buf, &prop_size); 80 | CSCDec_ReadProperties(&p, buf); 81 | CSCDecHandle h = CSCDec_Create(&p, (ISeqInStream*)&file_reader, NULL); 82 | int ret = CSCDec_Decode(h, (ISeqOutStream*)&file_writer, (ICompressProgress*)&prog); 83 | CSCDec_Destroy(h); 84 | 85 | file_reader.obj->Finish(); 86 | file_writer.obj->Finish(); 87 | delete file_reader.obj; 88 | delete file_writer.obj; 89 | return ret; 90 | } 91 | 92 | -------------------------------------------------------------------------------- /src/archiver/csa_worker.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSA_WORKER_H_ 2 | #define _CSA_WORKER_H_ 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | class MainWorker { 9 | protected: 10 | ThreadID thread_; 11 | Semaphore& sem_finish_; 12 | Semaphore got_task_; 13 | 14 | MainTask *task_; 15 | ArchiveBlocks* abs_; 16 | int last_ret_; 17 | 18 | public: 19 | uint64_t processed_raw_; 20 | 21 | private: 22 | bool finished_; 23 | 24 | static ThreadReturn entrance(void *arg) 25 | { 26 | MainWorker *worker = (MainWorker *)arg; 27 | return (ThreadReturn)worker->run(); 28 | } 29 | 30 | virtual int do_work() = 0; 31 | 32 | void *run() { 33 | while(1) { 34 | got_task_.wait(); 35 | if (finished_) 36 | break; 37 | 38 | last_ret_ = do_work(); 39 | processed_raw_ = 0; 40 | 41 | task_ = NULL; 42 | sem_finish_.signal(); 43 | } 44 | return NULL; 45 | } 46 | 47 | public: 48 | MainWorker(Semaphore &sem): 49 | sem_finish_(sem), 50 | task_(NULL), 51 | last_ret_(0), 52 | processed_raw_(0), 53 | finished_(false) { 54 | got_task_.init(0); 55 | } 56 | 57 | virtual ~MainWorker() {} 58 | 59 | void Run() { 60 | ::run(thread_, MainWorker::entrance, this); 61 | } 62 | 63 | void PutTask(MainTask& task, ArchiveBlocks& abs) { 64 | task_ = &task; 65 | abs_ = &abs; 66 | got_task_.signal(); 67 | } 68 | 69 | bool TaskDone() { 70 | return task_ == NULL; 71 | } 72 | 73 | int LastReturn() { 74 | return last_ret_; 75 | } 76 | 77 | void Finish() { 78 | finished_ = true; 79 | got_task_.signal(); 80 | ::join(thread_); 81 | } 82 | 83 | uint64_t GetProcessedRawSize() const { 84 | return processed_raw_; 85 | } 86 | }; 87 | 88 | class CompressionWorker: public MainWorker { 89 | int level_; 90 | uint32_t dict_size_; 91 | Mutex& arc_lock_; 92 | int do_work(); 93 | 94 | static int update_progress(void *p, UInt64 insize, UInt64 outsize) { 95 | //CompressionWorker *worker = (CompressionWorker *)p; 96 | //worker->processed_raw_ = insize; 97 | return 0; 98 | } 99 | 100 | public: 101 | CompressionWorker(Semaphore &sem, Mutex& arc_lock, int level, int dict_size) : 102 | MainWorker(sem), 103 | level_(level), 104 | dict_size_(dict_size), 105 | arc_lock_(arc_lock) 106 | { 107 | } 108 | 109 | }; 110 | 111 | class DecompressionWorker: public MainWorker { 112 | int do_work(); 113 | 114 | static int update_progress(void *p, UInt64 insize, UInt64 outsize) { 115 | //DecompressionWorker *worker = (DecompressionWorker *)p; 116 | //worker->processed_raw_ = outsize; 117 | return 0; 118 | } 119 | 120 | public: 121 | DecompressionWorker(Semaphore &sem) : 122 | MainWorker(sem) 123 | {} 124 | }; 125 | 126 | 127 | 128 | #endif 129 | 130 | -------------------------------------------------------------------------------- /src/libcsc/Types.h: -------------------------------------------------------------------------------- 1 | /* Types.h -- Basic types 2 | 2010-10-09 : Igor Pavlov : Public domain */ 3 | 4 | #ifndef __7Z_TYPES_H 5 | #define __7Z_TYPES_H 6 | 7 | #include 8 | 9 | #ifdef _WIN32 10 | #include 11 | #endif 12 | 13 | #ifndef EXTERN_C_BEGIN 14 | #ifdef __cplusplus 15 | #define EXTERN_C_BEGIN extern "C" { 16 | #define EXTERN_C_END } 17 | #else 18 | #define EXTERN_C_BEGIN 19 | #define EXTERN_C_END 20 | #endif 21 | #endif 22 | 23 | EXTERN_C_BEGIN 24 | 25 | #define SZ_OK 0 26 | 27 | #define SZ_ERROR_DATA 1 28 | #define SZ_ERROR_MEM 2 29 | #define SZ_ERROR_CRC 3 30 | #define SZ_ERROR_UNSUPPORTED 4 31 | #define SZ_ERROR_PARAM 5 32 | #define SZ_ERROR_INPUT_EOF 6 33 | #define SZ_ERROR_OUTPUT_EOF 7 34 | #define SZ_ERROR_READ 8 35 | #define SZ_ERROR_WRITE 9 36 | #define SZ_ERROR_PROGRESS 10 37 | #define SZ_ERROR_FAIL 11 38 | #define SZ_ERROR_THREAD 12 39 | 40 | #define SZ_ERROR_ARCHIVE 16 41 | #define SZ_ERROR_NO_ARCHIVE 17 42 | 43 | typedef int SRes; 44 | 45 | #ifdef _WIN32 46 | typedef DWORD WRes; 47 | #else 48 | typedef int WRes; 49 | #endif 50 | 51 | #ifndef RINOK 52 | #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } 53 | #endif 54 | 55 | typedef unsigned char Byte; 56 | typedef short Int16; 57 | typedef unsigned short UInt16; 58 | 59 | #ifdef _LZMA_UINT32_IS_ULONG 60 | typedef long Int32; 61 | typedef unsigned long UInt32; 62 | #else 63 | typedef int Int32; 64 | typedef unsigned int UInt32; 65 | #endif 66 | 67 | #ifdef _SZ_NO_INT_64 68 | 69 | /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. 70 | NOTES: Some code will work incorrectly in that case! */ 71 | 72 | typedef long Int64; 73 | typedef unsigned long UInt64; 74 | 75 | #else 76 | 77 | #if defined(_MSC_VER) || defined(__BORLANDC__) 78 | typedef __int64 Int64; 79 | typedef unsigned __int64 UInt64; 80 | #define UINT64_CONST(n) n 81 | #else 82 | typedef long long int Int64; 83 | typedef unsigned long long int UInt64; 84 | #define UINT64_CONST(n) n ## ULL 85 | #endif 86 | 87 | #endif 88 | 89 | #ifdef _LZMA_NO_SYSTEM_SIZE_T 90 | typedef UInt32 SizeT; 91 | #else 92 | typedef size_t SizeT; 93 | #endif 94 | 95 | typedef int Bool; 96 | #define True 1 97 | #define False 0 98 | 99 | 100 | #ifdef _WIN32 101 | #define MY_STD_CALL __stdcall 102 | #else 103 | #define MY_STD_CALL 104 | #endif 105 | 106 | #ifdef _MSC_VER 107 | 108 | #if _MSC_VER >= 1300 109 | #define MY_NO_INLINE __declspec(noinline) 110 | #else 111 | #define MY_NO_INLINE 112 | #endif 113 | 114 | #define MY_CDECL __cdecl 115 | #define MY_FAST_CALL __fastcall 116 | 117 | #else 118 | 119 | #define MY_CDECL 120 | #define MY_FAST_CALL 121 | 122 | #endif 123 | 124 | 125 | /* The following interfaces use first parameter as pointer to structure */ 126 | 127 | typedef struct 128 | { 129 | Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */ 130 | } IByteIn; 131 | 132 | typedef struct 133 | { 134 | void (*Write)(void *p, Byte b); 135 | } IByteOut; 136 | 137 | typedef struct 138 | { 139 | SRes (*Read)(void *p, void *buf, size_t *size); 140 | /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. 141 | (output(*size) < input(*size)) is allowed */ 142 | } ISeqInStream; 143 | 144 | /* it can return SZ_ERROR_INPUT_EOF */ 145 | SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); 146 | SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); 147 | SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf); 148 | 149 | typedef struct 150 | { 151 | size_t (*Write)(void *p, const void *buf, size_t size); 152 | /* Returns: result - the number of actually written bytes. 153 | (result < size) means error */ 154 | } ISeqOutStream; 155 | 156 | typedef enum 157 | { 158 | SZ_SEEK_SET = 0, 159 | SZ_SEEK_CUR = 1, 160 | SZ_SEEK_END = 2 161 | } ESzSeek; 162 | 163 | typedef struct 164 | { 165 | SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ 166 | SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); 167 | } ISeekInStream; 168 | 169 | typedef struct 170 | { 171 | SRes (*Look)(void *p, const void **buf, size_t *size); 172 | /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. 173 | (output(*size) > input(*size)) is not allowed 174 | (output(*size) < input(*size)) is allowed */ 175 | SRes (*Skip)(void *p, size_t offset); 176 | /* offset must be <= output(*size) of Look */ 177 | 178 | SRes (*Read)(void *p, void *buf, size_t *size); 179 | /* reads directly (without buffer). It's same as ISeqInStream::Read */ 180 | SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); 181 | } ILookInStream; 182 | 183 | SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); 184 | SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset); 185 | 186 | /* reads via ILookInStream::Read */ 187 | SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); 188 | SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); 189 | 190 | #define LookToRead_BUF_SIZE (1 << 14) 191 | 192 | typedef struct 193 | { 194 | ILookInStream s; 195 | ISeekInStream *realStream; 196 | size_t pos; 197 | size_t size; 198 | Byte buf[LookToRead_BUF_SIZE]; 199 | } CLookToRead; 200 | 201 | void LookToRead_CreateVTable(CLookToRead *p, int lookahead); 202 | void LookToRead_Init(CLookToRead *p); 203 | 204 | typedef struct 205 | { 206 | ISeqInStream s; 207 | ILookInStream *realStream; 208 | } CSecToLook; 209 | 210 | void SecToLook_CreateVTable(CSecToLook *p); 211 | 212 | typedef struct 213 | { 214 | ISeqInStream s; 215 | ILookInStream *realStream; 216 | } CSecToRead; 217 | 218 | void SecToRead_CreateVTable(CSecToRead *p); 219 | 220 | typedef struct 221 | { 222 | SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize); 223 | /* Returns: result. (result != SZ_OK) means break. 224 | Value (UInt64)(Int64)-1 for size means unknown value. */ 225 | } ICompressProgress; 226 | 227 | typedef struct 228 | { 229 | void *(*Alloc)(void *p, size_t size); 230 | void (*Free)(void *p, void *address); /* address can be 0 */ 231 | } ISzAlloc; 232 | 233 | #define IAlloc_Alloc(p, size) (p)->Alloc((p), size) 234 | #define IAlloc_Free(p, a) (p)->Free((p), a) 235 | 236 | #ifdef _WIN32 237 | 238 | #define CHAR_PATH_SEPARATOR '\\' 239 | #define WCHAR_PATH_SEPARATOR L'\\' 240 | #define STRING_PATH_SEPARATOR "\\" 241 | #define WSTRING_PATH_SEPARATOR L"\\" 242 | 243 | #else 244 | 245 | #define CHAR_PATH_SEPARATOR '/' 246 | #define WCHAR_PATH_SEPARATOR L'/' 247 | #define STRING_PATH_SEPARATOR "/" 248 | #define WSTRING_PATH_SEPARATOR L"/" 249 | 250 | #endif 251 | 252 | EXTERN_C_END 253 | 254 | #endif 255 | -------------------------------------------------------------------------------- /src/libcsc/csc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | struct StdioSeqStream 10 | { 11 | union { 12 | ISeqInStream is; 13 | ISeqOutStream os; 14 | }; 15 | FILE *f; 16 | }; 17 | 18 | 19 | int stdio_read(void *p, void *buf, size_t *size) 20 | { 21 | StdioSeqStream *sss = (StdioSeqStream *)p; 22 | *size = fread(buf, 1, *size, sss->f); 23 | return 0; 24 | } 25 | 26 | size_t stdio_write(void *p, const void *buf, size_t size) 27 | { 28 | StdioSeqStream *sss = (StdioSeqStream *)p; 29 | return fwrite(buf, 1, size, sss->f); 30 | } 31 | 32 | int show_progress(void *p, UInt64 insize, UInt64 outsize) 33 | { 34 | (void)p; 35 | fprintf(stderr, "\r%llu -> %llu\t\t\t\t", insize, outsize); 36 | fflush(stdout); 37 | return 0; 38 | } 39 | 40 | void ShowUsage(char *me) 41 | { 42 | fprintf(stderr, "Usage: %s c/d [options] input output\n", me); 43 | fprintf(stderr, " options:\n"); 44 | fprintf(stderr, " -m{1..5}, compression level from fast to best\n"); 45 | fprintf(stderr, " -d{###[k|m], dictionary size, ### Bytes [or KB/MB], 32KB <= d < 1GB\n"); 46 | fprintf(stderr, " -fdelta0, -fexe0, -ftxt0 to turn filters off for data tables, execodes, or English text\n"); 47 | exit(1); 48 | } 49 | 50 | int ParseOpt(CSCProps *p, char *argv) 51 | { 52 | if (strncmp(argv, "-fdelta0", 8) == 0) 53 | p->DLTFilter = 0; 54 | else if (strncmp(argv, "-fexe0", 6) == 0) 55 | p->EXEFilter = 0; 56 | else if (strncmp(argv, "-ftxt0", 6) == 0) 57 | p->TXTFilter = 0; 58 | return 0; 59 | } 60 | 61 | int ParseBasicOpt(char *argv, uint32_t *dict_size, int *level) 62 | { 63 | if (strncmp(argv, "-m", 2) == 0) { 64 | if (argv[2]) 65 | *level = argv[2] - '0'; 66 | else 67 | return -1; 68 | } else if (strncmp(argv, "-d", 2) == 0) { 69 | int slen = strlen(argv); 70 | *dict_size = atoi(argv + 2); 71 | if ((argv[slen - 1] | 0x20) == 'k') 72 | *dict_size *= 1024; 73 | else if ((argv[slen - 1] | 0x20) == 'm') 74 | *dict_size *= 1024 * 1024; 75 | if (*dict_size < 32 * 1024 || *dict_size >= 1024 * 1024 * 1024) 76 | return -1; 77 | } 78 | return 0; 79 | } 80 | 81 | #ifdef _WIN32 82 | uint64_t GetFileSize(FILE *f) 83 | { 84 | uint64_t size; 85 | _fseeki64(f, 0, SEEK_END); 86 | size = _ftelli64(f); 87 | _fseeki64(f, 0, SEEK_SET); 88 | return size; 89 | } 90 | #else 91 | #include 92 | #include 93 | uint64_t GetFileSize(FILE *f) 94 | { 95 | uint64_t size = lseek(fileno(f), 0, SEEK_END); 96 | lseek(fileno(f), 0, SEEK_SET); 97 | return size; 98 | } 99 | #endif 100 | 101 | int main(int argc, char *argv[]) 102 | { 103 | FILE *fin, *fout; 104 | 105 | if (argc < 4) 106 | ShowUsage(argv[0]); 107 | 108 | fin = fopen(argv[argc - 2], "rb"); 109 | fout = fopen(argv[argc - 1], "wb"); 110 | if (fin == NULL || fout == NULL) { 111 | fprintf(stderr, "File open failed\n"); 112 | return 1; 113 | } 114 | 115 | StdioSeqStream isss, osss; 116 | isss.f = fin; 117 | isss.is.Read = stdio_read; 118 | osss.f = fout; 119 | osss.os.Write = stdio_write; 120 | ICompressProgress prog; 121 | prog.Progress = show_progress; 122 | 123 | if (argv[1][0] == 'c') { 124 | CSCProps p; 125 | uint32_t dict_size = 64000000; 126 | uint64_t filesize = GetFileSize(fin); 127 | int level = 2; 128 | for(int i = 2; i < argc - 2; i++) { 129 | if (ParseBasicOpt(argv[i], &dict_size, &level) < 0) 130 | ShowUsage(argv[0]); 131 | } 132 | 133 | if (filesize < dict_size) 134 | dict_size = filesize; 135 | 136 | // init the default settings 137 | CSCEncProps_Init(&p, dict_size, level); 138 | // Then make extra settings 139 | for(int i = 2; i < argc - 2; i++) { 140 | if (ParseOpt(&p, argv[i]) < 0) 141 | ShowUsage(argv[0]); 142 | } 143 | 144 | fprintf(stderr, "Estimated memory usage: %llu MB\n", CSCEnc_EstMemUsage(&p) / 1048576ull); 145 | unsigned char buf[CSC_PROP_SIZE]; 146 | CSCEnc_WriteProperties(&p, buf, 0); 147 | (void)(fwrite(buf, 1, CSC_PROP_SIZE, fout) + 1); 148 | CSCEncHandle h = CSCEnc_Create(&p, (ISeqOutStream*)&osss, NULL); 149 | CSCEnc_Encode(h, (ISeqInStream*)&isss, &prog); 150 | CSCEnc_Encode_Flush(h); 151 | CSCEnc_Destroy(h); 152 | } else { 153 | CSCProps p; 154 | unsigned char buf[CSC_PROP_SIZE]; 155 | (void)(fread(buf, 1, CSC_PROP_SIZE, fin) + 1); 156 | CSCDec_ReadProperties(&p, buf); 157 | CSCDecHandle h = CSCDec_Create(&p, (ISeqInStream*)&isss, NULL); 158 | if (!h) { 159 | printf("Invalid csc compressed file\n"); 160 | return 1; 161 | } 162 | CSCDec_Decode(h, (ISeqOutStream*)&osss, &prog); 163 | CSCDec_Destroy(h); 164 | } 165 | fclose(fin); 166 | fclose(fout); 167 | 168 | printf("\n"); 169 | return 0; 170 | } 171 | 172 | 173 | -------------------------------------------------------------------------------- /src/libcsc/csc_analyzer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define AYZDEBUG 7 | 8 | 9 | void Analyzer::Init() 10 | { 11 | for(uint32_t i = 0; i< (MinBlockSize >> 4); i++) 12 | logTable[i] = (double)100 * log((double)i * 16 + 8)/ log((double)2); 13 | 14 | logTable[MinBlockSize >> 4] = (double)100*log((double)(MinBlockSize))/log((double)2); 15 | //fLog=fopen("r:\\dataLog.txt","w"); 16 | } 17 | 18 | uint32_t Analyzer::AnalyzeHeader(uint8_t *src, uint32_t size) //,uint32_t *typeArg1,uint32_t *typeArg2,uint32_t *typeArg3) 19 | { 20 | 21 | /* 22 | if (size<128) 23 | return DT_NONE; 24 | 25 | if (src[0]=='B' && src[1]=='M') 26 | { 27 | uint32_t width=*(uint32_t*)(src+0x12); 28 | uint32_t colorbits=*(uint32_t*)(src+0x1C); 29 | uint32_t compressed=*(uint32_t*)(src+0x1E); 30 | 31 | #ifdef AYZDEBUG 32 | printf(" ===== Detect BM: width:%u colorbits:%u compressed:%u\n",width,colorbits,compressed); 33 | #endif 34 | if (width<16384 && 35 | (colorbits==8|| 36 | colorbits==16|| 37 | colorbits==24|| 38 | colorbits==32) && 39 | compressed==0 && 40 | *(uint16_t*)(src+0x1A)==1 41 | ) 42 | { 43 | *typeArg1=width; 44 | *typeArg2=colorbits; 45 | return DT_RGB; 46 | } 47 | } 48 | 49 | if (src[0]==0 && src[1]==0 &&src[3]==0) 50 | { 51 | if ( (src[2]==2 || src[2]==10) 52 | && (src[16]==24 || src[16]==32) ) 53 | { 54 | uint32_t width=*(uint16_t*)(src+0x0C); 55 | 56 | #ifdef AYZDEBUG 57 | printf(" ===== Detect TGA: width:%u colorbits:%u\n",width,src[16]); 58 | #endif 59 | *typeArg1=width; 60 | *typeArg2=src[16]; 61 | return DT_RGB; 62 | } 63 | } 64 | 65 | 66 | if (src[0]=='P' && src[1]=='6' &&src[2]==0x0a) 67 | { 68 | uint8_t *pTmp=src+3; 69 | bool inComent=false; 70 | while(*pTmp==0 || *pTmp=='#' || inComent) 71 | { 72 | if (*pTmp==0x0a) inComent=false; 73 | else if (*pTmp=='#') inComent=true; 74 | pTmp++; 75 | if (pTmp-src>127) 76 | break; 77 | } 78 | 79 | uint32_t width; 80 | sscanf((char*)pTmp,"%d",&width); 81 | 82 | #ifdef AYZDEBUG 83 | printf(" ===== Detect PPM - P6: width:%u\n",width); 84 | #endif 85 | *typeArg1=width; 86 | *typeArg2=24; 87 | return DT_RGB; 88 | } 89 | 90 | // if (src[0]=='R' && src[1]=='I' && src[2]=='F' && src[3]=='F' && 91 | // src[8]=='W' && src[9]=='A' && src[10]=='V' && src[11]=='E') 92 | // { 93 | //#ifdef AYZDEBUG 94 | // printf(" ===== Detect Riff Wav \n"); 95 | //#endif 96 | // return DT_AUDIO; 97 | // } 98 | if (src[0]==80 && src[2]==10 && (src[1]==53||src[1]==54)) 99 | { 100 | P5623 101 | } 102 | 103 | //if (*(tufx)==80 && *(tufx+1)==54 && *(tufx+2)==10 ){PPMread();Decont=1;chn=1;deltarange=1;} 104 | //if (*(tufx)==80 && *(tufx+1)==53 && *(tufx+2)==10 ){PPMread();Decont=1;chn=1;deltarange=1;} 105 | //if (Peekl(tufx+8)==1163280727) {Decont=1;processed=1;chn=Peekw(tufx+22);deltarange=(Peekw(tufx+34)>>3);}// 106 | //if (Peekl(tufx)==1297239878 && Peekl(tufx+8)==1179011393 ) {Decont=1;chn=Peekb(tufx+21);deltarange=Peekb(tufx+27)>>3;} //AIFF AUDIO 107 | 108 | //if (*(tufx)==80 && *(tufx+1)==54 && *(tufx+2)==10 ){ CTX1=3;CTX2=6;processed=0;chn=1;deltarange=Deltac;ROLZ=0;RZCLEVEL=2;PPMread();} //PPM 109 | 110 | *.wav ----- if (Peekl(tufx)==1179011410) { processed=1;ROLZ=0;RZCLEVEL=2;chn=1;deltarange=1;} //RIFF WAVE 111 | if (Peekl(tufx)==1297239878 && Peekl(tufx+8)==1179011393 ) {processed=Deltac;ROLZ=0;RZCLEVEL=2;chn=Peekb(tufx+21);CTX1=chn*(Peekb(tufx+27)>>3);CTX2=CTX1*2;deltarange=Peekb(tufx+27)>>3;} //AIFF AUDIO 112 | if (Peekl(tufx+8)==1163280727) { processed=Deltac;ROLZ=0;RZCLEVEL=2;chn=Peekw(tufx+22);CTX1=chn;CTX2=CTX1*2;deltarange=(Peekw(tufx+34)>>3);}// wave // 113 | //if (*(tufx)==0 && *(tufx+1)==0 && *(tufx+2)==2 && *(tufx+3)==0 ) { CTX1=3;CTX2=6;processed=Deltac;chn=1;deltarange=1;ROLZ=0;RZCLEVEL=2;} // TARGA image // 114 | //if (*(tufx)==0 && *(tufx+1)==0 && *(tufx+2)==10 && *(tufx+3)==0) { CTX1=3;CTX2=6;processed=Deltac;chn=1;deltarange=1;ROLZ=0;RZCLEVEL=2;} // TARGA image RLE// 115 | //if (*(tufx)==80 && *(tufx+1)==53 && *(tufx+2)==10 ) { processed=Deltac;chn=1;deltarange=1;ROLZ=1;RZCLEVEL=2;PPMread();} // PGM image // 116 | */ 117 | return DT_NONE; 118 | } 119 | 120 | uint32_t lastType=0; 121 | 122 | int32_t Analyzer::get_channel_idx(uint8_t *src,uint32_t size) 123 | { 124 | uint32_t sameDist[DLT_CHANNEL_MAX]={0},succValue[DLT_CHANNEL_MAX]={0}; 125 | uint32_t maxSame,maxSucc,minSame,minSucc,bestChnNum; 126 | 127 | for(uint32_t i = 0; i + 16 < size; i++) { 128 | sameDist[0] += (src[i]==src[i+1]); 129 | sameDist[1] += (src[i]==src[i+2]); 130 | sameDist[2] += (src[i]==src[i+3]); 131 | sameDist[3] += (src[i]==src[i+4]); 132 | sameDist[4] += (src[i]==src[i+8]); 133 | succValue[0] += abs((signed)src[i]-(signed)src[i+1]); 134 | succValue[1] += abs((signed)src[i]-(signed)src[i+2]); 135 | succValue[2] += abs((signed)src[i]-(signed)src[i+3]); 136 | succValue[3] += abs((signed)src[i]-(signed)src[i+4]); 137 | succValue[4] += abs((signed)src[i]-(signed)src[i+8]); 138 | } 139 | 140 | maxSame=minSame = sameDist[0]; 141 | maxSucc=minSucc = succValue[0]; 142 | bestChnNum = 0; 143 | 144 | for (uint32_t i = 0;i < DLT_CHANNEL_MAX; i++) { 145 | if (sameDist[i] < minSame) minSame=sameDist[i]; 146 | if (sameDist[i] > maxSame) maxSame=sameDist[i]; 147 | if (succValue[i] > maxSucc) maxSucc=succValue[i]; 148 | if (succValue[i] < minSucc) { 149 | minSucc = succValue[i]; 150 | bestChnNum = i; 151 | } 152 | } 153 | 154 | 155 | if ( ((maxSucc > succValue[bestChnNum] * 4) || (maxSucc > succValue[bestChnNum] + 40 * size)) 156 | && (sameDist[bestChnNum] > minSame * 3) 157 | //&& (entropy>700*size || diffNum>245) 158 | && (sameDist[0] < 0.3 * size)) 159 | { 160 | //printf("delta:%d %d %d %d %d %dr:%d \n",succValue[0],succValue[1],succValue[2],succValue[3],succValue[4],sameDist[5],bestChnNum); 161 | return bestChnNum; 162 | } 163 | return -1; 164 | } 165 | 166 | uint32_t Analyzer::GetDltBpb(uint8_t *src, uint32_t size, uint32_t chn) 167 | { 168 | uint32_t freq[256] = {0}; 169 | uint8_t prev = 0; 170 | uint32_t bpb = 0; 171 | for(uint32_t i = 0; i < chn; i++) 172 | for(uint32_t j = i; j < size; j += chn) { 173 | freq[uint8_t(src[j] - prev)]++; 174 | prev = src[j]; 175 | } 176 | 177 | bpb = size * logTable[size>>4]; 178 | for(uint32_t i = 0; i < 256; i++) 179 | bpb -= freq[i] * logTable[freq[i] >> 4]; 180 | bpb /= size; 181 | return bpb; 182 | } 183 | 184 | uint32_t Analyzer::Analyze(uint8_t *src, uint32_t size, uint32_t *bpb) 185 | { 186 | uint32_t avgFreq,freq[256]={0}; 187 | uint32_t freq0x80[2]={0}; 188 | uint32_t entropy,alpha_num,diffNum; 189 | 190 | if (size>MinBlockSize) 191 | size=MinBlockSize; 192 | 193 | if (size<512) 194 | return DT_SKIP; 195 | 196 | for(uint32_t i = 0; i < size; i++) 197 | freq[src[i]]++; 198 | 199 | diffNum = 0; 200 | entropy = size * logTable[size>>4]; 201 | 202 | for(uint32_t i = 0; i < 256; i++) { 203 | entropy -= freq[i] * logTable[freq[i] >> 4]; 204 | diffNum += (freq[i] > 0); 205 | freq0x80[i >> 7] += freq[i]; 206 | } 207 | *bpb = entropy / size; 208 | avgFreq = size >> 8; 209 | 210 | alpha_num = 0; 211 | for(uint32_t i='a';i<='z';i++) 212 | alpha_num += freq[i]; 213 | 214 | if (freq0x80[1] < (size >> 3) && (freq[' '] + freq['\n'] + freq[':'] + freq['.'] + freq['/'] > (size >> 4)) 215 | && (freq['a'] + freq['e'] + freq['t'] > (size >> 4)) 216 | && entropy > 300 * size 217 | && alpha_num > (size / 3)) 218 | return DT_ENGTXT; 219 | 220 | if (freq[0x8b] > avgFreq && freq[0x00] > avgFreq * 2 && freq[0xE8] > 6) 221 | return DT_EXE; 222 | 223 | if (entropy > (log((double)diffNum - 2) / log((double)2) - 0.6) * 100.0 * size && diffNum < 16 && diffNum >= 6) 224 | return DT_ENTROPY; 225 | 226 | if (entropy < 400 * size && diffNum < 200) 227 | return DT_NORMAL; 228 | 229 | int32_t dltIdx = get_channel_idx(src,size); 230 | if (dltIdx != -1) 231 | return DT_DLT + dltIdx; 232 | 233 | if (entropy > 795 * size) 234 | return DT_BAD; 235 | else if (entropy > 780 * size) 236 | return DT_FAST; 237 | 238 | return DT_NORMAL;//DT_NORMAL; 239 | } 240 | 241 | 242 | -------------------------------------------------------------------------------- /src/libcsc/csc_analyzer.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_ANALYZER_H_ 2 | #define _CSC_ANALYZER_H_ 3 | 4 | #include 5 | 6 | 7 | class Analyzer 8 | { 9 | public: 10 | void Init(); 11 | //~Analyzer(); 12 | uint32_t Analyze(uint8_t* src, uint32_t size, uint32_t *bpb); 13 | uint32_t AnalyzeHeader(uint8_t *src, uint32_t size);//,uint32_t *typeArg1,uint32_t *typeArg2,uint32_t *typeArg3); 14 | uint32_t GetDltBpb(uint8_t *src, uint32_t size, uint32_t chn); 15 | 16 | private: 17 | uint32_t logTable[(MinBlockSize >> 4) + 1]; 18 | int32_t get_channel_idx(uint8_t *src, uint32_t size); 19 | }; 20 | 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /src/libcsc/csc_coder.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | int Coder::Init(MemIO *io, ISzAlloc *alloc) 7 | { 8 | alloc_ = alloc; 9 | rc_low_ = 0; 10 | rc_range_ = 0xFFFFFFFF; 11 | rc_cachesize_ = 1; 12 | rc_cache_ = 0; 13 | rc_code_=0; 14 | 15 | rc_size_ = bc_size_ = 0; 16 | bc_curbits_ = bc_curval_ =0; 17 | 18 | outsize_ = 0; 19 | 20 | io_ = io; 21 | rc_bufsize_ = bc_bufsize_ = io->GetBlockSize(); 22 | prc_ = rc_buf_ = (uint8_t *)alloc_->Alloc(alloc_, rc_bufsize_); 23 | pbc_ = bc_buf_ = (uint8_t *)alloc_->Alloc(alloc_, bc_bufsize_); 24 | 25 | if (rc_buf_ && bc_buf_) { 26 | return 0; 27 | } else { 28 | alloc_->Free(alloc_, rc_buf_); 29 | alloc_->Free(alloc_, bc_buf_); 30 | return -1; 31 | } 32 | } 33 | 34 | void Coder::Destroy() 35 | { 36 | alloc_->Free(alloc_, rc_buf_); 37 | alloc_->Free(alloc_, bc_buf_); 38 | } 39 | 40 | void Coder::Flush() 41 | { 42 | for (int i=0;i<5;i++) // One more byte for EOF 43 | { 44 | RC_ShiftLow(); 45 | } 46 | prc_++; 47 | rc_size_++; 48 | 49 | //one more byte for bitcoder is to prevent overflow while reading 50 | for(int i = 0; i < 2; i++) { 51 | if (i == 1) 52 | *pbc_++=0; 53 | else 54 | *pbc_++ = (bc_curval_ << (8 - bc_curbits_)) & 0xFF; 55 | bc_size_++; 56 | BCWCheckBound(); 57 | } 58 | 59 | outsize_ += rc_size_ + bc_size_; 60 | if (io_->WriteRCData(rc_buf_, rc_size_) != (int)rc_size_ 61 | || io_->WriteBCData(bc_buf_, bc_size_) != (int)bc_size_) { 62 | throw (int)WRITE_ERROR; 63 | } 64 | 65 | rc_low_ = 0; 66 | rc_range_ = 0xFFFFFFFF; 67 | rc_cachesize_ = 1; 68 | rc_cache_ = 0; 69 | rc_code_=0; 70 | rc_size_ = bc_size_ = 0; 71 | bc_curbits_ = bc_curval_ = 0; 72 | prc_ = rc_buf_; 73 | pbc_ = bc_buf_; 74 | } 75 | 76 | void Coder::EncDirect16(uint32_t val,uint32_t len) 77 | { 78 | bc_curval_ = (bc_curval_ << len) | val; 79 | bc_curbits_ += len; 80 | while(bc_curbits_ >= 8) 81 | { 82 | *pbc_++ = (bc_curval_ >> (bc_curbits_ - 8)) & 0xFF; 83 | bc_size_++; 84 | BCWCheckBound(); 85 | bc_curbits_ -= 8; 86 | } 87 | } 88 | 89 | void Coder::RC_ShiftLow(void) 90 | { 91 | uint8_t temp; 92 | if ((uint32_t)rc_low_ < (uint32_t)0xFF000000 || (int32_t)(rc_low_ >> 32) != 0) { 93 | temp = rc_cache_; 94 | do { 95 | *prc_++ = (uint8_t)(temp + (uint8_t)(rc_low_ >> 32)); 96 | rc_size_++; 97 | if (rc_size_ == rc_bufsize_) { 98 | outsize_+=rc_size_; 99 | if (io_->WriteRCData(rc_buf_,rc_bufsize_) != (int)rc_bufsize_) { 100 | throw (int)WRITE_ERROR; 101 | } 102 | rc_size_=0; 103 | prc_=rc_buf_; 104 | } 105 | temp = 0xFF; 106 | } 107 | while (--rc_cachesize_ != 0); 108 | rc_cache_ = (uint8_t)((uint32_t)rc_low_ >> 24); 109 | } 110 | rc_cachesize_++; 111 | rc_low_ = (uint32_t)rc_low_ << 8; 112 | } 113 | 114 | -------------------------------------------------------------------------------- /src/libcsc/csc_coder.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_CODER_H_ 2 | #define _CSC_CODER_H_ 3 | #include 4 | #include 5 | #include 6 | 7 | #define BCWCheckBound() do{if (bc_size_ == bc_bufsize_) \ 8 | {\ 9 | outsize_ += bc_size_;\ 10 | if (io_->WriteBCData(bc_buf_, bc_bufsize_) != (int)bc_bufsize_) {\ 11 | throw (int)WRITE_ERROR; \ 12 | } \ 13 | bc_size_ = 0;\ 14 | pbc_ = bc_buf_;\ 15 | }}while(0) 16 | 17 | class Coder 18 | { 19 | MemIO *io_; 20 | ISzAlloc *alloc_; 21 | 22 | public: 23 | 24 | int Init(MemIO *io, ISzAlloc *alloc); 25 | void Destroy(); 26 | 27 | //flush the coder 28 | void Flush(); 29 | 30 | //Get coded length 31 | uint32_t GetCodedLength(void) { 32 | return bc_size_ + rc_size_; 33 | } 34 | 35 | void EncDirect16(uint32_t val,uint32_t len); 36 | 37 | void RC_ShiftLow(void); 38 | 39 | //the main buffer for encoding/decoding range/bit coder 40 | uint8_t *rc_buf_; 41 | uint8_t *bc_buf_; 42 | 43 | //indicates the full size of buffer range/bit coder 44 | uint32_t rc_bufsize_; 45 | uint32_t bc_bufsize_; 46 | 47 | uint64_t rc_low_,rc_cachesize_; 48 | uint32_t rc_range_,rc_code_; 49 | uint8_t rc_cache_; 50 | 51 | // for bit coder 52 | uint32_t bc_curbits_; 53 | uint32_t bc_curval_; 54 | 55 | //the i/o pointer of range coder and bit coder 56 | uint8_t *prc_; 57 | uint8_t *pbc_; 58 | 59 | //byte counts of output bytes by range coder and bit coder 60 | uint32_t bc_size_; 61 | uint32_t rc_size_; 62 | int64_t outsize_; 63 | uint32_t bnum_; 64 | }; 65 | 66 | 67 | #define EncodeBit(coder,v,p) do {\ 68 | uint32_t newBound = (coder->rc_range_ >> 12) * p;\ 69 | if (v) {\ 70 | coder->rc_range_ = newBound;\ 71 | p += (0xFFF - p) >> 5;\ 72 | } else {\ 73 | coder->rc_low_ += newBound;\ 74 | coder->rc_range_ -= newBound;\ 75 | p -= p >> 5;\ 76 | }\ 77 | \ 78 | if (coder->rc_range_ < (1 << 24)) {\ 79 | coder->rc_range_ <<= 8;\ 80 | coder->RC_ShiftLow();\ 81 | }}while(0) 82 | 83 | #define EncodeDirect(x,v,l) do{if ((l) <= 16)\ 84 | x->EncDirect16(v,l);\ 85 | else {\ 86 | x->EncDirect16((v) >> 16,(l) - 16);\ 87 | x->EncDirect16((v) & 0xFFFF,16);\ 88 | }}while(0) 89 | 90 | #endif 91 | 92 | -------------------------------------------------------------------------------- /src/libcsc/csc_common.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_COMMON_H_ 2 | #define _CSC_COMMON_H_ 3 | 4 | #include 5 | 6 | #ifdef _7Z_TYPES_ 7 | #include 8 | #else 9 | #endif 10 | 11 | #define CSC_PROP_SIZE (4 + 3 + 3) 12 | 13 | #define DECODE_ERROR (-96) 14 | #define WRITE_ERROR (-97) 15 | #define READ_ERROR (-98) 16 | 17 | const size_t CSC_WRITE_ABORT = (size_t)-1; 18 | 19 | typedef struct _CSCProps { 20 | // LZ77 dictionary size, 32 KB to 1GB, real size is 8KB less 21 | size_t dict_size; 22 | 23 | // compressed stream block size, leaving it as default is good 24 | uint32_t csc_blocksize; // must be < 16M 25 | 26 | // uncompressed block size, it determines how much data 27 | // can be tried to process in one comp/decom call back. 28 | // default 1 * MB 29 | uint32_t raw_blocksize; // must be < 16M 30 | 31 | /******* Below only works while compression *****/ 32 | // hash table match finder hash bits, advice value 17 - 24 33 | // depending on dict_size 34 | uint8_t hash_bits; 35 | 36 | // hash table match finder candidates num. Work well with lz_mode = 0 or 1. 37 | uint8_t hash_width; 38 | 39 | // binary tree entrance hash bits, advice value 17 - 24 40 | // depending on dict_size, 0 to disable bt match finder 41 | uint8_t bt_hash_bits; 42 | 43 | // Binary tree match finder working range, 44 | // must be less than dict_size, 0 to disable bt match finder 45 | uint32_t bt_size; 46 | 47 | // max steps binary tree match finder will try 48 | uint32_t bt_cyc; 49 | 50 | // default 32 should be good enough, does not benefit much with higher value 51 | uint8_t good_len; 52 | 53 | // lz engine mode: 54 | // 0 fastest, 1 efficient, 2 advanced ( 3+ times than 2) 55 | // for 0 and 1, better to turn bt match finder off 56 | // and 2 with bt match finder on 57 | uint8_t lz_mode; 58 | 59 | // 3 kinds of filters supported so far 60 | uint8_t DLTFilter; 61 | uint8_t TXTFilter; 62 | uint8_t EXEFilter; 63 | } CSCProps; 64 | 65 | #endif 66 | 67 | -------------------------------------------------------------------------------- /src/libcsc/csc_dec.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_DEC_H_ 2 | #define _CSC_DEC_H_ 3 | #include 4 | 5 | EXTERN_C_BEGIN 6 | 7 | // DecProps usually needs to be read from existing data 8 | void CSCDec_ReadProperties(CSCProps *props, uint8_t *stream); 9 | 10 | typedef void * CSCDecHandle; 11 | 12 | // alloc can be NULL, so default malloc/free will be used 13 | CSCDecHandle CSCDec_Create(const CSCProps *props, 14 | ISeqInStream *instream, 15 | ISzAlloc *alloc); 16 | 17 | void CSCDec_Destroy(CSCDecHandle p); 18 | 19 | int CSCDec_Decode(CSCDecHandle p, 20 | ISeqOutStream *outstream, 21 | ICompressProgress *progress); 22 | 23 | 24 | /* 25 | int CSCDec_SimpleDecode(uint8_t *dest, 26 | size_t *destLen, 27 | const uint8_t *src, 28 | size_t srcLen, 29 | ICompressProgress *progress, 30 | ISzAlloc *alloc); 31 | */ 32 | 33 | EXTERN_C_END 34 | 35 | #endif 36 | 37 | -------------------------------------------------------------------------------- /src/libcsc/csc_default_alloc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | static void *Alloc(void *p, size_t size) 6 | { 7 | return malloc(size); 8 | } 9 | 10 | static void Free(void *p, void *address) 11 | { 12 | free(address); 13 | } 14 | 15 | 16 | ISzAlloc st_default_alloc = {Alloc, Free}; 17 | ISzAlloc *default_alloc = &st_default_alloc; 18 | 19 | -------------------------------------------------------------------------------- /src/libcsc/csc_default_alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_DEFAULT_ALLOC_H_ 2 | #define _CSC_DEFAULT_ALLOC_H_ 3 | #include 4 | 5 | extern ISzAlloc *default_alloc; 6 | 7 | #endif 8 | 9 | -------------------------------------------------------------------------------- /src/libcsc/csc_enc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct CSCEncInstance 9 | { 10 | CSCEncoder *encoder; 11 | MemIO *io; 12 | ISzAlloc *alloc; 13 | uint32_t raw_blocksize; 14 | }; 15 | 16 | void CSCEncProps_Init(CSCProps *p, uint32_t dict_size, int level) 17 | { 18 | dict_size += 10 * KB; // a little more, real size is 8KB smaller than set number 19 | if (dict_size < 32 * KB) dict_size = 32 * KB; 20 | if (dict_size > 1024 * MB) dict_size = 1024 * MB; 21 | p->dict_size = dict_size; 22 | if (level < 1) level = 1; 23 | if (level > 5) level = 5; 24 | p->DLTFilter = 1; 25 | p->TXTFilter = 1; 26 | p->EXEFilter = 1; 27 | p->csc_blocksize = 64 * KB; 28 | p->raw_blocksize = 2 * MB; 29 | 30 | uint32_t hbits = 20; 31 | if (dict_size < MB) 32 | hbits = 19; 33 | else if (dict_size <= 4 * MB) 34 | hbits = 20; 35 | else if (dict_size <= 16 * MB) 36 | hbits = 21; 37 | else if (dict_size <= 64 * MB) 38 | hbits = 22; 39 | else if (dict_size <= 256 * MB) 40 | hbits = 23; 41 | else 42 | hbits = 24; 43 | while(((uint32_t)1 << hbits) > dict_size) hbits--; 44 | 45 | if (dict_size <= 16 * MB) 46 | p->bt_size = dict_size; 47 | else if (dict_size <= 64 * MB) 48 | p->bt_size = (dict_size - 16 * MB) / 2 + 16 * MB; 49 | else if (dict_size <= 256 * MB) 50 | p->bt_size = (dict_size - 64 * MB) / 4 + 40 * MB; 51 | else 52 | p->bt_size = (dict_size - 256 * MB) / 8 + 88 * MB; 53 | 54 | p->good_len = 32; 55 | p->hash_bits = hbits; 56 | p->bt_hash_bits = hbits + 1; 57 | switch (level) { 58 | case 1: 59 | p->hash_width = 1; 60 | p->lz_mode = 2; 61 | p->bt_size = 0; 62 | p->hash_bits ++; 63 | break; 64 | case 2: 65 | p->hash_width = 8; 66 | p->lz_mode = 2; 67 | p->bt_size = 0; 68 | p->good_len = 24; 69 | p->hash_bits --; 70 | break; 71 | case 3: 72 | p->hash_width = 2; 73 | p->lz_mode = 3; 74 | p->bt_size = 0; 75 | p->good_len = 16; 76 | p->hash_bits ++; 77 | break; 78 | case 4: 79 | p->hash_width = 8; 80 | p->lz_mode = 3; 81 | p->bt_size = 0; 82 | p->good_len = 24; 83 | p->hash_bits --; 84 | break; 85 | case 5: 86 | p->lz_mode = 3; 87 | p->good_len = 48; 88 | p->bt_cyc = 32; 89 | //p->bt_size = p->dict_size; 90 | p->hash_width = 0; 91 | break; 92 | } 93 | 94 | if (p->bt_size == p->dict_size) { 95 | p->hash_width = 0; 96 | } 97 | } 98 | 99 | uint64_t CSCEnc_EstMemUsage(const CSCProps *p) 100 | { 101 | uint64_t ret = 0; 102 | ret += p->dict_size; 103 | ret += p->csc_blocksize * 2; 104 | if (p->bt_size) 105 | ret += ((1 << p->bt_hash_bits) + 2 * p->bt_size) * sizeof(uint32_t); 106 | if (p->hash_width) 107 | ret += (p->hash_width * (1 << p->hash_bits)) * sizeof(uint32_t); 108 | ret += 80 * KB *sizeof(uint32_t); 109 | ret += 256 * 256 * sizeof(uint32_t) * 2; 110 | ret += 2 * MB; 111 | return ret; 112 | } 113 | 114 | CSCEncHandle CSCEnc_Create(const CSCProps *props, 115 | ISeqOutStream *outstream, 116 | ISzAlloc *alloc) 117 | { 118 | if (alloc == NULL) { 119 | alloc = default_alloc; 120 | } 121 | CSCEncInstance *csc = (CSCEncInstance *)alloc->Alloc(alloc, sizeof(CSCEncInstance)); 122 | 123 | csc->io = (MemIO *)alloc->Alloc(alloc, sizeof(MemIO)); 124 | csc->io->Init(outstream, props->csc_blocksize, alloc); 125 | csc->raw_blocksize = props->raw_blocksize; 126 | csc->encoder = (CSCEncoder *)alloc->Alloc(alloc, sizeof(CSCEncoder)); 127 | csc->alloc = alloc; 128 | if (csc->encoder->Init(props, csc->io, alloc) < 0) { 129 | CSCEnc_Destroy((void *)csc); 130 | return NULL; 131 | } else 132 | return (void*)csc; 133 | } 134 | 135 | void CSCEnc_Destroy(CSCEncHandle p) 136 | { 137 | CSCEncInstance *csc = (CSCEncInstance *)p; 138 | csc->encoder->Destroy(); 139 | ISzAlloc *alloc = csc->alloc; 140 | alloc->Free(alloc, csc->encoder); 141 | alloc->Free(alloc, csc->io); 142 | alloc->Free(alloc, csc); 143 | } 144 | 145 | void CSCEnc_WriteProperties(const CSCProps *props, uint8_t *s, int full) 146 | { 147 | (void)full; 148 | s[0] = ((props->dict_size >> 24) & 0xff); 149 | s[1] = ((props->dict_size >> 16) & 0xff); 150 | s[2] = ((props->dict_size >> 8) & 0xff); 151 | s[3] = ((props->dict_size) & 0xff); 152 | s[4] = ((props->csc_blocksize >> 16) & 0xff); 153 | s[5] = ((props->csc_blocksize >> 8) & 0xff); 154 | s[6] = ((props->csc_blocksize) & 0xff); 155 | s[7] = ((props->raw_blocksize >> 16) & 0xff); 156 | s[8] = ((props->raw_blocksize >> 8) & 0xff); 157 | s[9] = ((props->raw_blocksize) & 0xff); 158 | } 159 | 160 | int CSCEnc_Encode(CSCEncHandle p, 161 | ISeqInStream *is, 162 | ICompressProgress *progress) 163 | { 164 | int ret = 0; 165 | CSCEncInstance *csc = (CSCEncInstance *)p; 166 | uint8_t *buf = (uint8_t *)csc->alloc->Alloc(csc->alloc, csc->raw_blocksize); 167 | uint64_t insize = 0; 168 | 169 | for(;;) { 170 | size_t size = csc->raw_blocksize; 171 | ret = is->Read(is, buf, &size); 172 | if (ret >= 0 && size) { 173 | insize += size; 174 | ret = 0; 175 | try { 176 | csc->encoder->Compress(buf, size); 177 | } catch (int errcode) { 178 | ret = errcode; 179 | } 180 | if (progress) 181 | progress->Progress(progress, insize, csc->encoder->GetCompressedSize()); 182 | } else if (ret < 0) { 183 | ret = READ_ERROR; 184 | } 185 | 186 | if (ret < 0 || size == 0) 187 | break; 188 | } 189 | csc->alloc->Free(csc->alloc, buf); 190 | return ret; 191 | } 192 | 193 | int CSCEnc_Encode_Flush(CSCEncHandle p) 194 | { 195 | CSCEncInstance *csc = (CSCEncInstance *)p; 196 | try { 197 | csc->encoder->WriteEOF(); 198 | csc->encoder->Flush(); 199 | } catch (int errcode) { 200 | return errcode; 201 | } 202 | return 0; 203 | } 204 | 205 | -------------------------------------------------------------------------------- /src/libcsc/csc_enc.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_ENC_H_ 2 | #define _CSC_ENC_H_ 3 | #include 4 | 5 | EXTERN_C_BEGIN 6 | 7 | 8 | // set default parameters with dict_size and level 9 | // further parameter changes must be after this call 10 | // level should be 1 to 4 11 | void CSCEncProps_Init(CSCProps *p, uint32_t dict_size = 64000000, int level = 2); 12 | 13 | void CSCEnc_WriteProperties(const CSCProps *props, uint8_t *stream, int full); 14 | 15 | uint64_t CSCEnc_EstMemUsage(const CSCProps *props); 16 | 17 | typedef void * CSCEncHandle; 18 | 19 | // alloc can be NULL, so default malloc/free will be used 20 | CSCEncHandle CSCEnc_Create(const CSCProps *props, 21 | ISeqOutStream *outstream, 22 | ISzAlloc *alloc); 23 | 24 | void CSCEnc_Destroy(CSCEncHandle p); 25 | 26 | int CSCEnc_Encode(CSCEncHandle p, 27 | ISeqInStream *instream, 28 | ICompressProgress *progress); 29 | 30 | int CSCEnc_Encode_Flush(CSCEncHandle p); 31 | 32 | /* 33 | int CSCEnc_SimpleEncode(uint8_t *dest, 34 | size_t *destLen, 35 | const uint8_t *src, 36 | size_t srcLen, 37 | ICompressProgress *progress, 38 | ISzAlloc *alloc); 39 | */ 40 | 41 | EXTERN_C_END 42 | 43 | #endif 44 | 45 | -------------------------------------------------------------------------------- /src/libcsc/csc_encoder_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int CSCEncoder::Init(const CSCProps *p, MemIO *io, ISzAlloc *alloc) 6 | { 7 | alloc_ = alloc; 8 | fixed_datatype_=DT_NONE; 9 | 10 | rawblock_limit_ = p->raw_blocksize; 11 | 12 | analyzer_.Init(); 13 | filters_.Init(alloc_); 14 | coder_.Init(io, alloc_); 15 | 16 | if (model_.Init(&coder_, alloc_) < 0) 17 | return -1; 18 | 19 | p_ = *p; 20 | if (lz_.Init(p, &model_, alloc_) < 0) { 21 | filters_.Destroy(); 22 | coder_.Destroy(); 23 | model_.Destroy(); 24 | return -1; 25 | } 26 | 27 | if (p->DLTFilter + p->EXEFilter + p->TXTFilter == 0) { 28 | use_filters_ = false; 29 | } else { 30 | use_filters_ = true; 31 | } 32 | return 0; 33 | } 34 | 35 | void CSCEncoder::compress_block(uint8_t *src,uint32_t size, uint32_t type) 36 | { 37 | if (size == 0) 38 | return; 39 | 40 | uint8_t cur_lz_mode = p_.lz_mode; 41 | 42 | if (type == DT_NORMAL) { 43 | model_.EncodeInt(type); 44 | lz_.EncodeNormal(src, size, cur_lz_mode); 45 | } else if (type == DT_EXE) { 46 | model_.EncodeInt(type); 47 | filters_.Forward_E89(src, size); 48 | lz_.EncodeNormal(src, size, cur_lz_mode); 49 | } else if (type == DT_ENGTXT) { 50 | if (filters_.Foward_Dict(src, size)) { 51 | model_.EncodeInt(type); 52 | model_.EncodeInt(size); 53 | } else 54 | model_.EncodeInt(DT_NORMAL); 55 | lz_.EncodeNormal(src, size, cur_lz_mode); 56 | } else if (type == DT_FAST) { 57 | // Now actually disabled fast mode 58 | model_.EncodeInt(DT_NORMAL); 59 | lz_.EncodeNormal(src, size, cur_lz_mode); 60 | } else if (type == DT_BAD) { 61 | model_.EncodeInt(type); 62 | lz_.EncodeNormal(src, size, 5); 63 | model_.CompressBad(src, size); 64 | } else if (type == DT_ENTROPY) { 65 | model_.EncodeInt(type); 66 | lz_.EncodeNormal(src, size, 5); 67 | model_.CompressLiterals(src, size); 68 | } else if (type >= DT_DLT && type < DT_DLT + DLT_CHANNEL_MAX) { 69 | uint32_t chnNum = DltIndex[type - DT_DLT]; 70 | model_.EncodeInt(type); 71 | lz_.EncodeNormal(src, size, 5); 72 | if (0) { 73 | FILE *f = fopen("dlt.dat", "ab"); 74 | fwrite(src, 1, size, f); 75 | fclose(f); 76 | } 77 | filters_.Forward_Delta(src, size, chnNum); 78 | model_.CompressRLE(src,size); 79 | //model_.CompressBad(src, size); 80 | } else { 81 | printf("Bad data type:%d\n", type); 82 | } 83 | } 84 | 85 | void CSCEncoder::Compress(uint8_t *src,uint32_t size) 86 | { 87 | uint32_t last_type, this_type; 88 | uint32_t last_begin, last_size; 89 | uint32_t cur_block_size; 90 | uint32_t bpb; 91 | 92 | last_begin = last_size=0; 93 | last_type = DT_NORMAL; 94 | 95 | for(uint32_t i = 0; i < size; ) { 96 | cur_block_size = MIN(MinBlockSize, size - i); 97 | 98 | if (use_filters_) { 99 | if (fixed_datatype_ == DT_NONE) 100 | this_type = analyzer_.Analyze(src + i, cur_block_size, &bpb); 101 | else 102 | this_type=fixed_datatype_; 103 | } else 104 | this_type=DT_NORMAL; 105 | 106 | if (this_type == DT_SKIP) 107 | this_type = last_type; 108 | 109 | if (this_type != DT_NORMAL) { 110 | if (this_type == DT_EXE && p_.EXEFilter==0) 111 | this_type = DT_NORMAL; 112 | else if (this_type == DT_ENGTXT && p_.TXTFilter==0) 113 | this_type = DT_NORMAL; 114 | else if (this_type >= DT_DLT && p_.DLTFilter==0) 115 | this_type = DT_NORMAL; 116 | } 117 | 118 | if (this_type >= DT_DLT 119 | && analyzer_.GetDltBpb(src + i, cur_block_size, DltIndex[this_type - DT_DLT]) >= bpb * 0.95) { 120 | this_type = DT_NORMAL; 121 | } 122 | 123 | if (this_type >= DT_NO_LZ) { 124 | if (lz_.IsDuplicateBlock(src + i, cur_block_size)) 125 | this_type = DT_NORMAL; 126 | } 127 | 128 | if (last_type != this_type || last_size + cur_block_size > rawblock_limit_) { 129 | if (last_size) { 130 | compress_block(src + last_begin, last_size, last_type); 131 | model_.EncodeInt(0); 132 | } 133 | last_begin = i; 134 | last_size = 0; 135 | } 136 | 137 | last_type = this_type; 138 | last_size += cur_block_size; 139 | i += cur_block_size; 140 | } 141 | if (last_size) { 142 | compress_block(src + last_begin, last_size, last_type); 143 | model_.EncodeInt(1); 144 | coder_.Flush(); 145 | } 146 | } 147 | 148 | 149 | void CSCEncoder::Flush() 150 | { 151 | coder_.Flush(); 152 | } 153 | 154 | void CSCEncoder::WriteEOF() 155 | { 156 | model_.EncodeInt(SIG_EOF); 157 | } 158 | 159 | 160 | void CSCEncoder::CheckFileType(uint8_t *src, uint32_t size) 161 | { 162 | fixed_datatype_=DT_NONE; 163 | return; 164 | } 165 | 166 | void CSCEncoder::Destroy() 167 | { 168 | coder_.Destroy(); 169 | lz_.Destroy(); 170 | model_.Destroy(); 171 | filters_.Destroy(); 172 | } 173 | 174 | int64_t CSCEncoder::GetCompressedSize() 175 | { 176 | return (coder_.outsize_ + coder_.rc_size_ + coder_.bc_size_); 177 | } 178 | 179 | -------------------------------------------------------------------------------- /src/libcsc/csc_encoder_main.h: -------------------------------------------------------------------------------- 1 | #ifndef _CCSC_H 2 | #define _CCSC_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | class MemIO; 11 | 12 | class CSCEncoder 13 | { 14 | public: 15 | int Init(const CSCProps *p, MemIO *io, ISzAlloc *alloc); 16 | 17 | 18 | void WriteEOF(); 19 | //Should be called when finished compression of one part. 20 | 21 | void Flush(); 22 | //Should be called when finished the whole compression. 23 | 24 | void Destroy(); 25 | 26 | void Compress(uint8_t *src, uint32_t size); 27 | 28 | int Decompress(uint8_t *src, uint32_t *size); 29 | //*size==0 means meets the EOF in raw stream. 30 | 31 | void CheckFileType(uint8_t *src, uint32_t size); 32 | //Should be called before compress a file.src points 33 | //to first several bytes of file. 34 | 35 | int64_t GetCompressedSize(); 36 | //Get current compressed size. 37 | 38 | 39 | private: 40 | ISzAlloc *alloc_; 41 | uint32_t fixed_datatype_; 42 | CSCProps p_; 43 | 44 | Filters filters_; 45 | Coder coder_; 46 | Model model_; 47 | LZ lz_; 48 | Analyzer analyzer_; 49 | 50 | uint32_t rawblock_limit_; 51 | //This determines how much maximumly the CSCEncoder:Decompress can decompress 52 | // in one time. 53 | 54 | bool use_filters_; 55 | void compress_block(uint8_t *src,uint32_t size,uint32_t type); 56 | //compress the buffer and treat them in one type.It's called after analyze the data. 57 | 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /src/libcsc/csc_filters.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | const uint32_t wordNum=123; 7 | 8 | uint8_t wordList[wordNum][8]= 9 | { 10 | "", 11 | "ac","ad","ai","al","am", 12 | "an","ar","as","at","ea", 13 | "ec","ed","ee","el","en", 14 | "er","es","et","id","ie", 15 | "ig","il","in","io","is", 16 | "it","of","ol","on","oo", 17 | "or","os","ou","ow","ul", 18 | "un","ur","us","ba","be", 19 | "ca","ce","co","ch","de", 20 | "di","ge","gh","ha","he", 21 | "hi","ho","ra","re","ri", 22 | "ro","rs","la","le","li", 23 | "lo","ld","ll","ly","se", 24 | "si","so","sh","ss","st", 25 | "ma","me","mi","ne","nc", 26 | "nd","ng","nt","pa","pe", 27 | "ta","te","ti","to","th", 28 | "tr","wa","ve", 29 | "all","and","but","dow", 30 | "for","had","hav","her", 31 | "him","his","man","mor", 32 | "not","now","one","out", 33 | "she","the","was","wer", 34 | "whi","whe","wit","you", 35 | "any","are", 36 | "that","said","with","have", 37 | "this","from","were","tion", 38 | //================== 39 | /* 40 | "the", "th", "he", "in", "er", "an", 41 | "on", "re", "tion", "and", "ion", "or", 42 | "ti", "at", "te", "en", "es", "ing", 43 | "al", "is", "ar", "nd", "st", "quot", 44 | "tio", "ed", "nt", "ent", "it", "of", 45 | "le", "ri", "to", "ng", "atio", "io", 46 | "ic", "me", "as", "ter", "ati", "se", 47 | "co", "ra", "de", "ve", "ro", "quo", 48 | "uot", "ate", "ot", "li", "la", "om", 49 | "ha", "ne", "ce", "ea", "for", "si", 50 | "ta", "ma", "ou", "hi", "ment", "ll", 51 | "am", "el", "con", "ca", "ch", "her", 52 | "ver", "all", "us", "ers", "ther", "ns", 53 | "com", "na", "tr", "qu", "that", "ist", 54 | "ge", "di", "ur", "with", "ons", "ted", 55 | "be", "ec", "men", "ere", "res", "ia", 56 | "ni", "ica", "ol", "ac", "il", "est", 57 | "ct", "rs", "nce", "amp", "mp", "sion", 58 | "pe", "et", "id", "nc", "sta", "ie", 59 | "age", "his", "tha", "rt", "fo", "tor", 60 | "ly", "ive", 61 | "the", "and", "ing", "tion", "that", "with", 62 | "hat", "ion", "her", "ther", "atio", "ent", 63 | "for", "ith", "ight", "tio", "tha", "his", 64 | "thou", "all", "here", "ter", "ere", "ough", 65 | "wit", "ver", "res", "ment", "ati", "ght", 66 | "igh", "from", "thi", "ate", "ess", "ould", 67 | "hou", "this", "nce", "not", "ers", "heir", 68 | "ear", "our", "ting", "thin", "est", "rea", 69 | "sion", "ave", "tho", "hing", "hich", "pro", 70 | "ect", "thei", "ill", "ence", "ound", "they", 71 | "con", "ted", "one", "com", "form", "what", 72 | "ring", "ions", "ever", "you", "comp", "pres", 73 | "reat", "will", "whic", "are", "eave", "ugh", 74 | "int", "eve", "hin", "ning", "ons", "ore", 75 | "have", "wor", "work", "ive", "said", "men", 76 | "rom", "ain", "houg", "ine", "othe", "nte", 77 | "aven", "hen", "oun", "eat", "lect", "oug", 78 | "able", "age", "nter", "ture", "text", "per", 79 | "rese", "ove", "use", "ven", "them", "she", 80 | "ble", "ught", "more", "ous", "und", "out", 81 | "nder", "ese", 82 | */ 83 | }; 84 | 85 | 86 | 87 | void Filters::MakeWordTree() 88 | { 89 | uint32_t i,j; 90 | uint32_t treePos; 91 | uint8_t symbolIndex=0x82; 92 | 93 | nodeMum=1; 94 | memset(wordTree,0,sizeof(wordTree)); 95 | for (i=1;i 0) { 126 | alloc_->Free(alloc_, m_fltSwapBuf); 127 | } 128 | m_fltSwapBuf = 0; 129 | } 130 | 131 | 132 | void Filters::Forward_Delta(uint8_t *src,uint32_t size,uint32_t chnNum) 133 | { 134 | uint32_t dstPos,i,j; 135 | uint8_t prevByte = 0; 136 | 137 | if (size<512) 138 | return; 139 | 140 | if (m_fltSwapSize < size) { 141 | if (m_fltSwapSize > 0) { 142 | alloc_->Free(alloc_, m_fltSwapBuf); 143 | } 144 | m_fltSwapBuf = (uint8_t*)alloc_->Alloc(alloc_, size); 145 | m_fltSwapSize = size; 146 | } 147 | 148 | memcpy(m_fltSwapBuf, src, size); 149 | 150 | dstPos = 0; 151 | 152 | for (i=0;i0) 180 | free(m_fltSwapBuf); 181 | 182 | m_fltSwapBuf=(uint8_t*)malloc(size+(width+1)*channelNum); 183 | m_fltSwapSize=size; 184 | } 185 | 186 | memset(m_fltSwapBuf,0,(width+1)*channelNum); 187 | memcpy(m_fltSwapBuf+(width+1)*channelNum,src,size); 188 | uint8_t *newSrc=m_fltSwapBuf+(width+1)*channelNum; 189 | uint32_t dstPos=0; 190 | 191 | 192 | for (int i=0;i255) 211 | vPredict=255; 212 | if (vPredict<0) 213 | vPredict=0; 214 | src[dstPos++]=vPredict-newSrc[j]; 215 | 216 | totalTest+=abs(newSrc[j]-vPredict); 217 | } 218 | } 219 | 220 | printf("size:%d --- %f\n",size,(float)totalTest/size); 221 | 222 | 223 | } 224 | 225 | void Filters::Inverse_RGB(uint8_t *src,uint32_t size,uint32_t width,uint32_t colorBits) 226 | { 227 | if (size<512) 228 | return; 229 | 230 | if (m_fltSwapSize0) 232 | free(m_fltSwapBuf); 233 | 234 | m_fltSwapBuf=(uint8_t*)malloc(size); 235 | m_fltSwapSize=size; 236 | } 237 | 238 | memcpy(m_fltSwapBuf,src,size); 239 | 240 | int channelNum=colorBits/8; 241 | 242 | } 243 | 244 | 245 | 246 | void Filters::Forward_Audio(uint8_t *src,uint32_t size,uint32_t width,uint32_t colorBits) 247 | { 248 | } 249 | 250 | void Filters::Inverse_Audio(uint8_t *src,uint32_t size,uint32_t width,uint32_t colorBits) 251 | { 252 | } 253 | 254 | */ 255 | 256 | uint32_t Filters::Foward_Dict(uint8_t *src,uint32_t size) 257 | { 258 | if (size < 16384) { 259 | return 0; 260 | } 261 | 262 | if (m_fltSwapSize < size) { 263 | if (m_fltSwapSize > 0) { 264 | alloc_->Free(alloc_, m_fltSwapBuf); 265 | } 266 | 267 | m_fltSwapBuf = (uint8_t*)alloc_->Alloc(alloc_, size); 268 | m_fltSwapSize = size; 269 | } 270 | 271 | uint8_t *dst = m_fltSwapBuf; 272 | uint32_t i, j, treePos = 0; 273 | uint32_t dstSize = 0; 274 | uint32_t idx; 275 | 276 | 277 | for(i=0;im_fltSwapSize-16) { 279 | return 0; 280 | } 281 | 282 | if (src[i]>='a'&& src[i]<='z') { 283 | uint32_t matchSymbol = 0,longestWord = 0; 284 | treePos = 0; 285 | for(j = 0;;) { 286 | idx = src[i+j]-'a'; 287 | if (idx > 25 || wordTree[treePos].next[idx] == 0) { 288 | break; 289 | } 290 | 291 | treePos = wordTree[treePos].next[idx]; 292 | j++; 293 | if (wordTree[treePos].symbol) { 294 | matchSymbol = wordTree[treePos].symbol; 295 | longestWord = j; 296 | } 297 | } 298 | 299 | if (matchSymbol) { 300 | dst[dstSize++] = matchSymbol; 301 | i+=longestWord; 302 | continue; 303 | } 304 | dst[dstSize++] = src[i]; 305 | i++; 306 | } else { 307 | if (src[i] >= 0x82) { 308 | dst[dstSize++] = 254; 309 | dst[dstSize++] = src[i]; 310 | } else { 311 | dst[dstSize++] = src[i]; 312 | } 313 | treePos = 0; 314 | i++; 315 | } 316 | 317 | } 318 | 319 | for (;i=0x82) { 321 | dst[dstSize++]=254; 322 | dst[dstSize++]=src[i]; 323 | } else { 324 | dst[dstSize++]=src[i]; 325 | } 326 | } 327 | 328 | if (dstSize > size * 0.82) { 329 | return 0; 330 | } 331 | 332 | memset(dst + dstSize, 0x20, size - dstSize); 333 | memcpy(src, dst, size); 334 | return 1; 335 | } 336 | 337 | void Filters::Inverse_Dict(uint8_t *src,uint32_t size) 338 | { 339 | 340 | if (m_fltSwapSize 0) { 342 | alloc_->Free(alloc_, m_fltSwapBuf); 343 | } 344 | 345 | m_fltSwapBuf = (uint8_t*)alloc_->Alloc(alloc_, size); 346 | m_fltSwapSize = size; 347 | } 348 | 349 | uint8_t *dst=m_fltSwapBuf; 350 | uint32_t i=0,j; 351 | uint32_t dstPos=0,idx; 352 | 353 | while(dstPos < size) { 354 | if (src[i] >= 0x82 && src[i] < maxSymbol) { 355 | idx = wordIndex[src[i]]; 356 | for(j = 0; wordList[idx][j] && dstPos < size; j++) { 357 | dst[dstPos++] = wordList[idx][j]; 358 | } 359 | } else if (src[i] == 254 && (i + 1 < size && src[i+1] >= 0x82)) { 360 | i++; 361 | dst[dstPos++] = src[i]; 362 | } else { 363 | dst[dstPos++] = src[i]; 364 | } 365 | i++; 366 | } 367 | memcpy(src, dst, size); 368 | } 369 | 370 | 371 | void Filters::Inverse_Delta(uint8_t *src,uint32_t size,uint32_t chnNum) 372 | { 373 | uint32_t dstPos,i,j,prevByte; 374 | 375 | if (size<512) 376 | return; 377 | 378 | if (m_fltSwapSize0) { 380 | alloc_->Free(alloc_, m_fltSwapBuf); 381 | } 382 | 383 | m_fltSwapBuf = (uint8_t*)alloc_->Alloc(alloc_, size); 384 | m_fltSwapSize=size; 385 | } 386 | 387 | memcpy(m_fltSwapBuf,src,size); 388 | 389 | dstPos = 0; 390 | prevByte = 0; 391 | for (i = 0; i < chnNum; i++) { 392 | for(j = i; j < size;j += chnNum) 393 | { 394 | src[j] = m_fltSwapBuf[dstPos++] + prevByte; 395 | prevByte = src[j]; 396 | } 397 | } 398 | } 399 | 400 | 401 | //void Filters::Forward_Audio4(uint8_t *src,uint32_t size) 402 | //{ 403 | // 404 | // uint32_t dstPos,i,j,prevByte; 405 | // uint32_t chnNum=4; 406 | // 407 | // if (m_fltSwapSize0) 410 | // { 411 | // SAFEFREE(m_fltSwapBuf); 412 | // } 413 | // m_fltSwapBuf=(uint8_t*)malloc(size); 414 | // m_fltSwapSize=size; 415 | // } 416 | // 417 | // memcpy(m_fltSwapBuf,src,size); 418 | // 419 | // dstPos=0; 420 | // prevByte=0; 421 | // for (i=0;i0) 434 | // { 435 | // SAFEFREE(m_fltSwapBuf); 436 | // } 437 | // m_fltSwapBuf=(uint8_t*)malloc(size); 438 | // m_fltSwapSize=size; 439 | // } 440 | // 441 | // memcpy(m_fltSwapBuf,src,size); 442 | // 443 | // SrcData=m_fltSwapBuf; 444 | // DestData=src; 445 | // 446 | // for (int CurChannel=0;CurChannel>3) & 0xff; 461 | // 462 | // 463 | // unsigned int CurByte=SrcData[I]; 464 | // 465 | // PrevDelta=(signed char)(CurByte-PrevByte); 466 | // *DestData++=Predicted-CurByte; 467 | // PrevByte=CurByte; 468 | // 469 | // int D=((signed char)(Predicted-CurByte))<<3; 470 | // 471 | // Dif[0]+=abs(D); 472 | // Dif[1]+=abs(D-D1); 473 | // Dif[2]+=abs(D+D1); 474 | // Dif[3]+=abs(D-D2); 475 | // Dif[4]+=abs(D+D2); 476 | // Dif[5]+=abs(D-D3); 477 | // Dif[6]+=abs(D+D3); 478 | // 479 | // if ((ByteCount & 0x1f)==0) 480 | // { 481 | // unsigned int MinDif=Dif[0],NumMinDif=0; 482 | // Dif[0]=0; 483 | // for (int J=1;J=-16) K1--; break; 495 | // case 2: if (K1 < 16) K1++; break; 496 | // case 3: if (K2>=-16) K2--; break; 497 | // case 4: if (K2 < 16) K2++; break; 498 | // case 5: if (K3>=-16) K3--; break; 499 | // case 6: if (K3 < 16) K3++; break; 500 | // } 501 | // } 502 | // } 503 | // }*/ 504 | //} 505 | 506 | 507 | 508 | void Filters::E89init( void ) 509 | { 510 | cs = 0xFF; 511 | x0 = x1 = 0; 512 | i = 0; 513 | k = 5; 514 | } 515 | 516 | int32_t Filters::E89cache_byte( int32_t c ) 517 | { 518 | int32_t d = cs&0x80 ? -1 : (uint8_t)(x1); 519 | x1>>=8; 520 | x1|=(x0<<24); 521 | x0>>=8; 522 | x0|=(c <<24); 523 | cs<<=1; i++; 524 | return d; 525 | } 526 | 527 | uint32_t Filters::E89xswap( uint32_t x ) 528 | { 529 | x<<=7; 530 | return (x>>24)|((uint8_t)(x>>16)<<8)|((uint8_t)(x>>8)<<16)|((uint8_t)(x)<<(24-7)); 531 | } 532 | 533 | uint32_t Filters::E89yswap( uint32_t x ) 534 | { 535 | x = ((uint8_t)(x>>24)<<7)|((uint8_t)(x>>16)<<8)|((uint8_t)(x>>8)<<16)|(x<<24); 536 | return x>>7; 537 | } 538 | 539 | 540 | int32_t Filters::E89forward( int32_t c ) 541 | { 542 | uint32_t x; 543 | if( i>=k ) { 544 | if( (x1&0xFE000000)==0xE8000000 ) { 545 | k = i+4; 546 | x= x0 - 0xFF000000; 547 | if( x<0x02000000 ) { 548 | x = (x+i) & 0x01FFFFFF; 549 | x = E89xswap(x); 550 | x0 = x + 0xFF000000; 551 | } 552 | } 553 | } 554 | return E89cache_byte(c); 555 | } 556 | 557 | int32_t Filters::E89inverse( int32_t c ) 558 | { 559 | uint32_t x; 560 | if( i>=k ) { 561 | if( (x1&0xFE000000)==0xE8000000 ) { 562 | k = i+4; 563 | x = x0 - 0xFF000000; 564 | if( x<0x02000000 ) { 565 | x = E89yswap(x); 566 | x = (x-i) & 0x01FFFFFF; 567 | x0 = x + 0xFF000000; 568 | } 569 | } 570 | } 571 | return E89cache_byte(c); 572 | } 573 | 574 | int32_t Filters::E89flush() 575 | { 576 | int32_t d; 577 | if( cs!=0xFF ) { 578 | while( cs&0x80 ) E89cache_byte(0),++cs; 579 | d = E89cache_byte(0); ++cs; 580 | return d; 581 | } else { 582 | E89init(); 583 | return -1; 584 | } 585 | } 586 | 587 | 588 | void Filters::Forward_E89(uint8_t* src, uint32_t size) 589 | { 590 | uint32_t i, j; 591 | int32_t c; 592 | E89init(); 593 | for(i=0,j=0; i=0 ) src[j++]=c; 596 | } 597 | while( (c=E89flush())>=0 ) src[j++]=c; 598 | } 599 | 600 | void Filters::Inverse_E89(uint8_t* src, uint32_t size) 601 | { 602 | uint32_t i, j; 603 | int32_t c; 604 | E89init(); 605 | for(i=0,j=0; i=0 ) src[j++] = c; 608 | } 609 | while( (c=E89flush())>=0 ) src[j++]=c; 610 | } 611 | -------------------------------------------------------------------------------- /src/libcsc/csc_filters.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_FILTERS_H_ 2 | #define _CSC_FILTERS_H_ 3 | 4 | #include 5 | #define MAX_WORDTREE_NODE_NUM 300 //Enough now! 6 | 7 | class Filters 8 | { 9 | public: 10 | void Forward_E89( uint8_t* src, uint32_t size); 11 | void Inverse_E89( uint8_t* src, uint32_t size); 12 | void Init(ISzAlloc *alloc); 13 | void Destroy(); 14 | 15 | uint32_t Foward_Dict(uint8_t *src,uint32_t size); 16 | void Inverse_Dict(uint8_t *src,uint32_t size); 17 | 18 | void Forward_Delta(uint8_t *src,uint32_t size,uint32_t chnNum); 19 | void Inverse_Delta(uint8_t *src,uint32_t size,uint32_t chnNum); 20 | //void Forward_RGB(uint8_t *src,uint32_t size,uint32_t width,uint32_t colorBits); 21 | //void Inverse_RGB(uint8_t *src,uint32_t size,uint32_t width,uint32_t colorBits); 22 | //void Forward_Audio(uint8_t *src,uint32_t size,uint32_t wavChannels,uint32_t sampleBits); 23 | //void Inverse_Audio(uint8_t *src,uint32_t size,uint32_t wavChannels,uint32_t sampleBits); 24 | //void Forward_Audio4(uint8_t *src,uint32_t size); 25 | //void Inverse_Audio4(uint8_t *src,uint32_t size); 26 | 27 | 28 | private: 29 | ISzAlloc *alloc_; 30 | 31 | typedef struct { 32 | uint32_t next[26]; 33 | uint8_t symbol; 34 | } CTreeNode; 35 | CTreeNode wordTree[MAX_WORDTREE_NODE_NUM]; 36 | uint32_t nodeMum; 37 | uint8_t maxSymbol; 38 | //Used for DICT transformer. Words are stored in trees. 39 | 40 | uint32_t wordIndex[256]; 41 | //Used for DICT untransformer.choose words by symbols. 42 | void MakeWordTree(); //Init the DICT transformer 43 | 44 | //Swap buffer for all filters. 45 | uint8_t *m_fltSwapBuf; 46 | uint32_t m_fltSwapSize; 47 | 48 | /* 49 | Below is Shelwien's E89 filter 50 | */ 51 | void E89init(void); 52 | int32_t E89cache_byte(int32_t c); 53 | uint32_t E89xswap(uint32_t x); 54 | uint32_t E89yswap(uint32_t x); 55 | int32_t E89forward(int32_t c); 56 | int32_t E89inverse(int32_t c); 57 | int32_t E89flush(void); 58 | 59 | uint32_t x0,x1; 60 | uint32_t i,k; 61 | uint8_t cs; // cache size, F8 - 5 bytes 62 | }; 63 | 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/libcsc/csc_lz.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int LZ::Init(const CSCProps *p, Model *model, ISzAlloc *alloc) 11 | { 12 | model_ = model; 13 | alloc_ = alloc; 14 | 15 | wnd_size_ = p->dict_size; 16 | 17 | if(wnd_size_ < MinDictSize) wnd_size_ = MinDictSize; 18 | if(wnd_size_ > MaxDictSize) wnd_size_ = MaxDictSize; 19 | 20 | wnd_ = (uint8_t*)alloc_->Alloc(alloc_, wnd_size_ + 8); 21 | if (!wnd_) 22 | goto FREE_ON_ERROR; 23 | 24 | if (mf_.Init(wnd_, wnd_size_, p->bt_size, p->bt_hash_bits, 25 | p->hash_width, p->hash_bits, alloc_)) 26 | goto FREE_ON_ERROR; 27 | 28 | good_len_ = p->good_len; 29 | bt_cyc_ = p->bt_cyc; 30 | ht_cyc_ = p->hash_width; 31 | mf_.SetArg(bt_cyc_, ht_cyc_, 1, good_len_); 32 | appt_ = (MFUnit *)alloc_->Alloc(alloc_, sizeof(MFUnit) * good_len_ + 1); 33 | 34 | Reset(); 35 | return 0; 36 | 37 | FREE_ON_ERROR: 38 | alloc_->Free(alloc_, wnd_); 39 | return -1; 40 | } 41 | 42 | 43 | void LZ::Reset(void) 44 | { 45 | wnd_curpos_=0; 46 | rep_dist_[0] = 47 | rep_dist_[1] = 48 | rep_dist_[2] = 49 | rep_dist_[3] = wnd_size_; 50 | memset(wnd_, 0, wnd_size_ + 8); 51 | model_->Reset(); 52 | } 53 | 54 | void LZ::Destroy(void) 55 | { 56 | mf_.Destroy(); 57 | alloc_->Free(alloc_, wnd_); 58 | alloc_->Free(alloc_, appt_); 59 | } 60 | 61 | void LZ::EncodeNormal(uint8_t *src, uint32_t size, uint32_t lz_mode) 62 | { 63 | for(uint32_t i = 0; i < size; ) { 64 | uint32_t cur_block_size; 65 | cur_block_size = MIN(wnd_size_ - wnd_curpos_, size - i); 66 | cur_block_size = MIN(cur_block_size, MinBlockSize); 67 | memcpy(wnd_ + wnd_curpos_, src + i, cur_block_size); 68 | if (lz_mode == 1) // fast, with no lazy parser 69 | compress_normal(cur_block_size, false); 70 | else if (lz_mode == 2) 71 | compress_normal(cur_block_size, true); 72 | else if (lz_mode == 3) 73 | compress_advanced(cur_block_size); 74 | /* 75 | else if (lz_mode == 4) { 76 | mf_.SetArg(1, 2, 0, good_len_); 77 | compress_normal(cur_block_size, false); 78 | mf_.SetArg(bt_cyc_, ht_cyc_, 1, good_len_); 79 | 80 | }*/ else if (lz_mode == 5) { 81 | // only copy the block data to window and put it into MF 82 | // No encoded output 83 | mf_.SetArg(1, 1, 0, good_len_); 84 | compress_mf_skip(cur_block_size); 85 | mf_.SetArg(bt_cyc_, ht_cyc_, 1, good_len_); 86 | } else { 87 | printf("Error!"); 88 | exit(0); 89 | } 90 | 91 | if (wnd_curpos_ >= wnd_size_) 92 | wnd_curpos_ = 0; 93 | i += cur_block_size; 94 | } 95 | if (lz_mode != 5) { 96 | // for lz_mode == 5, Encode nothing, non terminator 97 | model_->EncodeMatch(64, 0); 98 | } 99 | return; 100 | } 101 | 102 | bool LZ::IsDuplicateBlock(uint8_t *src, uint32_t size) 103 | { 104 | uint32_t mc = 0; 105 | for(uint32_t i = 0; i < size; i ++) 106 | if (mf_.TestFind(wnd_curpos_, src + i, size - i)) { 107 | mc++; 108 | if (mc) 109 | return true; 110 | } 111 | return false; 112 | } 113 | 114 | void LZ::DuplicateInsert(uint8_t *src,uint32_t size) 115 | { 116 | for(uint32_t i = 0; i < size; ) { 117 | uint32_t cur_block_size = MIN(wnd_size_ - wnd_curpos_, size - i); 118 | cur_block_size = MIN(cur_block_size, MinBlockSize); 119 | memcpy(wnd_ + wnd_curpos_, src + i, cur_block_size); 120 | wnd_curpos_ += cur_block_size; 121 | if (wnd_curpos_ >= wnd_size_) wnd_curpos_=0; 122 | i += cur_block_size; 123 | } 124 | return; 125 | } 126 | 127 | void LZ::encode_nonlit(MFUnit u) 128 | { 129 | if (u.dist <= 4) { 130 | if (u.len == 1 && u.dist == 1) 131 | model_->EncodeRep0Len1(); 132 | else { 133 | model_->EncodeRepDistMatch(u.dist - 1, u.len - 2); 134 | uint32_t dist = rep_dist_[u.dist - 1]; 135 | switch (u.dist) { 136 | case 4: 137 | rep_dist_[3] = rep_dist_[2]; 138 | case 3: 139 | rep_dist_[2] = rep_dist_[1]; 140 | case 2: 141 | rep_dist_[1] = rep_dist_[0]; 142 | case 1: 143 | rep_dist_[0] = dist; 144 | break; 145 | } 146 | } 147 | } else { 148 | model_->EncodeMatch(u.dist - 5, u.len - 2); 149 | rep_dist_[3] = rep_dist_[2]; 150 | rep_dist_[2] = rep_dist_[1]; 151 | rep_dist_[1] = rep_dist_[0]; 152 | rep_dist_[0] = u.dist - 4; 153 | } 154 | } 155 | 156 | void LZ::compress_normal(uint32_t size, bool lazy) 157 | { 158 | MFUnit u1, u2; 159 | bool got_u1 = false; 160 | for(uint32_t i = 0; i < size; ) { 161 | if (!got_u1) 162 | u1 = mf_.FindMatch(rep_dist_, wnd_curpos_, size - i); 163 | 164 | if (u1.len == 1 || !lazy || u1.len >= good_len_) { 165 | if (u1.dist == 0) 166 | model_->EncodeLiteral(wnd_[wnd_curpos_]);//,wnd_curpos_-1 167 | else 168 | encode_nonlit(u1); 169 | mf_.SlidePos(wnd_curpos_, u1.len, size - i); 170 | i += u1.len; wnd_curpos_ += u1.len; 171 | if (u1.dist) { 172 | model_->SetLiteralCtx(wnd_[wnd_curpos_ - 1]); 173 | //if (u1.dist <= 4 && rep_dist_[u1.dist - 1] < 20) 174 | // model_->SetLiteralCtx(wnd_[wnd_curpos_ - rep_dist_[u1.dist - 1]]); 175 | } 176 | got_u1 = false; 177 | continue; 178 | } 179 | 180 | u2 = mf_.FindMatch(rep_dist_, wnd_curpos_ + 1, size - i - 1); 181 | if (mf_.SecondMatchBetter(u1, u2)) { 182 | // choose literal output 183 | model_->EncodeLiteral(wnd_[wnd_curpos_]);//,wnd_curpos_-1 184 | mf_.SlidePos(wnd_curpos_, 1, size - i - 1); 185 | i++; wnd_curpos_++; 186 | u1 = u2; 187 | got_u1 = true; 188 | } else { 189 | encode_nonlit(u1); 190 | mf_.SlidePos(wnd_curpos_ + 1, u1.len - 1, size - i - 1); 191 | i += u1.len; wnd_curpos_ += u1.len; 192 | model_->SetLiteralCtx(wnd_[wnd_curpos_ - 1]); 193 | //if (u1.dist <= 4 && rep_dist_[u1.dist - 1] < 20) 194 | // model_->SetLiteralCtx(wnd_[wnd_curpos_ - rep_dist_[u1.dist - 1]]); 195 | got_u1 = false; 196 | } 197 | } 198 | return; 199 | } 200 | 201 | void LZ::compress_mf_skip(uint32_t size) 202 | { 203 | mf_.SlidePosFast(wnd_curpos_, size); 204 | wnd_curpos_ += size; 205 | } 206 | 207 | void LZ::compress_advanced(uint32_t size) 208 | { 209 | uint32_t apend = 0, apcur = 0; 210 | 211 | for(uint32_t i = 0; i < size; ) { 212 | mf_.FindMatchWithPrice(model_, model_->state_, 213 | appt_, rep_dist_, wnd_curpos_, size - i); 214 | if (appt_[0].dist == 0) { 215 | model_->EncodeLiteral(wnd_[wnd_curpos_]); 216 | mf_.SlidePos(wnd_curpos_, 1, size - i); 217 | i++; wnd_curpos_++; 218 | } else { 219 | apcur = 0; 220 | apend = 1; 221 | apunits_[0].price = 0; 222 | apunits_[0].back_pos = 0; 223 | apunits_[0].rep_dist[0] = rep_dist_[0]; 224 | apunits_[0].rep_dist[1] = rep_dist_[1]; 225 | apunits_[0].rep_dist[2] = rep_dist_[2]; 226 | apunits_[0].rep_dist[3] = rep_dist_[3]; 227 | apunits_[0].state = model_->state_; 228 | uint32_t aplimit = MIN(AP_LIMIT, size - i); 229 | for(;;) { 230 | apunits_[apcur].lit = wnd_[wnd_curpos_]; 231 | // fix cur state 232 | if (apcur) { 233 | int l = apunits_[apcur].back_pos; 234 | apunits_[apcur].rep_dist[0] = apunits_[l].rep_dist[0]; 235 | apunits_[apcur].rep_dist[1] = apunits_[l].rep_dist[1]; 236 | apunits_[apcur].rep_dist[2] = apunits_[l].rep_dist[2]; 237 | apunits_[apcur].rep_dist[3] = apunits_[l].rep_dist[3]; 238 | if (apunits_[apcur].dist == 0) { 239 | apunits_[apcur].state = (apunits_[l].state * 4) & 0x3F; 240 | } else if (apunits_[apcur].dist <= 4) { 241 | uint32_t len = apcur - l; 242 | if (len == 1 && apunits_[apcur].dist == 1) 243 | apunits_[apcur].state = (apunits_[l].state * 4 + 2) & 0x3F; 244 | else { 245 | apunits_[apcur].state = (apunits_[l].state * 4 + 3) & 0x3F; 246 | uint32_t tmp = apunits_[apcur].rep_dist[apunits_[apcur].dist - 1]; 247 | switch (apunits_[apcur].dist) { 248 | case 4: 249 | apunits_[apcur].rep_dist[3] = apunits_[apcur].rep_dist[2]; 250 | case 3: 251 | apunits_[apcur].rep_dist[2] = apunits_[apcur].rep_dist[1]; 252 | case 2: 253 | apunits_[apcur].rep_dist[1] = apunits_[apcur].rep_dist[0]; 254 | apunits_[apcur].rep_dist[0] = tmp; 255 | break; 256 | } 257 | } 258 | } else { 259 | apunits_[apcur].state = (apunits_[l].state * 4 + 1) & 0x3F; 260 | apunits_[apcur].rep_dist[0] = apunits_[apcur].dist- 4; 261 | apunits_[apcur].rep_dist[1] = apunits_[l].rep_dist[0]; 262 | apunits_[apcur].rep_dist[2] = apunits_[l].rep_dist[1]; 263 | apunits_[apcur].rep_dist[3] = apunits_[l].rep_dist[2]; 264 | } 265 | 266 | if (apcur < aplimit) 267 | mf_.FindMatchWithPrice(model_, apunits_[apcur].state, 268 | appt_, apunits_[apcur].rep_dist, wnd_curpos_, size - i - apcur); 269 | } 270 | 271 | if (apcur == aplimit) { 272 | ap_backward(apcur); 273 | i += apcur; 274 | break; 275 | } 276 | 277 | if (appt_[0].len == 1 && apcur + 1 == apend) { 278 | ap_backward(apcur); 279 | model_->EncodeLiteral(apunits_[apcur].lit); 280 | i += apcur; 281 | mf_.SlidePos(wnd_curpos_, 1, size - i); 282 | wnd_curpos_ ++; 283 | i++; 284 | break; 285 | } 286 | 287 | if (apcur + 1 >= apend) 288 | apunits_[apend++].price = 0xFFFFFFFF; 289 | 290 | if (appt_[0].len >= good_len_ || (appt_[0].len > 1 && appt_[0].len + apcur >= aplimit)) { 291 | ap_backward(apcur); 292 | i += apcur; 293 | encode_nonlit(appt_[0]); 294 | mf_.SlidePos(wnd_curpos_, appt_[0].len, size - i); 295 | i += appt_[0].len; 296 | wnd_curpos_ += appt_[0].len; 297 | model_->SetLiteralCtx(wnd_[wnd_curpos_ - 1]); 298 | break; 299 | } 300 | 301 | uint32_t lit_ctx = wnd_curpos_? wnd_[wnd_curpos_ - 1] : 0; 302 | uint32_t cprice = model_->GetLiteralPrice(apunits_[apcur].state, lit_ctx, wnd_[wnd_curpos_]); 303 | if (cprice + apunits_[apcur].price < apunits_[apcur + 1].price) { 304 | apunits_[apcur + 1].dist = 0; 305 | apunits_[apcur + 1].back_pos = apcur; 306 | apunits_[apcur + 1].price = cprice + apunits_[apcur].price; 307 | } 308 | 309 | if (appt_[1].dist && appt_[1].price + apunits_[apcur].price < apunits_[apcur + 1].price) { 310 | apunits_[apcur + 1].dist = 1; 311 | apunits_[apcur + 1].back_pos = apcur; 312 | apunits_[apcur + 1].price = appt_[1].price + apunits_[apcur].price; 313 | } 314 | 315 | uint32_t len = appt_[0].len; 316 | while (apcur + len >= apend) 317 | apunits_[apend++].price = 0xFFFFFFFF; 318 | 319 | while(len > 1) { 320 | if (appt_[len].dist && appt_[len].price + apunits_[apcur].price < apunits_[apcur + len].price) { 321 | apunits_[apcur + len].dist = appt_[len].dist; 322 | apunits_[apcur + len].back_pos = apcur; 323 | apunits_[apcur + len].price = appt_[len].price + apunits_[apcur].price; 324 | } 325 | len--; 326 | } 327 | apcur++; 328 | mf_.SlidePos(wnd_curpos_, 1, size - i - apcur); 329 | wnd_curpos_++; 330 | } 331 | } 332 | } 333 | } 334 | 335 | void LZ::ap_backward(int end) 336 | { 337 | for(int i = end; i; ) { 338 | apunits_[apunits_[i].back_pos].next_pos = i; 339 | i = apunits_[i].back_pos; 340 | } 341 | 342 | for(int i = 0; i != end; ) { 343 | uint32_t next = apunits_[i].next_pos; 344 | if (apunits_[next].dist == 0) { 345 | model_->EncodeLiteral(apunits_[i].lit); 346 | } else if (apunits_[next].dist <= 4) { 347 | if (next - i == 1 && apunits_[next].dist == 1) 348 | model_->EncodeRep0Len1(); 349 | else 350 | model_->EncodeRepDistMatch(apunits_[next].dist - 1, next - i - 2); 351 | model_->SetLiteralCtx(apunits_[next - 1].lit); 352 | } else { 353 | model_->EncodeMatch(apunits_[next].dist - 5, next - i - 2); 354 | model_->SetLiteralCtx(apunits_[next - 1].lit); 355 | } 356 | i = next; 357 | } 358 | rep_dist_[0] = apunits_[end].rep_dist[0]; 359 | rep_dist_[1] = apunits_[end].rep_dist[1]; 360 | rep_dist_[2] = apunits_[end].rep_dist[2]; 361 | rep_dist_[3] = apunits_[end].rep_dist[3]; 362 | } 363 | 364 | 365 | -------------------------------------------------------------------------------- /src/libcsc/csc_lz.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_LZ_H_ 2 | #define _CSC_LZ_H_ 3 | #include 4 | #include 5 | 6 | class Model; 7 | 8 | class LZ 9 | { 10 | public: 11 | int Init(const CSCProps *p, Model *model, ISzAlloc *alloc); 12 | void EncodeNormal(uint8_t *src, uint32_t size, uint32_t lz_mode); 13 | bool IsDuplicateBlock(uint8_t *src,uint32_t size); 14 | void DuplicateInsert(uint8_t *src,uint32_t size); 15 | 16 | void Reset(void); 17 | void Destroy(void); 18 | uint32_t wnd_curpos_; 19 | 20 | private: 21 | ISzAlloc *alloc_; 22 | uint8_t *wnd_; 23 | uint32_t rep_dist_[4]; 24 | uint32_t wnd_size_; 25 | uint32_t curblock_endpos; 26 | uint32_t good_len_; 27 | uint32_t bt_cyc_; 28 | uint32_t ht_cyc_; 29 | Model *model_; 30 | 31 | // New LZ77 Algorithm==================================== 32 | // ============== OPTIMAL ==== 33 | struct APUnit { 34 | uint32_t dist; 35 | uint32_t state; 36 | int back_pos; 37 | int next_pos; 38 | uint32_t price; 39 | uint32_t lit; 40 | uint32_t rep_dist[4]; 41 | }; 42 | // =========================== 43 | static const int AP_LIMIT = 2048; 44 | APUnit apunits_[AP_LIMIT + 1]; 45 | 46 | void compress_normal(uint32_t size, bool lazy); 47 | void compress_mf_skip(uint32_t size); 48 | void encode_nonlit(MFUnit u); 49 | 50 | void compress_advanced(uint32_t size); 51 | void ap_backward(int idx); 52 | 53 | MatchFinder mf_; 54 | MFUnit *appt_; 55 | }; 56 | 57 | 58 | #endif 59 | 60 | -------------------------------------------------------------------------------- /src/libcsc/csc_memio.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int MemIO::ReadBlock(uint8_t *buffer, uint32_t &size, int rc1bc0) 6 | { 7 | DataBlock **blist; 8 | blist = rc1bc0 ? &rc_blocks_ : &bc_blocks_; 9 | 10 | if (*blist) { 11 | // already read into buffer 12 | size = (*blist)->size; 13 | memcpy(buffer, (*blist)->buf, size); 14 | DataBlock *tb = (*blist)->next; 15 | alloc_->Free(alloc_, *blist); 16 | (*blist) = tb; 17 | } else { 18 | // read fresh, if meet the other kind of block, keep it in its buffer 19 | for(;;) { 20 | uint8_t fb; 21 | size_t iosize; 22 | iosize = 1; 23 | is_->Read(is_, &fb, &iosize); 24 | if (iosize != 1) 25 | return -1; 26 | 27 | uint32_t cur_bsize; 28 | if ((fb >> 6) & 0x1) { 29 | // full block size 30 | cur_bsize = bsize_; 31 | } else { 32 | // extra 3 bytes denoting size 33 | uint8_t size_bytes[3]; 34 | iosize = 3; 35 | is_->Read(is_, size_bytes, &iosize); 36 | if (iosize != 3) 37 | return -1; 38 | cur_bsize = ((uint32_t)size_bytes[0] << 16) 39 | + (size_bytes[1] << 8) 40 | + size_bytes[2]; 41 | } 42 | 43 | if (!cur_bsize || cur_bsize > bsize_) { 44 | // must be a abnormal stream 45 | return -1; 46 | } 47 | 48 | iosize = size = (int)cur_bsize; 49 | if (((fb >> 7) & 0x1) == rc1bc0) { 50 | // this is the block to read out 51 | is_->Read(is_, buffer, &iosize); 52 | if (iosize != cur_bsize) 53 | return -1; 54 | break; 55 | } else { 56 | // other kind of block, append it to list 57 | // keep looping until meet the block it wants 58 | DataBlock *newblock = (DataBlock *)alloc_->Alloc(alloc_, sizeof(*newblock) + cur_bsize); 59 | newblock->size = cur_bsize; 60 | newblock->next = NULL; 61 | is_->Read(is_, newblock->buf, &iosize); 62 | if (iosize != cur_bsize) { 63 | alloc_->Free(alloc_, newblock); 64 | return -1; 65 | } 66 | 67 | DataBlock dummy; 68 | dummy.next = rc1bc0 ? bc_blocks_ : rc_blocks_; 69 | DataBlock *p = &dummy; 70 | while(p->next) p = p->next; 71 | p->next = newblock; 72 | if (rc1bc0) { 73 | bc_blocks_ = dummy.next; 74 | } else { 75 | rc_blocks_ = dummy.next; 76 | } 77 | } 78 | } 79 | } 80 | return (int)size; 81 | } 82 | 83 | int MemIO::WriteBlock(uint8_t *buffer, uint32_t size, int rc1bc0) 84 | { 85 | uint8_t fb = 0; 86 | size_t iosize; 87 | fb |= ((uint8_t)rc1bc0 << 7); 88 | if (size == bsize_) 89 | fb |= (1 << 6); 90 | 91 | iosize = 1; 92 | if (os_->Write(os_, &fb, iosize) != 1) 93 | return -1; 94 | 95 | if (size != bsize_) { 96 | uint8_t size_bytes[3]; 97 | size_bytes[0] = ((size >> 16) & 0xff); 98 | size_bytes[1] = ((size >> 8) & 0xff); 99 | size_bytes[2] = (size & 0xff); 100 | iosize = 3; 101 | if (os_->Write(os_, size_bytes, iosize) != 3) 102 | return -1; 103 | } 104 | iosize = size; 105 | if (iosize && os_->Write(os_, buffer, iosize) != iosize) 106 | return -1; 107 | return size; 108 | } 109 | 110 | void MemIO::Init(void *iostream, uint32_t bsize, ISzAlloc *alloc) 111 | { 112 | ios_ = iostream; 113 | bsize_ = bsize; 114 | rc_blocks_ = NULL; 115 | bc_blocks_ = NULL; 116 | alloc_ = alloc; 117 | } 118 | 119 | 120 | void MemIO::Destroy() { 121 | while (rc_blocks_) { 122 | DataBlock *next = rc_blocks_->next; 123 | alloc_->Free(alloc_, rc_blocks_); 124 | rc_blocks_ = next; 125 | } 126 | 127 | while (bc_blocks_) { 128 | DataBlock *next = bc_blocks_->next; 129 | alloc_->Free(alloc_, bc_blocks_); 130 | bc_blocks_ = next; 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/libcsc/csc_memio.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_MEMIO_H_ 2 | #define _CSC_MEMIO_H_ 3 | #include 4 | 5 | // rc/RC means for range coder 6 | // bc/BC means for bit coder 7 | 8 | class MemIO 9 | { 10 | uint32_t bsize_; 11 | ISzAlloc *alloc_; 12 | 13 | 14 | struct DataBlock { 15 | DataBlock *next; 16 | uint32_t size; 17 | char buf[1]; 18 | }; 19 | 20 | DataBlock *rc_blocks_; 21 | DataBlock *bc_blocks_; 22 | 23 | union { 24 | ISeqInStream *is_; 25 | ISeqOutStream *os_; 26 | void *ios_; 27 | }; 28 | 29 | int ReadBlock(uint8_t *buffer, uint32_t &size, int rc1bc0); 30 | 31 | int WriteBlock(uint8_t *buffer, uint32_t size, int rc1bc0); 32 | 33 | public: 34 | void Init(void *iostream, uint32_t bsize, ISzAlloc *alloc); 35 | void Destroy(); 36 | 37 | uint32_t GetBlockSize() { return bsize_; } 38 | 39 | int ReadRCData(uint8_t *buffer,uint32_t& size) 40 | { 41 | return ReadBlock(buffer, size, 1); 42 | } 43 | 44 | int WriteRCData(uint8_t *buffer,uint32_t size) 45 | { 46 | return WriteBlock(buffer, size, 1); 47 | } 48 | 49 | int ReadBCData(uint8_t *buffer,uint32_t& size) 50 | { 51 | return ReadBlock(buffer, size, 0); 52 | } 53 | 54 | int WriteBCData(uint8_t *buffer,uint32_t size) 55 | { 56 | return WriteBlock(buffer, size, 0); 57 | } 58 | }; 59 | 60 | 61 | #endif 62 | 63 | -------------------------------------------------------------------------------- /src/libcsc/csc_mf.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #if defined(__amd64__) || defined(_M_AMD64) || defined(__i386__) || defined(_M_IX86) 8 | # include 9 | # define PREFETCH_T0(addr) _mm_prefetch(((char *)(addr)),_MM_HINT_T0) 10 | #elif defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2))) 11 | # define PREFETCH_T0(addr) __builtin_prefetch(addr) 12 | #else 13 | # define PREFETCH_T0(addr) 14 | #endif 15 | 16 | 17 | /* 18 | #define HASH6(a, bits) (((*(uint32_t*)(a)^(*(uint16_t*)((a)+4)<<13))*2654435761u)>>(32-(bits))) 19 | #define HASH3(a) ((*(a)<<8)^(*((a)+1)<<5)^(*((a)+2))) 20 | #define HASH2(a) ((*(uint16_t*)(a) * 2654435761u) & 0x3FFF) 21 | */ 22 | 23 | inline uint32_t HASH2(uint8_t *p) 24 | { 25 | uint16_t v = 0; 26 | memcpy(&v, p, 2); 27 | return (v * 65521u) & 0x3FFF; 28 | } 29 | 30 | inline uint32_t HASH3(uint8_t *p) 31 | { 32 | return (*(p) << 8) ^ (*(p + 1) << 5) ^ *(p + 2); 33 | } 34 | 35 | inline uint32_t HASH6(uint8_t *p, uint32_t bits) 36 | { 37 | uint32_t v = 0; 38 | uint16_t v2 = 0; 39 | memcpy(&v, p, 4); 40 | memcpy(&v2, p + 4, 2); 41 | return (((v ^ ((uint32_t)v2 << 13)) * 2654435761u) >> (32 - bits)); 42 | } 43 | 44 | 45 | int MatchFinder::Init(uint8_t *wnd, 46 | uint32_t wnd_size, 47 | uint32_t bt_size, 48 | uint32_t bt_bits, 49 | uint32_t ht_width, 50 | uint32_t ht_bits, 51 | ISzAlloc *alloc 52 | ) 53 | { 54 | wnd_ = wnd; 55 | wnd_size_ = wnd_size; 56 | vld_rge_ = wnd_size_ - MinBlockSize - 4; 57 | pos_ = vld_rge_; 58 | bt_pos_ = 0; 59 | 60 | ht_bits_ = ht_bits; 61 | ht_width_ = ht_width; 62 | bt_bits_ = bt_bits; 63 | bt_size_ = bt_size; 64 | alloc_ = alloc; 65 | 66 | if (!bt_bits_ || !bt_size_) 67 | bt_bits_ = bt_size_ = 0; 68 | if (!ht_bits_ || !ht_width_) 69 | ht_bits_ = ht_width_ = 0; 70 | 71 | size_ = HT2_SIZE_ + HT3_SIZE_ + (1 << ht_bits_) * ht_width_; 72 | if (bt_bits_) { 73 | size_ += (1 << (uint64_t)bt_bits_); 74 | size_ += (uint64_t)bt_size_ * 2; 75 | } 76 | 77 | mfbuf_raw_ = (uint32_t *)alloc_->Alloc(alloc_, sizeof(uint32_t) * size_ + 128); 78 | if (!mfbuf_raw_) 79 | return -1; 80 | mfbuf_ = (uint32_t*)((uint8_t *)mfbuf_raw_ + (64 - ((uint64_t)(mfbuf_raw_) & 0x3F))); 81 | memset(mfbuf_, 0, size_ * sizeof(uint32_t)); 82 | 83 | uint64_t cpos = 0; 84 | ht2_ = mfbuf_ + cpos; 85 | cpos += HT2_SIZE_; 86 | ht3_ = mfbuf_ + cpos; 87 | cpos += HT3_SIZE_; 88 | 89 | if (ht_bits_ && ht_width_) { 90 | ht6_ = mfbuf_ + cpos; 91 | cpos += ht_width_ * (1 << ht_bits_); 92 | } else { 93 | ht6_ = NULL; 94 | } 95 | 96 | if (bt_bits_) { 97 | bt_head_ = mfbuf_ + cpos; 98 | cpos += (1 << (uint64_t)bt_bits_); 99 | bt_nodes_ = mfbuf_ + cpos; 100 | cpos += bt_size_ * 2; 101 | } else { 102 | bt_head_ = NULL; 103 | } 104 | 105 | return 0; 106 | } 107 | 108 | void MatchFinder::normalize() 109 | { 110 | uint32_t diff = pos_ - vld_rge_ + 1; 111 | for(uint32_t i = 0; i < size_; i++) 112 | mfbuf_[i] = mfbuf_[i] > diff? mfbuf_[i] - diff : 0; 113 | pos_ -= diff; 114 | } 115 | 116 | void MatchFinder::Destroy() 117 | { 118 | alloc_->Free(alloc_, mfbuf_raw_); 119 | } 120 | 121 | void MatchFinder::SetArg(int bt_cyc, int ht_cyc, int ht_low, int good_len) 122 | { 123 | bt_cyc_ = bt_cyc; 124 | ht_cyc_ = ht_cyc; 125 | ht_low_ = ht_low; 126 | good_len_ = good_len; 127 | } 128 | 129 | void bug() 130 | { 131 | printf("!"); 132 | } 133 | 134 | void MatchFinder::SlidePos(uint32_t wnd_pos, uint32_t len, uint32_t limit) 135 | { 136 | uint32_t h2, h3, hbt, h6, lasth6 = 0; 137 | for(uint32_t i = 1; i < len; ) { 138 | uint32_t wpos = wnd_pos + i; 139 | if (pos_ >= 0xFFFFFFF0) normalize(); 140 | h2 = HASH2(wnd_ + wpos); 141 | h3 = HASH3(wnd_ + wpos); 142 | ht2_[h2] = pos_; 143 | ht3_[h3] = pos_; 144 | 145 | if (i + 128 < len) {i += 4; pos_ += 4; bt_pos_ += 4; continue;} 146 | 147 | if (ht_width_) { 148 | h6 = HASH6(wnd_ + wpos, ht_bits_); 149 | uint32_t *ht6 = ht6_ + h6 * ht_width_; 150 | if (h6 != lasth6) { 151 | uint32_t cands = MIN(ht_width_, ht_cyc_); 152 | for(uint32_t j = cands - 1; j > 0; j--) 153 | ht6[j] = ht6[j - 1]; 154 | } 155 | ht6[0] = pos_; 156 | lasth6 = h6; 157 | } 158 | 159 | if (!bt_head_) { pos_++; i++; continue; } 160 | hbt = HASH6(wnd_ + wpos, bt_bits_); 161 | if (bt_pos_ >= bt_size_) bt_pos_ -= bt_size_; 162 | uint32_t dist = pos_ - bt_head_[hbt]; 163 | uint32_t *l = &bt_nodes_[bt_pos_ * 2], *r = &bt_nodes_[bt_pos_ * 2 + 1]; 164 | uint32_t lenl = 0, lenr = 0; 165 | 166 | for(uint32_t cyc = 0; ; cyc++) { 167 | if (cyc >= bt_cyc_ || dist >= bt_size_ || dist >= vld_rge_) { *l = *r = 0; break; } 168 | uint32_t cmp_pos = wpos >= dist ? wpos - dist : wpos + wnd_size_ - dist; 169 | uint32_t clen = MIN(lenl, lenr); 170 | uint32_t climit = MIN(limit - i, wnd_size_ - cmp_pos); 171 | if (clen >= climit) { *l = *r = 0; break; } 172 | 173 | uint32_t bt_npos = bt_pos_ >= dist ? bt_pos_ - dist : bt_pos_ + bt_size_ - dist; 174 | uint32_t *tlast = &bt_nodes_[bt_npos * 2]; 175 | PREFETCH_T0(tlast); 176 | uint8_t *pcur = wnd_ + wpos, *pmatch = wnd_ + cmp_pos ; 177 | if (pcur[clen] == pmatch[clen]) { 178 | uint32_t climit2 = MIN(good_len_, climit); 179 | clen++; 180 | while(clen < climit2 && pcur[clen] == pmatch[clen]) 181 | clen++; 182 | 183 | if (clen >= good_len_) { 184 | *l = tlast[0]; *r = tlast[1]; break; 185 | } else if (clen >= climit2) { 186 | *l = *r = 0; break; 187 | } 188 | } 189 | 190 | if (pmatch[clen] < pcur[clen]) { 191 | *l = pos_ - dist; 192 | dist = pos_ - *(l = &tlast[1]); 193 | lenl = clen; 194 | } else { 195 | *r = pos_ - dist; 196 | dist = pos_ - *(r = &tlast[0]); 197 | lenr = clen; 198 | } 199 | } 200 | bt_head_[hbt] = pos_; 201 | 202 | bt_pos_++; 203 | pos_++; 204 | i++; 205 | } 206 | } 207 | 208 | void MatchFinder::SlidePosFast(uint32_t wnd_pos, uint32_t len) 209 | { 210 | uint32_t h; 211 | for(uint32_t i = 0; i < len; ) { 212 | uint32_t wpos = wnd_pos + i; 213 | if (pos_ >= 0xFFFFFFF0) normalize(); 214 | h = HASH2(wnd_ + wpos); 215 | if (h % 16) { 216 | // for 'BAD' data, only a small subset of data will be test by MF 217 | i++; 218 | pos_++; 219 | if (++bt_pos_ >= bt_size_) bt_pos_ -= bt_size_; 220 | continue; 221 | } 222 | 223 | if (ht_width_) { 224 | h = HASH6(wnd_ + wpos, ht_bits_); 225 | uint32_t *ht6 = ht6_ + h * ht_width_; 226 | for(uint32_t i = ht_width_ - 1; i > 0; i--) 227 | ht6[i] = ht6[i-1]; 228 | ht6[0] = pos_; 229 | } 230 | 231 | if (bt_head_) { 232 | h = HASH6(wnd_ + wpos, bt_bits_); 233 | bt_nodes_[bt_pos_ * 2] = bt_nodes_[bt_pos_ * 2 + 1] = 0; 234 | bt_head_[h] = pos_; 235 | if (++bt_pos_ >= bt_size_) bt_pos_ -= bt_size_; 236 | } 237 | 238 | i ++; 239 | pos_ ++; 240 | } 241 | } 242 | 243 | uint32_t MatchFinder::find_match(MFUnit *ret, uint32_t *rep_dist, uint32_t wpos, uint32_t limit) 244 | { 245 | static const uint32_t bound[] = {0, 0, 64, 1024, 16 * KB, 256 * KB, 4 * MB}; 246 | uint32_t h2 = HASH2(wnd_ + wpos); 247 | uint32_t h3 = HASH3(wnd_ + wpos); 248 | uint32_t h6 = 0; 249 | uint32_t hbt = 0; 250 | uint32_t minlen = 1, cnt = 0, dist = 0; 251 | if (ht_width_) { 252 | h6 = HASH6(wnd_ + wpos, ht_bits_); 253 | PREFETCH_T0(ht6_ + h6 * ht_width_); 254 | } 255 | 256 | if (bt_head_) { 257 | hbt = HASH6(wnd_ + wpos, bt_bits_); 258 | PREFETCH_T0(bt_head_ + hbt); 259 | } 260 | 261 | if (ht_low_) { 262 | PREFETCH_T0(ht2_ + h2); 263 | PREFETCH_T0(ht3_ + h3); 264 | } 265 | 266 | for(uint32_t i = 0; i < 4; i++) { 267 | if (rep_dist[i] >= vld_rge_) continue; 268 | uint32_t cmp_pos = wpos >= rep_dist[i] ? wpos - rep_dist[i] : wpos + wnd_size_ - rep_dist[i]; 269 | uint32_t climit = MIN(limit, wnd_size_ - cmp_pos); 270 | uint8_t *pcur = wnd_ + wpos, *pmatch = wnd_ + cmp_pos, *pend = pmatch + climit; 271 | if (minlen >= climit || pmatch[minlen] != pcur[minlen]) continue; 272 | /* avoid alignment warning) 273 | while(pmatch + 4 <= pend && *(uint32_t *)pcur == *(uint32_t *)pmatch) { 274 | pmatch += 4; pcur += 4; } 275 | if (pmatch + 2 <= pend && *(uint16_t *)pcur == *(uint16_t *)pmatch) { 276 | pmatch += 2; pcur += 2; } 277 | if (pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 278 | */ 279 | while(pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 280 | uint32_t match_len = (pcur - wnd_) - wpos; 281 | if (match_len && i == 0) { 282 | // rep0len1 283 | ret[cnt].len = 1; 284 | ret[cnt].dist = 1; 285 | if (cnt + 2 < MF_CAND_LIMIT) 286 | cnt++; 287 | } 288 | if (match_len > minlen) { 289 | minlen = match_len; 290 | ret[cnt].len = match_len; 291 | ret[cnt].dist = 1 + i; 292 | if (cnt + 2 < MF_CAND_LIMIT) 293 | cnt++; 294 | if (match_len >= good_len_) { 295 | dist = 0xFFFFFFFF; //disable all further find 296 | break; 297 | } 298 | } 299 | } 300 | 301 | if (!ht_low_) goto MAIN_MF; 302 | 303 | if (pos_ - ht2_[h2] > dist) for(;;) { 304 | dist = pos_ - ht2_[h2]; 305 | if (dist >= vld_rge_) break; 306 | uint32_t cmp_pos = wpos > dist ? wpos - dist : wpos + wnd_size_ - dist; 307 | uint32_t climit = MIN(limit, wnd_size_ - cmp_pos); 308 | uint8_t *pcur = wnd_ + wpos, *pmatch = wnd_ + cmp_pos, *pend = pmatch + climit; 309 | if (minlen >= climit || pmatch[minlen] != pcur[minlen]) break; 310 | /* avoid alignment warning) 311 | while(pmatch + 4 <= pend && *(uint32_t *)pcur == *(uint32_t *)pmatch) { 312 | pmatch += 4; pcur += 4; } 313 | if (pmatch + 2 <= pend && *(uint16_t *)pcur == *(uint16_t *)pmatch) { 314 | pmatch += 2; pcur += 2; } 315 | if (pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 316 | */ 317 | while(pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 318 | uint32_t match_len = (pcur - wnd_) - wpos; 319 | if (match_len > minlen) { 320 | minlen = match_len; 321 | if (match_len <= 6 && dist >= bound[match_len]) break; 322 | ret[cnt].len = match_len; 323 | ret[cnt].dist = 4 + dist; 324 | if (cnt + 2 < MF_CAND_LIMIT) 325 | cnt++; 326 | if (match_len >= good_len_) { 327 | dist = 0xFFFFFFFF; //disable all further find 328 | break; 329 | } 330 | } 331 | break; 332 | } 333 | 334 | if (pos_ - ht3_[h3] > dist) for(;;) { 335 | dist = pos_ - ht3_[h3]; 336 | if (dist >= vld_rge_) break; 337 | uint32_t cmp_pos = wpos >= dist ? wpos - dist : wpos + wnd_size_ - dist; 338 | uint32_t climit = MIN(limit, wnd_size_ - cmp_pos); 339 | uint8_t *pcur = wnd_ + wpos, *pmatch = wnd_ + cmp_pos, *pend = pmatch + climit; 340 | if (minlen >= climit || pmatch[minlen] != pcur[minlen]) break; 341 | /* avoid alignment warning) 342 | while(pmatch + 4 <= pend && *(uint32_t *)pcur == *(uint32_t *)pmatch) { 343 | pmatch += 4; pcur += 4; } 344 | if (pmatch + 2 <= pend && *(uint16_t *)pcur == *(uint16_t *)pmatch) { 345 | pmatch += 2; pcur += 2; } 346 | if (pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 347 | */ 348 | while(pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 349 | 350 | uint32_t match_len = (pcur - wnd_) - wpos; 351 | if (match_len > minlen) { 352 | minlen = match_len; 353 | if (match_len <= 6 && dist >= bound[match_len]) break; 354 | ret[cnt].len = match_len; 355 | ret[cnt].dist = 4 + dist; 356 | if (cnt + 2 < MF_CAND_LIMIT) 357 | cnt++; 358 | if (match_len >= good_len_) { 359 | dist = 0xFFFFFFFF; //disable all further find 360 | break; 361 | } 362 | } 363 | break; 364 | } 365 | ht2_[h2] = pos_; 366 | ht3_[h3] = pos_; 367 | 368 | MAIN_MF: 369 | if (bt_head_) { 370 | dist = pos_ - bt_head_[hbt]; 371 | uint32_t *l = &bt_nodes_[bt_pos_ * 2], *r = &bt_nodes_[bt_pos_ * 2 + 1]; 372 | 373 | for(; dist >= bt_size_ && dist < vld_rge_; ) { 374 | // candidate in hash head of binary tree does not have match distance limit 375 | uint32_t cmp_pos = wpos >= dist ? wpos - dist : wpos + wnd_size_ - dist; 376 | uint32_t climit = MIN(limit, wnd_size_ - cmp_pos); 377 | uint8_t *pcur = wnd_ + wpos, *pmatch = wnd_ + cmp_pos, *pend = pmatch + climit; 378 | if (minlen >= climit || pmatch[minlen] != pcur[minlen]) break; 379 | /* avoid alignment warning) 380 | while(pmatch + 4 <= pend && *(uint32_t *)pcur == *(uint32_t *)pmatch) { 381 | pmatch += 4; pcur += 4; } 382 | if (pmatch + 2 <= pend && *(uint16_t *)pcur == *(uint16_t *)pmatch) { 383 | pmatch += 2; pcur += 2; } 384 | if (pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 385 | */ 386 | while(pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 387 | 388 | uint32_t match_len = (pcur - wnd_) - wpos; 389 | if (match_len > minlen) { 390 | minlen = match_len; 391 | if (match_len <= 6 && dist >= bound[match_len]) break; 392 | ret[cnt].len = match_len; 393 | ret[cnt].dist = 4 + dist; 394 | if (cnt + 2 < MF_CAND_LIMIT) 395 | cnt++; 396 | if (match_len >= good_len_) { 397 | dist = 0xFFFFFFFF; //disable all further find 398 | break; 399 | } 400 | } 401 | break; 402 | } 403 | 404 | uint32_t lenl = 0, lenr = 0; 405 | for(uint32_t cyc = 0; ; cyc++) { 406 | if (cyc >= bt_cyc_ || dist >= bt_size_ || dist >= vld_rge_) { *l = *r = 0; break; } 407 | uint32_t cmp_pos = wpos >= dist ? wpos - dist : wpos + wnd_size_ - dist; 408 | uint32_t clen = MIN(lenl, lenr); 409 | uint32_t climit = MIN(limit, wnd_size_ - cmp_pos); 410 | if (clen >= climit) { *l = *r = 0; break; } 411 | 412 | uint32_t bt_npos = bt_pos_ >= dist ? bt_pos_ - dist : bt_pos_ + bt_size_ - dist; 413 | uint32_t *tlast = &bt_nodes_[bt_npos * 2]; 414 | PREFETCH_T0(tlast); 415 | uint8_t *pcur = wnd_ + wpos, *pmatch = wnd_ + cmp_pos ; 416 | if (pcur[clen] == pmatch[clen]) { 417 | //uint32_t climit2 = MIN(good_len_, climit); 418 | uint32_t climit2 = climit; 419 | clen++; 420 | while(clen < climit2 && pcur[clen] == pmatch[clen]) 421 | clen++; 422 | 423 | if (clen > minlen) { 424 | minlen = clen; 425 | if (clen > 6 || dist < bound[clen]) { 426 | ret[cnt].len = clen; 427 | ret[cnt].dist = 4 + dist; 428 | if (cnt + 2 < MF_CAND_LIMIT) cnt++; 429 | } 430 | } 431 | 432 | if (clen >= good_len_) { 433 | *l = tlast[0]; *r = tlast[1]; dist = 0xFFFFFFFF; break; 434 | } else if (clen >= climit2) { 435 | *l = *r = 0; break; 436 | } 437 | } 438 | 439 | if (pmatch[clen] < pcur[clen]) { 440 | *l = pos_ - dist; 441 | dist = pos_ - *(l = &tlast[1]); 442 | lenl = clen; 443 | } else { 444 | *r = pos_ - dist; 445 | dist = pos_ - *(r = &tlast[0]); 446 | lenr = clen; 447 | } 448 | } 449 | bt_head_[hbt] = pos_; 450 | if (++bt_pos_ >= bt_size_) bt_pos_ -= bt_size_; 451 | } 452 | 453 | uint32_t *ht6 = ht6_ + h6 * ht_width_; 454 | uint32_t cands = MIN(ht_width_, ht_cyc_); 455 | for(uint32_t i = 0; i < cands; i++) { 456 | if (pos_ - ht6[i] <= dist) continue; 457 | dist = pos_ - ht6[i]; 458 | if (dist >= vld_rge_) continue; 459 | uint32_t cmp_pos = wpos >= dist ? wpos - dist : wpos + wnd_size_ - dist; 460 | uint32_t climit = MIN(limit, wnd_size_ - cmp_pos); 461 | uint8_t *pcur = wnd_ + wpos, *pmatch = wnd_ + cmp_pos, *pend = pmatch + climit; 462 | if (minlen >= climit || pmatch[minlen] != pcur[minlen]) continue; 463 | /* avoid alignment warning) 464 | while(pmatch + 4 <= pend && *(uint32_t *)pcur == *(uint32_t *)pmatch) { 465 | pmatch += 4; pcur += 4; } 466 | if (pmatch + 2 <= pend && *(uint16_t *)pcur == *(uint16_t *)pmatch) { 467 | pmatch += 2; pcur += 2; } 468 | if (pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 469 | */ 470 | while(pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 471 | 472 | uint32_t match_len = (pcur - wnd_) - wpos; 473 | if (match_len > minlen) { 474 | minlen = match_len; 475 | if (match_len <= 6 && dist >= bound[match_len]) continue; 476 | ret[cnt].len = match_len; 477 | ret[cnt].dist = 4 + dist; 478 | if (cnt + 2 < MF_CAND_LIMIT) 479 | cnt++; 480 | if (match_len >= good_len_) { 481 | dist = 0xFFFFFFFF; //disable all further find 482 | break; 483 | } 484 | } 485 | } 486 | 487 | if (ht_width_) { 488 | for(uint32_t i = cands - 1; i > 0; i--) 489 | ht6[i] = ht6[i-1]; 490 | ht6[0] = pos_; 491 | } 492 | 493 | if (++pos_ >= 0xFFFFFFF0) normalize(); 494 | return cnt; 495 | } 496 | 497 | MFUnit MatchFinder::FindMatch(uint32_t *rep_dist, uint32_t wnd_pos, uint32_t limit) 498 | { 499 | static const uint32_t cof[] = {0, 4, 8, 12}; 500 | mfcand_[0].len = 1; 501 | mfcand_[0].dist = 0; 502 | uint32_t n = find_match(mfcand_ + 1, rep_dist, wnd_pos, limit); 503 | int bestidx = 0; 504 | for(uint32_t i = 1; i <= n; i++) { 505 | if (!bestidx) { 506 | bestidx = i; 507 | continue; 508 | } 509 | MFUnit &u1 = mfcand_[bestidx]; 510 | MFUnit &u2 = mfcand_[i]; 511 | if (u2.len > 1 && ( 512 | (u2.len > u1.len + 3) 513 | || (u2.len > u1.len && u2.dist <= 4) 514 | || (u2.len + 2 > u1.len && u2.dist <= 4 && u1.dist > 4) 515 | || (u2.len >= u1.len //&& u1.dist > 4 516 | && (u2.dist >> cof[u2.len - u1.len]) <= u1.dist) 517 | || (u2.len < u1.len && u2.len + 2 >= u1.len && u1.dist > 4 518 | && (u1.dist >> cof[u1.len - u2.len]) > u2.dist) 519 | )) 520 | bestidx = i; 521 | 522 | } 523 | return mfcand_[bestidx]; 524 | } 525 | 526 | bool MatchFinder::TestFind(uint32_t wpos, uint8_t *src, uint32_t limit) 527 | { 528 | uint32_t dists[9] = {wnd_size_, wnd_size_}; 529 | uint32_t depth = 0; 530 | uint32_t h = HASH2(src); 531 | if (h % 16) 532 | // for 'BAD' data, only a small subset of data will be test by MF 533 | return false; 534 | 535 | if (ht_width_) { 536 | h = HASH6(src, ht_bits_); 537 | for(uint32_t i = 0; i < ht_width_ && i < 8; i++) 538 | dists[depth++] = pos_ - ht6_[h * ht_width_]; 539 | } 540 | 541 | if (bt_head_) { 542 | h = HASH6(src, bt_bits_); 543 | dists[depth++] = pos_ - bt_head_[h]; 544 | } 545 | 546 | for(uint32_t i = 0; i < depth; i++) { 547 | uint32_t dist = dists[i]; 548 | if (dist >= vld_rge_) continue; 549 | uint32_t cmp_pos = wpos >= dist ? wpos - dist : wpos + wnd_size_ - dist; 550 | uint32_t climit = MIN(limit, 24); 551 | climit = MIN(limit, wnd_size_ - cmp_pos); 552 | uint8_t *pcur = src, *pmatch = wnd_ + cmp_pos, *pend = pmatch + climit; 553 | 554 | /* avoid alignment warning) 555 | while(pmatch + 4 <= pend && *(uint32_t *)pcur == *(uint32_t *)pmatch) { 556 | pmatch += 4; pcur += 4; } 557 | if (pmatch + 2 <= pend && *(uint16_t *)pcur == *(uint16_t *)pmatch) { 558 | pmatch += 2; pcur += 2; } 559 | if (pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 560 | */ 561 | while(pmatch < pend && *pcur == *pmatch) { pmatch++; pcur++; } 562 | 563 | if (pcur - src > 18) 564 | // enough long match 565 | return true; 566 | } 567 | return false; 568 | } 569 | 570 | bool MatchFinder::SecondMatchBetter(MFUnit u1, MFUnit u2) 571 | { 572 | static const uint32_t cof[] = {0, 4, 8, 12}; 573 | return (u2.len > 1 && ( 574 | (u2.len > u1.len + 3) 575 | || (u2.len > u1.len && u2.dist <= 4) 576 | || (u2.len + 2 > u1.len && u2.dist <= 4 && u1.dist > 4) 577 | || (u2.len >= u1.len //&& u1.dist > 4 578 | && (u2.dist >> cof[u2.len - u1.len]) <= u1.dist) 579 | || (u2.len < u1.len && u2.len + 2 >= u1.len && u1.dist > 4 580 | && (u1.dist >> cof[u1.len - u2.len]) > u2.dist) 581 | )); 582 | } 583 | 584 | void MatchFinder::FindMatchWithPrice(Model *model, uint32_t state, MFUnit *ret, uint32_t *rep_dist, uint32_t wnd_pos, uint32_t limit) 585 | { 586 | static const uint32_t bound[] = {0, 0, 64, 1024, 16 * KB, 256 * KB, 4 * MB}; 587 | mfcand_[0].len = 1; 588 | mfcand_[0].dist = 0; 589 | 590 | // ret[0] is the longest match 591 | // ret[1 .. n] are price tables by match length as index 592 | uint32_t n = find_match(mfcand_ + 1, rep_dist, wnd_pos, limit); 593 | ret[0] = mfcand_[n]; 594 | 595 | if (ret[0].len >= good_len_) 596 | return; 597 | 598 | ret[1].dist = 0; 599 | uint32_t lpos = 1; 600 | for(uint32_t i = 1; i <= n; i++) { 601 | uint32_t distprice = 0; 602 | uint32_t rdist = 0; 603 | if (mfcand_[i].len == 1 && mfcand_[i].dist == 1) { 604 | ret[1].price = model->GetRep0Len1Price(state); 605 | ret[1].dist = 1; 606 | continue; 607 | } else if (mfcand_[i].dist <= 4) { 608 | distprice = model->GetRepDistPrice(state, mfcand_[i].dist - 1); 609 | rdist = 0; 610 | } else { 611 | distprice = model->GetMatchDistPrice(state, mfcand_[i].dist - 5); 612 | rdist = mfcand_[i].dist - 4; 613 | } 614 | 615 | while(lpos < mfcand_[i].len) { 616 | lpos++; 617 | if (lpos <= 6 && rdist >= bound[lpos]) { 618 | ret[lpos].dist = 0; 619 | continue; 620 | } 621 | ret[lpos].dist = mfcand_[i].dist; 622 | ret[lpos].price = distprice + model->GetMatchLenPrice(state, lpos - 2); 623 | } 624 | } 625 | } 626 | 627 | 628 | -------------------------------------------------------------------------------- /src/libcsc/csc_mf.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_MF_H_ 2 | #define _CSC_MF_H_ 3 | #include 4 | 5 | 6 | class Model; 7 | 8 | struct MFUnit { 9 | union { 10 | uint32_t len; 11 | uint32_t price; 12 | }; 13 | uint32_t dist; 14 | }; 15 | 16 | class MatchFinder { 17 | static const uint32_t HT3_SIZE_ = 64 * KB; 18 | static const uint32_t HT2_SIZE_ = 16 * KB; 19 | 20 | ISzAlloc *alloc_; 21 | uint8_t *wnd_; 22 | uint32_t wnd_size_; 23 | uint32_t vld_rge_; // valid range, wnd_size - blocksize 24 | 25 | 26 | uint32_t *mfbuf_raw_; 27 | uint32_t *mfbuf_; 28 | uint32_t *ht2_; 29 | uint32_t *ht3_; 30 | uint32_t *ht6_; 31 | uint32_t ht_bits_; 32 | uint32_t ht_width_; 33 | uint32_t ht_low_; 34 | static const uint32_t MF_CAND_LIMIT = 32; 35 | MFUnit mfcand_[MF_CAND_LIMIT]; 36 | 37 | uint32_t *bt_head_; 38 | uint32_t bt_bits_; 39 | uint32_t *bt_nodes_; 40 | uint32_t bt_size_; 41 | 42 | uint32_t bt_pos_; 43 | uint64_t size_; 44 | 45 | uint32_t ht_cyc_; 46 | uint32_t bt_cyc_; 47 | uint32_t good_len_; 48 | 49 | void normalize(); 50 | 51 | uint32_t find_match(MFUnit *ret, uint32_t *rep_dist, uint32_t wnd_pos, uint32_t limit); 52 | 53 | public: 54 | int Init(uint8_t *wnd, 55 | uint32_t wnd_size, 56 | uint32_t bt_size, 57 | uint32_t bt_bits, 58 | uint32_t ht_width, 59 | uint32_t ht_bits, 60 | ISzAlloc *alloc 61 | ); 62 | 63 | void SetArg(int bt_cyc, int ht_cyc, int ht_low, int good_len); 64 | void Destroy(); 65 | 66 | // limit is useful only for binary tree mf 67 | void SlidePos(uint32_t wnd_pos, uint32_t len, uint32_t limit = 0xFFFFFFFF); 68 | void SlidePosFast(uint32_t wnd_pos, uint32_t len); 69 | MFUnit FindMatch(uint32_t *rep_dist, uint32_t wnd_pos, uint32_t limit); 70 | bool TestFind(uint32_t wnd_pos, uint8_t *src, uint32_t limit); 71 | bool SecondMatchBetter(MFUnit u1, MFUnit u2); 72 | void FindMatchWithPrice(Model *model, uint32_t state, MFUnit *ret, uint32_t *rep_dist, uint32_t wnd_pos, uint32_t limit); 73 | uint32_t pos_; 74 | }; 75 | 76 | #endif 77 | 78 | -------------------------------------------------------------------------------- /src/libcsc/csc_model.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | /* 8 | 9 | slot extra_bits dist_range 10 | 0 0 bits 0 - 0 11 | 1 0 bits 1 - 1 12 | 2 0 bits 2 - 2 13 | 3 1 bits 3 - 4 14 | 4 2 bits 5 - 8 15 | 5 3 bits 9 - 16 16 | 6 4 bits 17 - 32 17 | 7 5 bits 33 - 64 18 | 8 6 bits 65 - 128 19 | 9 7 bits 129 - 256 20 | 10 8 bits 257 - 512 21 | 11 9 bits 513 - 1024 22 | 12 10 bits 1025 - 2048 23 | 13 11 bits 2049 - 4096 24 | 14 12 bits 4097 - 8192 25 | 15 13 bits 8193 - 16384 26 | 16 14 bits 16385 - 32768 27 | 17 15 bits 32769 - 65536 28 | 18 16 bits 65537 - 131072 29 | 19 17 bits 131073 - 262144 30 | 20 18 bits 262145 - 524288 31 | 21 19 bits 524289 - 1048576 32 | 22 20 bits 1048577 - 2097152 33 | 23 21 bits 2097153 - 4194304 34 | 24 22 bits 4194305 - 8388608 35 | 25 23 bits 8388609 - 16777216 36 | 26 24 bits 16777217 - 33554432 37 | 27 25 bits 33554433 - 67108864 38 | 28 26 bits 67108865 - 134217728 39 | 29 27 bits 134217729 - 268435456 40 | 30 28 bits 268435457 - 536870912 41 | 31 29 bits 536870913 - 1073741824 42 | 43 | */ 44 | 45 | const uint32_t Model::dist_table_[33] = { 46 | 0, 1, 2, 3, 47 | 5, 9, 17, 33, 48 | 65, 129, 257, 513, 49 | 1025, 2049, 4097, 8193, 50 | 16385, 32769, 65537, 131073, 51 | 262145, 524289, 1048577, 2097153, 52 | 4194305, 8388609, 16777217, 33554433, 53 | 67108865, 134217729, 268435457, 536870913, 54 | 1073741825, 55 | }; 56 | 57 | const uint32_t Model::rev16_table_[16] = { 58 | 0, 8, 4, 12, 59 | 2, 10, 6, 14, 60 | 1, 9, 5, 13, 61 | 3, 11, 7, 15, 62 | }; 63 | 64 | int Model::Init(Coder *coder, ISzAlloc *alloc) 65 | { 66 | coder_ = coder; 67 | alloc_ = alloc; 68 | for (int i = 0; i < (4096 >> 3); i++) 69 | p_2_bits_[i]= (uint32_t)(128 * 70 | log((float)(i * 8 + 4) / 4096) / log(0.5)); 71 | 72 | p_delta_=NULL; 73 | p_lit_ = (uint32_t*)alloc_->Alloc(alloc_, 256 * 256 * sizeof(uint32_t)); 74 | if (!p_lit_) 75 | return -1; 76 | 77 | return 0; 78 | } 79 | 80 | void Model::Destroy() 81 | { 82 | alloc_->Free(alloc_, p_lit_); 83 | alloc_->Free(alloc_, p_delta_); 84 | PWriteLog(); 85 | } 86 | 87 | 88 | void Model::Reset(void) 89 | { 90 | alloc_->Free(alloc_, p_delta_); 91 | p_delta_ = NULL; 92 | #define INIT_PROB(P, K) do{for(int i = 0; i < K; i++) P[i] = 2048;}while(0) 93 | INIT_PROB(p_state_, 64 * 3); 94 | INIT_PROB(p_lit_, 256 * 256); 95 | INIT_PROB(p_repdist_, 64 * 3); 96 | INIT_PROB(p_dist_, 8 + 16 * 2 + 32 * 4); 97 | INIT_PROB(p_rle_len_, 16); 98 | 99 | INIT_PROB(p_matchlen_slot_, 2); 100 | INIT_PROB(p_matchlen_extra1_, 8); 101 | INIT_PROB(p_matchlen_extra2_, 8); 102 | INIT_PROB(p_matchlen_extra3_, 128); 103 | INIT_PROB(p_matchdist_extra_, 29 * 16); 104 | #undef INIT_PROB 105 | 106 | p_longlen_ = 2048; 107 | p_rle_flag_ = 2048; 108 | state_ = 0; 109 | ctx_ = 0; 110 | lp_rebuild_int_ = 0; 111 | } 112 | 113 | void Model::encode_matchlen_1(uint32_t len) 114 | { 115 | /* 0 - 7, 8 - 15, 16 - 143, if 143 denoting more than 143 */ 116 | uint32_t *p; 117 | if (len < 16) { 118 | if (len < 8) { 119 | EncodeBit(coder_, 0, p_matchlen_slot_[0]); 120 | p = p_matchlen_extra1_; 121 | } else { 122 | EncodeBit(coder_, 1, p_matchlen_slot_[0]); 123 | EncodeBit(coder_, 0, p_matchlen_slot_[1]); 124 | len -= 8; 125 | p = p_matchlen_extra2_; 126 | } 127 | 128 | uint32_t c = len | 0x08; 129 | do { 130 | EncodeBit(coder_, (c >> 2) & 1, p[c >> 3]); 131 | c <<= 1; 132 | } while (c < 0x40); 133 | } else { 134 | EncodeBit(coder_, 1, p_matchlen_slot_[0]); 135 | EncodeBit(coder_, 1, p_matchlen_slot_[1]); 136 | len -= 16; 137 | 138 | p = p_matchlen_extra3_; 139 | uint32_t c = len | 0x80; 140 | do { 141 | EncodeBit(coder_, (c >> 6) & 1, p[c >> 7]); 142 | c <<= 1; 143 | } while (c < 0x4000); 144 | } 145 | } 146 | 147 | void Model::encode_matchlen_2(uint32_t len) 148 | { 149 | if (len >= 143) { 150 | encode_matchlen_1(143); 151 | len -= 143; 152 | while(len >= 143) { 153 | len -= 143; 154 | EncodeBit(coder_, 0, p_longlen_); 155 | } 156 | EncodeBit(coder_, 1, p_longlen_); 157 | } 158 | encode_matchlen_1(len); 159 | } 160 | 161 | #define FEncodeBit(price,v,p) do\ 162 | {\ 163 | if (v)\ 164 | price += p_2_bits_[p>>3];\ 165 | else\ 166 | price += p_2_bits_[(4096-p)>>3];\ 167 | }while(0) 168 | 169 | void Model::EncodeLiteral(uint32_t c)//,uint32_t pos 170 | { 171 | PEncodeLiteral(c); 172 | //printf("[%u](%u)\n", c, ctx_); 173 | EncodeBit(coder_, 0, p_state_[state_ * 3 + 0]); 174 | 175 | state_ = (state_ * 4) & 0x3F; 176 | uint32_t *p = &p_lit_[ctx_ * 256]; 177 | ctx_ = (c >> 0); 178 | c = c | 0x100; 179 | do { 180 | EncodeBit(coder_,(c >> 7) & 1,p[c>>8]); 181 | c <<= 1; 182 | } while (c < 0x10000); 183 | } 184 | 185 | uint32_t Model::GetLiteralPrice(uint32_t fstate, uint32_t fctx, uint32_t c) 186 | { 187 | uint32_t ret = 0; 188 | FEncodeBit(ret, 0, p_state_[fstate * 3 + 0]); 189 | uint32_t *p = &p_lit_[fctx * 256]; 190 | c = c | 0x100; 191 | do { 192 | FEncodeBit(ret, (c >> 7) & 1, p[c>>8]); 193 | c <<= 1; 194 | } while (c < 0x10000); 195 | return ret; 196 | } 197 | 198 | void Model::EncodeRep0Len1(void) 199 | { 200 | PEncode1BMatch(); 201 | //printf("Rep0Len1\n"); 202 | EncodeBit(coder_, 1, p_state_[state_ *3 + 0]); 203 | EncodeBit(coder_, 0, p_state_[state_ *3 + 1]); 204 | EncodeBit(coder_, 0, p_state_[state_ *3 + 2]); 205 | ctx_ = 0; 206 | state_ = (state_ * 4 + 2) & 0x3F; 207 | } 208 | 209 | uint32_t Model::GetRep0Len1Price(uint32_t fstate) 210 | { 211 | uint32_t ret = 0; 212 | FEncodeBit(ret, 1, p_state_[fstate *3 + 0]); 213 | FEncodeBit(ret, 0, p_state_[fstate *3 + 1]); 214 | FEncodeBit(ret, 0, p_state_[fstate *3 + 2]); 215 | return ret; 216 | } 217 | 218 | void Model::EncodeRepDistMatch(uint32_t rep_idx, uint32_t match_len) 219 | { 220 | PEncodeRepMatch(match_len, rep_idx); 221 | //printf("Rep %u %u\n", rep_idx, match_len); 222 | EncodeBit(coder_, 1, p_state_[state_ *3 + 0]); 223 | EncodeBit(coder_, 0, p_state_[state_ *3 + 1]); 224 | EncodeBit(coder_, 1, p_state_[state_ *3 + 2]); 225 | 226 | uint32_t i = 1, j; 227 | j = (rep_idx >> 1) & 1; EncodeBit(coder_, j, p_repdist_[state_ * 3 + i - 1]); i += i + j; 228 | j = rep_idx & 1; EncodeBit(coder_, j, p_repdist_[state_ * 3 + i - 1]); 229 | 230 | encode_matchlen_2(match_len); 231 | state_ = (state_ * 4 + 3) & 0x3F; 232 | } 233 | 234 | void Model::len_price_rebuild() 235 | { 236 | for(int i = 0; i < 32; i++) { 237 | uint32_t ret = 0, len = i; 238 | uint32_t *p; 239 | if (len < 16) { 240 | if (len < 8) { 241 | FEncodeBit(ret , 0, p_matchlen_slot_[0]); 242 | p = p_matchlen_extra1_; 243 | } else { 244 | FEncodeBit(ret , 1, p_matchlen_slot_[0]); 245 | FEncodeBit(ret , 0, p_matchlen_slot_[1]); 246 | len -= 8; 247 | p = p_matchlen_extra2_; 248 | } 249 | 250 | uint32_t c = len | 0x08; 251 | do { 252 | FEncodeBit(ret, (c >> 2) & 1, p[c >> 3]); 253 | c <<= 1; 254 | } while (c < 0x40); 255 | } else { 256 | FEncodeBit(ret, 1, p_matchlen_slot_[0]); 257 | FEncodeBit(ret, 1, p_matchlen_slot_[1]); 258 | len -= 16; 259 | 260 | p = p_matchlen_extra3_; 261 | uint32_t c = len | 0x80; 262 | do { 263 | FEncodeBit(ret, (c >> 6) & 1, p[c >> 7]); 264 | c <<= 1; 265 | } while (c < 0x4000); 266 | } 267 | len_price_[i] = ret; 268 | } 269 | lp_rebuild_int_ = 4096; 270 | } 271 | 272 | 273 | uint32_t Model::GetRepDistPrice(uint32_t fstate,uint32_t rep_idx) 274 | { 275 | uint32_t ret = 0; 276 | FEncodeBit(ret, 1, p_state_[fstate *3 + 0]); 277 | FEncodeBit(ret, 0, p_state_[fstate *3 + 1]); 278 | FEncodeBit(ret, 1, p_state_[fstate *3 + 2]); 279 | 280 | uint32_t i = 1, j; 281 | j = (rep_idx >> 1) & 1; FEncodeBit(ret, j, p_repdist_[fstate * 3 + i - 1]); i += i + j; 282 | j = rep_idx & 1; FEncodeBit(ret, j, p_repdist_[fstate * 3 + i - 1]); 283 | return ret; //ret > 256 ? ret - 256 : 0; 284 | } 285 | 286 | uint32_t Model::GetMatchLenPrice(uint32_t fstate,uint32_t matchLen) 287 | { 288 | uint32_t ret = 0; 289 | (void)fstate; 290 | if (matchLen >= 32) 291 | //long enough, some random reasonable price 292 | ret += 128 * 6; 293 | else { 294 | if (lp_rebuild_int_-- == 0) 295 | len_price_rebuild(); 296 | ret += len_price_[matchLen]; 297 | } 298 | return ret; 299 | } 300 | 301 | void Model::EncodeMatch(uint32_t dist, uint32_t len) 302 | { 303 | //printf("%u %u\n", dist, len); 304 | PEncodeMatch(len, dist); 305 | EncodeBit(coder_, 1,p_state_[state_ * 3 + 0]); 306 | EncodeBit(coder_, 1,p_state_[state_ * 3 + 1]); 307 | encode_matchlen_2(len); 308 | uint32_t pdist_pos, sbits; 309 | switch(len) { 310 | case 0: 311 | pdist_pos = 0; 312 | sbits = 3; 313 | break; 314 | case 1: 315 | case 2: 316 | pdist_pos = 16 * (len - 1) + 8; 317 | sbits = 4; 318 | break; 319 | case 3: 320 | case 4: 321 | case 5: 322 | pdist_pos = 32 * (len - 3) + 8 + 16 * 2; 323 | sbits = 5; 324 | break; 325 | default: 326 | pdist_pos = 32 * 3 + 8 + 16 * 2; 327 | sbits = 5; 328 | break; 329 | }; 330 | 331 | uint32_t l = 0, r = 32; 332 | while(l + 1 < r) { 333 | uint32_t mid = (l + (r - l) / 2); 334 | if (dist_table_[mid] > dist) 335 | r = mid; 336 | else if (dist_table_[mid] < dist) 337 | l = mid; 338 | else 339 | l = r = mid; 340 | } 341 | 342 | uint32_t slot = l, c = slot | (1 << sbits); 343 | uint32_t extra_len = 0, extra_bits = slot > 2? slot - 2 : 0; 344 | uint32_t *p = p_dist_ + pdist_pos; 345 | do { 346 | EncodeBit(coder_,(c >> (sbits - 1)) & 1, p[c >> sbits]); 347 | c <<= 1; 348 | } while (c < (1u << (sbits * 2))); 349 | 350 | if (extra_bits) { 351 | extra_len = dist - (1 << extra_bits) - 1; 352 | if (extra_bits > 4) { 353 | uint32_t dlen = (extra_len >> 4); 354 | uint32_t bits = (extra_bits - 4); 355 | EncodeDirect(coder_, dlen, bits); 356 | } 357 | c = rev16_table_[extra_len & 0x0F] | 0x10; 358 | p = &p_matchdist_extra_[(extra_bits - 1) * 16]; 359 | do { 360 | EncodeBit(coder_,(c >> 3) & 1, p[c >> 4]); 361 | c <<= 1; 362 | } while (c < (1 << 8)); 363 | } 364 | 365 | state_ = (state_ * 4 + 1) & 0x3F; 366 | } 367 | 368 | uint32_t Model::GetMatchDistPrice(uint32_t fstate, uint32_t dist) 369 | { 370 | uint32_t ret = 0; 371 | FEncodeBit(ret, 1,p_state_[fstate * 3 + 0]); 372 | FEncodeBit(ret, 1,p_state_[fstate * 3 + 1]); 373 | 374 | // quick estimation, 4bit for slot + extrabits 375 | uint32_t l = 0, r = 32; 376 | while(l + 1 < r) { 377 | uint32_t mid = (l + (r - l) / 2); 378 | if (dist_table_[mid] > dist) 379 | r = mid; 380 | else if (dist_table_[mid] < dist) 381 | l = mid; 382 | else 383 | l = r = mid; 384 | } 385 | ret += (l > 2? l + 2 : 2) * 128; 386 | return ret; 387 | } 388 | 389 | void Model::EncodeInt(uint32_t num) 390 | { 391 | /* slot, extra_bits, range_end_close 392 | 0 - 1 1 393 | 1 - 1 3 394 | 2 - 2 7 395 | 3 - 3 15 396 | ... 397 | 31 - 31 2^32 - 1 398 | */ 399 | 400 | uint32_t tmp = num; 401 | uint32_t slot = 0; 402 | while(tmp) { 403 | tmp >>= 1; 404 | slot++; 405 | } 406 | if (slot) 407 | slot--; 408 | 409 | EncodeDirect(coder_, slot, 5); 410 | if (slot == 0) 411 | EncodeDirect(coder_, num, 1); 412 | else 413 | EncodeDirect(coder_, num - (1 << slot), slot); 414 | } 415 | 416 | void Model::CompressDelta(uint8_t *src,uint32_t size) 417 | { 418 | uint32_t i; 419 | uint32_t *p; 420 | uint32_t c; 421 | uint32_t sCtx=0; 422 | 423 | if (p_delta_ == NULL) { 424 | p_delta_ = (uint32_t*)alloc_->Alloc(alloc_, 256*256*4); 425 | for (i = 0; i < 256 * 256; i++) { 426 | p_delta_[i] = 2048; 427 | } 428 | } 429 | 430 | 431 | EncodeInt(size); 432 | for(i=0;i> 7) & 1,p[c>>8]); 439 | c <<= 1; 440 | } 441 | while (c < 0x10000); 442 | 443 | sCtx=src[i]; 444 | } 445 | return; 446 | } 447 | 448 | void Model::CompressLiterals(uint8_t *src,uint32_t size) 449 | { 450 | EncodeInt(size); 451 | for(uint32_t i = 0; i < size; i++) { 452 | uint32_t c = src[i]; 453 | uint32_t *p = &p_lit_[ctx_ * 256]; 454 | ctx_ = (c >> 0); 455 | c = c | 0x100; 456 | do { 457 | EncodeBit(coder_,(c >> 7) & 1,p[c>>8]); 458 | c <<= 1; 459 | } while (c < 0x10000); 460 | } 461 | } 462 | 463 | void Model::CompressBad(uint8_t *src,uint32_t size) 464 | { 465 | EncodeInt(size); 466 | for(uint32_t i = 0; i< size; i++) 467 | coder_->EncDirect16(src[i],8); 468 | return; 469 | } 470 | 471 | void Model::CompressRLE(uint8_t *src, uint32_t size) 472 | { 473 | uint32_t i,j,c,len; 474 | uint32_t sCtx=0; 475 | uint32_t *p=NULL; 476 | 477 | EncodeInt(size); 478 | if (p_delta_==NULL) { 479 | p_delta_=(uint32_t*)alloc_->Alloc(alloc_, 256*256*4); 480 | for (i=0;i<256*256;i++) 481 | p_delta_[i]=2048; 482 | } 483 | 484 | for (i = 0; i < size;) { 485 | if (i > 0 && size - i > 3 486 | && src[i - 1] == src[i] 487 | && src[i] == src[i + 1] 488 | && src[i] == src[i + 2]) { 489 | j = i + 3; 490 | len = 3; 491 | while(j < size && src[j] == src[j-1]) { len++; j++; } 492 | 493 | if (len > 10) { 494 | sCtx = src[j-1]; 495 | len -= 11; 496 | EncodeBit(coder_,1,p_rle_flag_); 497 | encode_matchlen_2(len); 498 | i=j; 499 | continue; 500 | } 501 | } 502 | EncodeBit(coder_,0,p_rle_flag_); 503 | p=&p_delta_[sCtx*256]; 504 | c=src[i]|0x100; 505 | do { 506 | EncodeBit(coder_,(c >> 7) & 1,p[c>>8]); 507 | c <<= 1; 508 | } while (c < 0x10000); 509 | sCtx = src[i]; 510 | i++; 511 | } 512 | return; 513 | } 514 | 515 | 516 | -------------------------------------------------------------------------------- /src/libcsc/csc_model.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_MODEL_H_ 2 | #define _CSC_MODEL_H_ 3 | #include 4 | #include 5 | 6 | 7 | /* Current specific 8 | The compression stream is made up by myriads of small data packs. 9 | Each pack: 10 | 11 | 0-- 0 Rawbyte Literal byte 12 | 1-- 1 1 MatchDist MatchLen Ordinary match 13 | disabled-(2-- 1 0 0 0 Last match pair) 14 | disabled-(3-- 1 0 0 1 1-Byte match with last repeat MatchDist) 15 | 3-- 1 0 0 1-Byte match with last repeat MatchDist 16 | 4-- 1 0 1 0 0 MatchLen A match with last repeat MatchDist(0) 17 | 5-- 1 0 1 0 1 MatchLen A match with repeat MatchDist[1] 18 | 6-- 1 0 1 1 0 MatchLen A match with repeat MatchDist[2] 19 | 7-- 1 0 1 1 1 MatchLen A match with repeat MatchDist[3] 20 | 21 | MatchDist 22 | 64 buckets with different num of extra direct bits. 23 | probDists[64] is the statistical model. 24 | 25 | MatchLen 26 | 16 buckets with different num of extra direct bits. 27 | p_len_[16] is the statistical model. 28 | 29 | // 0, 1 - 8 30 | // 1 0, 9 - 16 31 | // 1 1, 17 - 144 32 | 33 | Rawbyte 34 | Order-1 coder with only 3-MSBs as context 35 | p_lit_[8][256] is the model 36 | 37 | About state type: 38 | pack 0 --- current type 0 39 | pack 1 --- current type 1 40 | pack 2,3 --- current type 2 41 | pack 4,5,6,7 --- current type 3 42 | 43 | The state: 44 | stores last 4 state type. 45 | p_state_ is the model. 46 | */ 47 | 48 | class Model 49 | { 50 | void encode_matchlen_1(uint32_t len); 51 | void encode_matchlen_2(uint32_t len); 52 | void encode_matchdist(uint32_t dist); 53 | 54 | uint32_t len_price_[32]; 55 | void len_price_rebuild(); 56 | uint32_t lp_rebuild_int_; 57 | 58 | public: 59 | Coder *coder_; 60 | 61 | void Reset(void); 62 | int Init(Coder *coder, ISzAlloc *alloc); 63 | void Destroy(); 64 | 65 | void EncodeLiteral(uint32_t c); 66 | void SetLiteralCtx(uint32_t c) {ctx_ = (c >> 0);} 67 | uint32_t GetLiteralCtx() {return ctx_;} 68 | 69 | 70 | void EncodeMatch(uint32_t matchDist,uint32_t matchLen); 71 | 72 | void EncodeRepDistMatch(uint32_t repIndex,uint32_t matchLen); 73 | 74 | 75 | void EncodeRep0Len1(void); 76 | void CompressDelta(uint8_t *src,uint32_t size); 77 | void CompressLiterals(uint8_t *src,uint32_t size); 78 | void CompressBad(uint8_t *src,uint32_t size); 79 | 80 | void CompressRLE(uint8_t *src,uint32_t size); 81 | 82 | void EncodeInt(uint32_t num); 83 | 84 | uint32_t p_state_[4*4*4*3];//Original [64][3] 85 | // public for advanced parser 86 | uint32_t state_; 87 | 88 | //Fake Encode --- to get price 89 | uint32_t GetLiteralPrice(uint32_t fstate,uint32_t fctx,uint32_t c); 90 | uint32_t GetRep0Len1Price(uint32_t fstate); 91 | uint32_t GetRepDistPrice(uint32_t fstate,uint32_t repIndex); 92 | uint32_t GetMatchDistPrice(uint32_t fstate,uint32_t matchDist); 93 | uint32_t GetMatchLenPrice(uint32_t fstate,uint32_t matchLen); 94 | 95 | 96 | private: 97 | ISzAlloc *alloc_; 98 | 99 | uint32_t p_rle_flag_; 100 | uint32_t p_rle_len_[16]; 101 | 102 | // prob of literals 103 | uint32_t *p_lit_;//[256][256] 104 | uint32_t ctx_; 105 | uint32_t *p_delta_; 106 | 107 | uint32_t p_repdist_[64 * 4]; 108 | // len 0 use 8 slots 109 | // len 1 - 2 use 16 slots 110 | // len 3, 4, 5, >= 6 use 32 slots 111 | uint32_t p_dist_[8 + 16 * 2 + 32 * 4]; //len 0 , < 64 112 | 113 | uint32_t p_longlen_; 114 | 115 | // prob to bits num 116 | uint32_t p_2_bits_[4096>>3]; 117 | 118 | uint32_t p_matchlen_slot_[2]; 119 | uint32_t p_matchlen_extra1_[8]; 120 | uint32_t p_matchlen_extra2_[8]; 121 | uint32_t p_matchlen_extra3_[128]; 122 | uint32_t p_matchdist_extra_[29 * 16]; 123 | 124 | static const uint32_t dist_table_[33]; 125 | static const uint32_t rev16_table_[16]; 126 | }; 127 | 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /src/libcsc/csc_profiler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #ifdef _HAVE_PROFILER_ 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | 10 | 11 | uint32_t matchlen_cnt[32] = {0}; 12 | uint32_t repmatch_idx_cnt[5] = {0}; 13 | uint32_t lits_cnt = 0; 14 | uint32_t match1b_cnt = 0; 15 | string lits; 16 | 17 | 18 | struct DistMatch { 19 | uint32_t match_len; 20 | uint32_t match_dist; 21 | }; 22 | 23 | void PEncodeLiteral(uint32_t c) 24 | { 25 | lits_cnt++; 26 | 27 | if (c == '\r') 28 | lits += "\\r"; 29 | else if (c == '\n') 30 | lits += "\\n"; 31 | else 32 | lits.append(1, c); 33 | 34 | if (lits.size() % 120 == 0) 35 | lits.append(1, '\n'); 36 | } 37 | 38 | void PEncodeRepMatch(uint32_t len, uint32_t idx) 39 | { 40 | repmatch_idx_cnt[idx]++; 41 | if (len < 31) 42 | matchlen_cnt[len]++; 43 | else 44 | matchlen_cnt[31]++; 45 | } 46 | 47 | void PEncodeMatch(uint32_t len, uint32_t dist) 48 | { 49 | repmatch_idx_cnt[4]++; 50 | if (len < 31) 51 | matchlen_cnt[len]++; 52 | else 53 | matchlen_cnt[31]++; 54 | } 55 | 56 | void PEncode1BMatch() 57 | { 58 | match1b_cnt++; 59 | } 60 | 61 | void PWriteLog() 62 | { 63 | printf("Literal %u: \n\n\n\n\n", lits_cnt, lits.c_str()); 64 | for(int i = 0; i < 4; i++) 65 | printf("Rep Match idx %d cnt: %u\n", i, repmatch_idx_cnt[i]); 66 | printf("Match cnt: %u\n", repmatch_idx_cnt[4]); 67 | printf("\n\n1B Match Cnt: %u\n", match1b_cnt); 68 | for(int i = 0; i < 32; i++) 69 | printf("Match len of %d cnt: %u\n", i, matchlen_cnt[i]); 70 | printf("Match len >= 31 cnt: %u\n", matchlen_cnt[31]); 71 | } 72 | 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/libcsc/csc_profiler.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_PROFILE_H_ 2 | #define _CSC_PROFILE_H_ 3 | #include 4 | 5 | 6 | #ifdef _HAVE_PROFILER_ 7 | void PEncodeLiteral(uint32_t c); 8 | void PEncodeRepMatch(uint32_t len, uint32_t idx); 9 | void PEncodeMatch(uint32_t len, uint32_t dist); 10 | void PEncode1BMatch(); 11 | void PWriteLog(); 12 | #else 13 | #define PEncodeLiteral(c) 14 | #define PEncodeRepMatch(len, idx) 15 | #define PEncodeMatch(len, dist) 16 | #define PEncode1BMatch() 17 | #define PWriteLog() 18 | #endif 19 | 20 | #endif 21 | 22 | -------------------------------------------------------------------------------- /src/libcsc/csc_typedef.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSC_TYPEDEF_H_ 2 | #define _CSC_TYPEDEF_H_ 3 | 4 | #include 5 | #include 6 | 7 | const uint32_t KB = 1024; 8 | const uint32_t MB = 1048576; 9 | const uint32_t MinBlockSize = 8 * KB; 10 | 11 | 12 | const uint32_t MaxDictSize = 1024 * MB;//Don't change 13 | const uint32_t MinDictSize = 32 * KB;//Don't change 14 | 15 | #define MIN(a,b) ((a)<(b)?(a):(b)) 16 | #define MAX(a,b) ((a)>(b)?(a):(b)) 17 | 18 | 19 | /******Block Type*************/ 20 | const uint32_t DT_NONE = 0x00; 21 | const uint32_t DT_NORMAL = 0x01; 22 | const uint32_t DT_ENGTXT = 0x02; 23 | const uint32_t DT_EXE = 0x03; 24 | const uint32_t DT_FAST = 0x04; 25 | 26 | /////////////////////////// 27 | const uint32_t DT_NO_LZ = 0x05; 28 | 29 | //const uint32_t DT_AUDIO = 0x06; 30 | //const uint32_t DT_AUDIO = 0x06; 31 | const uint32_t DT_ENTROPY = 0x07; 32 | const uint32_t DT_BAD = 0x08; 33 | const uint32_t SIG_EOF = 0x09; 34 | const uint32_t DT_DLT = 0x10; 35 | const uint32_t DLT_CHANNEL_MAX = 5; 36 | const uint32_t DltIndex[DLT_CHANNEL_MAX]={1,2,3,4,8}; 37 | 38 | // DT_SKIP means same with last one 39 | const uint32_t DT_SKIP = 0x1E; 40 | const uint32_t DT_MAXINVALID = 0x1F; 41 | /******Block Type*************/ 42 | 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /src/libcsc/decomp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | struct StdioSeqStream 8 | { 9 | union { 10 | ISeqInStream is; 11 | ISeqOutStream os; 12 | }; 13 | FILE *f; 14 | }; 15 | 16 | 17 | int stdio_read(void *p, void *buf, size_t *size) 18 | { 19 | StdioSeqStream *sss = (StdioSeqStream *)p; 20 | *size = fread(buf, 1, *size, sss->f); 21 | return 0; 22 | } 23 | 24 | size_t stdio_write(void *p, const void *buf, size_t size) 25 | { 26 | StdioSeqStream *sss = (StdioSeqStream *)p; 27 | return fwrite(buf, 1, size, sss->f); 28 | } 29 | 30 | int show_progress(void *p, UInt64 insize, UInt64 outsize) 31 | { 32 | (void)p; 33 | printf("\r%llu -> %llu\t\t\t\t", insize, outsize); 34 | return 0; 35 | } 36 | 37 | int main(int argc, char *argv[]) 38 | { 39 | FILE *fin, *fout; 40 | fin = fopen(argv[1], "rb"); 41 | fout = fopen(argv[2], "wb"); 42 | if (fin == NULL || fout == NULL) { 43 | fprintf(stderr, "File open failed\n"); 44 | return -1; 45 | } 46 | 47 | StdioSeqStream isss, osss; 48 | isss.f = fin; 49 | isss.is.Read = stdio_read; 50 | osss.f = fout; 51 | osss.os.Write = stdio_write; 52 | ICompressProgress prog; 53 | prog.Progress = show_progress; 54 | 55 | CSCProps p; 56 | unsigned char buf[CSC_PROP_SIZE]; 57 | (void)(fread(buf, 1, CSC_PROP_SIZE, fin) + 1); 58 | CSCDec_ReadProperties(&p, buf); 59 | CSCDecHandle h = CSCDec_Create(&p, (ISeqInStream*)&isss, NULL); 60 | CSCDec_Decode(h, (ISeqOutStream*)&osss, &prog); 61 | CSCDec_Destroy(h); 62 | fclose(fin); 63 | fclose(fout); 64 | 65 | printf("\n"); 66 | return 0; 67 | } 68 | 69 | -------------------------------------------------------------------------------- /src/others/dist_table.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | static const uint32_t dist_table_[33] = { 6 | 0, 1, 2, 3, 7 | 5, 9, 17, 33, 8 | 65, 129, 257, 513, 9 | 1025, 2049, 4097, 8193, 10 | 16385, 32769, 65537, 131073, 11 | 262145, 524289, 1048577, 2097153, 12 | 4194305, 8388609, 16777217, 33554433, 13 | 67108865, 134217729, 268435457, 536870913, 14 | 1073741825, 15 | }; 16 | 17 | int main() 18 | { 19 | uint32_t last_dist = 0; 20 | for(uint32_t i = 0; i <= 32; i++) { 21 | uint32_t dist = 0; 22 | uint32_t extra = 0; 23 | if (i < 3) { 24 | dist = i; 25 | last_dist = dist; 26 | } else { 27 | extra = i - 2; 28 | dist = last_dist + (1 << extra); 29 | } 30 | //printf("%3u\t%2u bits\t%9u - %9u\n", i, extra, i < 3? last_dist : last_dist + 1, dist); 31 | printf("%u,\t", i < 3? last_dist : last_dist + 1); 32 | if ((i + 1) % 4 == 0) printf("\n"); 33 | last_dist = dist; 34 | } 35 | 36 | printf("\n\n"); 37 | 38 | for(uint32_t i = 0; i < 16; i++) { 39 | uint32_t r = 0; 40 | if (i & 1) r += 8; 41 | if (i & 2) r += 4; 42 | if (i & 4) r += 2; 43 | if (i & 8) r += 1; 44 | printf("%u,\t", r); 45 | if ((i + 1) % 4 == 0) printf("\n"); 46 | } 47 | 48 | printf("\n\n"); 49 | uint32_t dist; 50 | while(scanf("%u", &dist) != EOF) { 51 | uint32_t l = 0, r = 32, mid; 52 | while(l + 1 < r) { 53 | uint32_t mid = (l + (r - l) / 2); 54 | if (dist_table_[mid] > dist) 55 | r = mid; 56 | else if (dist_table_[mid] < dist) 57 | l = mid; 58 | else 59 | l = r = mid; 60 | } 61 | uint32_t slot = l; 62 | 63 | uint32_t extra_len = 0, extra_bits = slot > 2? slot - 2 : 0; 64 | if (extra_bits) { 65 | extra_len = dist - (1 << extra_bits) - 1; 66 | } 67 | printf("%u -> slot: %u, extra_bits %u, extra_len: %u\n", dist, slot, extra_bits, extra_len); 68 | } 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /src/others/make_dict.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | using namespace std; 10 | 11 | 12 | #define BSIZE 10000000 13 | int main(int argc, char *argv[]) 14 | { 15 | if (argc < 2) return 1; 16 | 17 | FILE *f = fopen(argv[1], "rb"); 18 | if (f == NULL) return 1; 19 | 20 | 21 | unordered_map m; 22 | #define LC(x) (x >= 'a' && x <= 'z') 23 | 24 | uint8_t *buf = new uint8_t[BSIZE + 3]; 25 | for(size_t p = 0;;) { 26 | size_t n = fread(buf, 1, BSIZE, f); 27 | for(size_t i = 0; i < n; i++) { 28 | if (LC(buf[i]) && LC(buf[i + 1])) { 29 | string key; 30 | key.append(1, buf[i]); 31 | key.append(1, buf[i + 1]); 32 | } 33 | if (LC(buf[i]) && LC(buf[i + 1]) && LC(buf[i + 2])) { 34 | string key; 35 | key.append(1, buf[i]); 36 | key.append(1, buf[i + 1]); 37 | key.append(1, buf[i + 2]); 38 | m[key] += 1; 39 | } 40 | if (LC(buf[i]) && LC(buf[i + 1]) 41 | && LC(buf[i + 2]) && LC(buf[i + 3])) { 42 | string key; 43 | key.append(1, buf[i]); 44 | key.append(1, buf[i + 1]); 45 | key.append(1, buf[i + 2]); 46 | key.append(1, buf[i + 3]); 47 | m[key] += 1; 48 | } 49 | } 50 | if (n < BSIZE) 51 | break; 52 | p += n; 53 | fprintf(stderr, "\r%u\t\t", p); 54 | } 55 | fclose(f); 56 | delete[] buf; 57 | 58 | fprintf(stderr, "\n%u keys got\n", m.size()); 59 | vector> sorted; 60 | for(const auto &i : m) 61 | sorted.push_back(i); 62 | 63 | sort(sorted.begin(), sorted.end(), [&](const auto& a, const auto& b) { return a.second > b.second; }); 64 | for(int i = 0; i < 256; i++) 65 | printf("%3d: %6s\t\t%u\n", i, sorted[i].first.c_str(), sorted[i].second); 66 | 67 | printf("\n\n"); 68 | for(int i = 1; i < 123; i++) { 69 | if ((i - 1) % 6 == 0) printf("\n"); 70 | printf("\"%s\", ", sorted[i - 1].first.c_str()); 71 | } 72 | 73 | return 0; 74 | } 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | 4 | md5sum /disk2/D/tar6 5 | echo "encoding..." 6 | ./test c /disk2/D/tar6 /tmp/_t.csc 7 | echo "encode done" 8 | echo "decoding..." 9 | ./test d /tmp/_t.csc /tmp/out 10 | echo "decode done" 11 | md5sum /tmp/out 12 | 13 | --------------------------------------------------------------------------------