├── vs ├── anscdf_avx2.c ├── transpose_avx2.c ├── vs2022 │ ├── TurboRC.sln │ └── TurboRCapp.vcxproj ├── getopt.h ├── stdint.h └── inttypes.h ├── .gitignore ├── .gitmodules ├── libdivsufsort ├── unbwt.h ├── lib │ └── daware.cpp ├── include │ ├── sort │ │ ├── LICENCE.md │ │ ├── inplace.h │ │ ├── copy.h │ │ ├── detail │ │ │ ├── suffix.h │ │ │ └── misc.h │ │ └── suffix.h │ ├── divsufsort.h │ └── divsufsort_private.h ├── LICENSE └── unbwt.c ├── makefile.vs ├── rccm_ss.c ├── rcqlfc_s.c ├── rc_s.c ├── bec_.h ├── rc_ss.c ├── rccm_sf.c ├── rcqlfc_ss.c ├── rcqlfc_sf.c ├── include_ ├── bec.h ├── vlcbit.h ├── rcutil.h └── time_.h ├── .github └── workflows │ └── build.yaml ├── rccm_s.c ├── rc_sf.c ├── mbc_s.h ├── mbc_sf.h ├── mbc_ss.h ├── bec_b.c ├── mb_on.h ├── FSM0.txt ├── rcqlfc_.c ├── bec_bstm.h ├── makefile ├── bec_.c ├── rccdf_.h ├── rcbwt.c ├── cdf_.h ├── rccm_.c └── include └── anscdf.h /vs/anscdf_avx2.c: -------------------------------------------------------------------------------- 1 | #include "../anscdf.c" 2 | -------------------------------------------------------------------------------- /vs/transpose_avx2.c: -------------------------------------------------------------------------------- 1 | #include "../transpose.c" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vs/ 2 | msvc.build/ 3 | *.vcxproj.user 4 | *.obj 5 | *.exe 6 | *.lib 7 | *.o 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libsais"] 2 | path = libsais 3 | url = https://github.com/IlyaGrebnov/libsais.git 4 | -------------------------------------------------------------------------------- /libdivsufsort/unbwt.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | int obwt_unbwt_biPSIv2(const unsigned char *T, unsigned char *U, unsigned *PSI, int n, int pidx); 5 | #ifdef __cplusplus 6 | } 7 | #endif 8 | -------------------------------------------------------------------------------- /libdivsufsort/lib/daware.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/divsufsort_private.h" 2 | #include "../include/sort/suffix.h" 3 | 4 | // Call the library 5 | void daware(saidx_t* SAf, saidx_t* SAl, saidx_t* ISAf, saidx_t* Af, saidx_t* Al) { 6 | auto* Sf = reinterpret_cast*>(Af); 7 | auto* Sl = Sf + (Al - Af) / sizeof(decltype(*Sf)) * sizeof(saidx_t); 8 | sort::suffix::daware(SAf, SAl, ISAf, Sf, Sl); 9 | } 10 | -------------------------------------------------------------------------------- /libdivsufsort/include/sort/LICENCE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Christoph Diegelmann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /libdivsufsort/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2003 Yuta Mori All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /makefile.vs: -------------------------------------------------------------------------------- 1 | # Copyright (C) powturbo 2015-2022 2 | # nmake "NCODEC1=1" "NCODEC2=1" /f makefile.msc 3 | # or 4 | # nmake /f makefile.msc 5 | 6 | .SUFFIXES: .c .obj .dllobj 7 | 8 | CC = cl /nologo 9 | LD = link /nologo 10 | AR = lib /nologo 11 | CFLAGS = /MD -I. 12 | LDFLAGS = 13 | 14 | LIB_LIB = libttrc.lib 15 | LIB_DLL = turborc.dll 16 | LIB_IMP = turborc.lib 17 | 18 | OBJS = rc_ss.obj rc_s.obj rccdf.obj rccm_s.obj rccm_ss.obj rcutil.obj bec_b.obj 19 | 20 | !IF "$(AVX2)" == "1" 21 | DEFS = $(DEFS) /D__AVX2__ 22 | !endif 23 | 24 | !IF "$(NSSE2)" != "1" 25 | DEFS = $(DEFS) /D__SSE__ 26 | !endif 27 | 28 | DLL_OBJS = $(OBJS:.obj=.dllobj) 29 | 30 | all: $(LIB_LIB) turborc.exe 31 | 32 | #$(LIB_DLL) $(LIB_IMP) 33 | 34 | .c.obj: 35 | $(CC) -c /Fo$@ /O2 $(CFLAGS) $(DEFS) $** 36 | 37 | .cc.obj: 38 | $(CC) -c /Fo$@ /O2 $(CFLAGS) $(DEFS) $** 39 | 40 | .c.dllobj: 41 | $(CC) -c /Fo$@ /O2 $(CFLAGS) $(DEFS) /DLIB_DLL $** 42 | 43 | $(LIB_LIB): $(OBJS) 44 | $(AR) $(ARFLAGS) -out:$@ $(OBJS) 45 | 46 | $(LIB_DLL): $(DLL_OBJS) 47 | $(LD) $(LDFLAGS) -out:$@ -dll -implib:$(LIB_IMP) $(DLL_OBJS) 48 | 49 | $(LIB_IMP): $(LIB_DLL) 50 | 51 | turborc.exe: turborc.obj vs/getopt.obj $(LIB_LIB) 52 | $(LD) $(LDFLAGS) -out:$@ $** 53 | 54 | clean: 55 | -del *.dll *.exe *.exp *.obj *.dllobj *.lib *.manifest 2>nul 56 | 57 | -------------------------------------------------------------------------------- /rccm_ss.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - CM predictor (16 bits counters) 25 | #include "include_/conf.h" 26 | 27 | #define RC_MACROS 28 | #define RC_BITS 16 // RC_SIZE + RC_IO: set in turborc_.h 29 | #include "turborc_.h" 30 | 31 | #include "mbc_ss.h" // simple predictor 32 | #include "rccm_.c" // template functions 33 | -------------------------------------------------------------------------------- /rcqlfc_s.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder bwt - simple predictor (16 bits counters) 25 | #include 26 | #include "include_/conf.h" 27 | 28 | #define RC_MACROS 29 | #define RC_BITS 11 // RC_SIZE + RC_IO: set in turborc_.h 30 | #include "turborc_.h" 31 | #include "mbc_s.h" // simple predictor 32 | #include "rcqlfc_.c" // bwt template functions 33 | -------------------------------------------------------------------------------- /rc_s.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - simple predictor (16 bits counters) 25 | 26 | #include 27 | #include "include_/conf.h" 28 | 29 | #define RC_MACROS 30 | #define RC_BITS 15 // RC_SIZE + RC_IO: set in turborc_.h 31 | #include "turborc_.h" 32 | 33 | #include "mbc_s.h" // simple predictor 34 | #include "rc_.c" // template functions 35 | -------------------------------------------------------------------------------- /bec_.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // Bit entropy coder : 8+16 bits 25 | #define ECN 12 26 | #pragma pack(1) 27 | typedef struct _PACKED ectab_t { uint8_t cw, cl; } _PACKED ectab_t; // LUT entry 28 | #pragma pack() 29 | 30 | #ifndef IN_ECTAB 31 | extern ectab_t bectab[ECN][ECN][ECN][ECN]; // encoding acceleration LUT 32 | #endif 33 | 34 | typedef struct { unsigned l,n,lx,hx; } cw_t; 35 | 36 | void bectabini(); 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /rc_ss.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder encode/decode functions using dual speed predictor "ss" (two 16 bits counters) 25 | 26 | #include 27 | #include "include_/conf.h" 28 | 29 | #define RC_MACROS 30 | #define RC_BITS 16 // RC_SIZE=64 + RC_IO=32 : set in turborc_.h 31 | #include "turborc_.h" 32 | #include "mbc_ss.h" // dual speed predictor 33 | 34 | #include "rc_.c" // template functions 35 | -------------------------------------------------------------------------------- /rccm_sf.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - CM predictor (16 bits counters) 25 | 26 | #include 27 | #include "include/turborc.h" 28 | #include "include_/conf.h" 29 | 30 | #define RC_MACROS 31 | #define RC_BITS 16 // RC_SIZE + RC_IO: set in turborc_.h 32 | #include "turborc_.h" 33 | extern fsm_t fsm[]; 34 | 35 | #include "mbc_sf.h" // simple predictor 36 | #include "rccm_.c" // template functions 37 | -------------------------------------------------------------------------------- /rcqlfc_ss.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - dual speed predictor (two 16 bits counters) 25 | // Reference: http://cbloomrants.blogspot.com/2017/01/order-0-estimators-for-data-compression.html 26 | #include 27 | #include "include_/conf.h" 28 | 29 | #define RC_MACROS 30 | #define RC_BITS 16 // RC_SIZE=64 + RC_IO=32 : set in turborc_.h 31 | #include "turborc_.h" 32 | #include "mbc_ss.h" // dual speed predictor 33 | #include "rcqlfc_.c" // bwt template functions 34 | -------------------------------------------------------------------------------- /rcqlfc_sf.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - fsm 25 | // Reference: http://cbloomrants.blogspot.com/2017/01/order-0-estimators-for-data-compression.html 26 | 27 | #include 28 | #include "include/turborc.h" 29 | #include "include_/conf.h" 30 | 31 | #define RC_MACROS 32 | #define RC_BITS 15 // RC_SIZE=64 + RC_IO=32 : set in turborc_.h 33 | #include "turborc_.h" 34 | #include "mbc_sf.h" // fsm predictor 35 | 36 | extern fsm_t fsm[]; 37 | #include "rcqlfc_.c" // template functions 38 | -------------------------------------------------------------------------------- /include_/bec.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC Range Coder : bit entropy coder include header 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | unsigned becdec8(unsigned char *__restrict in, unsigned outlen, uint8_t *__restrict out); // 8 bits 30 | unsigned becenc8(uint8_t *__restrict in, unsigned inlen, unsigned char *__restrict out); 31 | unsigned becdec16(unsigned char *__restrict in, unsigned outlen, uint16_t *__restrict out); // 16 bits 32 | unsigned becenc16(uint16_t *__restrict in, unsigned inlen, unsigned char *__restrict out); 33 | #ifdef __cplusplus 34 | } 35 | #endif 36 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | linux: 11 | runs-on: '${{ matrix.os }}' 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | os: [ubuntu-latest] 16 | compiler: ["gcc", "clang"] 17 | env: 18 | CC: ${{ matrix.compiler }} 19 | steps: 20 | - uses: actions/checkout@v3 21 | with: 22 | submodules: 'true' 23 | - run: git submodule update --init --recursive 24 | - run: make 25 | - run: lscpu 26 | - run: ./turborc -e0 turborc -V2 27 | 28 | macos: 29 | runs-on: '${{ matrix.os }}' 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | os: [macos-latest] 34 | compiler: ["clang"] 35 | env: 36 | CC: ${{ matrix.compiler }} 37 | steps: 38 | - uses: actions/checkout@v3 39 | with: 40 | submodules: 'true' 41 | - run: git submodule update --init --recursive 42 | - run: make 43 | - run: sysctl -n machdep.cpu.brand_string 44 | - run: ./turborc -e0 turborc -V2 45 | 46 | windows: 47 | runs-on: windows-latest 48 | defaults: 49 | run: 50 | shell: msys2 {0} 51 | steps: 52 | - uses: actions/checkout@v3 53 | with: 54 | submodules: 'true' 55 | - uses: msys2/setup-msys2@v2 56 | with: 57 | msystem: MINGW64 58 | install: git make mingw-w64-x86_64-gcc 59 | update: true 60 | - run: git submodule update --init --recursive 61 | - run: make 62 | - run: ./turborc -e0 turborc.exe -V2 63 | 64 | 65 | -------------------------------------------------------------------------------- /rccm_s.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - CM predictor (16 bits counters) 25 | 26 | #include 27 | #include "include_/conf.h" 28 | 29 | #define RC_MACROS 30 | #define RC_BITS 16 // RC_SIZE + RC_IO: set in turborc_.h 31 | #include "turborc_.h" 32 | 33 | void ssebinit(unsigned short sse2[1<<(1+8)][17]) { int r,i,j; 34 | for(r=0; r<2; ++r) 35 | for(i=0; i<256; ++i) 36 | for(j=0; j<=16; ++j) 37 | sse2[r<<8|i][j] = (j<<(RC_BITS-4))-(j==RC_BITS); 38 | } 39 | 40 | void sseinit( unsigned short sse[1<<8][17]) { int i,j; 41 | for(i=0; i<256; ++i) 42 | for(int j=0; j<=16; ++j) 43 | sse[i][j] = (j<<(RC_BITS-4))-(j==RC_BITS); 44 | } 45 | 46 | #define RATE_S 47 | #include "mbc_s.h" // simple predictor 48 | #include "rccm_.c" // template functions 49 | -------------------------------------------------------------------------------- /rc_sf.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder encode/decode functions using fsm predictor 25 | 26 | #include 27 | #include "include_/conf.h" 28 | #include "include/turborc.h" 29 | 30 | #define RC_MACROS 31 | #define RC_BITS 15 // RC_SIZE=64 + RC_IO=32 : set in turborc_.h 32 | #include "turborc_.h" 33 | #include "mbc_sf.h" // fsm predictor 34 | 35 | fsm_t fsm[N_STATES]; 36 | 37 | #define BUFSIZE (1u<<20) 38 | void fsm_init(int id) { 39 | char s[256], buf[BUFSIZE]; 40 | unsigned l; 41 | sprintf(s, "FSM%d.txt", id); 42 | FILE *f = fopen(s, "r"); if(!f) { fprintf(stderr, "FSM file '%s' not found\n", s); exit(0); } 43 | if((l = fread(buf, 1, BUFSIZE, f)) == -1) 44 | perror("fread failed\n"); 45 | printf("fsm file '%s' read.length=%d\n", s, l); 46 | buf[l] = 0; 47 | fsminit_(buf, fsm, N_STATES); 48 | } 49 | 50 | #include "rc_.c" // template functions 51 | -------------------------------------------------------------------------------- /vs/vs2022/TurboRC.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28307.757 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TurboRC", "TurboRC.vcxproj", "{A162F37F-183F-4250-88AB-9B9FBDE30B04}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TurboRCApp", "TurboRCApp.vcxproj", "{6876BEB8-2B45-48B9-8381-1D4094FE8868}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|x64 = Debug|x64 13 | Debug|x86 = Debug|x86 14 | Release|x64 = Release|x64 15 | Release|x86 = Release|x86 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x64.ActiveCfg = Debug|x64 19 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x64.Build.0 = Debug|x64 20 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x86.ActiveCfg = Debug|Win32 21 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x86.Build.0 = Debug|Win32 22 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x64.ActiveCfg = Release|x64 23 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x64.Build.0 = Release|x64 24 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x86.ActiveCfg = Release|Win32 25 | {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x86.Build.0 = Release|Win32 26 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x64.ActiveCfg = Debug|x64 27 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x64.Build.0 = Debug|x64 28 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x86.ActiveCfg = Debug|Win32 29 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x86.Build.0 = Debug|Win32 30 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x64.ActiveCfg = Release|x64 31 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x64.Build.0 = Release|x64 32 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x86.ActiveCfg = Release|Win32 33 | {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x86.Build.0 = Release|Win32 34 | EndGlobalSection 35 | GlobalSection(SolutionProperties) = preSolution 36 | HideSolutionNode = FALSE 37 | EndGlobalSection 38 | GlobalSection(ExtensibilityGlobals) = postSolution 39 | SolutionGuid = {A02524FA-10E2-4E1C-BE79-0AE7B077D2CE} 40 | EndGlobalSection 41 | EndGlobal 42 | -------------------------------------------------------------------------------- /mbc_s.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - simple predictor (16 bits counter) 25 | #define RC_PRDID 1 26 | #define RC_PRD s 27 | 28 | #ifdef RC_MACROS 29 | #define mbu unsigned short 30 | #define mbu_p(_mb_,_prm0_) (*(_mb_)) 31 | #else 32 | typedef unsigned short mbu; 33 | static inline int mbu_p(mbu *mb, int _prm0_) { return (*mb); } // get probability 34 | #endif 35 | 36 | #define mbu_probinit() (1<<(RC_BITS-1)) // initial probability 0.5 37 | #define mbu_init(_m_, _p0_) { *(_m_) = _p0_; } // predictor init 38 | 39 | #ifdef RATE_S // rate (=_prm0_) as parameter 40 | #define RCPRM //,unsigned RCPRM0 41 | #define RCPRMC //,RCPRM0 42 | #define mbu_update( _mb_, _mbp_, _prm0_, _prm1_, _bit_) *(_mb_) = (_mbp_) - ((((_mbp_) - (-_bit_ & (1<> _prm0_) + _bit_) 43 | #define mbu_update0(_mb_, _mbp_, _prm0_, _prm1_) mbu_update( _mb_, _mbp_, _prm0_, _prm1_, 0) 44 | #define mbu_update1(_mb_, _mbp_, _prm0_, _prm1_) mbu_update( _mb_, _mbp_, _prm0_, _prm1_, 1) 45 | #else 46 | #define RCPRM 47 | #define RCPRMC 48 | #if 0 49 | #define mbu_update1(_mb_, _mbp_, _prm0_, _prm1_) (*(_mb_) = (_mbp_) + (((1u<> 5)) // Predictor update for bit 0 50 | #define mbu_update0(_mb_, _mbp_, _prm0_, _prm1_) (*(_mb_) = (_mbp_) - ((_mbp_) >> 5)) // Predictor update for bit 1 51 | #define mbu_update( _mb_, _mbp_, _prm0_, _prm1_, _bit_) (_bit_)?mbu_update1(_mb_, _mbp_, _prm0_, _prm1_):mbu_update0(_mb_, _mbp_, _prm0_, _prm1_) 52 | #else 53 | #define mbu_update0(_mb_, _mbp_, _prm0_, _prm1_) *(_mb_) = (_mbp_) - ((_mbp_) >> 5) 54 | #define mbu_update1(_mb_, _mbp_, _prm0_, _prm1_) *(_mb_) = (_mbp_) - ( (((_mbp_) - (1<> 5) + 1) 55 | #define mbu_update( _mb_, _mbp_, _prm0_, _prm1_, _bit_) *(_mb_) = (_mbp_) - ((((_mbp_) - (-_bit_ & (1<> 5) + _bit_) 56 | #endif 57 | #endif 58 | 59 | #include "mbc.h" 60 | -------------------------------------------------------------------------------- /mbc_sf.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - fsm predictor 25 | // Reference: https://encode.su/threads/3681-GDCC-21-T5-FSM-Counter-Optimization 26 | #include // strtol 27 | 28 | #define RC_PRDID 3 29 | #define RC_PRD sf 30 | #define RCPRM1 0 31 | #define RCPRM ,fsm_t *RCPRM0 32 | #define RCPRMC ,RCPRM0 33 | 34 | #if RC_BITS < 15 35 | #error "RC_BITS must be >= 15" 36 | #endif 37 | #define MBU_PRM 0 38 | #define mbu_probinit() 0 39 | 40 | #define N_STATES 32768 41 | 42 | typedef unsigned short mbu; 43 | 44 | #define mbu_p(_mb_,_fsm_) (_fsm_[*(_mb_)].p) 45 | #define mbu_init(_m_, _p0_) { *(_m_) = _p0_; } //static inline unsigned mbu_p(mbu *mb, fsm_t *_fsm_) { return fsm[*mb].p; } 46 | 47 | #define mbu_update0(_mb_, _mbp_, _fsm_, _prm1_) (*(_mb_) = _fsm_[*(_mb_)].s[0]) 48 | #define mbu_update1(_mb_, _mbp_, _fsm_, _prm1_) (*(_mb_) = _fsm_[*(_mb_)].s[1]) 49 | 50 | #define mbu_update( _mb_, _mbp_, _fsm_, _prm1_, _bit_) (*(_mb_) = _fsm_[*(_mb_)].s[_bit_]) 51 | 52 | #ifndef min 53 | #define min(x,y) (((x)<(y)) ? (x) : (y)) 54 | #define max(x,y) (((x)>(y)) ? (x) : (y)) 55 | #endif 56 | 57 | static inline unsigned fsmget_(unsigned char **_p) { 58 | unsigned char *p = *_p,*e; 59 | int c, r = 0; 60 | while(*p && (*p<'0' || *p>'9')) p++; 61 | r = strtoul(p, &e, 10); 62 | *_p = e; //if(r) printf("%d,", r); 63 | return r; 64 | } 65 | 66 | static inline void fsminit_(unsigned char *p, fsm_t *fsm, unsigned nstates) { 67 | int i,m; 68 | memset(fsm, 0, sizeof(fsm[0])*nstates); 69 | for(i = 0; i < nstates; i++) { 70 | m = fsmget_(&p); fsm[i].s[1] = max(0,min(nstates-1, m)); 71 | m = fsmget_(&p); fsm[i].s[0] = max(0,min(nstates-1, m)); 72 | m = fsmget_(&p); fsm[i].p = max(1,min((1< 16 40 | #error "RC_BITS must be : 11 <= RC_BITS <= 16" 41 | #endif 42 | 43 | #define mbu_init(_mb_, _p0_) { (_mb_)->p = (_mb_)->q = _p0_; } 44 | #define mbu_probinit() (1<<(RC_BITS-1)) 45 | 46 | #ifdef RC_MACROS 47 | #define mbu_p(_mb_,_prm0_) (((_mb_)->p+(_mb_)->q)>>(17-RC_BITS)) 48 | #else 49 | static inline int mbu_p(mbu *bm, int _prm0_) { return (bm->p+bm->q)>>(17-RC_BITS); } 50 | #endif 51 | 52 | #if 0 53 | #define mbu_update1(_mb_,_mbp_, _prm0_,_prm1_) ((_mb_)->p += ((_mb_)->p^((1<<16)-1)) >> _prm0_,\ 54 | (_mb_)->q += ((_mb_)->q^((1<<16)-1)) >> _prm1_) 55 | #define mbu_update0(_mb_,_mbp_, _prm0_,_prm1_) ((_mb_)->p -= (_mb_)->p >> _prm0_,\ 56 | (_mb_)->q -= (_mb_)->q >> _prm1_) 57 | #define mbu_update( _mb_,_mbp_, _prm0_,_prm1_,_bit_) (_bit_)?mbu_update1(_mb_,_mbp_, _prm0_,_prm1_):mbu_update0(_mb_,_mbp_, _prm0_,_prm1_) 58 | #else 59 | #define mbu_update( _mb_,_mbp_, _prm0_,_prm1_,_bit_) (_mb_)->p = (_mb_)->p - (((_mb_)->p&-!_bit_) >> _prm0_) + ( (((_mb_)->p^((1<<16)-1))&-_bit_) >> _prm0_),\ 60 | (_mb_)->q = (_mb_)->q - (((_mb_)->q&-!_bit_) >> _prm1_) + ( (((_mb_)->q^((1<<16)-1))&-_bit_) >> _prm1_) 61 | 62 | #define mbu_update0(_mb_,_mbp_, _prm0_, _prm1_) mbu_update(_mb_, _mbp_, _prm0_,_prm1_, 0) 63 | #define mbu_update1(_mb_,_mbp_, _prm0_, _prm1_) mbu_update(_mb_, _mbp_, _prm0_,_prm1_, 1) 64 | #endif 65 | 66 | #include "mbc.h" 67 | -------------------------------------------------------------------------------- /bec_b.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // Bit entropy coder with bitio 25 | #include "include_/conf.h" 26 | #include "rcutil_.h" 27 | #define IN_BECTAB 28 | #include "include_/bec.h" 29 | #include "bec_.h" 30 | #include "bec_bstm.h" 31 | 32 | ectab_t bectab[ECN][ECN][ECN][ECN] 33 | #if ECN == 8 34 | = 35 | #include "bectab8_.h" 36 | #elif ECN == 12 37 | = 38 | #include "bectab12_.h" 39 | #elif ECN == 16 40 | = 41 | #include "bectab16_.h" 42 | #else 43 | ; 44 | #endif 45 | static unsigned becini_; 46 | void bectabini() { unsigned lx,hx,li,hi,lhi; if(becini_) return; becini_++; 47 | #if ECN != 8 && ECN != 12 && ECN != 16 48 | for( lx = 0; lx < ECN; lx++) 49 | for( hx = 0; hx < ECN; hx++) 50 | for( li = 0; li <= lx; li++) 51 | for(hi = 0; hi <= hx; hi++) 52 | if((lhi = li + hi) < ECN) { 53 | unsigned l = li, r = hi, ml = lx, mr = hx, t = (l + r), cl, msb, cw; 54 | if(t > ml) { unsigned ih = t - ml; mr -= ih; t -= ih; } 55 | if(t > mr) { unsigned il = t - mr; l -= il; t -= il; } 56 | cl = __bsr32(t|1); 57 | msb = (l | (1u << cl)) <= t; 58 | cw = l << msb | l >> cl; 59 | cl += msb; cw &= (1u << cl) - 1; 60 | ectab_t *e = &bectab[lx][hx][li][lhi]; e->cw = cw; e->cl = cl; 61 | } 62 | #if 0 // generate LUT table 8,12,16: "turborc file 2>becttabN_.h" 63 | fprintf(stderr,"{"); 64 | for( unsigned i = 0; i < ECN; ++i) { fprintf(stderr,"\n{"); 65 | for( unsigned j = 0; j < ECN; ++j) { fprintf(stderr,"\n{"); 66 | for( unsigned k = 0; k < ECN; ++k) { fprintf(stderr,"\n{"); 67 | for(unsigned l = 0; l < ECN; ++l) { 68 | ectab_t *e = &bectab[i][j][k][l]; 69 | fprintf(stderr,"{%u,%u}%c", e->cw, e->cl,l==ECN-1?' ':','); 70 | } fprintf(stderr,"}%c", k==ECN-1?' ':','); 71 | } fprintf(stderr,"}%c", j==ECN-1?' ':','); 72 | } fprintf(stderr,"}%c", i==ECN-1?' ':','); 73 | } fprintf(stderr,"};/*%d*/\n", 1< 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #include "detail/misc.h" 40 | #include "detail/inplace.h" 41 | 42 | namespace sort { 43 | namespace inplace { 44 | 45 | // Fast general purpose multi pivot introsort 46 | // with index function and optimal callback on each equal ranges. 47 | // One of these implementation should already be faster than std::sort 48 | // and especially for highly repetitive data 49 | 50 | // If branch misses slow down your sort use block() 51 | // In any other case (slow index function) use quick() 52 | // as it reduces the number of index() calls 53 | 54 | // Average runtime is O(n * log(m)) 55 | // Worst case is O(n * log(n)) 56 | // where m is the number of destinct values 57 | template 58 | inline void quick(T first, T last, I index, C cb) { 59 | int budget = 3 * detail::misc::ilogb(last - first + 1) >> 1; 60 | detail::inplace::quick(first, last, index, cb, budget); 61 | } 62 | 63 | template 64 | inline void quick(T first, T last, I index) { 65 | int budget = detail::misc::ilogb(last - first + 1); 66 | detail::inplace::quick(first, last, index, [](auto a, auto b) { 67 | (void) a; (void) b; 68 | }, budget); 69 | } 70 | 71 | template 72 | inline void block(T first, T last, I index, C cb) { 73 | int budget = 2 * detail::misc::ilogb(last - first + 1); 74 | detail::inplace::quick(first, last, index, cb, budget); 75 | } 76 | 77 | template 78 | inline void block(T first, T last, I index) { 79 | int budget = 2 * detail::misc::ilogb(last - first + 1); 80 | detail::inplace::quick(first, last, index, [](auto a, auto b) { 81 | (void) a; (void) b; 82 | }, budget); 83 | } 84 | 85 | } // inplace 86 | } // sort 87 | 88 | #endif // SORT_INPLACE_H 89 | -------------------------------------------------------------------------------- /vs/getopt.h: -------------------------------------------------------------------------------- 1 | #ifndef __GETOPT_H__ 2 | /** 3 | * DISCLAIMER 4 | * This file has no copyright assigned and is placed in the Public Domain. 5 | * This file is a part of the w64 mingw-runtime package. 6 | * 7 | * The w64 mingw-runtime package and its code is distributed in the hope that it 8 | * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR 9 | * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to 10 | * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 11 | */ 12 | 13 | #define __GETOPT_H__ 14 | 15 | /* All the headers include this file. */ 16 | #if _MSC_VER >= 1300 17 | #include 18 | #endif 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | extern int optind; /* index of first non-option in argv */ 25 | extern int optopt; /* single option character, as parsed */ 26 | extern int opterr; /* flag to enable built-in diagnostics... */ 27 | /* (user may set to zero, to suppress) */ 28 | 29 | extern char *optarg; /* pointer to argument of current option */ 30 | 31 | extern int getopt(int nargc, char * const *nargv, const char *options); 32 | 33 | #ifdef _BSD_SOURCE 34 | /* 35 | * BSD adds the non-standard `optreset' feature, for reinitialisation 36 | * of `getopt' parsing. We support this feature, for applications which 37 | * proclaim their BSD heritage, before including this header; however, 38 | * to maintain portability, developers are advised to avoid it. 39 | */ 40 | # define optreset __mingw_optreset 41 | extern int optreset; 42 | #endif 43 | #ifdef __cplusplus 44 | } 45 | #endif 46 | /* 47 | * POSIX requires the `getopt' API to be specified in `unistd.h'; 48 | * thus, `unistd.h' includes this header. However, we do not want 49 | * to expose the `getopt_long' or `getopt_long_only' APIs, when 50 | * included in this manner. Thus, close the standard __GETOPT_H__ 51 | * declarations block, and open an additional __GETOPT_LONG_H__ 52 | * specific block, only when *not* __UNISTD_H_SOURCED__, in which 53 | * to declare the extended API. 54 | */ 55 | #endif /* !defined(__GETOPT_H__) */ 56 | 57 | #if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) 58 | #define __GETOPT_LONG_H__ 59 | 60 | #ifdef __cplusplus 61 | extern "C" { 62 | #endif 63 | 64 | struct option /* specification for a long form option... */ 65 | { 66 | const char *name; /* option name, without leading hyphens */ 67 | int has_arg; /* does it take an argument? */ 68 | int *flag; /* where to save its status, or NULL */ 69 | int val; /* its associated status value */ 70 | }; 71 | 72 | enum /* permitted values for its `has_arg' field... */ 73 | { 74 | no_argument = 0, /* option never takes an argument */ 75 | required_argument, /* option always requires an argument */ 76 | optional_argument /* option may take an argument */ 77 | }; 78 | 79 | extern int getopt_long(int nargc, char * const *nargv, const char *options, 80 | const struct option *long_options, int *idx); 81 | extern int getopt_long_only(int nargc, char * const *nargv, const char *options, 82 | const struct option *long_options, int *idx); 83 | /* 84 | * Previous MinGW implementation had... 85 | */ 86 | #ifndef HAVE_DECL_GETOPT 87 | /* 88 | * ...for the long form API only; keep this for compatibility. 89 | */ 90 | # define HAVE_DECL_GETOPT 1 91 | #endif 92 | 93 | #ifdef __cplusplus 94 | } 95 | #endif 96 | 97 | #endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ 98 | -------------------------------------------------------------------------------- /libdivsufsort/unbwt.c: -------------------------------------------------------------------------------- 1 | #include "../conf.h" 2 | #include 3 | #include "unbwt.h" 4 | 5 | #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 6 | #define BSWAP16(a) a 7 | #else 8 | #define BSWAP16(a) bswap16(a) 9 | #endif 10 | 11 | // Reference: obwt_unbwt_biPSIv2 optimized by powturbo (15% faster) 12 | // openbwt: https://encode.su/threads/104-libBWT?p=22903&viewfull=1#post22903 13 | 14 | // bi-gram based PSI array + binary search algorithm v2 (t=NUM_TOPBITS). space: 4(n+1)+4*257+4*256*256+4*(2**t) bytes. time: O(n log 256)? 15 | #define NUM_TOPBITS 17 16 | int obwt_unbwt_biPSIv2(const unsigned char *__restrict T, unsigned char *__restrict U, unsigned *__restrict PSI, int n, int pidx) { 17 | unsigned C[257] ={0}; 18 | unsigned D[1<<16]={0}, *LD; 19 | unsigned B2S[1U << NUM_TOPBITS]; 20 | unsigned u; 21 | int i, t, v, w, x; 22 | int c, d, e, mask, lastc; 23 | int lowshift, highshift; 24 | 25 | /* Check arguments. */ 26 | if((T == NULL) || (U == NULL) || (n < 0) || (pidx < 0) || 27 | (n < pidx) || ((0 < n) && (pidx == 0))) { return -1; } 28 | if(n <= 1) { if(T != U) { U[0] = T[0]; } return 0; } 29 | 30 | i = __bsr32(n+1)+1; //for(i = 0; 0 < (n >> i); ++i) { } 31 | highshift = (NUM_TOPBITS <= i) ? i - NUM_TOPBITS : 0; 32 | lowshift = (16 < i) ? (32 - i) : 16; 33 | mask = (1u << lowshift) - 1; 34 | 35 | // Compute the cumulative frequency of uni-grams 36 | histrcalc8(T, n, C); 37 | for(c = 0, i = 1; c < 256; ++c) { t = C[c]; C[c] = i; i += t; } C[c] = i; //R[0] = 0; PREFIXSUM(R, 256); //for(c=0; c < 257; c++) printf("%d,%d ", R[c], C[c]); exit(0); 38 | 39 | // Count the frequency of bi-grams 40 | for(c = 0; c < 256; ++c) { 41 | LD = D + (c << 8); 42 | for(i = C[c]; i < C[c + 1]; ++i) 43 | if(i != pidx) { 44 | d = T[i - (pidx < i)]; 45 | ++LD[d]; 46 | } 47 | } 48 | 49 | // Compute the cumulative frequency of bi-grams 50 | lastc = T[0]; 51 | for(c = 0, i = 1, w = 0; c < 256; ++c) { 52 | if(c == lastc) i += 1; 53 | LD = D + c; 54 | for(d = 0; d < 256; ++d) { 55 | t = LD[d << 8]; 56 | if(0 < t) { 57 | e = (c << 8) | d; 58 | for(v = i >> highshift; v < w; ++v) B2S[v] = ((unsigned short)B2S[v]) | (((unsigned )e) << 16); 59 | for(w = ((i + t - 1) >> highshift) + 1; v < w; ++v) B2S[v] = ((unsigned )e) | (((unsigned )e) << 16); 60 | } 61 | LD[d << 8] = i; i += t; 62 | } 63 | } 64 | for(v = 0; v < w; ++v) { 65 | c = (B2S[v] >> 16) ^ ((unsigned short)B2S[v]); 66 | x = bsr32(c); //for(x = 0; c != 0; ++x, c >>= 1) { } 67 | B2S[v] = (unsigned)((((unsigned short)B2S[v]) & (0xffffU << x)) | (x << 16)); 68 | } 69 | 70 | // Compute bi-PSI array 71 | for(i = 0; i < n; ++i) { 72 | d = T[i]; 73 | t = C[d]++; 74 | if(t != pidx) { 75 | c = T[t - (pidx < t)]; 76 | t = D[(d << 8) | c]++; 77 | u = i + (pidx <= i); 78 | PSI[t] = (u << lowshift) | (((c << 8) | d) & mask); 79 | } 80 | } 81 | 82 | 83 | // Inverse BW-transform. 84 | for(c = 0; c < 256; ++c) { 85 | for(d = 0; d < c; ++d) { 86 | t = D[(d << 8) | c]; 87 | D[(d << 8) | c] = D[(c << 8) | d]; 88 | D[(c << 8) | d] = t; 89 | } 90 | } 91 | 92 | for(i = 0, t = pidx; i < n-1; i += 2) { 93 | unsigned c = B2S[t >> highshift], 94 | half = ((1u << (c >> 16)) - 1)>>1; 95 | c = (unsigned short)c; //int h=__bsr32(half+1)+1; h=h>lowshift?h-lowshift:1; 96 | #define ST { c += -(D[c + half] <= t) & (half + 1); if(half <= mask) break; half >>= 1; } 97 | for(;;) { ST;ST;ST;ST;/*ST;ST;ST;ST;*/ } //unsigned c = (unsigned short)B2S[t >> highshift]; while(D[c] <= t) c++; 98 | ctou16(&U[i]) = BSWAP16(c | (PSI[t] & mask)); 99 | t = PSI[t] >> lowshift; 100 | } 101 | if(i == n-1) { U[n - 1] = (unsigned char)lastc; } 102 | return 0; 103 | } 104 | -------------------------------------------------------------------------------- /mb_on.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - Sliding context order N context bits bitwise range coder 25 | // Unlike other entropy coders, you can specify the context in bits (4 - 24 bits) 26 | // and not only in bytes. 27 | 28 | // Predictor declaration. _cxbits_ must be >= 4 29 | #define MBC_DEF( _m_, _cxbits_) mbu _m_[1<<(_cxbits_+1)][1<<(4+2)] 30 | // Predictor init 31 | #define MBC_INIT( _m_, _cxbits_) mbu_init2(_m_,1<<(_cxbits_+1), 1<<(4+2)) 32 | #define MBC_DEC( _m_, _cxbits_) MBC_DEF( _m_, _cxbits_); MBC_INIT( _m_, _cxbits_) 33 | 34 | #define LITSX8(_cx_, _cxbits_) (((_cx_) >> _cxbits_) << 4) | 35 | #define MBC_(_cx_, _cxbits_) (_cxbits_==8?(unsigned char)(_cx_):((_cx_) & ((1<<_cxbits_)-1))) 36 | 37 | // Encode char 'x' with context 'cx' of size 'cxbits' bits 38 | #define mbcenc(rcrange,rclow,rcilow, _cx_, _cxbits_, _m_,_prm0_,_prm1_,_op_, _x_) {\ 39 | unsigned _y = _x_; /*encode high nibble*/\ 40 | mbu *_mh = _m_[ MBC_(_cx_, _cxbits_)]; unsigned _cxh = LITSX8(_cx_, _cxbits_) ((_y) >> 4); _cx_ = _cx_ << 4 | (_y)>>4;\ 41 | { mbu *_m = &_mh[(_cxh >> 2) & 0x3c | 3]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxh,3)); }\ 42 | { mbu *_m = &_mh[(_cxh >> 1) & 0x3c | 2]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxh,2)); }\ 43 | { mbu *_m = &_mh[(_cxh ) & 0x3c | 1]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxh,1)); }\ 44 | { mbu *_m = &_mh[(_cxh << 1) & 0x3c ]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxh,0)); }\ 45 | /*slide context then encode low nibble*/\ 46 | mbu *_ml = _m_[(1<<_cxbits_)+MBC_(_cx_, _cxbits_)]; unsigned _cxl = LITSX8(_cx_, _cxbits_) ((_y) & 0xf); _cx_ = _cx_ << 4 | (_y) & 0xf;\ 47 | { mbu *_m = &_ml[(_cxl >> 2) & 0x3c | 3]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxl,3)); }\ 48 | { mbu *_m = &_ml[(_cxl >> 1) & 0x3c | 2]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxl,2)); }\ 49 | { mbu *_m = &_ml[(_cxl ) & 0x3c | 1]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxl,1)); }\ 50 | { mbu *_m = &_ml[(_cxl << 1) & 0x3c ]; mbu_enc(rcrange,rclow,rcilow, _m,_prm0_,_prm1_,_op_, RCB(_cxl,0)); }\ 51 | } 52 | 53 | // Decode char with context 'cx' of size 'cxbits' bits. Decoded char = (unsigned char)cx 54 | #define mbcdec(rcrange,rccode, _cx_, _cxbits_, _m_,_prm0_,_prm1_,_ip_) { \ 55 | mbu *_mh = &_m_[MBC_(_cx_, _cxbits_)],*_m;\ 56 | unsigned _y = (_cx_) >> _cxbits_;\ 57 | _m = &_mh[(_y & 0xf) << 2 | 3]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y); /* high nibble*/\ 58 | _m = &_mh[(_y & 0xf) << 2 | 2]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y);\ 59 | _m = &_mh[(_y & 0xf) << 2 | 1]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y);\ 60 | _m = &_mh[(_y & 0xf) << 2 ]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y); _cx_ = _cx_ << 4 | _y & 0xf; _mh = &_m_[1<<_cxbits_ | MBC_(_cx_, _cxbits_)]; _y = (_cx_) >> _cxbits_;\ 61 | _m = &_mh[(_y & 0xf) << 2 | 3]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y); /* low nibble*/\ 62 | _m = &_mh[(_y & 0xf) << 2 | 2]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y);\ 63 | _m = &_mh[(_y & 0xf) << 2 | 1]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y);\ 64 | _m = &_mh[(_y & 0xf) << 2 ]; mbu_dec(rcrange,rccode, _m,_prm0_,_prm1_,_ip_, _y); _cx_ = _cx_ << 4 | _y & 0xf;\ 65 | } 66 | -------------------------------------------------------------------------------- /FSM0.txt: -------------------------------------------------------------------------------- 1 | 193,100,17664 2 | 1, 5, 1 3 | 17, 4, 0 4 | 8, 3, 4 5 | 94, 2, 54 6 | 94, 2, 64 7 | 16, 13, 80 8 | 83, 26, 128 9 | 84, 3, 1156 10 | 57, 1, 147 11 | 114, 9, 150 12 | 79, 10, 153 13 | 106, 36, 157 14 | 69, 94, 160 15 | 83, 7, 162 16 | 83, 14, 165 17 | 101, 15, 168 18 | 113, 71, 176 19 | 108, 50, 197 20 | 125, 18, 202 21 | 116, 39, 218 22 | 24, 20, 224 23 | 9, 21, 230 24 | 24, 96, 244 25 | 0,106, 256 26 | 22, 8, 260 27 | 83, 11, 264 28 | 36, 25, 268 29 | 35, 31, 309 30 | 106, 28, 321 31 | 9, 66, 324 32 | 58, 12, 386 33 | 110, 32, 113 34 | 110, 32, 673 35 | 118, 33, 692 36 | 33,102, 512 37 | 104, 27, 512 38 | 32, 29, 0 39 | 23, 6, 0 40 | 116, 93, 648 41 | 75, 34, 546 42 | 143,113, 862 43 | 110, 55, 768 44 | 110, 46, 884 45 | 108, 65, 892 46 | 117, 42, 772 47 | 124, 45, 640 48 | 117, 44, 897 49 | 118, 59, 940 50 | 117, 73, 960 51 | 117, 60, 977 52 | 117, 56, 995 53 | 118, 43, 754 54 | 110, 63, 864 55 | 118, 47, 784 56 | 110, 40, 608 57 | 117, 57, 1028 58 | 108, 61, 1031 59 | 117, 49, 1037 60 | 117, 51, 1040 61 | 108, 38, 1048 62 | 108, 67, 937 63 | 144, 19, 1058 64 | 118, 52, 704 65 | 118, 53, 736 66 | 118, 64, 720 67 | 56, 68, 1088 68 | 118, 54, 896 69 | 120, 72, 256 70 | 124, 58, 1273 71 | 124, 74, 1282 72 | 99, 16, 1312 73 | 32, 37, 40 74 | 124, 48, 1156 75 | 124, 69, 1252 76 | 121, 76, 1808 77 | 121, 77, 1564 78 | 124, 70, 1374 79 | 131, 62, 1609 80 | 230, 14, 1698 81 | 135, 41, 1806 82 | 104, 82, 2080 83 | 126, 92, 1969 84 | 4, 97, 1988 85 | 121, 75, 2304 86 | 122, 81, 2045 87 | 122, 87, 2594 88 | 122, 88, 2536 89 | 122, 85, 2274 90 | 126, 75, 1824 91 | 121, 8, 2049 92 | 122, 86, 2512 93 | 104, 89, 1856 94 | 93, 19, 2056 95 | 95, 30, 1024 96 | 122, 91, 2717 97 | 38, 22, 2344 98 | 122, 95, 3056 99 | 132,107, 3464 100 | 122, 97, 3086 101 | 171, 78, 2947 102 | 122, 99, 3536 103 | 139, 80, 3164 104 | 132,105, 4288 105 | 125,107, 4160 106 | 132, 98, 3912 107 | 105,101, 4020 108 | 125,101, 3712 109 | 121, 99, 4164 110 | 128,112, 6018 111 | 154, 84, 4280 112 | 130,109, 5716 113 | 132,103, 4992 114 | 136, 6, 1066 115 | 130,111, 6533 116 | 125,104, 4634 117 | 172,110, 5424 118 | 154, 97, 6001 119 | 154, 95, 5120 120 | 130,109, 7424 121 | 137,114, 7713 122 | 154,108, 9472 123 | 138,115, 6656 124 | 137,120, 8100 125 | 154,108, 6410 126 | 138,115, 8033 127 | 138,101, 4224 128 | 143,123,10240 129 | 137,123, 9600 130 | 137,128,10944 131 | 142,128,11442 132 | 175,136,11525 133 | 142,119, 9956 134 | 172,116, 9110 135 | 148,127,12288 136 | 148,134,13764 137 | 150,132,11776 138 | 143,129,12872 139 | 147,119,13392 140 | 153,133, 9217 141 | 158,134,15556 142 | 149,135,15616 143 | 153,128,15414 144 | 149,137,16085 145 | 142,120,16384 146 | 157,141,16926 147 | 173,133,16384 148 | 164,132,19456 149 | 149,141,17280 150 | 159,141,18908 151 | 155,140,18344 152 | 166,139,13192 153 | 158,141,18025 154 | 166,130,19456 155 | 179,122,20480 156 | 167,150,21696 157 | 174,150,22208 158 | 165,150,20117 159 | 161,152,20064 160 | 161,148,20296 161 | 175,151,19424 162 | 167,152,21600 163 | 174,150,22080 164 | 186,160,23754 165 | 181,138,22994 166 | 176,158,23680 167 | 180,150,23204 168 | 165,158,22888 169 | 200,133,23576 170 | 184,160,24448 171 | 184,146,23692 172 | 202,133,21664 173 | 183,151,23040 174 | 188,160,24512 175 | 176,157,23488 176 | 180,155,25504 177 | 175,161,25088 178 | 189,146,25472 179 | 195,146,25792 180 | 198,133,26384 181 | 185,156,25808 182 | 179,138,27328 183 | 189,160,25376 184 | 195,160,26112 185 | 189,174,27104 186 | 184,162,26528 187 | 184,169,26624 188 | 189,147,28440 189 | 191,163,28066 190 | 188,166,27640 191 | 191,146,25600 192 | 192,169,28488 193 | 194,163,28930 194 | 201,138,29812 195 | 196,182,29121 196 | 200,177,29560 197 | 197,182,29698 198 | 195,169,29712 199 | 238,146,30064 200 | 202,177,30276 201 | 199,190,29828 202 | 210,160,30736 203 | 203,177,30016 204 | 205,190,30361 205 | 211,190,30849 206 | 206,190,30592 207 | 204,192,30752 208 | 208,190,30912 209 | 212,173,31234 210 | 214,178,31264 211 | 249,171,31552 212 | 207,190,31010 213 | 209,190,31492 214 | 217,195,31525 215 | 216,190,31059 216 | 213,168,31560 217 | 215,190,31564 218 | 219,183,31637 219 | 245,249,31639 220 | 231,178,31368 221 | 242,168,31932 222 | 232,168,32265 223 | 233,178,31714 224 | 229,168,32032 225 | 239,171,31416 226 | 254,215,31770 227 | 243,168,31772 228 | 222,168,31780 229 | 237,195,31800 230 | 236,190,32057 231 | 227,196,31488 232 | 230,178,31648 233 | 241,168,32064 234 | 228,195,31816 235 | 235,168,31920 236 | 223,178,31830 237 | 240,168,31879 238 | 238,190,31880 239 | 224,168,31312 240 | 226,171,31728 241 | 220,171,31968 242 | 244,168,31969 243 | 221,168,32036 244 | 234,168,32017 245 | 247,168,32024 246 | 250,179,32044 247 | 225,183,32144 248 | 248,179,31970 249 | 248,179,32676 250 | 253,169,32382 251 | 246,189,32644 252 | 252,179,32767 253 | 251,226,32767 254 | 251,239,32766 255 | 249,223,32766 256 | 252,237,32766 257 | -------------------------------------------------------------------------------- /rcqlfc_.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // Turbo Range Coder bwt: templates include 25 | #include "include/turborc.h" 26 | #include "include_/rcutil.h" 27 | 28 | #include "rcutil_.h" 29 | #include "mb_vint.h" 30 | //-------------------------- Post bwt stage: QLFC entropy coding ------------------------------------------------------------------ 31 | #ifndef RCPRM 32 | #define RCPRM 33 | #define RCPRMC 34 | #endif 35 | 36 | #if RC_PRDID == 1 // optimal predictor parameters 37 | #define RCPRM0K 5 38 | #define RCPRM1K 5 39 | #elif RC_PRDID == 2 40 | #define RCPRM0K 4 // 4,8 41 | #define RCPRM1K 8 42 | #define RCPRM0R 5 //5,8 43 | #define RCPRM1R 8 44 | #else 45 | #define RCPRM0K RCPRM0 46 | #define RCPRM1K RCPRM1 47 | #define RCPRM0R RCPRM0 48 | #define RCPRM1R RCPRM1 49 | #endif 50 | 51 | #define PREDEMAK(_avg_,_x_) EMA(3, _avg_, 5, _x_) // 3 bits 52 | #define PREDEMAR(_avg_,_x_) EMA(5, _avg_, 23, (_x_)>31?31:(_x_)) // 5 bits 53 | #define CXK unsigned cxk = RICEK(K[u]>31?31:K[u]) << 8 | u // cxk: 3+8bits 54 | #define CXR unsigned cxr = RICEK(R[u]) << 8 | u, ku = RICEK(K[u]>14?14:K[u]) // cxr: 3+8 = 11, ku:2bits 55 | enum { KU0=11, KU=11, KB=11, 56 | RU0=13, RU=12, RB= 8 }; 57 | #ifndef NCOMP 58 | size_t T3(rcqlfc,RC_PRD,enc)(uint8_t *in, size_t inlen, unsigned char *out RCPRM) { 59 | uint8_t _r2cr[(1<<8)+32], *r2cr = &_r2cr[32], *ip = in, *in_,*op = out, *_rk = vmalloc((inlen+1)*sizeof(in[0])), *rk; if(!_rk) die("malloc failed. size=%zu\n", inlen); 60 | MBG_DEC( mbg0a, mbgua, mbgba, 33, 33); //initial mtf r2c 61 | MBG_DEC2(mbg0c, 1<0)<<11|cxr], mbgbr[u ], RCPRM0R,RCPRM1R,op, r); R[u] = PREDEMAR(R[u],r); 74 | OVERFLOW(in,inlen, out, op, goto e); 75 | } 76 | rceflush(rcrange,rclow,rcilow, op); OVERFLOW(in,inlen, out,op,;); 77 | e:vfree(_rk); 78 | return op - out; 79 | } 80 | #endif 81 | 82 | #ifndef NDECOMP 83 | size_t T3(rcqlfc,RC_PRD,dec)(uint8_t *in, size_t outlen, uint8_t *out RCPRM) { 84 | uint8_t r2c[257+O], *op, *ip = in, *p, K[256], R[256] = {1};; 85 | unsigned i; 86 | MBG_DEC( mbg0a, mbgua, mbgba, 33, 33); 87 | MBG_DEC2(mbg0c, 1<0)<<11|cxr], mbgbr[u ], RCPRM0R,RCPRM1R,ip, r); R[u] = PREDEMAR(R[u],r); 97 | r++; memset_(op, u, r); 98 | } 99 | return outlen; 100 | } 101 | #endif 102 | -------------------------------------------------------------------------------- /libdivsufsort/include/sort/copy.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Christoph Diegelmann 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a 4 | // copy of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom the 8 | // Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | // IN THE SOFTWARE. 20 | 21 | #ifndef SORT_COPY_H 22 | #define SORT_COPY_H 23 | 24 | #ifdef __INTEL_COMPILER 25 | #pragma warning disable 3373 26 | #endif 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "detail/misc.h" 36 | #include "inplace.h" 37 | 38 | namespace sort { 39 | 40 | // Make pair accessible from outside 41 | template 42 | using pair = detail::misc::pair; 43 | 44 | namespace copy { 45 | 46 | // Oportunistic version of the quicksort 47 | // Uses free space given to it to copy together key and value 48 | // then sorting it 49 | template 50 | inline void quick(T first, T last, U Sf, U Sl, I index, C cb) { 51 | using typeA = std::remove_reference_t; 52 | using typeB = std::remove_reference_t; 53 | using typeC = std::remove_reference_t; 54 | static_assert(std::is_same>::value, "Type mismatch"); 55 | 56 | if (detail::misc::COPY_MIN <= std::distance(first, last) 57 | && std::distance(first, last) < std::distance(Sf, Sl)) { 58 | Sl = Sf + std::distance(first, last); 59 | 60 | // get a pivot 61 | typeC pivot; int equals; 62 | std::tie(pivot, equals) = detail::misc::median7_copy(first, index); 63 | if (std::distance(first, last) * (6 - equals) < detail::misc::COPY_MIN * 7) 64 | return sort::inplace::quick(first, last, index, cb); 65 | 66 | // copy together + initial partitioning 67 | auto a = Sf, b = Sl; 68 | for (auto it = first; it != last; ++it) { 69 | auto v = detail::misc::make_pair(index(*it), *it); 70 | *(v.first < pivot ? a++ : --b) = v; 71 | } 72 | 73 | auto idx = [](auto a) { return a.first; }; 74 | auto icb = [first, Sf, cb](auto a, auto b) { 75 | // copy back 76 | for (auto it = a; it != b; ++it) 77 | first[it - Sf] = it->second; 78 | 79 | // call the cb 80 | cb(first + (a - Sf), first + (b - Sf)); 81 | }; 82 | 83 | if (LR) { 84 | sort::inplace::block(Sf, a, idx, icb); 85 | sort::inplace::block(a, Sl, idx, icb); 86 | } else { 87 | sort::inplace::block(a, Sl, idx, icb); 88 | sort::inplace::block(Sf, a, idx, icb); 89 | } 90 | } else // not enough space 91 | sort::inplace::quick(first, last, index, cb); 92 | } 93 | 94 | template 95 | inline void quick(T first, T last, U Sf, U Sl, I index) { 96 | using typeA = std::remove_reference_t; 97 | using typeB = std::remove_reference_t; 98 | using typeC = std::remove_reference_t; 99 | static_assert(std::is_same>::value, "Type mismatch"); 100 | 101 | if (detail::misc::COPY_MIN <= std::distance(first, last) 102 | && std::distance(first, last) < std::distance(Sf, Sl)) { 103 | Sl = Sf + std::distance(first, last); 104 | 105 | // get a pivot 106 | typeC pivot = index(*first); 107 | 108 | // copy together + initial partition 109 | auto a = Sf, b = Sl; 110 | for (auto it = first; it != last; ++it) { 111 | auto v = detail::misc::make_pair(index(*it), *it); 112 | *(v.first < pivot ? a++ : --b) = v; 113 | } 114 | 115 | auto idx = [](auto a) { return a.first; }; 116 | 117 | if (LR) { 118 | sort::inplace::block(Sf, a, idx); 119 | for (auto it = Sf; it != a; ++it) 120 | first[std::distance(Sf, it)] = it->second; 121 | sort::inplace::block(a, Sl, idx); 122 | for (auto it = a; it != Sl; ++it) 123 | first[std::distance(Sf, it)] = it->second; 124 | } else { 125 | sort::inplace::block(a, Sl, idx); 126 | for (auto it = a; it != Sl; ++it) 127 | first[std::distance(Sf, it)] = it->second; 128 | sort::inplace::block(Sf, a, idx); 129 | for (auto it = Sf; it != a; ++it) 130 | first[std::distance(Sf, it)] = it->second; 131 | } 132 | 133 | } else // not enough space 134 | sort::inplace::quick(first, last, index); 135 | } 136 | 137 | } // copy 138 | } // sort 139 | 140 | #endif // SORT_COPY_H 141 | -------------------------------------------------------------------------------- /bec_bstm.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // bit entropy coder with bitio 25 | #include "include_/conf.h" 26 | //---- BitIO encode - Little Endian ------------------------------------------------------ 27 | #define bitput_t T3(uint, __WORDSIZE, _t) 28 | #define bitebr_t unsigned 29 | #define bitedec( _bw_,_br_) bitput_t _bw_; bitebr_t _br_ 30 | #define bitedef( _bw_,_br_) bitput_t _bw_=0; unsigned _br_=0 31 | #define biteini( _bw_,_br_) _bw_=_br_=0 32 | #define bitput( _bw_,_br_,_nb_,_x_) (_bw_) += (bitput_t)(_x_) << (_br_), (_br_) += (_nb_) 33 | 34 | // Encode renorm Right->Left 35 | #define bitenorm( _bw_,_br_,_op_) ctou64(_op_) = _bw_; _op_ += ((_br_)>>3), (_bw_) >>=((_br_)&~7), (_br_) &= 7 36 | #define bitflush( _bw_,_br_,_op_) ctou64(_op_) = _bw_, _op_ += ((_br_)+7)>>3, _bw_=_br_=0 37 | 38 | //---- BitIO decode - Big Endian ------------------------------------------------------- 39 | #define bitget_t T3(uint, __WORDSIZE, _t) 40 | #define bitdbr_t unsigned char 41 | #define bitddef( _bw_,_br_) bitget_t _bw_; bitdbr_t _br_ 42 | #define bitdini( _bw_,_br_) _bw_ = _br_ = 0 43 | #define bitbw( _bw_,_br_) (_bw_ << _br_) // _bw_// 44 | #define bitrmv( _bw_,_br_,_nb_) (_br_ += (_nb_)) // _bw_ <<= (_nb_), _br_ += (_nb_)// 45 | #define bitpeek( _bw_,_br_,_nb_) (bitbw(_bw_,_br_)>>(sizeof(_bw_)*8- (_nb_))) 46 | #define bitget( _bw_,_br_,_nb_,_x_) _x_ = bitpeek(_bw_, _br_, _nb_), bitrmv(_bw_, _br_, _nb_) 47 | 48 | // Decode renorm Right->Left 49 | #define bitdinir( _bw_,_br_,_ip_) bitdini(_bw_,_br_),_ip_ -= sizeof(_bw_) 50 | #define bitdnormr(_bw_,_br_,_ip_) _bw_ = *(bitget_t *)(_ip_ -= _br_>>3), _br_ &= 7 //, _bw_ <<= _br_ 51 | 52 | //----------------------- Bit entropy stream i/o using bitio ------------------------------------------------- 53 | typedef struct { bitedec(bw,br); unsigned char *_p,*p; } stm_t; // bitio 54 | #define STMSAVE(_s_) bitedef(bw,br); bw = (_s_)->bw; br = (_s_)->br; unsigned char *p = (_s_)->p 55 | #define STMREST(_s_) (_s_)->bw = bw; (_s_)->br = br; (_s_)->p = p 56 | 57 | static size_t stmetell(stm_t *s) { return s->p - s->_p; } 58 | static size_t stmseek(stm_t* s, unsigned o, unsigned x) { s->p += x; return 0; } 59 | 60 | //------ Stream encode --- 61 | #define MSBREV(_cl_,_l_) { unsigned _msb = (_l_ | (1u << _cl_)) <= _n; _l_ = _l_ << _msb | _l_ >> _cl_; _cl_ += _msb; _l_ = bzhi32(_l_,_cl_); } //#define MSBREV(_cl,_l) cl++ 62 | 63 | #define BSR(_n_) (_n_)==1?1:__bsr32(_n_) 64 | 65 | #define stmput0_(bw,br,p, _l_, _n_, _lx_, _hx_) {\ 66 | if(_n_ < ECN) { ectab_t *e = &bectab[_lx_>ECN-1?ECN-1:_lx_][_hx_>ECN-1?ECN-1:_hx_][_l_][_n_]; bitput(bw,br, e->cl,e->cw); }\ 67 | else { unsigned _hx=_hx_, _n=_n_, _l=_l_, _x;\ 68 | if(_n > _lx_) _x = _n-_lx_, _hx-=_x, _n-=_x;\ 69 | if(_n > _hx ) _x = _n-_hx, _l -=_x, _n-=_x;\ 70 | if(_n) { unsigned _msb,_cl = BSR(_n); MSBREV(_cl,_l); bitput(bw,br, _cl,_l); }\ 71 | } bitenorm(bw,br,p);\ 72 | } 73 | 74 | static void stmeini(stm_t *s, unsigned char *op, size_t outsize) { s->_p = s->p = op; bitdini(s->bw, s->br); } 75 | 76 | static ALWAYS_INLINE void stmput( stm_t *s, unsigned l, unsigned n, unsigned lx, unsigned hx) { stmput0_(s->bw,s->br,s->p, l, n, lx, hx); } 77 | 78 | static ALWAYS_INLINE void stmputx(stm_t *s, unsigned l, unsigned x) { bitput(s->bw,s->br, l,x); bitenorm(s->bw,s->br,s->p); } 79 | 80 | static unsigned stmflush(stm_t *s) { 81 | unsigned _br = (64-s->br)&7; 82 | bitflush(s->bw,s->br,s->p); 83 | s->p[0] = _br; s->p++; ctou32(s->_p) = s->p - s->_p; 84 | return stmetell(s); 85 | } 86 | 87 | //------ Stream decode --- 88 | #define stmget0_(bw,br,p,_n_, _lx_, _hx_, _x_) do { unsigned _n = _n_, _hx = _hx_;\ 89 | if(_n > (_lx_)) { unsigned ifh = _n - (_lx_); _hx -= ifh; _n -= ifh; }\ 90 | _x_ = _n > _hx?_n-_hx:0;\ 91 | if(_n-=_x_) { bitdnormr(bw,br,p);\ 92 | unsigned _cw, _cl = BSR(_n); _cw = bitpeek(bw,br, _cl);\ 93 | if(likely((1u << _cl | _cw) > _n)) bitrmv(bw,br,_cl); else { bitget(bw,br, _cl+1,_cw); _cw = (_cw&1) << _cl | _cw>>1; }\ 94 | _x_ += _cw;\ 95 | }\ 96 | } while(0) 97 | 98 | static void stmdini(stm_t *s, unsigned char *in, size_t inlen) { unsigned char *ip = in+ctou32(in), sbr = *--ip; bitdinir(s->bw,s->br,ip); s->br = sbr; s->_p = in; s->p = ip; } 99 | #define stmgetx_(_l_,_x_) bitget(bw,br, _l_,_x_) 100 | #define stmget_(_n_, _lx_, _hx_, _x_) stmget0_(bw,br,p,_n_, _lx_, _hx_, _x_) 101 | #define stmgetx(_s_, _l_, _x_) { bitdnormr((_s_)->bw,(_s_)->br,(_s_)->p); bitget((_s_)->bw,(_s_)->br, _l_,_x_); } 102 | #define stmget(_s_,_n_, _lx_, _hx_, _x_) stmget0_((_s_)->bw,(_s_)->br,(_s_)->p,_n_, _lx_, _hx_, _x_) 103 | -------------------------------------------------------------------------------- /libdivsufsort/include/sort/detail/suffix.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Christoph Diegelmann 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a 4 | // copy of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom the 8 | // Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | // IN THE SOFTWARE. 20 | 21 | // Linear Time Suffix Sorting By Depth Awareness 22 | 23 | #ifndef SORT_DETAIL_SUFFIX_H 24 | #define SORT_DETAIL_SUFFIX_H 25 | 26 | #ifdef __INTEL_COMPILER 27 | #pragma warning disable 3373 28 | #endif 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "misc.h" 39 | #include "../inplace.h" 40 | #include "../copy.h" 41 | 42 | // define if additional space may be used 43 | #define USE_COPY 44 | 45 | namespace sort { 46 | namespace detail { 47 | namespace suffix { 48 | 49 | template 50 | inline T induce(T SA, U ISA, T a, T b, T e, T f, D depth, G group) { 51 | if (b == e) { 52 | if (a != b) b[-1] = ~b[-1]; 53 | return b; 54 | } 55 | 56 | // [a, b) = lower range = group 57 | // [b, e) = range to be induced = group 58 | // [e, f) = high range > group 59 | T c = b, d = e; 60 | auto index = detail::misc::index(ISA, depth); 61 | auto castToIndex = detail::misc::castTo(); 62 | 63 | // Induce upper part 64 | while (e != f) { 65 | for (auto it = f; it != e; --it) { 66 | auto v = it[-1] < 0 ? ~it[-1] : it[-1]; 67 | // If the prev element is in the group 68 | if (depth <= v && ISA[v = (v - depth)] == group) 69 | *--d = v; // put it into the bucket 70 | } 71 | // update the beginning of the group 72 | if (d != e) ISA[e[-1]] = castToIndex(d - SA); 73 | // Iterate into the left part 74 | f = e; e = d; 75 | } 76 | 77 | // Induce lower part 78 | auto ndepth = depth; 79 | while (b != d) { 80 | ndepth += depth; 81 | auto cgroup = castToIndex(b - SA); 82 | for (auto it = a; it != b; ++it) { 83 | auto v = *it; 84 | // If the prev element is in the group 85 | if (depth <= v && ISA[v = (v - depth)] == group) { 86 | ISA[*c++ = v] = cgroup; 87 | ISA[v + 1] = -ndepth; 88 | } 89 | } 90 | // Flag end of lower part to mark it type F 91 | b[-1] = ~b[-1]; 92 | // Interate into the right part 93 | a = b; b = c; 94 | } 95 | // Flag the center 96 | if (a != b) b[-1] = ~b[-1]; 97 | 98 | return b; 99 | } 100 | 101 | // Partition into type L, T and S 102 | template 103 | static T partition(T SA, U ISA, T first, T last, D depth) { 104 | // Index function: return the next element according to the current depth 105 | auto index = detail::misc::index(ISA, depth); 106 | 107 | // Name of the group equals the index of the first element 108 | // Only sort items leading to groups bigger than the current 109 | T a, b; 110 | static_assert(!std::is_reference::value, "T is a reference"); 111 | std::tie(a, b) = detail::inplace::partition(first, last, index, first - SA); 112 | 113 | // update the beginning of the group 114 | auto castToIndex = detail::misc::castTo(); 115 | if (b != last) ISA[last[-1]] = castToIndex(b - SA); 116 | 117 | // Induce sort the tandem repeats part (= equal partition) into type L and S 118 | return detail::suffix::induce(SA, ISA, first, a, b, last, depth, first - SA); 119 | } 120 | 121 | template 122 | inline auto name(T SA, U ISA, D depth) { 123 | return [SA, ISA, depth = depth + 1](auto a, auto b) { 124 | auto castToIndex = detail::misc::castTo(); 125 | if (std::distance(a, b) < 2) { 126 | ISA[*a] = castToIndex(a - SA); 127 | return (void) (*a = ~*a); // Group is unique - flag it as type F 128 | } 129 | 130 | // Get the element following the current 131 | auto n = ISA[*a + depth]; 132 | // If it's negative it was already sorted and contains the current sorting depth 133 | auto ndepth = depth + ((n >> (sizeof(decltype(n)) * CHAR_BIT - 1)) & ~n); 134 | // auto ndepth = depth + (ISA[*a + depth] < 0 ? -ISA[*a + depth] - 1 : 0); 135 | 136 | // Naming Stage: rename and update depth 137 | std::for_each(a, b, [ISA, cgroup = castToIndex(a - SA), ndepth](auto c) { 138 | // Storing the depth in the next item is possible due to 139 | // the fact that we will never sort this pair again. 140 | // That this works strongly indicates O(n) time for radix sort implementation 141 | // because it shows that every pair is sorted at max once and there are 142 | // only O(n) pairs.The only problem arises with tandem repeats which need to 143 | // be sorted using induction 144 | ISA[c + 0] = +cgroup; 145 | ISA[c + 1] = -ndepth; 146 | }); 147 | 148 | // Partition and return 149 | return (void) sort::detail::suffix::partition(SA, ISA, a, b, ndepth); 150 | }; 151 | } 152 | 153 | } // suffix 154 | } // detail 155 | } // sort 156 | 157 | #endif // SORT_DETAIL_SUFFIX_H 158 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # powturbo (c) Copyright 2013-2023 2 | # Download or clone TurboRC: 3 | # git clone git://github.com/powturbo/Turbo-Range-Coder.git 4 | 5 | # fsm predictor 6 | #SF=1 7 | # include BWT 8 | BWT=1 9 | #BWTDIV=1 10 | LIBSAIS16=1 11 | #V8 12 | #BWTSATAN=1 13 | ANS=1 14 | #EXT=1 15 | #NOCOMP=1 16 | #AVX2=1 17 | #NZ=1 18 | #SF=1 19 | #TURBORLE=1 20 | TRANSPOSE=1 21 | #---------------------------------------------- 22 | CC ?= gcc 23 | CXX ?= g++ 24 | #CC=clang 25 | #CXX=clang++ 26 | 27 | #CC=powerpc64le-linux-gnu-gcc 28 | #CL = $(CC) 29 | #DEBUG=-DDEBUG -g 30 | DEBUG=-DNDEBUG -s 31 | 32 | PREFIX ?= /usr/local 33 | DIRBIN ?= $(PREFIX)/bin 34 | DIRINC ?= $(PREFIX)/include 35 | DIRLIB ?= $(PREFIX)/lib 36 | 37 | OPT=-fstrict-aliasing 38 | ifeq (,$(findstring clang, $(CC))) 39 | OPT+=-falign-loops 40 | endif 41 | 42 | #------- OS/ARCH ------------------- 43 | ifneq (,$(filter Windows%,$(OS))) 44 | OS := Windows 45 | CC=gcc 46 | CXX=g++ 47 | # CC=clang 48 | ARCH=x86_64 49 | LDFLAGS=-Wl,--stack,33554432 50 | else 51 | OS := $(shell uname -s) 52 | ARCH := $(shell uname -m) 53 | 54 | ifneq (,$(findstring aarch64,$(CC))) 55 | ARCH = aarch64 56 | else ifneq (,$(findstring arm64,$(ARCH))) 57 | ARCH = aarch64 58 | else ifneq (,$(findstring powerpc64le,$(CC))) 59 | ARCH = ppc64le 60 | endif 61 | endif 62 | 63 | ifeq ($(ARCH),ppc64le) 64 | _SSE=-D__SSSE3__ 65 | MARCH=-mcpu=power9 -mtune=power9 $(_SSE) 66 | CFLAGS+=-D_NAVX2 -D_NSCALAR 67 | else ifeq ($(ARCH),aarch64) 68 | MARCH=-march=armv8-a 69 | CFLAGS+=-D_NAVX2 -D_NSCALAR 70 | ifneq (,$(findstring clang, $(CC))) 71 | OPT+=-fomit-frame-pointer 72 | #-fmacro-backtrace-limit=0 73 | endif 74 | _SSE=-march=armv8-a 75 | else ifeq ($(ARCH),$(filter $(ARCH),x86_64)) 76 | _SCALAR=-mno-sse2 77 | # set minimum arch sandy bridge SSE4.1 + AVX 78 | _SSE=-march=corei7-avx -mtune=corei7-avx 79 | # _SSE+=-mno-avx -mno-aes 80 | _AVX2=-march=haswell 81 | endif 82 | 83 | ifeq ($(AVX2),1) 84 | MARCH=$(_AVX2) 85 | else 86 | MARCH=$(_SSE) 87 | endif 88 | 89 | CFLAGS+= $(DEBUG) $(OPT) -w -Wall 90 | #-pedantic 91 | ifeq ($(PGO), 1) 92 | CFLAGS+=-fprofile-generate 93 | LDFLAGS+=-lgcov 94 | else ifeq ($(PGO), 2) 95 | CFLAGS+=-fprofile-use 96 | endif 97 | 98 | ifeq ($(OS),$(filter $(OS),Darwin Linux GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku)) 99 | #LDFLAGS+=-lrt 100 | LDFLAGS+=-lm 101 | #-Wl,--stack_size -Wl,20971520 102 | endif 103 | 104 | ifeq ($(EXTRC), 1) 105 | CFLAGS+=-DEXTRC 106 | endif 107 | 108 | ifeq ($(STATIC),1) 109 | LDFLAGS+=-static 110 | endif 111 | 112 | #-------- bwt -------------------------- 113 | ifeq ($(BWTSATAN), 1) 114 | CFLAGS+=-D_BWTSATAN 115 | BWT=1 116 | endif 117 | 118 | ifeq ($(BWT), 1) 119 | CFLAGS+=-D_BWT 120 | ifeq ($(BWTDIV), 1) 121 | CFLAGS+=-DPROJECT_VERSION_FULL="20137" -DINLINE=inline -Ilibdivsufsort/include -Ilibdivsufsort/build/include 122 | CFLAGS+=-D_BWTDIV 123 | LIBBWT+=libdivsufsort/unbwt.o 124 | ifeq ($(NOCOMP), 1) 125 | else 126 | LIBBWT+=libdivsufsort/lib/sssort.o libdivsufsort/lib/utils.o libdivsufsort/lib/daware.o 127 | LIBBWT+=libdivsufsort/lib/divsufsort.o 128 | endif 129 | else 130 | ifeq ($(BWTX), 1) 131 | LIBBWT =../bwt/sssort.o ../bwt/bwtxinv.o ../bwt/divsufsort.o ../bwt/trsort.o 132 | CFLAGS+=-D_BWTX 133 | else 134 | CFLAGS+=-D_LIBSAIS -Ilibsais/include 135 | LIBBWT+=$(B)libsais/src/libsais.o 136 | ifeq ($(LIBSAIS16), 1) 137 | CFLAGS+=-D_LIBSAIS16 138 | LIBBWT+=$(B)libsais/src/libsais16.o 139 | endif 140 | endif 141 | endif 142 | endif 143 | 144 | 145 | all: turborc 146 | 147 | ifneq ($(NOCOMP), 1) 148 | LIB=rc_ss.o rc_s.o rccdf.o rcutil.o bec_b.o rccm_s.o rccm_ss.o rcqlfc_s.o rcqlfc_ss.o rcqlfc_sf.o 149 | 150 | #ifeq ($(DELTA), 1) 151 | #CFLAGS+=-D_DELTA 152 | #LIB+=transform.o 153 | #endif 154 | 155 | ifeq ($(ANS), 1) 156 | CFLAGS+=-D_ANS 157 | L=./ 158 | $(L)anscdf0.o: $(L)anscdf.c $(L)anscdf_.h 159 | $(CC) -c -O3 $(CFLAGS) $(_SCALAR) -falign-loops=32 $(L)anscdf.c -o $(L)anscdf0.o 160 | 161 | $(L)anscdfs.o: $(L)anscdf.c $(L)anscdf_.h 162 | $(CC) -c -O3 $(CFLAGS) $(_SSE) -falign-loops=32 $(L)anscdf.c -o $(L)anscdfs.o 163 | 164 | LIB+=$(L)anscdfs.o 165 | ifeq ($(ARCH), x86_64) 166 | $(L)anscdfx.o: $(L)anscdf.c $(L)anscdf_.h 167 | $(CC) -c -O3 $(CFLAGS) -march=haswell -falign-loops=32 $(L)anscdf.c -o $(L)anscdfx.o 168 | 169 | LIB+=$(L)anscdfx.o 170 | #$(L)anscdf0.o 171 | endif 172 | endif 173 | 174 | ifeq ($(TURBORLE), 1) 175 | CFLAGS+=-D_TURBORLE 176 | LIB+=trlec.o trled.o 177 | endif 178 | 179 | ifeq ($(TRANSPOSE), 1) 180 | transpose_avx2.o: transpose.c 181 | $(CC) -O3 -w -mavx2 $(OPT) -c transpose.c -o transpose_avx2.o 182 | 183 | CFLAGS+=-D_TRANSPOSE -D_NCPUISA 184 | LIB+=transpose.o transpose_.o 185 | 186 | ifeq ($(ARCH), x86_64) 187 | LIB+=transpose_avx2.o 188 | endif 189 | endif 190 | 191 | ifeq ($(V8), 1) 192 | CFLAGS+=-D_V8 193 | LIB+=v8.o 194 | endif 195 | #trlec.o trled.o $(L)anscdfx.o $(L)anscdfs.o $(L)anscdf0.o 196 | endif 197 | ifeq ($(BWT), 1) 198 | LIB+=rcbwt.o 199 | endif 200 | 201 | ifeq ($(SF), 1) 202 | CFLAGS+=-D_SF 203 | LIB+=rc_sf.o rccm_sf.o rcqlfc_sf.o 204 | endif 205 | 206 | ifeq ($(NZ), 1) 207 | LIB+=rc_nz.o rccm_nz.o rcqlfc_nz.o 208 | CFLAGS+=-D_NZ 209 | endif 210 | 211 | ifeq ($(SH), 1) 212 | LIB+=rc_sh.o 213 | CFLAGS+=-D_SH 214 | endif 215 | 216 | ifeq ($(EXT), 1) 217 | CFLAGS+=-D_EXT 218 | #LIB+=xrc.o 219 | ifeq ($(BWT), 1) 220 | #LIB+=xrcbwt_sf.o 221 | endif 222 | endif 223 | 224 | ifeq ($(NOCOMP), 1) 225 | CFLAGS+=-DNO_COMP 226 | endif 227 | 228 | 229 | #librc.a: $(LIB) 230 | # ar cr $@ $+ 231 | turborc.o: turborc.c 232 | $(CC) -O3 $(CFLAGS) $(MARCH) -c turborc.c -o turborc.o 233 | 234 | turborc: $(LIB) $(LIBBWT) turborc.o 235 | $(CC) $^ $(LDFLAGS) -o turborc 236 | 237 | reorder: $(LIBDIV) reorder.o 238 | $(CC) $^ $(LDFLAGS) -o reorder 239 | 240 | .c.o: 241 | $(CC) -O3 $(CFLAGS) $(MARCH) $< -c -o $@ 242 | 243 | .cpp.o: 244 | $(CXX) -O3 $(MARCH) $(CXXFLAGS) $< -c -o $@ 245 | 246 | ifeq ($(OS),Windows_NT) 247 | clean: 248 | del /S *.o 249 | del /S *~ 250 | else 251 | clean: 252 | @find . -type f -name "*\.o" -delete -or -name "*\~" -delete -or -name "core" -delete -or -name "turborc" -delete -or -name "librc.a" -delete 253 | endif 254 | -------------------------------------------------------------------------------- /libdivsufsort/include/divsufsort.h: -------------------------------------------------------------------------------- 1 | /* 2 | * divsufsort.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _DIVSUFSORT_H 28 | #define _DIVSUFSORT_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | #include 35 | 36 | #ifndef DIVSUFSORT_API 37 | # ifdef DIVSUFSORT_BUILD_DLL 38 | # define DIVSUFSORT_API 39 | # else 40 | # define DIVSUFSORT_API 41 | # endif 42 | #endif 43 | 44 | /*- Datatypes -*/ 45 | #ifndef SAUCHAR_T 46 | #define SAUCHAR_T 47 | typedef uint8_t sauchar_t; 48 | #endif /* SAUCHAR_T */ 49 | #ifndef SAINT_T 50 | #define SAINT_T 51 | typedef int32_t saint_t; 52 | #endif /* SAINT_T */ 53 | #ifndef SAIDX_T 54 | #define SAIDX_T 55 | typedef int32_t saidx_t; 56 | #endif /* SAIDX_T */ 57 | #ifndef PRIdSAINT_T 58 | #define PRIdSAINT_T PRId32 59 | #endif /* PRIdSAINT_T */ 60 | #ifndef PRIdSAIDX_T 61 | #define PRIdSAIDX_T PRId32 62 | #endif /* PRIdSAIDX_T */ 63 | 64 | 65 | /*- Prototypes -*/ 66 | 67 | /** 68 | * Constructs the suffix array of a given string. 69 | * @param T[0..n-1] The input string. 70 | * @param SA[0..n-1] The output array of suffixes. 71 | * @param n The length of the given string. 72 | * @return 0 if no error occurred, -1 or -2 otherwise. 73 | */ 74 | DIVSUFSORT_API 75 | saint_t 76 | divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n); 77 | 78 | /** 79 | * Constructs the burrows-wheeler transformed string of a given string. 80 | * @param T[0..n-1] The input string. 81 | * @param U[0..n-1] The output string. (can be T) 82 | * @param A[0..n-1] The temporary array. (can be NULL) 83 | * @param n The length of the given string. 84 | * @return The primary index if no error occurred, -1 or -2 otherwise. 85 | */ 86 | DIVSUFSORT_API 87 | saidx_t 88 | divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n); 89 | 90 | /** 91 | * Returns the version of the divsufsort library. 92 | * @return The version number string. 93 | */ 94 | DIVSUFSORT_API 95 | const char * 96 | divsufsort_version(void); 97 | 98 | 99 | /** 100 | * Constructs the burrows-wheeler transformed string of a given string and suffix array. 101 | * @param T[0..n-1] The input string. 102 | * @param U[0..n-1] The output string. (can be T) 103 | * @param SA[0..n-1] The suffix array. (can be NULL) 104 | * @param n The length of the given string. 105 | * @param idx The output primary index. 106 | * @return 0 if no error occurred, -1 or -2 otherwise. 107 | */ 108 | DIVSUFSORT_API 109 | saint_t 110 | bw_transform(const sauchar_t *T, sauchar_t *U, 111 | saidx_t *SA /* can NULL */, 112 | saidx_t n, saidx_t *idx); 113 | 114 | /** 115 | * Inverse BW-transforms a given BWTed string. 116 | * @param T[0..n-1] The input string. 117 | * @param U[0..n-1] The output string. (can be T) 118 | * @param A[0..n-1] The temporary array. (can be NULL) 119 | * @param n The length of the given string. 120 | * @param idx The primary index. 121 | * @return 0 if no error occurred, -1 or -2 otherwise. 122 | */ 123 | DIVSUFSORT_API 124 | saint_t 125 | inverse_bw_transform(const sauchar_t *T, sauchar_t *U, 126 | saidx_t *A /* can NULL */, 127 | saidx_t n, saidx_t idx); 128 | 129 | /** 130 | * Checks the correctness of a given suffix array. 131 | * @param T[0..n-1] The input string. 132 | * @param SA[0..n-1] The input suffix array. 133 | * @param n The length of the given string. 134 | * @param verbose The verbose mode. 135 | * @return 0 if no error occurred. 136 | */ 137 | DIVSUFSORT_API 138 | saint_t 139 | sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose); 140 | 141 | /** 142 | * Search for the pattern P in the string T. 143 | * @param T[0..Tsize-1] The input string. 144 | * @param Tsize The length of the given string. 145 | * @param P[0..Psize-1] The input pattern string. 146 | * @param Psize The length of the given pattern string. 147 | * @param SA[0..SAsize-1] The input suffix array. 148 | * @param SAsize The length of the given suffix array. 149 | * @param idx The output index. 150 | * @return The count of matches if no error occurred, -1 otherwise. 151 | */ 152 | DIVSUFSORT_API 153 | saidx_t 154 | sa_search(const sauchar_t *T, saidx_t Tsize, 155 | const sauchar_t *P, saidx_t Psize, 156 | const saidx_t *SA, saidx_t SAsize, 157 | saidx_t *left); 158 | 159 | /** 160 | * Search for the character c in the string T. 161 | * @param T[0..Tsize-1] The input string. 162 | * @param Tsize The length of the given string. 163 | * @param SA[0..SAsize-1] The input suffix array. 164 | * @param SAsize The length of the given suffix array. 165 | * @param c The input character. 166 | * @param idx The output index. 167 | * @return The count of matches if no error occurred, -1 otherwise. 168 | */ 169 | DIVSUFSORT_API 170 | saidx_t 171 | sa_simplesearch(const sauchar_t *T, saidx_t Tsize, 172 | const saidx_t *SA, saidx_t SAsize, 173 | saint_t c, saidx_t *left); 174 | 175 | 176 | #ifdef __cplusplus 177 | } /* extern "C" */ 178 | #endif /* __cplusplus */ 179 | 180 | #endif /* _DIVSUFSORT_H */ 181 | -------------------------------------------------------------------------------- /bec_.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // Bit entropy coder 25 | // Based on: "A quick explaination of the M99 compression algorithm" http://www.michael-maniscalco.com/blog.htm 26 | // https://github.com/michaelmaniscalco/m99 27 | #define uint_t T3(uint,USIZE,_t) 28 | #define sym_t T2(sym_t,USIZE) 29 | #define bec_t T2(bec_t,USIZE) 30 | typedef struct { unsigned n; uint16_t c; } _PACKED sym_t; // occurrence n, symbol c 31 | typedef struct { stm_t s; sym_t *lo, *hi, _lo[32*(1 << USIZE)], _hi[32*(1 << USIZE)]; cw_t cw[(1 << USIZE)]; } bec_t; // stream object, pointer stack low/high to symbol table cw 32 | 33 | static void T2(BECENC_,USIZE)(bec_t *t, uint_t *in, unsigned n, unsigned lon, sym_t *r, unsigned lor) { 34 | if(lor >= n) r->c = *in, r->n = n; 35 | else if(n < 3) { 36 | if(n == 1) r->c = *in, r->n = 1; // 1 symbol 37 | else { int u = (int)in[0] - (int)in[1]; // 2 symbols 38 | r->c = in[u >= 0]; r->n = !u + 1; 39 | (r+1)->c = in[u < 0]; (r+1)->n = 1; if(u) stmputx(&t->s, 1, u<0); 40 | } 41 | } else { unsigned hir, m; sym_t *c; // recursive partitioning 42 | if(likely(lor <= lon)) hir = T2(memrun,USIZE)(in+lon, in+n); // run 43 | else hir = lor - lon; 44 | sym_t *lo = t->lo, *hi = t->hi; t->lo += (1 << USIZE); t->hi +=(1 << USIZE);// stack 45 | T2(BECENC_,USIZE)(t, in+lon, n-lon, (n-lon) >> 1, hi, hir); // encode low partition 46 | T2(BECENC_,USIZE)(t, in, lon, lon >> 1, lo, lor); // encode high partition 47 | 48 | sym_t *lohi[2]; cw_t *cw=t->cw; unsigned loc,hic,ln=lon,hn=n-lon; lohi[0] = lo; lohi[1] = hi; 49 | do { 50 | unsigned c0 = lohi[0]->c, c1 = lohi[1]->c; 51 | loc = -(c0 <= c1) & lohi[0]->n; 52 | hic = -(c1 <= c0) & lohi[1]->n; 53 | r->c = lohi[!loc]->c; 54 | lohi[0] += (loc!=0); 55 | lohi[1] += (hic!=0); 56 | cw->l = loc; cw->n = (r++)->n = loc + hic; cw->hx = hn; hn-=hic; (cw++)->lx = ln; ln-=loc; // store in temp array cw 57 | } while(ln && hn); 58 | for(m = ln + hn, c = lohi[ln == 0]; m > 0;) m -= c->n, *r++ = *c++; 59 | while(cw-- != t->cw) stmput(&t->s,cw->l, cw->n, cw->lx, cw->hx); // encode cw in reverse order 60 | t->lo -= (1 << USIZE); t->hi -= (1 << USIZE); //stack 61 | } 62 | } 63 | 64 | unsigned T2(BECENC,USIZE)(uint_t *__restrict in, unsigned _inlen, unsigned char *__restrict out) { 65 | #define OV 8 66 | unsigned inlen = USIZE==16?((_inlen+1)/2):_inlen,len[(1 << USIZE)],i; //TODO:better processing for 16bits 67 | sym_t stab[(1 << USIZE)] = {0}; 68 | bec_t t; t.lo = t._lo; t.hi = t._hi; bectabini(bectab); stm_t *s = (stm_t *)&t.s; stmeini(s, out, _inlen); stmseek(s, 0, OV); 69 | uint_t *in_=in+inlen, *ip = in, c = *ip; while(ip < in_ && *ip == c) ip++; unsigned lor = ip - in; 70 | T2(BECENC_,USIZE)(&t, in, inlen, pow2next(inlen) >> 1, stab, lor); // encode 71 | 72 | for(i = 0; inlen; inlen -= stab[i++].n) len[i] = inlen; // write the symbol table 73 | for(int j = i-1; j >= 0; --j) { stmputx(s,USIZE, stab[j].c); stmput(s,stab[j].n, len[j], len[j], len[j]); } 74 | return stmflush(s); 75 | } 76 | 77 | static void T2(BECDEC_,USIZE)(bec_t *t, uint_t *out, unsigned n, unsigned lon, sym_t *stab) { 78 | if(stab->n >= n) { unsigned c = stab->c; memset_(out, c, n); } 79 | else if(n < 3) { 80 | if(n == 1) *out = stab->c; 81 | else { unsigned c; stmgetx(&t->s, 1, c); 82 | out[!c] = stab->c; 83 | out[ c] = (stab+1)->c; 84 | } 85 | } else { unsigned hin = n-lon, loc,hic, ln=lon, hn=hin; sym_t *lo=t->lo, *hi=t->hi; stm_t *s = &t->s; STMSAVE(s); 86 | do { stmget_(stab->n, ln, hn, loc); 87 | lo->c = hi->c = stab->c; hic = (stab++)->n - loc; 88 | lo += (lo->n = loc) != 0; ln -= loc; 89 | hi += (hi->n = hic) != 0; hn -= hic; 90 | } while(ln && hn); STMREST(s); 91 | for(lo = ln?lo:hi, ln += hn; ln > 0; ln -= stab->n, *lo++ = *stab++); 92 | 93 | lo = t->lo; hi = t->hi; t->lo += (1 << USIZE); t->hi += (1 << USIZE); // symbol table stack 94 | T2(BECDEC_,USIZE)(t, out, lon, lon >> 1, lo); // decode low partition 95 | T2(BECDEC_,USIZE)(t, out+lon, hin, hin >> 1, hi); // decode high partition 96 | t->lo -= (1 << USIZE); t->hi -= (1 << USIZE); // symbol table stack 97 | } 98 | } 99 | 100 | unsigned T2(BECDEC,USIZE)(unsigned char *__restrict in, unsigned _outlen, uint_t *__restrict out) { 101 | unsigned outlen = USIZE==16?((_outlen+1)/2):_outlen, i,n = outlen; 102 | sym_t stab[(1 << USIZE)]; 103 | bec_t t; t.lo = t._lo; t.hi = t._hi; stm_t *s = &t.s; stmdini(s, in, 0); STMSAVE(s); // read the symbol table 104 | for(i = 0; n; n -= stab[i++].n) { stmget_(n, n, n, stab[i].n); stmgetx_(USIZE, stab[i].c); } STMREST(s); 105 | T2(BECDEC_,USIZE)(&t, out, outlen, pow2next(outlen) >> 1, stab); // Decode 106 | return _outlen; 107 | } 108 | -------------------------------------------------------------------------------- /include_/vlcbit.h: -------------------------------------------------------------------------------- 1 | //-- Turbo VLC : Novel Variable Length Coding for large integers with exponent + mantissa 2 | #include "conf.h" 3 | 4 | // Exponent base for the bit size vlcbits: 1=63 2=123, 3=239 4=463 5=895 6=1727 7=3327 5 | #define VLC_VN6 ( 6-5) 6 | #define VLC_VN7 ( 7-5) 7 | #define VLC_VN8 ( 8-5) 8 | #define VLC_VN9 ( 9-5) 9 | #define VLC_VN10 (10-5) 10 | #define VLC_VN11 (11-5) 11 | #define VLC_VN12 (12-5) 12 | #define VLC_VN15 (15-5) 13 | #define VLC_VN16 (16-5) 14 | 15 | #define VLC_VB6 0 16 | #define VLC_VB7 4 17 | #define VLC_VB8 16 18 | #define VLC_VB9 48 19 | #define VLC_VB10 128 20 | #define VLC_VB11 296 21 | #define VLC_VB12 768 22 | #define VLC_VB16 768 //??? 23 | 24 | #define vlcbits(_vn_) (5+_vn_) 25 | #define vlcfirst(_vn_) (1u<<(_vn_+1)) //1,0,4 2,4,8 3,16,16 26 | #define vlcmbits(_expo_, _vn_) (((_expo_) >> _vn_)-1) 27 | #define _vlcexpo_(_x_, _vn_,_expo_) { unsigned _f = __bsr32(_x_)-_vn_; _expo_ = ((_f+1)<<_vn_) + bextr32((_x_),_f,_vn_); } 28 | #ifndef VLCBIT_H_ 29 | #define VLCBIT_H_ 30 | static inline int vlcexpo(unsigned x, unsigned vn) { unsigned expo; _vlcexpo_(x, vn, expo); return expo; } 31 | #endif 32 | 33 | // return exponent, mantissa + bits length for x the value 34 | #define vlcenc( _x_, _vn_,_expo_, _mb_, _ma_) { unsigned _x = _x_; _vlcexpo_(_x, _vn_, _expo_); _mb_ = vlcmbits(_expo_, _vn_); _ma_ = bzhi32(_x,_mb_); } 35 | 36 | // build value from exponent, mantissa + length 37 | #define vlcdec(_expo_, _mb_, _ma_, _vn_) ((((1u << _vn_) + bzhi32(_expo_,_vn_))<<(_mb_)) + (_ma_)) 38 | 39 | // encode the mantissa in bitio (R->L) and return the exponent in u 40 | #define bitvrput(_bw_,_br_,_ep_, _vn_,_vb_, _u_) do { \ 41 | if((_u_) >= vlcfirst(_vn_)+_vb_) {\ 42 | unsigned _expo, _mb, _ma;\ 43 | vlcenc((_u_)-_vb_, _vn_, _expo, _mb, _ma); \ 44 | bitput(_bw_,_br_, _mb, _ma); bitenormr(_bw_,_br_,_ep_);\ 45 | _u_ = _expo+_vb_;\ 46 | }\ 47 | } while(0) 48 | 49 | // get mantissa and bitio (R->L) decode value from x 50 | /*#define bitvrget( _bw_,_br_,_ip_, _vn_,_vb_,_x_) \ 51 | if(_x_ >= vlcfirst(_vn_)+_vb_) { \ 52 | _x_ -= _vb_; \ 53 | int _mb = vlcmbits(_x_, _vn_), _ma; \ 54 | bitdnormr(_bw_,_br_,_ip_);\ 55 | bitget(_bw_,_br_, _mb,_ma);\ 56 | _x_ = vlcdec(_x_, _mb, _ma, _vn_)+_vb_;\ 57 | }*/ 58 | #define bitvrget(_bw_,_br_,_ip_,_vn_,_vb_,_x_) \ 59 | if(_x_ >= vlcfirst(_vn_)+_vb_) { \ 60 | bitdnormr(_bw_,_br_,_ip_); \ 61 | unsigned _mb = vlcmbits(_x_ -= _vb_, _vn_), _ma = bitpeek(_bw_,_br_, _mb); /*bitget(_bw_,_br_, _mb, _ma);*/\ 62 | _x_ = vlcdec(_x_, _mb, _ma, _vn_)+_vb_; bitrmv(_bw_,_br_, _mb);\ 63 | } 64 | 65 | //--------- Expo & mantissa with bitio ------------------------------ 66 | #define bitvput(_bw_,_br_,_vn_,_vb_, _u_) do { \ 67 | if((_u_) >= vlcfirst(_vn_)+_vb_) {\ 68 | unsigned _expo, _mb, _ma;\ 69 | vlcenc((_u_)-_vb_, _vn_, _expo, _mb, _ma); \ 70 | bitput(_bw_,_br_, vlcbits(_vn_), _expo+_vb_); \ 71 | bitput(_bw_,_br_, _mb, _ma); \ 72 | } else bitput(_bw_,_br_, vlcbits(_vn_), _u_);\ 73 | } while(0) 74 | 75 | #define bitvget( _bw_,_br_, _vn_,_vb_,_x_) do {\ 76 | bitget(_bw_,_br_, vlcbits(_vn_),_x_);\ 77 | if(_x_ >= vlcfirst(_vn_)+_vb_) { \ 78 | _x_ -= _vb_; \ 79 | int _mb = vlcmbits(_x_, _vn_), _ma; \ 80 | bitget(_bw_,_br_, _mb,_ma);\ 81 | _x_ = vlcdec(_x_, _mb, _ma, _vn_)+_vb_;\ 82 | }\ 83 | } while(0) 84 | 85 | //--------- Branchless, expo with byte io, mantissa with bitio --------------- 86 | #define bitvbput(_bw_,_br_,_cp_,_bp_,_vn_,_vb_, _u_) do { \ 87 | unsigned _expo, _mb, _ma;\ 88 | vlcenc((_u_)+vlcfirst(_vn_), _vn_, _expo, _mb, _ma); \ 89 | *_cp_++ = _expo; \ 90 | bitput(_bw_,_br_, _mb, _ma); \ 91 | } while(0) 92 | 93 | #define bitvbget(_bw_,_br_,_cp_,_bp_,_vn_,_vb_,_x_) do { _x_ = *_cp_++; \ 94 | unsigned _mb = vlcmbits(_x_, _vn_), _ma;\ 95 | _ma = bitpeek(_bw_, _br_, _mb); bitrmv(_bw_, _br_, _mb);\ 96 | _x_ = vlcdec(_x_, _mb, _ma, _vn_)-vlcfirst(_vn_);\ 97 | } while(0) 98 | 99 | //-- 100 | #define bitvcput(_bw_,_br_,_cp_,_bp_,_vn_,_vb_, _u_) do { \ 101 | if((_u_) >= vlcfirst(_vn_)+_vb_) {\ 102 | unsigned _expo, _mb, _ma;\ 103 | vlcenc((_u_)-_vb_, _vn_, _expo, _mb, _ma); \ 104 | *_cp_++ = _expo+_vb_; \ 105 | bitput(_bw_,_br_, _mb, _ma); /*bitenormr(_bw_,_br_,_bp_);*/\ 106 | } else *_cp_++ = _u_; \ 107 | } while(0) 108 | 109 | #define bitvcget(_bw_,_br_,_cp_,_bp_,_vn_,_vb_,_x_) do { /*bitdnormr(_bw_,_br_,_bp_);*/ _x_ = *_cp_++; \ 110 | if(likely(_x_ >= vlcfirst(_vn_)+_vb_)) {\ 111 | _x_ -= _vb_;\ 112 | int _mb = vlcmbits(_x_, _vn_), _ma; /*_ma = bitpeek(_bw_, _br_, _mb); bitrmv(_bw_,_br_, _mb);*/\ 113 | bitget(_bw_,_br_, _mb,_ma);\ 114 | _x_ = vlcdec(_x_, _mb, _ma, _vn_)+_vb_;\ 115 | }\ 116 | } while(0) 117 | 118 | 119 | #define vhifirst(_k_) (1<<(_k_)) 120 | 121 | #define vhimbits(_expo_,_k_,_i_,_j_) (_k_ - (_i_ + _j_) + ((_expo_ - (1<<_k_)) >> (_i_ + _j_))) 122 | // Hybrid integer https://www.lucaversari.it/phd/main.pdf 123 | #define VHI_K 4 124 | #define VHI_I 2 125 | #define VHI_J 1 126 | 127 | //#define _vlcexpo_(_x_, _k_,_i_,_j, _expo_) 128 | #define vhienc(_x_, _k_, _i_, _j_, _expo_, _mb_, _ma_) {\ 129 | unsigned n = __bsr32(_x_), m = _x_ - (1 << n);\ 130 | _expo_ = (1<<_k_) + ((n - _k_) << (_i_ + _j_)) + ((m >> (n - _i_)) << _j_) + BZHI32(m,_j_);\ 131 | _mb_ = n - _i_ - _j_;\ 132 | _ma_ = bzhi32(_x_ >> _j_,_mb_);\ 133 | } 134 | 135 | #define vhidec(_x_, _mb_, _ma_, _k_, _i_, _j_) {\ 136 | unsigned _mb_ = vhimbits(_x_,_k_,_i_,_j_), low_bits = _x_ & ((1 << _j_) - 1);\ 137 | _x_ >>= _j_;\ 138 | unsigned high_bits = (1 << _i_) | (_x_ & ((1 << _i_) - 1));\ 139 | _x_ = (((high_bits << _mb_) | _ma_) << _j_) | low_bits;\ 140 | } 141 | 142 | #define bithcput(_bw_,_br_,_cp_,_bp_,_k_,_i_,_j_, _u_) do { \ 143 | if((_u_) >= vhifirst(_k_)) {\ 144 | unsigned _expo, _mb, _ma;\ 145 | vhienc(_u_, _k_, _i_, _j_, _expo, _mb, _ma); \ 146 | *_cp_++ = _expo; \ 147 | bitput(_bw_,_br_, _mb, _ma); /*bitenormr(_bw_,_br_,_bp_);*/\ 148 | } else *_cp_++ = _u_; \ 149 | } while(0) 150 | 151 | #define bithcget(_bw_,_br_,_cp_,_bp_,_k_,_i_,_j_,_x_) do { /*bitdnormr(_bw_,_br_,_bp_);*/ _x_ = *_cp_++; \ 152 | if(likely(_x_ >= vhifirst(_k_))) {\ 153 | unsigned _mb = _k_ - (_i_ + _j_) + ((_x_ - (1<<_k_)) >> (_i_ + _j_)), _ma; bitget(_bw_,_br_, _mb,_ma);\ 154 | _x_ = (((((1 << _i_) | BZHI32(_x_ >> _j_,_i_)) << _mb) | _ma) << _j_) | BZHI32(_x_,_j_);\ 155 | }\ 156 | } while(0) 157 | 158 | -------------------------------------------------------------------------------- /libdivsufsort/include/divsufsort_private.h: -------------------------------------------------------------------------------- 1 | /* 2 | * divsufsort_private.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _DIVSUFSORT_PRIVATE_H 28 | #define _DIVSUFSORT_PRIVATE_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | #if HAVE_CONFIG_H 35 | # include "config.h" 36 | #endif 37 | #include 38 | #include 39 | #if HAVE_STRING_H 40 | # include 41 | #endif 42 | #if HAVE_STDLIB_H 43 | # include 44 | #endif 45 | #if HAVE_MEMORY_H 46 | # include 47 | #endif 48 | #if HAVE_STDDEF_H 49 | # include 50 | #endif 51 | #if HAVE_STRINGS_H 52 | # include 53 | #endif 54 | #if HAVE_INTTYPES_H 55 | # include 56 | #else 57 | # if HAVE_STDINT_H 58 | # include 59 | # endif 60 | #endif 61 | #if defined(BUILD_DIVSUFSORT64) 62 | # include "divsufsort64.h" 63 | # ifndef SAIDX_T 64 | # define SAIDX_T 65 | # define saidx_t saidx64_t 66 | # endif /* SAIDX_T */ 67 | # ifndef PRIdSAIDX_T 68 | # define PRIdSAIDX_T PRIdSAIDX64_T 69 | # endif /* PRIdSAIDX_T */ 70 | # define divsufsort divsufsort64 71 | # define divbwt divbwt64 72 | # define divsufsort_version divsufsort64_version 73 | # define bw_transform bw_transform64 74 | # define inverse_bw_transform inverse_bw_transform64 75 | # define sufcheck sufcheck64 76 | # define sa_search sa_search64 77 | # define sa_simplesearch sa_simplesearch64 78 | # define sssort sssort64 79 | # define trsort trsort64 80 | #else 81 | # include "divsufsort.h" 82 | #endif 83 | 84 | 85 | /*- Constants -*/ 86 | #if !defined(UINT8_MAX) 87 | # define UINT8_MAX (255) 88 | #endif /* UINT8_MAX */ 89 | #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) 90 | # undef ALPHABET_SIZE 91 | #endif 92 | #if !defined(ALPHABET_SIZE) 93 | # define ALPHABET_SIZE (UINT8_MAX + 1) 94 | #endif 95 | /* for divsufsort.c */ 96 | #define BUCKET_A_SIZE (ALPHABET_SIZE) 97 | #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) 98 | /* for sssort.c */ 99 | #if defined(SS_INSERTIONSORT_THRESHOLD) 100 | # if SS_INSERTIONSORT_THRESHOLD < 1 101 | # undef SS_INSERTIONSORT_THRESHOLD 102 | # define SS_INSERTIONSORT_THRESHOLD (1) 103 | # endif 104 | #else 105 | # define SS_INSERTIONSORT_THRESHOLD (8) 106 | #endif 107 | #if defined(SS_BLOCKSIZE) 108 | # if SS_BLOCKSIZE < 0 109 | # undef SS_BLOCKSIZE 110 | # define SS_BLOCKSIZE (0) 111 | # elif 32768 <= SS_BLOCKSIZE 112 | # undef SS_BLOCKSIZE 113 | # define SS_BLOCKSIZE (32767) 114 | # endif 115 | #else 116 | # define SS_BLOCKSIZE (1024) 117 | #endif 118 | /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ 119 | #if SS_BLOCKSIZE == 0 120 | # if defined(BUILD_DIVSUFSORT64) 121 | # define SS_MISORT_STACKSIZE (96) 122 | # else 123 | # define SS_MISORT_STACKSIZE (64) 124 | # endif 125 | #elif SS_BLOCKSIZE <= 4096 126 | # define SS_MISORT_STACKSIZE (16) 127 | #else 128 | # define SS_MISORT_STACKSIZE (24) 129 | #endif 130 | #if defined(BUILD_DIVSUFSORT64) 131 | # define SS_SMERGE_STACKSIZE (64) 132 | #else 133 | # define SS_SMERGE_STACKSIZE (32) 134 | #endif 135 | /* for trsort.c */ 136 | #define TR_INSERTIONSORT_THRESHOLD (8) 137 | #if defined(BUILD_DIVSUFSORT64) 138 | # define TR_STACKSIZE (96) 139 | #else 140 | # define TR_STACKSIZE (64) 141 | #endif 142 | 143 | 144 | /*- Macros -*/ 145 | #ifndef SWAP 146 | # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) 147 | #endif /* SWAP */ 148 | #ifndef MIN 149 | # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) 150 | #endif /* MIN */ 151 | #ifndef MAX 152 | # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) 153 | #endif /* MAX */ 154 | #define STACK_PUSH(_a, _b, _c, _d)\ 155 | do {\ 156 | assert(ssize < STACK_SIZE);\ 157 | stack[ssize].a = (_a), stack[ssize].b = (_b),\ 158 | stack[ssize].c = (_c), stack[ssize++].d = (_d);\ 159 | } while(0) 160 | #define STACK_PUSH5(_a, _b, _c, _d, _e)\ 161 | do {\ 162 | assert(ssize < STACK_SIZE);\ 163 | stack[ssize].a = (_a), stack[ssize].b = (_b),\ 164 | stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ 165 | } while(0) 166 | #define STACK_POP(_a, _b, _c, _d)\ 167 | do {\ 168 | assert(0 <= ssize);\ 169 | if(ssize == 0) { return; }\ 170 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 171 | (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ 172 | } while(0) 173 | #define STACK_POP5(_a, _b, _c, _d, _e)\ 174 | do {\ 175 | assert(0 <= ssize);\ 176 | if(ssize == 0) { return; }\ 177 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 178 | (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ 179 | } while(0) 180 | /* for divsufsort.c */ 181 | #define BUCKET_A(_c0) bucket_A[(_c0)] 182 | #if ALPHABET_SIZE == 256 183 | #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) 184 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) 185 | #else 186 | #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) 187 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) 188 | #endif 189 | 190 | 191 | /*- Private Prototypes -*/ 192 | /* sssort.c */ 193 | void 194 | sssort(const sauchar_t *Td, const saidx_t *PA, 195 | saidx_t *first, saidx_t *last, 196 | saidx_t *buf, saidx_t bufsize, 197 | saidx_t depth, saidx_t n, saint_t lastsuffix); 198 | 199 | /* daware.cpp */ 200 | void daware(saidx_t* SAf, saidx_t* SAl, saidx_t* ISAf, saidx_t* Sf, saidx_t* Sl); 201 | 202 | 203 | #ifdef __cplusplus 204 | } /* extern "C" */ 205 | #endif /* __cplusplus */ 206 | 207 | #endif /* _DIVSUFSORT_PRIVATE_H */ 208 | -------------------------------------------------------------------------------- /rccdf_.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC: Range Coder - CDF include 25 | #ifdef __AVX2__ 26 | #include 27 | #elif defined(__AVX__) 28 | #include 29 | #elif defined(__SSE4_1__) 30 | #include 31 | #elif defined(__SSSE3__) 32 | #ifdef __powerpc64__ 33 | #define __SSE__ 1 34 | #define __SSE2__ 1 35 | #define __SSE3__ 1 36 | #define NO_WARN_X86_INTRINSICS 1 37 | #endif 38 | #include 39 | #elif defined(__SSE2__) 40 | #include 41 | #elif defined(__ARM_NEON) 42 | #include 43 | #define __m128i uint32x4_t 44 | #define _mm_loadu_si128( _ip_) vld1q_u32(_ip_) 45 | #define _mm_add_epi16( _a_,_b_) (__m128i)vaddq_u16((uint16x8_t)(_a_), (uint16x8_t)(_b_)) 46 | #define _mm_srai_epi16( _a_,_m_) (__m128i)vshrq_n_s16((int16x8_t)(_a_), _m_) 47 | #define _mm_sub_epi16( _a_,_b_) (__m128i)vsubq_u16((uint16x8_t)(_a_), (uint16x8_t)(_b_)) 48 | #define _mm_storeu_si128(_ip_,_a_) vst1q_u32((__m128i *)(_ip_),_a_) 49 | #endif 50 | 51 | #include "cdf_.h" 52 | 53 | #define cdf4e(_rcrange_,_rclow_,_rcilow_,_cdf_,_x_,_op_) { cdfenc(_rcrange_,_rclow_,_rcilow_, _cdf_, _x_, _op_); cdf16upd(_cdf_,_x_); } 54 | 55 | #define cdf8e(_rcrange_,_rclow_,_rcilow_,_cdfh_, _cdfl_, _x_, _op_) { \ 56 | unsigned _x = _x_, _xh = _x>>4, _xl=_x & 0xf;\ 57 | cdf4e(_rcrange_,_rclow_,_rcilow_, _cdfh_, _xh, _op_);\ 58 | cdf4e(_rcrange_,_rclow_,_rcilow_, _cdfl_[_xh], _xl, _op_);\ 59 | } 60 | #if 1 61 | #define cdf8e2(_rcrange0_,_rclow0_,_rcilow0_,_rcrange1_,_rclow1_,_rcilow1_,_cdfh_, _cdfl_, _x_, _op0_, _op1_) {\ 62 | unsigned _x = _x_, _xh = _x>>4, _xl=_x & 0xf;\ 63 | cdf4e(_rcrange0_,_rclow0_,_rcilow0_,_cdfh_, _xh, _op0_);\ 64 | cdf4e(_rcrange1_,_rclow1_,_rcilow1_,_cdfl_[_xh], _xl, _op1_);\ 65 | } 66 | #else 67 | #define cdf8e2(_rcrange0_,_rclow0_,_rcilow0_,_rcrange1_,_rclow1_,_rcilow1_,_cdfh_, _cdfl_, _x_, _op0_, _op1_) {\ 68 | unsigned _x = _x_, _xh = _x>>4, _xl=_x & 0xf;\ 69 | cdfenc(_rcrange0_,_rclow0_, _cdfh_, _xh, _op0_); cdf16upd(_cdfh_,_xh);\ 70 | cdfenc(_rcrange1_,_rclow1_, _cdfl_[_xh], _xl, _op1_); cdf16upd( _cdfl_[_xh],_xl);\ 71 | } 72 | #endif 73 | #define cdf4d(_rcrange_,_rccode_, _cdf_,_x_,_ip_) { cdflget16(_rcrange_,_rccode_, _cdf_, _x_, _ip_); cdf16upd(_cdf_,_x_); } 74 | 75 | #define cdf8d(_rcrange_,rccode, _cdfh_, _cdfl_, _x_, _ip_) {\ 76 | unsigned _xh,_xl; cdf4d(_rcrange_,rccode,_cdfh_,_xh, _ip_);\ 77 | cdf_t *_cdfl = _cdfl_[_xh]; cdf4d(_rcrange_,rccode,_cdfl, _xl, _ip_);\ 78 | _x_ = _xh << 4| _xl;\ 79 | } 80 | 81 | #if 0 82 | #define cdf8d2(rcrange0,rccode0,rcrange1,rccode1, _cdfh_, _cdfl_, _x_, _ip0_, _ip1_) {\ 83 | unsigned _xh,_xl; cdf4d(rcrange0,rccode0,_cdfh_,_xh, _ip0_);\ 84 | cdf_t *_cdfl = _cdfl_[_xh]; cdf4d(rcrange1,rccode1,_cdfl, _xl, _ip1_);\ 85 | _x_ = _xh << 4| _xl;\ 86 | } 87 | #else 88 | #define cdf8d2(rcrange0,rccode0,rcrange1,rccode1, _cdfh_, _cdfl_, _x_, _ip0_, _ip1_) {\ 89 | unsigned _xh,_xl;\ 90 | _rccdfrange(rcrange0);\ 91 | _rccdfrange(rcrange1);\ 92 | _cdflget16(rcrange0,rccode0, _cdfh_, _xh);\ 93 | cdf_t *_cdfl = _cdfl_[_xh]; \ 94 | _cdflget16(rcrange1,rccode1, _cdfl, _xl); \ 95 | _rccdfupdate(rcrange0,rccode0, _cdfh_[_xh], _cdfh_[_xh+1],_ip0_); cdf16upd(_cdfh_,_xh);\ 96 | _rccdfupdate(rcrange1,rccode1, _cdfl[ _xl], _cdfl[ _xl+1],_ip1_); cdf16upd(_cdfl,_xl); /*cdf16upd2(_cdfh_,_xh, _cdfl,_xl);*/\ 97 | _x_ = _xh << 4| _xl; \ 98 | } 99 | #endif 100 | 101 | // Variable Length Coding: Integer 0-299 102 | // 1:0-12 0-12 103 | // 2:13,14 xxxx 13,14...45(13+32) 104 | // 3:15 xxxx+xxxx 46,47..299(255+13+32) 105 | #define cdfe8(rcrange0,rclow0,rcilow0,rcrange1,rclow1,rcilow1, _m0_, _m1_, _m2_, _x_, _op0_, _op1_) { unsigned _x = _x_;\ 106 | if(likely(_x < 13)) { cdf4e(rcrange0,rclow0,rcilow0,_m0_, _x, _op0_); }\ 107 | else if (_x < 13+32) { _x -= 13; unsigned _y = (_x>>4)+13; cdf4e(rcrange0,rclow0,rcilow0,_m0_, _y, _op0_); \ 108 | _y = _x&0xf; cdf4e(rcrange1,rclow1,rcilow1,_m1_, _y, _op1_); }\ 109 | else { _x -= 13+32; unsigned _y = _x>>4; cdf4e(rcrange0,rclow0,rcilow0,_m0_, 15, _op0_); \ 110 | cdf4e(rcrange1,rclow1,rcilow1,_m1_, _y, _op1_);\ 111 | _y = _x&0xf; cdf4e(rcrange0,rclow0,rcilow0,_m2_, _y, _op0_); }\ 112 | } 113 | 114 | #define cdfd8(rcrange0,rccode0,rcrange1,rccode1, _m0_, _m1_, _m2_, _x_, _ip0_, _ip1_) {\ 115 | cdf4d(rcrange0,rccode0,_m0_,_x_,_ip0_);\ 116 | if(_x_ >= 13) { \ 117 | if(_x_ != 15) { unsigned _y; cdf4d(rcrange1,rccode1,_m1_,_y, _ip1_); _x_=((_x_-13)<<4|_y)+13; }\ 118 | else { unsigned _y; cdf4d(rcrange1,rccode1,_m1_,_y, _ip1_); \ 119 | cdf4d(rcrange0,rccode0,_m2_,_x_,_ip0_); _x_=(_y<<4|_x_)+13+32; } \ 120 | }\ 121 | } 122 | // 7bits: 8+127 123 | //1: 0-8 xxxx 124 | //2: 9+3bits: 9,10,11, 12,13,14,15 1xxx xxxx 125 | #define cdfe7(rcrange0,rclow0,rcilow0,rcrange1,rclow1,rcilow1, _m0_, _m1_, _x_, _op0_, _op1_) { unsigned _x = _x_;\ 126 | if(likely(_x < 8)) { cdf4e(rcrange0,rclow0,rcilow0,_m0_, _x, _op0_); }\ 127 | else { _x -= 8; unsigned _y = (_x>>4)+8; cdf4e(rcrange0,rclow0,rcilow0,_m0_, _y, _op0_); \ 128 | _y = _x&0xf; cdf4e(rcrange1,rclow1,rcilow1,_m1_, _y, _op1_); }\ 129 | } 130 | 131 | #define cdfd7(rcrange0,rccode0,rcrange1,rccode1, _m0_, _m1_, _x_, _ip0_, _ip1_) {\ 132 | cdf4d(rcrange0,rccode0,_m0_,_x_,_ip0_);\ 133 | if(_x_ >= 8) { unsigned _y; cdf4d(rcrange1,rccode1,_m1_,_y, _ip1_); _x_=((_x_-8)<<4|_y)+8; }\ 134 | } 135 | // 6 bits: 0-76 136 | // 1: 0-12 137 | // 2: 13,14,15: bbxxxx 13 - 13+63 bits 138 | #define cdfe6(rcrange0,rclow0,rcilow0,rcrange1,rclow1,rcilow1, _m0_, _m1_, _x_, _op0_, _op1_) { unsigned _x = _x_;\ 139 | if(likely(_x < 12)) { cdf4e(rcrange0,rclow0,rcilow0,_m0_, _x, _op0_); }\ 140 | else { _x -= 12; unsigned _y = (_x>>4)+12; cdf4e(rcrange0,rclow0,rcilow0,_m0_, _y, _op0_); \ 141 | _y = _x&0xf; cdf4e(rcrange1,rclow1,rcilow1,_m1_, _y, _op1_); }\ 142 | } 143 | 144 | #define cdfd6(rcrange0,rccode0,rcrange1,rccode1, _m0_, _m1_, _x_, _ip0_, _ip1_) {\ 145 | cdf4d(rcrange0,rccode0,_m0_,_x_,_ip0_);\ 146 | if(_x_ >= 12) { unsigned _y; cdf4d(rcrange1,rccode1,_m1_,_y, _ip1_); _x_=((_x_-12)<<4|_y)+12; }\ 147 | } 148 | -------------------------------------------------------------------------------- /include_/rcutil.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRC Range Coder : include header 25 | #if defined(_MSC_VER) && (_MSC_VER < 1600) 26 | #if !defined(_STDINT) && !defined(_MSC_STDINT_H_) 27 | typedef unsigned char uint8_t; 28 | typedef unsigned short uint16_t; 29 | typedef unsigned int uint32_t; 30 | typedef unsigned long long uint64_t; 31 | #endif 32 | #else 33 | #include 34 | #endif 35 | #include 36 | 37 | #if __HAVE_FLOAT16 38 | #define HAVE_FLOAT16 1 39 | #elif !defined(__HAVE_FLOAT16) 40 | #define __STDC_WANT_IEC_60559_TYPES_EXT__ 41 | #include 42 | 43 | #if defined(__clang__) && defined(__is_identifier) 44 | #if __is_identifier(_Float16) 45 | #define HAVE_FLOAT16 1 46 | #endif 47 | #elif defined(FLT16_MAX) 48 | #define HAVE_FLOAT16 1 49 | #endif 50 | #endif 51 | 52 | #ifdef __cplusplus 53 | extern "C" { 54 | #endif 55 | size_t lzpenc(unsigned char *__restrict in, size_t inlen, unsigned char *__restrict out, unsigned lenmin, unsigned h_bits); 56 | size_t lzpdec(unsigned char *__restrict in, size_t outlen, unsigned char *__restrict out, unsigned lenmin, unsigned h_bits); 57 | 58 | unsigned char *rcqlfc(unsigned char *__restrict in, size_t n, unsigned char *__restrict out, unsigned char *__restrict r2c); 59 | 60 | size_t utf8enc(unsigned char *__restrict in, size_t inlen, unsigned char *__restrict out, unsigned flag); 61 | size_t utf8dec(unsigned char *__restrict in, size_t outlen, unsigned char *__restrict out); 62 | void memrev(unsigned char a[], unsigned n); 63 | 64 | //----------- 16 = 2x8,1x16 ---------------- 65 | unsigned delta8l16(uint8_t *in, size_t n); 66 | void delta8e16(uint8_t *in, size_t n, uint8_t *out); 67 | void delta8d16(uint8_t *in, size_t n, uint8_t *out); 68 | 69 | unsigned delta16l16(uint8_t *in, size_t n); 70 | void delta16e16(uint8_t *in, size_t n, uint8_t *out); 71 | void delta16d16(uint8_t *in, size_t n, uint8_t *out); 72 | //----------- 24 = 3x8 ----------------- 73 | unsigned delta8l24(uint8_t *in, size_t n); 74 | void delta8e24(uint8_t *in, size_t n, uint8_t *out); 75 | void delta8d24(uint8_t *in, size_t n, uint8_t *out); 76 | 77 | //----------- 32 = 8x4,4x16,1x32 ---------------- 78 | unsigned delta8l32(uint8_t *in, size_t n); 79 | void delta8e32(uint8_t *in, size_t n, uint8_t *out); 80 | void delta8d32(uint8_t *in, size_t n, uint8_t *out); 81 | 82 | unsigned delta16l32(uint8_t *in, size_t n); 83 | void delta16e32(uint8_t *in, size_t n, uint8_t *out); 84 | void delta16d32(uint8_t *in, size_t n, uint8_t *out); 85 | 86 | unsigned delta32l32(uint8_t *in, size_t n); 87 | void delta32e32(uint8_t *in, size_t n, uint8_t *out); 88 | void delta32d32(uint8_t *in, size_t n, uint8_t *out); 89 | 90 | void xorenc64( unsigned char *in, size_t inlen, unsigned char *out); 91 | void xordec64( unsigned char *in, size_t inlen, unsigned char *out); 92 | void xorenc32( unsigned char *in, size_t inlen, unsigned char *out); 93 | void xordec32( unsigned char *in, size_t inlen, unsigned char *out); 94 | void xorenc16( unsigned char *in, size_t inlen, unsigned char *out); 95 | void xordec16( unsigned char *in, size_t inlen, unsigned char *out); 96 | 97 | void zzagenc16( unsigned char *in, size_t inlen, unsigned char *out); 98 | void zzagdec16( unsigned char *in, size_t inlen, unsigned char *out); 99 | void zzagenc32( unsigned char *in, size_t inlen, unsigned char *out); 100 | void zzagdec32( unsigned char *in, size_t inlen, unsigned char *out); 101 | void zzagenc64( unsigned char *in, size_t inlen, unsigned char *out); 102 | void zzagdec64( unsigned char *in, size_t inlen, unsigned char *out); 103 | 104 | //------- Quantization [0..qmax] : qmax maximum quantized value in out ---------------- 105 | #if defined(HAVE_FLOAT16) 106 | size_t fpquant8e16( _Float16 *in, size_t inlen, uint8_t *out, unsigned qmax, _Float16 *pfmin, _Float16 *pfmax, _Float16 zmin); 107 | size_t fpquant16e16(_Float16 *in, size_t inlen, uint16_t *out, unsigned qmax, _Float16 *pfmin, _Float16 *pfmax, _Float16 zmin); 108 | //size_t fpquantv8e16(_Float16 *in, size_t inlen, uint8_t *out, unsigned qmax, _Float16 *pfmin, _Float16 *pfmax, _Float16 zmin); 109 | #endif 110 | 111 | size_t fpquant8e32( float *in, size_t inlen, uint8_t *out, unsigned qmax, float *pfmin, float *pfmax, float zmin); 112 | size_t fpquant16e32( float *in, size_t inlen, uint16_t *out, unsigned qmax, float *pfmin, float *pfmax, float zmin); 113 | size_t fpquant32e32( float *in, size_t inlen, uint32_t *out, unsigned qmax, float *pfmin, float *pfmax, float zmin); 114 | 115 | size_t fpquant8e64( double *in, size_t inlen, uint8_t *out, unsigned qmax, double *pfmin, double *pfmax, double zmin); 116 | size_t fpquant16e64( double *in, size_t inlen, uint16_t *out, unsigned qmax, double *pfmin, double *pfmax, double zmin); 117 | size_t fpquant32e64( double *in, size_t inlen, uint32_t *out, unsigned qmax, double *pfmin, double *pfmax, double zmin); 118 | size_t fpquant64e64( double *in, size_t inlen, uint64_t *out, unsigned qmax, double *pfmin, double *pfmax, double zmin); 119 | 120 | #if defined(HAVE_FLOAT16) 121 | size_t fpquant8d16( uint8_t *in, size_t outlen, _Float16 *out, unsigned qmax, _Float16 fmin, _Float16 fmax, size_t inlen); 122 | size_t fpquant16d16( uint16_t *in, size_t outlen, _Float16 *out, unsigned qmax, _Float16 fmin, _Float16 fmax); 123 | //size_t fpquantv8d16( uint8_t *in, size_t outlen, _Float16 *out, unsigned qmax, _Float16 fmin, _Float16 fmax); 124 | #endif 125 | 126 | size_t fpquant8d32( uint8_t *in, size_t outlen, float *out, unsigned qmax, float fmin, float fmax, size_t inlen); 127 | size_t fpquant16d32( uint16_t *in, size_t outlen, float *out, unsigned qmax, float fmin, float fmax); 128 | size_t fpquant32d32( uint32_t *in, size_t outlen, float *out, unsigned qmax, float fmin, float fmax); 129 | 130 | size_t fpquant8d64( uint8_t *in, size_t outlen, double *out, unsigned qmax, double fmin, double fmax, size_t inlen); 131 | size_t fpquant16d64( uint16_t *in, size_t outlen, double *out, unsigned qmax, double fmin, double fmax); 132 | size_t fpquant32d64( uint32_t *in, size_t outlen, double *out, unsigned qmax, double fmin, double fmax); 133 | size_t fpquant64d64( uint64_t *in, size_t outlen, double *out, unsigned qmax, double fmin, double fmax); 134 | 135 | //------- Lossy floating point transform: pad the trailing mantissa bits with zeros according to the error e (ex. e=0.00001) 136 | // must include float.h to use _Float16 (see icapp.c) 137 | #ifdef HAVE_FLOAT16 138 | _Float16 _fprazor16(_Float16 d, float e, int lg2e); 139 | void fprazor16(_Float16 *in, unsigned n, _Float16 *out, float e); 140 | #endif 141 | 142 | float _fprazor32(float d, float e, int lg2e); 143 | double _fprazor64(double d, double e, int lg2e); 144 | void fprazor32( float *in, unsigned n, float *out, float e); 145 | void fprazor64(double *in, unsigned n, double *out, double e); 146 | 147 | #ifdef __cplusplus 148 | } 149 | #endif 150 | -------------------------------------------------------------------------------- /rcbwt.c: -------------------------------------------------------------------------------- 1 | // Turbo Range Coder bwt: templates include 2 | #include "include/turborc.h" 3 | #include "include_/conf.h" 4 | #include "include_/rcutil.h" 5 | #include "include_/bec.h" 6 | #include "include_/vlcbit.h" 7 | 8 | #include "rcutil_.h" 9 | #include "mb_vint.h" 10 | 11 | //------------------------------------------- bwt ---------------------------------------------------------- 12 | #ifdef _BWTDIV 13 | #include "libdivsufsort/include/divsufsort.h" 14 | #include "libdivsufsort/include/unbwt.h" 15 | #else 16 | #include "libsais/include/libsais.h" 17 | #include "libsais/include/libsais16.h" 18 | typedef int32_t saidx_t; 19 | #endif 20 | #define LZPREV(a) a//// 21 | #define OUT out //in // 22 | static int bwtx, forcelzp; 23 | static unsigned calcmod(size_t len) { return 1<<__bsr32(len); } 24 | #define SR 16 25 | 26 | #ifndef NCOMP 27 | static unsigned lenmins[64] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 96, 96, 96, 96, 96, 128, 144, 144, 144 }; 29 | // MB 0 0 0 0 0 0 0 0 1 1 2 3 4 6 8 12 16 24 32 48 64 96 128 192 256 384 512 768 1024 1536 2048 3072 30 | 31 | size_t rcbwtenc(unsigned char *in, size_t inlen, unsigned char *out, unsigned lev, unsigned thnum, unsigned _lenmin) { //char *ipp = malloc(inlen); memcpy(ipp,in,inlen); 32 | size_t iplen = inlen; 33 | unsigned lenmin = _lenmin & 0x3ff, xbwt16 = (_lenmin & BWT_BWT16)?0x80:0, verbose = _lenmin & BWT_VERBOSE, nutf8 = _lenmin & BWT_NUTF8; 34 | unsigned char *op = out, *bwt = vmalloc(inlen+1024), *ip = in; if(!bwt) goto err; // inlen + space for bwt indexes idxns 35 | if(lenmin==1) lenmin = lenmins[vlcexpo(inlen,1)]; 36 | if(verbose) { printf("\nlev=%u MB=%zu expo=%u nutf8=%d ", lev, inlen/(1<<20), vlcexpo(inlen,1), nutf8?1:0);fflush(stdout); } 37 | if(lenmin) { if(verbose) { printf("lenmin=%u ", lenmin);fflush(stdout); } 38 | ip = bwt; 39 | switch(lenmin) { 40 | //case 2 : iplen = fastaenc(in, inlen, ip); if(verbose) { printf("GenTR %u->%u ", inlen, iplen); fflush(stdout); } break; 41 | default : if(!nutf8) { iplen = utf8enc(in, inlen, ip, _lenmin); if(verbose) { if(iplen == inlen) printf("NoUTF8 "); else printf("UTF8:%zu->%zu ", inlen, iplen); fflush(stdout); }} break; // try utf8 preprocessing 42 | } 43 | if(lenmin < 15 || iplen != inlen && iplen != -1) 44 | lenmin = lenmin<15?128-lenmin:127; // lenmin = 127-15 for other prep 45 | else { 46 | lenmin = ((lenmin>384?384:lenmin)+3)/4; 47 | ip = bwt; LZPREV(if(lev==9) { memcpy(out, in, inlen); memrev(out, inlen); } ); 48 | iplen = lzpenc(lev==9?OUT:in, inlen, ip, lenmin*4, lev > 8?1:0); 49 | if(iplen == inlen || iplen+(inlen>>7)+256 > inlen && !forcelzp) { /*Not enough saving*/ if(verbose) { printf("NoLzp=%.2f%% ", (double)iplen*100.0/inlen);fflush(stdout); } 50 | ip = in; iplen = inlen; lenmin = 0; 51 | } else { if(verbose) { printf("Lzp=%.2f%% ", (double)iplen*100.0/inlen);fflush(stdout); } 52 | LZPREV(if(lev==9) memrev(ip, iplen)); 53 | } 54 | } 55 | } 56 | *op++ = xbwt16|lenmin; 57 | if(lenmin) ctou32(op) = iplen, op += 4; 58 | #ifdef _BWTDIV 59 | *op++ = 0; 60 | saidx_t *sa = (saidx_t *)vmalloc((iplen+2)*sizeof(sa[0])); i f(!sa) goto err; 61 | *(saidx_t *)op = divbwt(ip, bwt, sa, iplen); 62 | op += sizeof(sa[0]); 63 | #else 64 | unsigned idxs[256], iplen_ = xbwt16?(iplen/2):iplen, 65 | mod = calcmod(iplen_/SR), idxsn = (iplen_-1)/mod + 1; //printf("bwt idxs=%d ", idxsn); //idxsn = (idxsn/SR)*SR; 66 | *op++ = idxsn - 1; 67 | saidx_t *sa = (saidx_t *)vmalloc((iplen_+2+128)*sizeof(sa[0])); if(!sa) goto err; if(verbose) { printf("bwt16=%u ", xbwt16>0);fflush(stdout); } 68 | #ifdef _LIBSAIS16 69 | if(xbwt16) { if(verbose) { printf("-"); fflush(stdout); } 70 | unsigned rc = libsais16_bwt_aux(ip, bwt, sa, iplen_, 0, 0, mod, idxs); if(verbose) { printf("+"); fflush(stdout); } 71 | if(iplen & 1) bwt[iplen-1] = ip[iplen-1]; 72 | } else 73 | #endif 74 | { 75 | libsais_bwt_aux(ip, bwt, sa, iplen, 0, 0, mod, idxs); //libsais_bwt(ip, bwt, sa, iplen, fs);//if(ip == in) { memcpy(bwt, ip, iplen); ip = bwt; } memrev(ip, iplen); ip[iplen] = 0; 76 | } 77 | memcpy(op, idxs, idxsn*sizeof(idxs[0])); 78 | op += idxsn*sizeof(idxs[0]); 79 | #endif 80 | vfree(sa); 81 | switch(lev) { 82 | case 0: memcpy(op, bwt, iplen); op += iplen; if(op-out == inlen) *op++ = 0; goto e; break; 83 | case 2: op += xbwt16?becenc16(bwt, iplen, op):becenc8(bwt, iplen, op); break; 84 | case 3: op += xbwt16?rcrlesenc16( bwt, iplen, op): rcrlesenc(bwt, iplen, op); break; 85 | case 4: op += xbwt16?rcrlessenc16( bwt, iplen, op, 4,7):rcrlessenc( bwt, iplen, op, 4,7); break; 86 | case 5: op += xbwt16?rcrle1senc16( bwt, iplen, op): rcrle1senc( bwt, iplen, op); break; 87 | case 6: op += xbwt16?rcrle1ssenc16(bwt, iplen, op, 3,7):rcrle1ssenc(bwt, iplen, op, 3,7); break; 88 | case 7: op += rcqlfcsenc( bwt, iplen, op); break; 89 | case 9: op += rcmrrssenc( bwt, iplen, op, 0, 0); break; // prm1,prm2 in mbc.h fixed 90 | case 8: 91 | default: op += rcqlfcssenc( bwt, iplen, op, 4, 7); 92 | } OVERFLOW0(in,inlen,out, op, goto err); 93 | goto e; 94 | err: memcpy(out, in, inlen); op = out+inlen; 95 | e:if(bwt) vfree(bwt); if(verbose) { printf("clen=%lld ", (int64_t)(op-out)); fflush(stdout); } 96 | return op - out; 97 | } 98 | #endif 99 | 100 | #ifndef NDECOMP 101 | size_t rcbwtdec(unsigned char *in, size_t outlen, unsigned char *out, unsigned lev, unsigned thnum) { 102 | unsigned char *ip = in; 103 | unsigned lenmin = *ip++, xbwt16 = lenmin&0x80; lenmin &=0x7f; 104 | size_t oplen = outlen, rc; 105 | 106 | if(lenmin) oplen = ctou32(ip),ip += 4; 107 | 108 | #ifdef _BWTDIV 109 | ip++; 110 | saidx_t bwtidx = *(saidx_t *)ip; ip += sizeof(saidx_t); 111 | #else 112 | unsigned idxs[256]; 113 | int oplen_ = xbwt16?oplen/2:oplen, mod = calcmod(oplen_/SR), idxsn = (oplen_-1)/mod + 1; //idxsn = (idxsn/SR)*SR; 114 | ip++; // idxsn 115 | memcpy(idxs, ip, idxsn*sizeof(idxs[0])); ip += idxsn*sizeof(idxs[0]); 116 | #endif 117 | unsigned char *_bwt = vmalloc(oplen+128), *op = out, *bwt = _bwt; if(!_bwt) die("malloc failed\n"); 118 | { if(lenmin) { bwt = out; op = _bwt; } } 119 | switch(lev) { 120 | case 0: memcpy(bwt, ip, oplen+bwtx); break; 121 | case 2: xbwt16?becdec16(ip, oplen+bwtx, bwt):becdec8(ip, oplen+bwtx, bwt); break; 122 | case 3: xbwt16?rcrlesdec16( ip, oplen+bwtx, bwt): rcrlesdec( ip, oplen+bwtx, bwt); break; 123 | case 4: xbwt16?rcrlessdec16( ip, oplen+bwtx, bwt, 4, 7):rcrlessdec( ip, oplen+bwtx, bwt, 4, 7); break; 124 | case 5: xbwt16?rcrle1sdec16( ip, oplen+bwtx, bwt): rcrle1sdec( ip, oplen+bwtx, bwt); break; 125 | case 6: xbwt16?rcrle1ssdec16(ip, oplen+bwtx, bwt, 3, 7):rcrle1ssdec(ip, oplen+bwtx, bwt, 3, 7); break; 126 | case 7: rcqlfcsdec( ip, oplen+bwtx, bwt); break; 127 | case 9: rcmrrssdec( ip, oplen+bwtx, bwt, 0, 0); break; 128 | case 8: 129 | default: rcqlfcssdec( ip, oplen+bwtx, bwt, 4, 7); 130 | } 131 | saidx_t *sa = (saidx_t *)vmalloc((oplen+2+128)*sizeof(sa[0])); if(!sa) { vfree(bwt); die("malloc failed\n"); } 132 | #ifdef _BWTDIV 133 | rc = obwt_unbwt_biPSIv2(bwt, op, sa, oplen, bwtidx); 134 | #else 135 | #ifdef _LIBSAIS16 136 | if(xbwt16) { rc = libsais16_unbwt_aux(bwt, op, sa, oplen_, 0, mod, idxs); if(oplen & 1) op[oplen-1] = bwt[oplen-1]; } 137 | else 138 | #endif 139 | rc = libsais_unbwt_aux(bwt, op, sa, oplen, 0, mod, idxs); //libsais_unbwt(bwt, op, sa, oplen, idxs[0]); //#bwtinv(bwt, oplen, op, NULL, idxs, idxsn); memrev(op, oplen); //op[256]=0; printf("%s ", op); 140 | #endif 141 | vfree(sa); 142 | 143 | if(lenmin) { 144 | switch(lenmin) { 145 | case 127: utf8dec(op, outlen, out); break; 146 | //case 126: fastadec(op, outlen, out); break; 147 | default: LZPREV(if(lev==9) memrev(op, oplen)); 148 | lzpdec(op, outlen, out, lenmin*4, lev > 8?1:0); LZPREV(if(lev==9) memrev(out, outlen)); 149 | } 150 | } 151 | vfree(_bwt); 152 | return rc; 153 | } 154 | #endif 155 | -------------------------------------------------------------------------------- /cdf_.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | SPDX-License-Identifier: GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // CDF : Cumulative distribution function for range coder + rans 25 | #define CDFRATE 7 26 | #define CDFDEC0(_mb_, _n_) cdf_t _mb_[_n_+1]; { int _j; for(_j = 0; _j <= _n_; _j++) _mb_[_j] = _j << (RC_BITS-4); } 27 | #define CDFDEC1(_mb_,_n1_, _n0_) cdf_t _mb_[_n1_][_n0_+1]; { int _i,_j; for(_i=0; _i < _n1_; _i++) for(_j = 0; _j <= _n0_; _j++) _mb_[_i][_j] = _j << (RC_BITS-4); } 28 | #define CDFDEC2(_mb_, _n2_, _n1_,_n0_) cdf_t _mb_[_n2_][_n1_][_n0_+1]; { int _i,_j,_k;\ 29 | for( _i = 0; _i < _n2_; _i++) \ 30 | for( _j = 0; _j < _n1_; _j++)\ 31 | for(_k = 0; _k <= _n0_; _k++) _mb_[_i][_j][_k] = _k << (RC_BITS-4);\ 32 | } 33 | 34 | #ifndef _CDF2 35 | #define IC 10 36 | #define MIXD ( ((1u<> ANS_BITS) + BZHI32(_st_, ANS_BITS) - (_mb_)[_x_] 38 | //#define mbcheck(_mb_) for(int i=0; i < 16; i++) AC(_mb_[i+1] > _mb_[i], "Fatal probs") 39 | //---- CDF16 ----------------------------------------------------------------- 40 | #define CDF16DEC0(_mb_) CDFDEC0(_mb_, 16) 41 | #define CDF16DEC1(_mb_,_n_) CDFDEC1(_mb_, _n_, 16) 42 | #define CDF16DEC2(_mb_,_n1_,_n0_) CDFDEC2(_mb_, _n1_,_n0_, 16) 43 | 44 | #ifdef __AVX2__ 45 | #define CDF16DEF __m256i _cmv = _mm256_set1_epi16(MIXD), _crv = _mm256_set_epi16(15*IC,14*IC,13*IC,12*IC,11*IC,10*IC, 9*IC, 8*IC, 7*IC, 6*IC, 5*IC, 4*IC, 3*IC, 2*IC, 1*IC, 0) 46 | #define cdf16upd(_mb_, _x_) {\ 47 | __m256i _mv = _mm256_loadu_si256((const __m256i *)(_mb_));\ 48 | _mv = _mm256_add_epi16(_mv,_mm256_srai_epi16(_mm256_add_epi16(_mm256_sub_epi16(_crv,_mv),_mm256_and_si256(_mm256_cmpgt_epi16(_mv, _mm256_set1_epi16((_mb_)[_x_])),_cmv)), CDFRATE));\ 49 | _mm256_storeu_si256((__m256i *)(_mb_), _mv);\ 50 | } 51 | 52 | #define cdf16ansdec(_mb_, _st_, _x_) {\ 53 | __m256i _mv = _mm256_loadu_si256((const __m256i *)(_mb_)),\ 54 | _gv = _mm256_cmpgt_epi16(_mv, _mm256_set1_epi16(BZHI32(_st_, ANS_BITS)));\ 55 | _x_ = ctz32(_mm256_movemask_epi8(_gv))>>1;\ 56 | _st_ = STATEUPD((_mb_), _st_,_x_);\ 57 | _mv = _mm256_add_epi16(_mv,_mm256_srai_epi16(_mm256_add_epi16(_mm256_sub_epi16(_crv,_mv),_mm256_and_si256(_gv,_cmv)), CDFRATE));\ 58 | _mm256_storeu_si256((__m256i *)(_mb_), _mv);\ 59 | } 60 | 61 | #define cdf16sansdec(_mb_, _st_, _x_) {\ 62 | __m256i _mv = _mm256_loadu_si256((const __m256i *)(_mb_)), \ 63 | _gv = _mm256_cmpgt_epi16(_mv, _mm256_set1_epi16(BZHI32(_st_, ANS_BITS))); \ 64 | _x_ = (ctz32(_mm256_movemask_epi8(_gv))>>1); \ 65 | _st_ = STATEUPD((_mb_), _st_,_x_);\ 66 | } 67 | 68 | #elif defined(__SSE2__) || defined(__powerpc64__) || defined(__ARM_NEON) 69 | #define CDF16DEF __m128i _cmv = _mm_set1_epi16(MIXD), /*adaptive CDF*/\ 70 | _crv0 = _mm_set_epi16( 7*IC, 6*IC, 5*IC, 4*IC, 3*IC, 2*IC, 1*IC, 0 ), \ 71 | _crv1 = _mm_set_epi16(15*IC, 14*IC, 13*IC, 12*IC, 11*IC, 10*IC, 9*IC, 8*IC) 72 | 73 | #define cdf16ansdec(_mb_, _st_, _x_) {\ 74 | __m128i _mv0 = _mm_loadu_si128((const __m128i *)(_mb_)),\ 75 | _mv1 = _mm_loadu_si128((const __m128i *)&(_mb_)[8]),\ 76 | _sv = _mm_set1_epi16(BZHI32(_st_, ANS_BITS)),\ 77 | _gv0 = _mm_cmpgt_epi16(_mv0, _sv),\ 78 | _gv1 = _mm_cmpgt_epi16(_mv1, _sv);\ 79 | _x_ = ctz16(_mm_movemask_epi8(_mm_packs_epi16(_gv0, _gv1))); \ 80 | _st_ = STATEUPD(_mb_,_st_,_x_);\ 81 | _mv0 = _mm_add_epi16(_mv0,_mm_srai_epi16(_mm_add_epi16(_mm_sub_epi16(_crv0,_mv0),_mm_and_si128(_gv0,_cmv)), CDFRATE));\ 82 | _mv1 = _mm_add_epi16(_mv1,_mm_srai_epi16(_mm_add_epi16(_mm_sub_epi16(_crv1,_mv1),_mm_and_si128(_gv1,_cmv)), CDFRATE));\ 83 | _mm_storeu_si128((const __m128i *)(_mb_), _mv0);\ 84 | _mm_storeu_si128((const __m128i *)&(_mb_)[8], _mv1);\ 85 | } 86 | 87 | #define cdf16upd(_mb_, _x_) {\ 88 | __m128i _mv0 = _mm_loadu_si128((const __m128i *)(_mb_)),\ 89 | _mv1 = _mm_loadu_si128((const __m128i *)&(_mb_)[8]),\ 90 | _sv = _mm_set1_epi16((_mb_)[_x_]),\ 91 | _gv0 = _mm_cmpgt_epi16(_mv0, _sv),\ 92 | _gv1 = _mm_cmpgt_epi16(_mv1, _sv);\ 93 | _mv0 = _mm_add_epi16(_mv0,_mm_srai_epi16(_mm_add_epi16(_mm_sub_epi16(_crv0,_mv0),_mm_and_si128(_gv0,_cmv)), CDFRATE));\ 94 | _mv1 = _mm_add_epi16(_mv1,_mm_srai_epi16(_mm_add_epi16(_mm_sub_epi16(_crv1,_mv1),_mm_and_si128(_gv1,_cmv)), CDFRATE));\ 95 | _mm_storeu_si128((const __m128i *)(_mb_), _mv0);\ 96 | _mm_storeu_si128((const __m128i *)&(_mb_)[8], _mv1);\ 97 | } 98 | 99 | #define cdf16sansdec(_mb_, _st_, _x_) { /*static CDF*/\ 100 | __m128i _mv0 = _mm_loadu_si128((const __m128i *)(_mb_)),\ 101 | _mv1 = _mm_loadu_si128((const __m128i *)&(_mb_)[8]),\ 102 | _sv = _mm_set1_epi16(BZHI32(_st_, ANS_BITS)),\ 103 | _gv0 = _mm_cmpgt_epi16(_mv0, _sv),\ 104 | _gv1 = _mm_cmpgt_epi16(_mv1, _sv);\ 105 | _x_ = ctz16(_mm_movemask_epi8(_mm_packs_epi16(_gv0, _gv1))); \ 106 | _st_ = STATEUPD(_mb_,_st_,_x_);\ 107 | } 108 | 109 | #elif defined(__ARM_NEON) // TODO: custom arm functions 110 | #else 111 | #define CDF16DEF 112 | #define cdf16upd(_mb_,_x_) { unsigned _i;\ 113 | for(_i = 0; _i < 16; _i++) {\ 114 | int _tmp = 2 - (1< _x_);\ 115 | (_mb_)[_i] -= ((_mb_)[_i] - _tmp) >> CDFRATE;\ 116 | }\ 117 | } 118 | 119 | #define cdf16ansdec(_mb_, _st_, _x_) { _x_=0; while(BZHI32(_st_, ANS_BITS) >= (_mb_)[_x_]) ++_x_;\ 120 | unsigned _s = (_mb_)[_x_--],_t=(_mb_)[_x_]; _st_ = (state_t)(_s - _t) * (_st_ >> ANS_BITS) + BZHI32(_st_, ANS_BITS) - _t; cdf16upd((_mb_),_x_);\ 121 | } 122 | 123 | #define cdf16sansdec(_mb_, _st_, _x_) { _x_=0; while(BZHI32(_st_, ANS_BITS) >= (_mb_)[_x_]) ++_x_; /*static CDF*/\ 124 | unsigned _s = (_mb_)[_x_--],_t=(_mb_)[_x_]; _st_ = (state_t)(_s - _t) * (_st_ >> ANS_BITS) + BZHI32(_st_, ANS_BITS) - _t;\ 125 | } 126 | #endif 127 | 128 | //----- CDF8 -------------------------------------------------------------------------------------------------- 129 | #define CDF8DEC0(_mb_) CDFDEC0(_mb_, 8) 130 | #define CDF8DEC1(_mb_,_n_) CDFDEC1(_mb_, _n_, 8) 131 | 132 | #if defined(__SSE2__) || defined(__powerpc64__) || defined(__ARM_NEON) 133 | #define CDF8DEF __m128i _cmv = _mm_set1_epi16(MIXD),\ 134 | _crv0 = _mm_set_epi16( 7*IC, 6*IC, 5*IC, 4*IC, 3*IC, 2*IC, 1*IC, 0) 135 | 136 | #define cdf8ansdec(_mb_, _st_, _x_) {\ 137 | __m128i _mv0 = _mm_loadu_si128((const __m128i *)_mb_),\ 138 | _sv = _mm_set1_epi16(BZHI32(_st_, ANS_BITS)),\ 139 | _gv0 = _mm_cmpgt_epi16(_mv0, _sv),\ 140 | _x_ = ctz16(_mm_movemask_epi8(_gv0))); \ 141 | _st_ = STATEUPD(_mb_,_st_,_x_);\ 142 | _mv0 = _mm_add_epi16(_mv0,_mm_srai_epi16(_mm_add_epi16(_mm_sub_epi16(_crv0,_mv0),_mm_and_si128(_gv0,_cmv)), CDFRATE));\ 143 | _mm_storeu_si128((const __m128i *)(_mb_), _mv0);\ 144 | } 145 | 146 | #define cdf8upd(_mb_, _x_) {\ 147 | __m128i _mv0 = _mm_loadu_si128((const __m128i *)_mb_),\ 148 | _sv = _mm_set1_epi16(_mb_[_x_]),\ 149 | _gv0 = _mm_cmpgt_epi16(_mv0, _sv),\ 150 | _mv0 = _mm_add_epi16(_mv0,_mm_srai_epi16(_mm_add_epi16(_mm_sub_epi16(_crv0,_mv0),_mm_and_si128(_gv0,_cmv)), CDFRATE));\ 151 | _mm_storeu_si128((const __m128i *)(_mb_), _mv0);\ 152 | } 153 | #else 154 | #define CDF16DEF 155 | #define cdf8upd(_mb_,_x_) { unsigned _i; \ 156 | for(_i = 0; _i < 8; _i++) { \ 157 | int _tmp = 2 - (1< _x_);\ 158 | _mb_[_i] -= (_mb_[_i] - _tmp) >> CDFRATE; \ 159 | }\ 160 | } 161 | 162 | #define cdf8ansdec(_mb_, _st_, _x_) { _x_=0; while(BZHI32(_st_, ANS_BITS) >= _mb_[_x_]) ++_x_;\ 163 | unsigned _s = _mb_[_x_--],_t=_mb_[_x_]; _st_ = (state_t)(_s - _t) * (_st_ >> ANS_BITS) + BZHI32(_st_, ANS_BITS) - _t; cdf8upd(_mb_,_x_);\ 164 | } 165 | #endif 166 | 167 | #else //---------------------------------------------------------------------------- 168 | #include "xcdf_.h" 169 | #endif 170 | -------------------------------------------------------------------------------- /rccm_.c: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2022 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // Turbo Range Coder: Context mixing templates include 25 | #include "include/turborc.h" 26 | #include "rcutil_.h" 27 | #include "mb_o0.h" // order 0 28 | 29 | #define mbus unsigned short 30 | 31 | //---------- cm order 1-0 run aware base on bcm - https://github.com/encode84/bcm --------------------- 32 | #define RUNPARM run | 33 | size_t T3(rcmr,RC_PRD,enc)(unsigned char *in, size_t inlen, unsigned char *out RCPRM) { 34 | unsigned char *op = out, *ip; 35 | unsigned cx1 = 0, cx2 = 0, run = 0; 36 | rcencdec(rcrange,rclow,rcilow); // range coder 37 | MBU_DEC1(mb0, 1<<8); // predictor 38 | MBU_DEC2(mb1, 1<<8, 1<<8); 39 | unsigned short sse[1<<(1+8)][17]; ssebinit(sse); 40 | 41 | for(ip = in; ip < in+inlen; ip++) { 42 | mbu *m1x = &mb1[cx1], *m2x = &mb1[cx2]; 43 | unsigned y = 1<<8 | ip[0]; 44 | int i; 45 | for(i = 8-1; i >= 0; --i) { 46 | unsigned x = y>>(i+1); 47 | mbu *m0 = &mb0[x], *m1 = &m1x[x], *m2 = &m2x[x]; mbus *msse = &sse[RUNPARM x]; 48 | mbur_enc(rcrange,rclow,rcilow, m0, RCPRM0,RCPRM1,op, RCB(y,i), m1, m2, msse); 49 | } OVERFLOW(in,inlen,out, op, goto e); 50 | cx2 = cx1; 51 | cx1 = (unsigned char)y; 52 | run = cx1==cx2?0x100:0; // r = cx1 == cx2?r+1:0; run = r > 2?0x100:0; // 53 | } 54 | rceflush(rcrange,rclow,rcilow, op); 55 | e:return op - out; 56 | } 57 | 58 | size_t T3(rcmr,RC_PRD,dec)(unsigned char *in, size_t outlen, unsigned char *out RCPRM) { 59 | unsigned char *ip = in, *op; 60 | unsigned cx1 = 0, cx2 = 0, run = 0; 61 | rcdecdec(rcrange, rccode, ip); 62 | MBU_DEC1(mb0, 1<<8); 63 | MBU_DEC2(mb1, 1<<8, 1<<8); 64 | unsigned short sse[1<<9][17]; ssebinit(sse); 65 | 66 | for(op = out; op < out+outlen; op++) { 67 | mbu *m1x = &mb1[cx1], *m2x = &mb1[cx2]; 68 | int i; 69 | unsigned x = 1; 70 | for(i = 8-1; i >= 0; --i) { 71 | mbu *m0 = &mb0[x], *m1 = &m1x[x], *m2 = &m2x[x]; mbus *msse = &sse[RUNPARM x]; 72 | mbur_dec(rcrange,rccode, m0, RCPRM0,RCPRM1,ip, x, m1, m2, msse); 73 | } 74 | cx2 = cx1; 75 | op[0] = cx1 = (unsigned char)x; 76 | run = cx1==cx2?0x100:0; 77 | } 78 | return outlen; 79 | } 80 | 81 | size_t T3(rcmrr,RC_PRD,enc)(unsigned char *in, size_t inlen, unsigned char *out RCPRM) { 82 | unsigned char *op = out, *ip; 83 | unsigned cx1 = 0, cx2 = 0, run = 0, r = 0; 84 | rcencdec(rcrange,rclow,rcilow); // range coder 85 | MBU_DEC1(mb0, 1<<8); // predictor 86 | MBU_DEC2(mb1, 1<<8, 1<<8); 87 | unsigned short sse[1<<(1+8)][17]; ssebinit(sse); 88 | 89 | for(ip = in; ip < in+inlen; ip++) { 90 | mbu *m1x = &mb1[cx1], *m2x = &mb1[cx2]; 91 | unsigned y = 1<<8 | ip[0]; 92 | int i; 93 | for(i = 8-1; i >= 0; --i) { 94 | unsigned x = y>>(i+1); 95 | mbu *m0 = &mb0[x], *m1 = &m1x[x], *m2 = &m2x[x]; mbus *msse = &sse[RUNPARM x]; 96 | mbur_enc(rcrange,rclow,rcilow, m0, RCPRM0,RCPRM1,op, RCB(y,i), m1, m2, msse); 97 | } OVERFLOW(in,inlen,out, op, goto e); 98 | cx2 = cx1; 99 | cx1 = (unsigned char)y; 100 | r = cx1 == cx2?r+1:0; run = r > 2?0x100:0; //run = cx1==cx2?0x100:0;// 101 | } 102 | rceflush(rcrange,rclow,rcilow, op); 103 | e:return op - out; 104 | } 105 | 106 | size_t T3(rcmrr,RC_PRD,dec)(unsigned char *in, size_t outlen, unsigned char *out RCPRM) { 107 | unsigned char *ip = in, *op; 108 | unsigned cx1 = 0, cx2 = 0, run = 0, r = 0; 109 | rcdecdec(rcrange, rccode, ip); 110 | MBU_DEC1(mb0, 1<<8); 111 | MBU_DEC2(mb1, 1<<8, 1<<8); 112 | unsigned short sse[1<<9][17]; ssebinit(sse); 113 | 114 | for(op = out; op < out+outlen; op++) { 115 | mbu *m1x = &mb1[cx1], *m2x = &mb1[cx2]; 116 | int i; 117 | unsigned x = 1; 118 | for(i = 8-1; i >= 0; --i) { 119 | mbu *m0 = &mb0[x], *m1 = &m1x[x], *m2 = &m2x[x]; mbus *msse = &sse[RUNPARM x]; 120 | mbur_dec(rcrange,rccode, m0, RCPRM0,RCPRM1,ip, x, m1, m2, msse); 121 | } 122 | cx2 = cx1; 123 | op[0] = cx1 = (unsigned char)x; 124 | r = cx1 == cx2?r+1:0; run = r > 2?0x100:0;//run = cx1==cx2?0x100:0;// 125 | } 126 | return outlen; 127 | } 128 | 129 | //-------------- Order 1-0 context mixing ------------------------------------- 130 | size_t T3(rcm,RC_PRD,enc)(unsigned char *in, size_t inlen, unsigned char *out RCPRM) { 131 | unsigned char *op = out, *ip; 132 | unsigned cx = 0, run = 0; 133 | rcencdec(rcrange,rclow,rcilow); 134 | MBU_DEC1(mb0, 1<<8); 135 | MBU_DEC2(mb1, 1<<8, 1<<8); 136 | unsigned short sse[1<<8][17]; sseinit(sse); 137 | 138 | for(ip = in; ip < in+inlen; ip++) { 139 | mbu *m1x = mb1[cx]; 140 | int i; 141 | unsigned x = 1<<8 | ip[0]; 142 | for(i = 8-1; i >= 0; --i) { 143 | unsigned y = x>>(i+1); 144 | mbu *m0 = &mb0[y], *m1 = &m1x[y]; mbus *msse2 = &sse[y]; 145 | mbum_enc(rcrange,rclow,rcilow, m0, RCPRM0,RCPRM1,op, RCB(x,i), m1, 0, msse2); 146 | } 147 | cx = ip[0]; OVERFLOW(in,inlen,out, op, goto e); 148 | } 149 | rceflush(rcrange,rclow,rcilow, op); 150 | e:return op - out; 151 | } 152 | 153 | size_t T3(rcm,RC_PRD,dec)(unsigned char *in, size_t outlen, unsigned char *out RCPRM) { 154 | unsigned char *ip = in, *op; 155 | unsigned cx = 0, run = 0; 156 | rcdecdec(rcrange, rccode, ip); 157 | MBU_DEC1(mb0, 1<<8); 158 | MBU_DEC2(mb1, 1<<8, 1<<8); 159 | unsigned short sse[1<<8][17]; sseinit(sse); 160 | 161 | for(op = out; op < out+outlen; op++) { 162 | mbu *m1x = mb1[(uint8_t)cx]; 163 | int i; 164 | unsigned x = 1; 165 | for(i = 8-1; i >= 0; --i) { 166 | mbu *m0 = &mb0[x], *m1 = &m1x[x]; mbus *msse = &sse[x]; 167 | mbum_dec(rcrange,rccode, m0, RCPRM0,RCPRM1,ip, x, m1, 0, msse); 168 | } 169 | op[0] = cx = (unsigned char)x; 170 | } 171 | return outlen; 172 | } 173 | 174 | //-------------- Order 2-1-0 context mixing ------------------------------------- 175 | size_t T3(rcm2,RC_PRD,enc)(unsigned char *in, size_t inlen, unsigned char *out RCPRM) { 176 | unsigned char *op = out, *ip; 177 | unsigned cx = 0, run = 0; 178 | rcencdec(rcrange,rclow,rcilow); 179 | MBU_DEC1( mb0, 1<<8); 180 | MBU_DEC2( mb1, 1<<8, 1<<8); 181 | MBU_NEWI2(mb2, 1<<16, 1<<8); //MBU_DEC2(mb2, 1<<16, 1<<8); 182 | unsigned short sse[1<<8][17]; sseinit(sse); 183 | 184 | for(ip = in; ip < in+inlen; ip++) { 185 | mbu *m1x = mb1[(uint8_t)cx], *m2x = &mb2[(uint16_t)cx*(1<<8)]; 186 | unsigned x = 1<<8 | ip[0]; 187 | int i; 188 | for(i = 8-1; i >= 0; --i) { 189 | unsigned y = x>>(i+1); 190 | mbu *m0 = &mb0[y], *m1 = &m1x[y], *m2 = &m2x[y]; mbus *msse = &sse[y]; 191 | mbum2_enc(rcrange,rclow,rcilow, m0, RCPRM0,RCPRM1,op, RCB(x,i), m1, m2, msse); 192 | } 193 | cx = cx << 8 | ip[0]; OVERFLOW(in,inlen,out, op, goto e); 194 | } 195 | rceflush(rcrange,rclow,rcilow, op); 196 | e: free(mb2); 197 | return op - out; 198 | } 199 | 200 | size_t T3(rcm2,RC_PRD,dec)(unsigned char *in, size_t outlen, unsigned char *out RCPRM) { 201 | unsigned char *ip = in, *op; 202 | unsigned cx = 0, run = 0; 203 | rcdecdec(rcrange, rccode, ip); 204 | MBU_DEC1(mb0, 1<<8); 205 | MBU_DEC2(mb1, 1<<8, 1<<8); 206 | MBU_NEWI2(mb2, 1<<16, 1<<8); 207 | unsigned short sse[1<<(1+8)][17]; sseinit(sse); 208 | 209 | for(op = out; op < out+outlen; op++) { 210 | mbu *m1x = mb1[(uint8_t)cx], *m2x = &mb2[(uint16_t)cx*(1<<8)]; 211 | int i; 212 | unsigned x = 1; 213 | for(i = 8-1; i >= 0; --i) { 214 | mbu *m0 = &mb0[x], *m1 = &m1x[x], *m2 = &m2x[x]; mbus *msse = &sse[x]; 215 | mbum2_dec(rcrange,rccode, m0, RCPRM0,RCPRM1,ip, x, m1, m2, msse); 216 | } 217 | op[0] = cx = cx << 8 | (unsigned char)x; 218 | } 219 | free(mb2); 220 | return outlen; 221 | } 222 | -------------------------------------------------------------------------------- /vs/stdint.h: -------------------------------------------------------------------------------- 1 | // ISO C9x compliant stdint.h for Microsoft Visual Studio 2 | // Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 3 | // 4 | // Copyright (c) 2006-2013 Alexander Chemeris 5 | // 6 | // Redistribution and use in source and binary forms, with or without 7 | // modification, are permitted provided that the following conditions are met: 8 | // 9 | // 1. Redistributions of source code must retain the above copyright notice, 10 | // this list of conditions and the following disclaimer. 11 | // 12 | // 2. Redistributions in binary form must reproduce the above copyright 13 | // notice, this list of conditions and the following disclaimer in the 14 | // documentation and/or other materials provided with the distribution. 15 | // 16 | // 3. Neither the name of the product nor the names of its contributors may 17 | // be used to endorse or promote products derived from this software 18 | // without specific prior written permission. 19 | // 20 | // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 21 | // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 22 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 23 | // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | // 31 | /////////////////////////////////////////////////////////////////////////////// 32 | 33 | #ifndef _MSC_VER // [ 34 | #error "Use this header only with Microsoft Visual C++ compilers!" 35 | #endif // _MSC_VER ] 36 | 37 | #ifndef _MSC_STDINT_H_ // [ 38 | #define _MSC_STDINT_H_ 39 | 40 | #if _MSC_VER > 1000 41 | #pragma once 42 | #endif 43 | 44 | #if _MSC_VER >= 1600 // [ 45 | #include 46 | #else // ] _MSC_VER >= 1600 [ 47 | 48 | #include 49 | 50 | // For Visual Studio 6 in C++ mode and for many Visual Studio versions when 51 | // compiling for ARM we should wrap include with 'extern "C++" {}' 52 | // or compiler give many errors like this: 53 | // error C2733: second C linkage of overloaded function 'wmemchr' not allowed 54 | #ifdef __cplusplus 55 | extern "C" { 56 | #endif 57 | # include 58 | #ifdef __cplusplus 59 | } 60 | #endif 61 | 62 | // Define _W64 macros to mark types changing their size, like intptr_t. 63 | #ifndef _W64 64 | # if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 65 | # define _W64 __w64 66 | # else 67 | # define _W64 68 | # endif 69 | #endif 70 | 71 | 72 | // 7.18.1 Integer types 73 | 74 | // 7.18.1.1 Exact-width integer types 75 | 76 | // Visual Studio 6 and Embedded Visual C++ 4 doesn't 77 | // realize that, e.g. char has the same size as __int8 78 | // so we give up on __intX for them. 79 | #if (_MSC_VER < 1300) 80 | typedef signed char int8_t; 81 | typedef signed short int16_t; 82 | typedef signed int int32_t; 83 | typedef unsigned char uint8_t; 84 | typedef unsigned short uint16_t; 85 | typedef unsigned int uint32_t; 86 | #else 87 | typedef signed __int8 int8_t; 88 | typedef signed __int16 int16_t; 89 | typedef signed __int32 int32_t; 90 | typedef unsigned __int8 uint8_t; 91 | typedef unsigned __int16 uint16_t; 92 | typedef unsigned __int32 uint32_t; 93 | #endif 94 | typedef signed __int64 int64_t; 95 | typedef unsigned __int64 uint64_t; 96 | 97 | 98 | // 7.18.1.2 Minimum-width integer types 99 | typedef int8_t int_least8_t; 100 | typedef int16_t int_least16_t; 101 | typedef int32_t int_least32_t; 102 | typedef int64_t int_least64_t; 103 | typedef uint8_t uint_least8_t; 104 | typedef uint16_t uint_least16_t; 105 | typedef uint32_t uint_least32_t; 106 | typedef uint64_t uint_least64_t; 107 | 108 | // 7.18.1.3 Fastest minimum-width integer types 109 | typedef int8_t int_fast8_t; 110 | typedef int16_t int_fast16_t; 111 | typedef int32_t int_fast32_t; 112 | typedef int64_t int_fast64_t; 113 | typedef uint8_t uint_fast8_t; 114 | typedef uint16_t uint_fast16_t; 115 | typedef uint32_t uint_fast32_t; 116 | typedef uint64_t uint_fast64_t; 117 | 118 | // 7.18.1.4 Integer types capable of holding object pointers 119 | #ifdef _WIN64 // [ 120 | typedef signed __int64 intptr_t; 121 | typedef unsigned __int64 uintptr_t; 122 | #else // _WIN64 ][ 123 | typedef _W64 signed int intptr_t; 124 | typedef _W64 unsigned int uintptr_t; 125 | #endif // _WIN64 ] 126 | 127 | // 7.18.1.5 Greatest-width integer types 128 | typedef int64_t intmax_t; 129 | typedef uint64_t uintmax_t; 130 | 131 | 132 | // 7.18.2 Limits of specified-width integer types 133 | 134 | #if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 135 | 136 | // 7.18.2.1 Limits of exact-width integer types 137 | #define INT8_MIN ((int8_t)_I8_MIN) 138 | #define INT8_MAX _I8_MAX 139 | #define INT16_MIN ((int16_t)_I16_MIN) 140 | #define INT16_MAX _I16_MAX 141 | #define INT32_MIN ((int32_t)_I32_MIN) 142 | #define INT32_MAX _I32_MAX 143 | #define INT64_MIN ((int64_t)_I64_MIN) 144 | #define INT64_MAX _I64_MAX 145 | #define UINT8_MAX _UI8_MAX 146 | #define UINT16_MAX _UI16_MAX 147 | #define UINT32_MAX _UI32_MAX 148 | #define UINT64_MAX _UI64_MAX 149 | 150 | // 7.18.2.2 Limits of minimum-width integer types 151 | #define INT_LEAST8_MIN INT8_MIN 152 | #define INT_LEAST8_MAX INT8_MAX 153 | #define INT_LEAST16_MIN INT16_MIN 154 | #define INT_LEAST16_MAX INT16_MAX 155 | #define INT_LEAST32_MIN INT32_MIN 156 | #define INT_LEAST32_MAX INT32_MAX 157 | #define INT_LEAST64_MIN INT64_MIN 158 | #define INT_LEAST64_MAX INT64_MAX 159 | #define UINT_LEAST8_MAX UINT8_MAX 160 | #define UINT_LEAST16_MAX UINT16_MAX 161 | #define UINT_LEAST32_MAX UINT32_MAX 162 | #define UINT_LEAST64_MAX UINT64_MAX 163 | 164 | // 7.18.2.3 Limits of fastest minimum-width integer types 165 | #define INT_FAST8_MIN INT8_MIN 166 | #define INT_FAST8_MAX INT8_MAX 167 | #define INT_FAST16_MIN INT16_MIN 168 | #define INT_FAST16_MAX INT16_MAX 169 | #define INT_FAST32_MIN INT32_MIN 170 | #define INT_FAST32_MAX INT32_MAX 171 | #define INT_FAST64_MIN INT64_MIN 172 | #define INT_FAST64_MAX INT64_MAX 173 | #define UINT_FAST8_MAX UINT8_MAX 174 | #define UINT_FAST16_MAX UINT16_MAX 175 | #define UINT_FAST32_MAX UINT32_MAX 176 | #define UINT_FAST64_MAX UINT64_MAX 177 | 178 | // 7.18.2.4 Limits of integer types capable of holding object pointers 179 | #ifdef _WIN64 // [ 180 | # define INTPTR_MIN INT64_MIN 181 | # define INTPTR_MAX INT64_MAX 182 | # define UINTPTR_MAX UINT64_MAX 183 | #else // _WIN64 ][ 184 | # define INTPTR_MIN INT32_MIN 185 | # define INTPTR_MAX INT32_MAX 186 | # define UINTPTR_MAX UINT32_MAX 187 | #endif // _WIN64 ] 188 | 189 | // 7.18.2.5 Limits of greatest-width integer types 190 | #define INTMAX_MIN INT64_MIN 191 | #define INTMAX_MAX INT64_MAX 192 | #define UINTMAX_MAX UINT64_MAX 193 | 194 | // 7.18.3 Limits of other integer types 195 | 196 | #ifdef _WIN64 // [ 197 | # define PTRDIFF_MIN _I64_MIN 198 | # define PTRDIFF_MAX _I64_MAX 199 | #else // _WIN64 ][ 200 | # define PTRDIFF_MIN _I32_MIN 201 | # define PTRDIFF_MAX _I32_MAX 202 | #endif // _WIN64 ] 203 | 204 | #define SIG_ATOMIC_MIN INT_MIN 205 | #define SIG_ATOMIC_MAX INT_MAX 206 | 207 | #ifndef SIZE_MAX // [ 208 | # ifdef _WIN64 // [ 209 | # define SIZE_MAX _UI64_MAX 210 | # else // _WIN64 ][ 211 | # define SIZE_MAX _UI32_MAX 212 | # endif // _WIN64 ] 213 | #endif // SIZE_MAX ] 214 | 215 | // WCHAR_MIN and WCHAR_MAX are also defined in 216 | #ifndef WCHAR_MIN // [ 217 | # define WCHAR_MIN 0 218 | #endif // WCHAR_MIN ] 219 | #ifndef WCHAR_MAX // [ 220 | # define WCHAR_MAX _UI16_MAX 221 | #endif // WCHAR_MAX ] 222 | 223 | #define WINT_MIN 0 224 | #define WINT_MAX _UI16_MAX 225 | 226 | #endif // __STDC_LIMIT_MACROS ] 227 | 228 | 229 | // 7.18.4 Limits of other integer types 230 | 231 | #if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 232 | 233 | // 7.18.4.1 Macros for minimum-width integer constants 234 | 235 | #define INT8_C(val) val##i8 236 | #define INT16_C(val) val##i16 237 | #define INT32_C(val) val##i32 238 | #define INT64_C(val) val##i64 239 | 240 | #define UINT8_C(val) val##ui8 241 | #define UINT16_C(val) val##ui16 242 | #define UINT32_C(val) val##ui32 243 | #define UINT64_C(val) val##ui64 244 | 245 | // 7.18.4.2 Macros for greatest-width integer constants 246 | // These #ifndef's are needed to prevent collisions with . 247 | // Check out Issue 9 for the details. 248 | #ifndef INTMAX_C // [ 249 | # define INTMAX_C INT64_C 250 | #endif // INTMAX_C ] 251 | #ifndef UINTMAX_C // [ 252 | # define UINTMAX_C UINT64_C 253 | #endif // UINTMAX_C ] 254 | 255 | #endif // __STDC_CONSTANT_MACROS ] 256 | 257 | #endif // _MSC_VER >= 1600 ] 258 | 259 | #endif // _MSC_STDINT_H_ ] 260 | -------------------------------------------------------------------------------- /vs/inttypes.h: -------------------------------------------------------------------------------- 1 | // ISO C9x compliant inttypes.h for Microsoft Visual Studio 2 | // Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 3 | // 4 | // Copyright (c) 2006-2013 Alexander Chemeris 5 | // 6 | // Redistribution and use in source and binary forms, with or without 7 | // modification, are permitted provided that the following conditions are met: 8 | // 9 | // 1. Redistributions of source code must retain the above copyright notice, 10 | // this list of conditions and the following disclaimer. 11 | // 12 | // 2. Redistributions in binary form must reproduce the above copyright 13 | // notice, this list of conditions and the following disclaimer in the 14 | // documentation and/or other materials provided with the distribution. 15 | // 16 | // 3. Neither the name of the product nor the names of its contributors may 17 | // be used to endorse or promote products derived from this software 18 | // without specific prior written permission. 19 | // 20 | // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 21 | // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 22 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 23 | // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | // 31 | /////////////////////////////////////////////////////////////////////////////// 32 | 33 | #ifndef _MSC_VER // [ 34 | #error "Use this header only with Microsoft Visual C++ compilers!" 35 | #endif // _MSC_VER ] 36 | 37 | #ifndef _MSC_INTTYPES_H_ // [ 38 | #define _MSC_INTTYPES_H_ 39 | 40 | #if _MSC_VER > 1000 41 | #pragma once 42 | #endif 43 | 44 | #include "stdint.h" 45 | 46 | // 7.8 Format conversion of integer types 47 | 48 | typedef struct { 49 | intmax_t quot; 50 | intmax_t rem; 51 | } imaxdiv_t; 52 | 53 | // 7.8.1 Macros for format specifiers 54 | 55 | #if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 56 | 57 | // The fprintf macros for signed integers are: 58 | #define PRId8 "d" 59 | #define PRIi8 "i" 60 | #define PRIdLEAST8 "d" 61 | #define PRIiLEAST8 "i" 62 | #define PRIdFAST8 "d" 63 | #define PRIiFAST8 "i" 64 | 65 | #define PRId16 "hd" 66 | #define PRIi16 "hi" 67 | #define PRIdLEAST16 "hd" 68 | #define PRIiLEAST16 "hi" 69 | #define PRIdFAST16 "hd" 70 | #define PRIiFAST16 "hi" 71 | 72 | #define PRId32 "I32d" 73 | #define PRIi32 "I32i" 74 | #define PRIdLEAST32 "I32d" 75 | #define PRIiLEAST32 "I32i" 76 | #define PRIdFAST32 "I32d" 77 | #define PRIiFAST32 "I32i" 78 | 79 | #define PRId64 "I64d" 80 | #define PRIi64 "I64i" 81 | #define PRIdLEAST64 "I64d" 82 | #define PRIiLEAST64 "I64i" 83 | #define PRIdFAST64 "I64d" 84 | #define PRIiFAST64 "I64i" 85 | 86 | #define PRIdMAX "I64d" 87 | #define PRIiMAX "I64i" 88 | 89 | #define PRIdPTR "Id" 90 | #define PRIiPTR "Ii" 91 | 92 | // The fprintf macros for unsigned integers are: 93 | #define PRIo8 "o" 94 | #define PRIu8 "u" 95 | #define PRIx8 "x" 96 | #define PRIX8 "X" 97 | #define PRIoLEAST8 "o" 98 | #define PRIuLEAST8 "u" 99 | #define PRIxLEAST8 "x" 100 | #define PRIXLEAST8 "X" 101 | #define PRIoFAST8 "o" 102 | #define PRIuFAST8 "u" 103 | #define PRIxFAST8 "x" 104 | #define PRIXFAST8 "X" 105 | 106 | #define PRIo16 "ho" 107 | #define PRIu16 "hu" 108 | #define PRIx16 "hx" 109 | #define PRIX16 "hX" 110 | #define PRIoLEAST16 "ho" 111 | #define PRIuLEAST16 "hu" 112 | #define PRIxLEAST16 "hx" 113 | #define PRIXLEAST16 "hX" 114 | #define PRIoFAST16 "ho" 115 | #define PRIuFAST16 "hu" 116 | #define PRIxFAST16 "hx" 117 | #define PRIXFAST16 "hX" 118 | 119 | #define PRIo32 "I32o" 120 | #define PRIu32 "I32u" 121 | #define PRIx32 "I32x" 122 | #define PRIX32 "I32X" 123 | #define PRIoLEAST32 "I32o" 124 | #define PRIuLEAST32 "I32u" 125 | #define PRIxLEAST32 "I32x" 126 | #define PRIXLEAST32 "I32X" 127 | #define PRIoFAST32 "I32o" 128 | #define PRIuFAST32 "I32u" 129 | #define PRIxFAST32 "I32x" 130 | #define PRIXFAST32 "I32X" 131 | 132 | #define PRIo64 "I64o" 133 | #define PRIu64 "I64u" 134 | #define PRIx64 "I64x" 135 | #define PRIX64 "I64X" 136 | #define PRIoLEAST64 "I64o" 137 | #define PRIuLEAST64 "I64u" 138 | #define PRIxLEAST64 "I64x" 139 | #define PRIXLEAST64 "I64X" 140 | #define PRIoFAST64 "I64o" 141 | #define PRIuFAST64 "I64u" 142 | #define PRIxFAST64 "I64x" 143 | #define PRIXFAST64 "I64X" 144 | 145 | #define PRIoMAX "I64o" 146 | #define PRIuMAX "I64u" 147 | #define PRIxMAX "I64x" 148 | #define PRIXMAX "I64X" 149 | 150 | #define PRIoPTR "Io" 151 | #define PRIuPTR "Iu" 152 | #define PRIxPTR "Ix" 153 | #define PRIXPTR "IX" 154 | 155 | // The fscanf macros for signed integers are: 156 | #define SCNd8 "d" 157 | #define SCNi8 "i" 158 | #define SCNdLEAST8 "d" 159 | #define SCNiLEAST8 "i" 160 | #define SCNdFAST8 "d" 161 | #define SCNiFAST8 "i" 162 | 163 | #define SCNd16 "hd" 164 | #define SCNi16 "hi" 165 | #define SCNdLEAST16 "hd" 166 | #define SCNiLEAST16 "hi" 167 | #define SCNdFAST16 "hd" 168 | #define SCNiFAST16 "hi" 169 | 170 | #define SCNd32 "ld" 171 | #define SCNi32 "li" 172 | #define SCNdLEAST32 "ld" 173 | #define SCNiLEAST32 "li" 174 | #define SCNdFAST32 "ld" 175 | #define SCNiFAST32 "li" 176 | 177 | #define SCNd64 "I64d" 178 | #define SCNi64 "I64i" 179 | #define SCNdLEAST64 "I64d" 180 | #define SCNiLEAST64 "I64i" 181 | #define SCNdFAST64 "I64d" 182 | #define SCNiFAST64 "I64i" 183 | 184 | #define SCNdMAX "I64d" 185 | #define SCNiMAX "I64i" 186 | 187 | #ifdef _WIN64 // [ 188 | # define SCNdPTR "I64d" 189 | # define SCNiPTR "I64i" 190 | #else // _WIN64 ][ 191 | # define SCNdPTR "ld" 192 | # define SCNiPTR "li" 193 | #endif // _WIN64 ] 194 | 195 | // The fscanf macros for unsigned integers are: 196 | #define SCNo8 "o" 197 | #define SCNu8 "u" 198 | #define SCNx8 "x" 199 | #define SCNX8 "X" 200 | #define SCNoLEAST8 "o" 201 | #define SCNuLEAST8 "u" 202 | #define SCNxLEAST8 "x" 203 | #define SCNXLEAST8 "X" 204 | #define SCNoFAST8 "o" 205 | #define SCNuFAST8 "u" 206 | #define SCNxFAST8 "x" 207 | #define SCNXFAST8 "X" 208 | 209 | #define SCNo16 "ho" 210 | #define SCNu16 "hu" 211 | #define SCNx16 "hx" 212 | #define SCNX16 "hX" 213 | #define SCNoLEAST16 "ho" 214 | #define SCNuLEAST16 "hu" 215 | #define SCNxLEAST16 "hx" 216 | #define SCNXLEAST16 "hX" 217 | #define SCNoFAST16 "ho" 218 | #define SCNuFAST16 "hu" 219 | #define SCNxFAST16 "hx" 220 | #define SCNXFAST16 "hX" 221 | 222 | #define SCNo32 "lo" 223 | #define SCNu32 "lu" 224 | #define SCNx32 "lx" 225 | #define SCNX32 "lX" 226 | #define SCNoLEAST32 "lo" 227 | #define SCNuLEAST32 "lu" 228 | #define SCNxLEAST32 "lx" 229 | #define SCNXLEAST32 "lX" 230 | #define SCNoFAST32 "lo" 231 | #define SCNuFAST32 "lu" 232 | #define SCNxFAST32 "lx" 233 | #define SCNXFAST32 "lX" 234 | 235 | #define SCNo64 "I64o" 236 | #define SCNu64 "I64u" 237 | #define SCNx64 "I64x" 238 | #define SCNX64 "I64X" 239 | #define SCNoLEAST64 "I64o" 240 | #define SCNuLEAST64 "I64u" 241 | #define SCNxLEAST64 "I64x" 242 | #define SCNXLEAST64 "I64X" 243 | #define SCNoFAST64 "I64o" 244 | #define SCNuFAST64 "I64u" 245 | #define SCNxFAST64 "I64x" 246 | #define SCNXFAST64 "I64X" 247 | 248 | #define SCNoMAX "I64o" 249 | #define SCNuMAX "I64u" 250 | #define SCNxMAX "I64x" 251 | #define SCNXMAX "I64X" 252 | 253 | #ifdef _WIN64 // [ 254 | # define SCNoPTR "I64o" 255 | # define SCNuPTR "I64u" 256 | # define SCNxPTR "I64x" 257 | # define SCNXPTR "I64X" 258 | #else // _WIN64 ][ 259 | # define SCNoPTR "lo" 260 | # define SCNuPTR "lu" 261 | # define SCNxPTR "lx" 262 | # define SCNXPTR "lX" 263 | #endif // _WIN64 ] 264 | 265 | #endif // __STDC_FORMAT_MACROS ] 266 | 267 | // 7.8.2 Functions for greatest-width integer types 268 | 269 | // 7.8.2.1 The imaxabs function 270 | #define imaxabs _abs64 271 | 272 | // 7.8.2.2 The imaxdiv function 273 | 274 | // This is modified version of div() function from Microsoft's div.c found 275 | // in %MSVC.NET%\crt\src\div.c 276 | #ifdef STATIC_IMAXDIV // [ 277 | static 278 | #else // STATIC_IMAXDIV ][ 279 | _inline 280 | #endif // STATIC_IMAXDIV ] 281 | imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) 282 | { 283 | imaxdiv_t result; 284 | 285 | result.quot = numer / denom; 286 | result.rem = numer % denom; 287 | 288 | if (numer < 0 && result.rem > 0) { 289 | // did division wrong; must fix up 290 | ++result.quot; 291 | result.rem -= denom; 292 | } 293 | 294 | return result; 295 | } 296 | 297 | // 7.8.2.3 The strtoimax and strtoumax functions 298 | #define strtoimax _strtoi64 299 | #define strtoumax _strtoui64 300 | 301 | // 7.8.2.4 The wcstoimax and wcstoumax functions 302 | #define wcstoimax _wcstoi64 303 | #define wcstoumax _wcstoui64 304 | 305 | 306 | #endif // _MSC_INTTYPES_H_ ] 307 | -------------------------------------------------------------------------------- /libdivsufsort/include/sort/detail/misc.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Christoph Diegelmann 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a 4 | // copy of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom the 8 | // Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | // IN THE SOFTWARE. 20 | 21 | #ifndef SORT_DETAIL_MISC_H 22 | #define SORT_DETAIL_MISC_H 23 | 24 | #ifdef __INTEL_COMPILER 25 | #pragma warning disable 3373 26 | #endif 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | namespace sort { 37 | namespace detail { 38 | namespace misc { 39 | 40 | constexpr const int NOCB = 2; // Don't use the callback on equal ranges (implies LR) 41 | constexpr const int LR = 1; // Direction: left to right 42 | constexpr const int RL = 0; // Direction: right to left 43 | 44 | constexpr const int INSERTION_MAX = 32; // When to switch to insertion sort 45 | constexpr const int MEDIAN21 = 64; // When to switch to pseudo median of 21 46 | constexpr const int MEDIAN65 = 8192; // When to switch to pseudo median of 65 47 | constexpr const int BLOCK_SIZE = 128; // Block Size for block partition ~2 cache lines 48 | constexpr const int COPY_MIN = 1024; // Minimum number of elements to use copy 49 | // probably around number of cache lines in L1 cache * 2 50 | 51 | template 52 | struct pair { 53 | constexpr pair() : first(), second() {}; 54 | 55 | template 56 | constexpr pair(A&& a, B&& b) : 57 | first(std::forward(a)), 58 | second(std::forward(b)) {} 59 | 60 | pair& operator^=(const pair& rhs) { 61 | this->first ^= rhs.first; 62 | this->second ^= rhs.second; 63 | return *this; 64 | } 65 | 66 | T1 first; 67 | T2 second; 68 | }; 69 | 70 | template 71 | inline pair operator^(pair lhs, const pair& rhs) { 72 | lhs ^= rhs; 73 | return lhs; 74 | } 75 | 76 | template 77 | constexpr auto make_pair(T1&& a, T2&& b)->detail::misc::pair< 78 | std::remove_reference_t, 79 | std::remove_reference_t> { 80 | return detail::misc::pair< 81 | std::remove_reference_t, 82 | std::remove_reference_t 83 | >(std::forward(a), std::forward(b)); 84 | } 85 | 86 | template 87 | struct has_xor_operator 88 | : std::false_type {}; 89 | 90 | template 91 | struct has_xor_operator() ^ std::declval()))> 92 | : std::true_type {}; 93 | 94 | template int ilogb(T v) { 95 | #if defined(__GNUC__) 96 | return (31 - __builtin_clz(v)); 97 | #else 98 | int r = 0; 99 | while (v >>= 1) 100 | ++r; 101 | return r; 102 | #endif 103 | } 104 | 105 | template inline void cswap(V &a, V &b) { 106 | #if defined(__GNUC__) || defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER) 107 | if (has_xor_operator::value) { 108 | std::remove_reference_t da = a, db = b, tmp; 109 | tmp = a = da < db ? da : db; 110 | b ^= da ^ tmp; 111 | } else { 112 | if (b < a) std::swap(a, b); 113 | } 114 | #else 115 | // clang+icc nicely optimize this into cmoves 116 | if (b < a) std::swap(a, b); 117 | #endif 118 | } 119 | 120 | template inline void cmovl(V &a, V b) { 121 | if (b < a) a = b; 122 | } 123 | 124 | template inline void cmovg(V a, V &b) { 125 | if (b < a) b = a; 126 | } 127 | 128 | template inline V median3(V a, V b, V c) { 129 | // Get median by sorting 3 elements and return the middle element 130 | // using a sorting network 131 | 132 | cswap(a, c); cmovg(a, b); 133 | cmovl(b, c); 134 | 135 | return b; 136 | } 137 | 138 | template inline V median5(V a, V b, V c, V d, V e) { 139 | // Get median by sorting 5 elements and return the middle element 140 | // using a sorting network 141 | 142 | cswap(a, b); cswap(d, e); 143 | cswap(c, e); cswap(c, d); 144 | cswap(a, d); cmovg(a, c); 145 | cmovl(b, e); cmovl(b, d); 146 | cmovg(b, c); 147 | 148 | return c; 149 | } 150 | 151 | template 152 | inline std::tuple median7(T first, I index) { 153 | // Get 3 pivots by sorting 7 elements and returning element 1, 3 and 5 154 | // using a sorting network 155 | 156 | V a = index(first[0]); 157 | V c = index(first[2]); 158 | V e = index(first[4]); 159 | V g = index(first[6]); 160 | 161 | cswap(c, g); cswap(a, e); 162 | V b = index(first[1]); 163 | V f = index(first[5]); 164 | cswap(b, f); cswap(e, g); 165 | cswap(a, c); 166 | V d = index(first[3]); 167 | cswap(b, d); cswap(c, e); 168 | cswap(d, f); cmovg(a, b); 169 | cswap(e, f); cswap(c, d); 170 | cswap(b, e); cswap(d, g); 171 | cmovl(b, c); cmovl(d, e); 172 | cmovl(f, g); 173 | 174 | return std::make_tuple(b, d, f); 175 | } 176 | 177 | template 178 | inline std::tuple median7_copy(T first, I index) { 179 | // Get 3 pivots by sorting 7 elements and returning element 1, 3 and 5 180 | // using a sorting network and return the number of equal elements 181 | 182 | V a = index(first[0]); 183 | V c = index(first[2]); 184 | V e = index(first[4]); 185 | V g = index(first[6]); 186 | 187 | cswap(c, g); cswap(a, e); 188 | V b = index(first[1]); 189 | V f = index(first[5]); 190 | cswap(b, f); cswap(e, g); 191 | cswap(a, c); 192 | V d = index(first[3]); 193 | cswap(b, d); cswap(c, e); 194 | cswap(d, f); cmovg(a, b); 195 | cswap(e, f); cswap(c, d); 196 | cswap(b, e); cswap(d, g); 197 | cswap(b, c); cswap(d, e); 198 | cswap(f, g); 199 | 200 | return std::make_tuple(d, (a == b) + (b == c) + (c == d) + (d == e) + (e == f) + (f == g)); 201 | } 202 | 203 | template 204 | static std::tuple median15(T first, I index) { 205 | // Get 3 pivots by sorting 15 elements and returning element 4, 8 and 12 206 | 207 | V a = index(first[0]); 208 | V b = index(first[1]); 209 | V c = index(first[2]); 210 | V d = index(first[3]); 211 | cswap(a, b); cswap(c, d); 212 | V e = index(first[4]); 213 | V f = index(first[5]); 214 | V g = index(first[6]); 215 | V h = index(first[7]); 216 | cswap(e, f); cswap(g, h); 217 | V i = index(first[8]); 218 | V j = index(first[9]); 219 | V k = index(first[10]); 220 | V l = index(first[11]); 221 | cswap(i, j); cswap(k, l); 222 | V m = index(first[12]); 223 | V n = index(first[13]); 224 | cswap(m, n); cswap(a, c); 225 | cswap(e, g); cswap(i, k); 226 | V o = index(first[14]); 227 | cswap(m, o); cswap(b, d); 228 | cswap(f, h); cswap(j, l); 229 | cswap(a, e); cswap(i, m); 230 | cswap(b, f); cswap(j, n); 231 | cswap(c, g); cswap(k, o); 232 | cswap(d, h); cmovg(a, i); 233 | cswap(b, j); cswap(c, k); 234 | cswap(d, l); cswap(e, m); 235 | cswap(f, n); cswap(g, o); 236 | cswap(f, k); cswap(g, j); 237 | cswap(d, m); cswap(n, o); 238 | cswap(h, l); cswap(b, c); 239 | cswap(e, i); cmovg(b, e); 240 | cswap(h, n); cswap(c, i); 241 | cmovl(l, o); cmovg(c, e); 242 | cswap(f, g); cswap(j, k); 243 | cmovl(l, n); cswap(d, i); 244 | cswap(h, m); cswap(g, i); 245 | cmovg(k, m); cswap(d, f); 246 | cmovl(h, j); cmovl(d, e); 247 | cmovg(f, g); cmovl(h, i); 248 | cmovl(l, m); cmovg(g, h); 249 | 250 | return std::make_tuple(d, h, l); 251 | } 252 | 253 | // I refactored this out to always give them the same name 254 | template inline auto compare(I index) { 255 | return [index](auto a, auto b) { return index(a) < index(b); }; 256 | } 257 | 258 | template 259 | inline void call_range(T first, T last, I index, C cb) { 260 | // Callbacks in LR or RL 261 | if (LR != detail::misc::NOCB) { 262 | if (LR) for (auto itf = first; itf < last;) { 263 | auto itl = itf + 1; 264 | while (itl < last && index(*itf) == index(*itl)) ++itl; 265 | cb(itf, itl); 266 | itf = itl; 267 | } else for (auto itl = last; itl > first;) { 268 | auto itf = itl - 1; 269 | while (itf > first && index(itf[-1]) == index(itl[-1])) --itf; 270 | cb(itf, itl); 271 | itl = itf; 272 | } 273 | } 274 | } 275 | 276 | // I refactored this out to always give it the same name 277 | // This may result in better code sharing 278 | template 279 | inline auto index(U ISA, D depth) { 280 | // We don't have to check if a + depth > n because 281 | // all groups leading up to n are always already unique 282 | return [ISAd = ISA + depth](auto a) { return ISAd[a]; }; 283 | } 284 | 285 | template 286 | auto castTo() { 287 | return [](auto val) { return static_cast>(val); }; 288 | } 289 | 290 | } // misc 291 | } // detail 292 | } // sort 293 | 294 | #endif // SORT_DETAIL_MISC_H 295 | -------------------------------------------------------------------------------- /vs/vs2022/TurboRCapp.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | {a162f37f-183f-4250-88ab-9b9fbde30b04} 28 | 29 | 30 | 31 | 15.0 32 | {6876BEB8-2B45-48B9-8381-1D4094FE8868} 33 | Win32Proj 34 | TurboRCApp 35 | 10.0 36 | 37 | 38 | 39 | Application 40 | true 41 | v143 42 | 43 | 44 | Application 45 | true 46 | v143 47 | 48 | 49 | Application 50 | false 51 | v143 52 | 53 | 54 | Application 55 | false 56 | v143 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | true 78 | $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ 79 | $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ 80 | 81 | 82 | true 83 | $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ 84 | $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ 85 | 86 | 87 | false 88 | $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ 89 | $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ 90 | 91 | 92 | false 93 | $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ 94 | $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ 95 | 96 | 97 | 98 | Disabled 99 | true 100 | CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;_DEBUG;%(PreprocessorDefinitions) 101 | true 102 | /w24146 /w24133 /w24996 103 | MultiThreadedDebug 104 | AdvancedVectorExtensions 105 | ..\..\ext 106 | 107 | 108 | Console 109 | true 110 | 111 | 112 | 113 | 114 | Disabled 115 | true 116 | CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;_DEBUG;%(PreprocessorDefinitions) 117 | true 118 | /w24146 /w24133 /w24996 119 | MultiThreadedDebug 120 | AdvancedVectorExtensions2 121 | ..\..\ext 122 | 123 | 124 | Console 125 | true 126 | 127 | 128 | 129 | 130 | MaxSpeed 131 | true 132 | true 133 | true 134 | CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;NDEBUG;%(PreprocessorDefinitions) 135 | true 136 | /w24146 /w24133 /w24996 137 | MultiThreaded 138 | AdvancedVectorExtensions 139 | ..\..\ext 140 | 141 | 142 | Console 143 | true 144 | true 145 | true 146 | 147 | 148 | 149 | 150 | MaxSpeed 151 | true 152 | true 153 | true 154 | CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;NDEBUG;%(PreprocessorDefinitions) 155 | true 156 | /w24146 /w24133 /w24996 157 | MultiThreaded 158 | AdvancedVectorExtensions2 159 | ..\..\ext 160 | 161 | 162 | Console 163 | true 164 | true 165 | true 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /include/anscdf.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v3 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // TurboRANS Range Asymmetric Systems : include header 25 | #define LIBAPI 26 | 27 | typedef LIBAPI size_t (*fanscdfenc)( unsigned char *in, size_t inlen, unsigned char *out); 28 | typedef LIBAPI size_t (*fanscdfdec)( unsigned char *in, size_t inlen, unsigned char *out); 29 | typedef LIBAPI size_t (*fanscdf4senc)(unsigned char *in, size_t inlen, unsigned char *out, cdf_t *cdf); 30 | typedef LIBAPI size_t (*fanscdf4sdec)(unsigned char *in, size_t inlen, unsigned char *out, cdf_t *cdf); 31 | 32 | extern fanscdfenc _anscdfenc; 33 | extern fanscdfdec _anscdfdec; 34 | extern fanscdfenc _anscdf4enc; 35 | extern fanscdfdec _anscdf4dec; 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | void anscdfini(unsigned id); 41 | 42 | LIBAPI size_t anscdf4senc( unsigned char *in, size_t inlen, unsigned char *out, cdf_t *cdf); 43 | LIBAPI size_t anscdf4sdec( unsigned char *in, size_t outlen, unsigned char *out, cdf_t *cdf); 44 | 45 | LIBAPI size_t anscdf4senc( unsigned char *in, size_t inlen, unsigned char *out, cdf_t *cdf); 46 | LIBAPI size_t anscdf4sdec( unsigned char *in, size_t outlen, unsigned char *out, cdf_t *cdf); 47 | LIBAPI size_t anscdf4enc( unsigned char *in, size_t inlen, unsigned char *out); 48 | LIBAPI size_t anscdf4dec( unsigned char *in, size_t outlen, unsigned char *out); 49 | LIBAPI size_t anscdfenc( unsigned char *in, size_t inlen, unsigned char *out); 50 | LIBAPI size_t anscdfdec( unsigned char *in, size_t outlen, unsigned char *out); 51 | LIBAPI size_t anscdf1enc( unsigned char *in, size_t inlen, unsigned char *out); // o1 52 | LIBAPI size_t anscdf1dec( unsigned char *in, size_t outlen, unsigned char *out); 53 | 54 | LIBAPI size_t anscdfuenc16( unsigned char *in, size_t inlen, unsigned char *out); // vlc6 16 bits 55 | LIBAPI size_t anscdfudec16( unsigned char *in, size_t outlen, unsigned char *out); 56 | LIBAPI size_t anscdfuzenc16( unsigned char *in, size_t inlen, unsigned char *out); // vlc6 16 bits zigzag 57 | LIBAPI size_t anscdfuzdec16( unsigned char *in, size_t outlen, unsigned char *out); 58 | 59 | LIBAPI size_t anscdfvenc16( unsigned char *in, size_t inlen, unsigned char *out); // vlc7 16 bits 60 | LIBAPI size_t anscdfvdec16( unsigned char *in, size_t outlen, unsigned char *out); 61 | LIBAPI size_t anscdfvzenc16( unsigned char *in, size_t inlen, unsigned char *out); // vlc7 16 bits zigzag 62 | LIBAPI size_t anscdfvzdec16( unsigned char *in, size_t outlen, unsigned char *out); 63 | 64 | LIBAPI size_t anscdfvenc32( unsigned char *in, size_t inlen, unsigned char *out); // vlc7 32 bits 65 | LIBAPI size_t anscdfvdec32( unsigned char *in, size_t outlen, unsigned char *out); 66 | LIBAPI size_t anscdfvzenc32( unsigned char *in, size_t inlen, unsigned char *out); // vlc7 32 bits zigzag 67 | LIBAPI size_t anscdfvzdec32( unsigned char *in, size_t outlen, unsigned char *out); 68 | 69 | //-------------- direct function calls -------------------------------------------------------- 70 | LIBAPI size_t anscdf4senc0( unsigned char *in, size_t inlen, unsigned char *out, cdf_t *cdf); 71 | LIBAPI size_t anscdf4sdec0( unsigned char *in, size_t outlen, unsigned char *out, cdf_t *cdf); 72 | LIBAPI size_t anscdf4sencs( unsigned char *in, size_t inlen, unsigned char *out, cdf_t *cdf); 73 | LIBAPI size_t anscdf4sdecs( unsigned char *in, size_t outlen, unsigned char *out, cdf_t *cdf); 74 | LIBAPI size_t anscdf4sencx( unsigned char *in, size_t inlen, unsigned char *out, cdf_t *cdf); 75 | LIBAPI size_t anscdf4sdecx( unsigned char *in, size_t outlen, unsigned char *out, cdf_t *cdf); 76 | 77 | LIBAPI size_t anscdfenc0( unsigned char *in, size_t inlen, unsigned char *out); 78 | LIBAPI size_t anscdfencs( unsigned char *in, size_t inlen, unsigned char *out); 79 | LIBAPI size_t anscdfencx( unsigned char *in, size_t inlen, unsigned char *out); 80 | LIBAPI size_t anscdfdec0( unsigned char *in, size_t outlen, unsigned char *out); 81 | LIBAPI size_t anscdfdecs( unsigned char *in, size_t outlen, unsigned char *out); 82 | LIBAPI size_t anscdfdecx( unsigned char *in, size_t outlen, unsigned char *out); 83 | 84 | LIBAPI size_t anscdf1enc0( unsigned char *in, size_t inlen, unsigned char *out); 85 | LIBAPI size_t anscdf1encs( unsigned char *in, size_t inlen, unsigned char *out); 86 | LIBAPI size_t anscdf1encx( unsigned char *in, size_t inlen, unsigned char *out); 87 | LIBAPI size_t anscdf1dec0( unsigned char *in, size_t outlen, unsigned char *out); 88 | LIBAPI size_t anscdf1decs( unsigned char *in, size_t outlen, unsigned char *out); 89 | LIBAPI size_t anscdf1decx( unsigned char *in, size_t outlen, unsigned char *out); 90 | 91 | LIBAPI size_t anscdf4enc0( unsigned char *in, size_t inlen, unsigned char *out); 92 | LIBAPI size_t anscdf4encs( unsigned char *in, size_t inlen, unsigned char *out); 93 | LIBAPI size_t anscdf4encx( unsigned char *in, size_t inlen, unsigned char *out); 94 | LIBAPI size_t anscdf4dec0( unsigned char *in, size_t outlen, unsigned char *out); 95 | LIBAPI size_t anscdf4decs( unsigned char *in, size_t outlen, unsigned char *out); 96 | LIBAPI size_t anscdf4decx( unsigned char *in, size_t outlen, unsigned char *out); 97 | 98 | LIBAPI size_t anscdfuenc160( unsigned char *in, size_t inlen, unsigned char *out); 99 | LIBAPI size_t anscdfudec160( unsigned char *in, size_t outlen, unsigned char *out); 100 | LIBAPI size_t anscdfuenc16s( unsigned char *in, size_t inlen, unsigned char *out); 101 | LIBAPI size_t anscdfudec16s( unsigned char *in, size_t outlen, unsigned char *out); 102 | LIBAPI size_t anscdfuenc16x( unsigned char *in, size_t inlen, unsigned char *out); 103 | LIBAPI size_t anscdfudec16x( unsigned char *in, size_t outlen, unsigned char *out); 104 | 105 | LIBAPI size_t anscdfuzenc160(unsigned char *in, size_t inlen, unsigned char *out); 106 | LIBAPI size_t anscdfuzdec160(unsigned char *in, size_t outlen, unsigned char *out); 107 | LIBAPI size_t anscdfuzenc16s(unsigned char *in, size_t inlen, unsigned char *out); 108 | LIBAPI size_t anscdfuzdec16s(unsigned char *in, size_t outlen, unsigned char *out); 109 | LIBAPI size_t anscdfuzenc16x(unsigned char *in, size_t inlen, unsigned char *out); 110 | LIBAPI size_t anscdfuzdec16x(unsigned char *in, size_t outlen, unsigned char *out); 111 | 112 | LIBAPI size_t anscdfvenc160( unsigned char *in, size_t inlen, unsigned char *out); // vlc7 16 bits 113 | LIBAPI size_t anscdfvdec160( unsigned char *in, size_t outlen, unsigned char *out); 114 | LIBAPI size_t anscdfvenc16s( unsigned char *in, size_t inlen, unsigned char *out); 115 | LIBAPI size_t anscdfvdec16s( unsigned char *in, size_t outlen, unsigned char *out); 116 | LIBAPI size_t anscdfvenc16x( unsigned char *in, size_t inlen, unsigned char *out); 117 | LIBAPI size_t anscdfvdec16x( unsigned char *in, size_t outlen, unsigned char *out); 118 | 119 | LIBAPI size_t anscdfvzenc160(unsigned char *in, size_t inlen, unsigned char *out); // vlc7 16 bits zigzag 120 | LIBAPI size_t anscdfvzdec160(unsigned char *in, size_t outlen, unsigned char *out); 121 | LIBAPI size_t anscdfvzenc16s(unsigned char *in, size_t inlen, unsigned char *out); 122 | LIBAPI size_t anscdfvzdec16s(unsigned char *in, size_t outlen, unsigned char *out); 123 | LIBAPI size_t anscdfvzenc16x(unsigned char *in, size_t inlen, unsigned char *out); 124 | LIBAPI size_t anscdfvzdec16x(unsigned char *in, size_t outlen, unsigned char *out); 125 | 126 | LIBAPI size_t anscdfvenc320( unsigned char *in, size_t inlen, unsigned char *out); // vlc7 32 bits 127 | LIBAPI size_t anscdfvdec320( unsigned char *in, size_t outlen, unsigned char *out); 128 | LIBAPI size_t anscdfvenc32s( unsigned char *in, size_t inlen, unsigned char *out); 129 | LIBAPI size_t anscdfvdec32s( unsigned char *in, size_t outlen, unsigned char *out); 130 | LIBAPI size_t anscdfvenc32x( unsigned char *in, size_t inlen, unsigned char *out); 131 | LIBAPI size_t anscdfvdec32x( unsigned char *in, size_t outlen, unsigned char *out); 132 | 133 | LIBAPI size_t anscdfvzenc320(unsigned char *in, size_t inlen, unsigned char *out); // vlc7 32 bits zigzag 134 | LIBAPI size_t anscdfvzdec320(unsigned char *in, size_t outlen, unsigned char *out); 135 | LIBAPI size_t anscdfvzenc32s(unsigned char *in, size_t inlen, unsigned char *out); 136 | LIBAPI size_t anscdfvzdec32s(unsigned char *in, size_t outlen, unsigned char *out); 137 | LIBAPI size_t anscdfvzenc32x(unsigned char *in, size_t inlen, unsigned char *out); 138 | LIBAPI size_t anscdfvzdec32x(unsigned char *in, size_t outlen, unsigned char *out); 139 | 140 | //extern LIBAPI size_t ansbc(unsigned char *in, size_t inlen, unsigned char *out, unsigned outsize); 141 | //extern LIBAPI size_t ansbd(unsigned char *in, size_t inlen, unsigned char *out); 142 | #ifdef __cplusplus 143 | } 144 | #endif 145 | -------------------------------------------------------------------------------- /libdivsufsort/include/sort/suffix.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Christoph Diegelmann 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a 4 | // copy of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom the 8 | // Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | // IN THE SOFTWARE. 20 | 21 | // Linear Time Suffix Sorting By Depth Awareness 22 | 23 | // Notes from the author: 24 | // This is a reference implementation currently NOT running in O(n) 25 | // It uses multi pivot introsort rather than a linear time sorting stage 26 | // I would be happy if you file a pull request on github when you change 27 | // something so we can improve the software for everyone. Of course it's 28 | // your right not to do so. 29 | // If you have questions on this feel free to report an issue. 30 | // http://github.com/akamiru 31 | // I'd really like to thank all authors who make their papers available online 32 | // many related papers are cited in the description most of which can be 33 | // found on the internet. 34 | 35 | #ifndef SORT_SUFFIX_H 36 | #define SORT_SUFFIX_H 37 | 38 | #ifdef __INTEL_COMPILER 39 | #pragma warning disable 3373 40 | #endif 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | #include "detail/suffix.h" 51 | #include "inplace.h" 52 | #include "copy.h" 53 | 54 | // define if additional space may be used 55 | #ifndef NO_USE_COPY 56 | #define USE_COPY 57 | #endif 58 | 59 | namespace sort { 60 | namespace suffix { 61 | 62 | // Sort the suffix array depth aware in (theoretical) linear time 63 | // expects the SA and ISA to be grouped by a single char 64 | // that implies ISA[n-1] == 0 && SA[0] == n-1 65 | // moreover the name of each group should equal the position of 66 | // the beginning in SA (e.g. generated by an EXclusive scan) 67 | // [Sf Sl) is additional space available 68 | #ifdef USE_COPY 69 | template void daware(T SAf, T SAl, U ISAf, V Af, V Al) { 70 | using X = std::remove_reference_t; 71 | using Y = std::remove_reference_t; 72 | auto* Sf = reinterpret_cast*>(&*Af); 73 | auto* Sl = Sf + (Al - Af) / sizeof(decltype(*Sf)) * sizeof(decltype(*Af)); 74 | #else 75 | template void daware(T SAf, T SAl, U ISAf) { 76 | #endif 77 | // This is a "pulling" or "lazy" rather than a "pushing" version of GSACA 78 | // while GSACA sorts previous elements using info of the current group 79 | // this implementation sorts groups using info of subsequent elements 80 | // in a way related to prefix doubling. For GSACA see 81 | // "Linear-time Suffix Sorting - A New Approach for Suffix Array Construction" - Baier 82 | 83 | // More interesting papers on the topic include 84 | // - "Faster suffix sorting" - Larsson, Sadakane (Prefix Doubling) 85 | // - "A Taxonomy of Suffix Array Construction Algorithms" - PUGLISI, SMYTH, TURPIN (General Overview) 86 | // - probably also see "An Efficient Algorithm for Suffix Sorting" - Peng, Wang, Xue, Wei 87 | // because it incorporates depth info into prefix doubling but sadly I 88 | // don't have access to it 89 | 90 | // A "pulling" version allows us to drop all but the additional data 91 | // for ISA and sorting depth (correspond to Previous Pointers in GSACA). 92 | // see the comment at the naming stage in sort::detail::misc::name() (line 185). 93 | // This is more cache friendly and results in less book keeping work. 94 | // The depth array can be further emplaced into the ISA to achieve 8 * n 95 | // memory usage 96 | 97 | // This is enough to get a O(n) time, O(1) working space SACA using the 98 | // Improved Two Stage (ITS) approach 99 | // see "Short description of improved two-stage suffix sorting algorithm" - Mori 100 | // http://homepage3.nifty.com/wpage/software/itssort.txt 101 | // basically it's a single stage of SA-IS 102 | // see "Linear Suffix Array Construction by Almost Pure Induced-Sorting" - Nong, Zhang, Chan 103 | // [currently the actual worst case working space is O(log(n)) due to 104 | // introsort] 105 | 106 | // The current implementation has a worst and average case running time of 107 | // O(n * log(n)) due to the fact that it uses multi pivot introsort rather 108 | // than a linear time sort (which probably won't change) 109 | 110 | // Additional ideas: 111 | 112 | // "BlockQuicksort: How Branch Mispredictions don't affect Quicksort" - Edelkamp, Weiss 113 | // for multi pivot quicksort. But without it the reduced branch mispredictions 114 | // probably won't make up for the additional cache misses when not using three 115 | // pivot quicksort. 116 | // We use this when values are fast to access. 117 | 118 | // "The Spreadsort High-performance General-case Sorting Algorithm" - Ross 119 | // Spreadsort might improve performance over pure quicksort 120 | // at least runtime garantee improves somewhat. 121 | 122 | // "Improving multikey Quicksort for sorting strings with many equal elements" - Kim, Park 123 | // Split-end partitioning for the three way quicksort might not be optimal 124 | // because it is only used when we have a big amount of elements equal to 125 | // the pivot which results in a lot of copying. "collect-center" might be 126 | // faster. Tests showed that collect-center is slower. 127 | 128 | // We might be able to further reduce the cache misses if we could 129 | // classify the ISA values to S/L type while building the SA in the 130 | // previous stage and only sort the L type in the first stage. 131 | 132 | // Using sorting networks instead of insertion sort for small lists 133 | // might increase performance by reducing the number of comparisons. 134 | // Maybe compile time construction of Batcher's Odd-Even Mergesort 135 | // to keep the code clean. 136 | // Hard to add branchless therefore pretty slow. 137 | 138 | // Prefetching ISA when sorting 139 | // This should be tested on real data with valgrind - first tests are pretty 140 | // mixed 141 | 142 | // If we have additional memory available (with ITS we have an average of n left) 143 | // we could first copy SA[i], ISA[SA[i]] together, then sort this list and 144 | // then copy back the results. That should reduce the number of cache misses 145 | // to near linear (quicksort itself has O(n * log(n)) cache misses with a very 146 | // small const factor) 147 | // This greatly improved the scaling of daware. 148 | 149 | // Actual implementation: 150 | // Start by iterating over all groups right to left 151 | // sorting all suffixes until their next smaller suffix 152 | // Basically this equals sorting the lyndon words starting at each SA[i] 153 | // So this has something to do with Lyndon Trees 154 | // which themselves have to do with maximal reptitions 155 | // maybe this leads to an even faster approach in those areas 156 | // "A new characterization of maximal repetitions by Lyndon trees" - Bannai, I, Inenaga 157 | for (auto gl = SAl; gl > SAf + 1;) { 158 | // Name of the group equals the start of the group 159 | auto gf = SAf + ISAf[gl[-1]]; 160 | if (1 < std::distance(gf, gl)) { 161 | // after this call everything left is type F everything right is type S 162 | T gc = detail::suffix::partition(SAf, ISAf, gf, gl, 1); 163 | 164 | // handle only the type S subgroups 165 | for (T sgf = gc, sgl = gl; gc < sgl;) { 166 | if (sgl[-1] < 0) { // is type F? 167 | sgl = SAf + ISAf[~sgl[-1]]; // skip over 168 | } else { // type S 169 | sgf = SAf + ISAf[+sgl[-1]]; // get the start of the group 170 | 171 | if (std::distance(sgf, sgl) < 2) { 172 | *--sgl = ~*sgf; // we can directly skip over it 173 | continue; 174 | } 175 | 176 | // calculate the sorting depth 177 | auto n = ISAf[sgl[-1] + 1]; // Get the element following the current 178 | auto depth = 1 + ((n >> (sizeof(decltype(n)) * CHAR_BIT - 1)) & ~n); 179 | 180 | // get the index function 181 | auto index = detail::misc::index(ISAf, depth); 182 | 183 | // sort all type S 184 | constexpr auto RL = detail::misc::RL; 185 | #ifdef USE_COPY 186 | sort::copy::quick(sgf, sgl, Sf, Sl, index, detail::suffix::name(SAf, ISAf, depth)); 187 | #else 188 | sort::inplace::quick(sgf, sgl, index, detail::suffix::name(SAf, ISAf, depth)); 189 | #endif 190 | } 191 | } 192 | } else 193 | *gf = ~*gf; // Group is unique - just flag it as type F 194 | gl = gf; 195 | } 196 | 197 | // Induce the order of all suffixes from left to right 198 | for (auto gf = SAf + 1; gf < SAl;) { 199 | auto gl = gf; 200 | while (0 <= *gl++); // End of the group is flagged 201 | gl[-1] = ~gl[-1]; // Flip the flag 202 | 203 | auto n = ISAf[*gf + 1]; // Get the element following the current 204 | auto depth = 1 + ((n >> (sizeof(decltype(n)) * CHAR_BIT - 1)) & ~n); 205 | auto index = detail::misc::index(ISAf, depth); 206 | 207 | // All elements of the left are already unique so we simply need to sort 208 | constexpr auto NOCB = detail::misc::NOCB; 209 | #ifdef USE_COPY 210 | sort::copy::quick(gf, gl, Sf, Sl, index); 211 | #else 212 | sort::inplace::quick(gf, gl, index); 213 | #endif 214 | // And give each of them a unique name 215 | auto castToIndex = detail::misc::castTo(); 216 | std::for_each(gf, gl, [ISAf, SAf, castToIndex](auto &a) { ISAf[a] = castToIndex(&a - &*SAf); }); 217 | 218 | gf = gl; 219 | // Scan over all unique groups 220 | for (; gf < SAl && *gf < 0; ++gf) { 221 | *gf = ~*gf; 222 | // Maybe we could somehow overwrite the embedded depth info in the 223 | // sorting stage to make this unnecessary 224 | ISAf[*gf] = castToIndex(gf - SAf); 225 | } 226 | } 227 | 228 | // Now the SA is completly sorted and ISA is completly reconstructed 229 | } 230 | 231 | } // suffix 232 | } // sort 233 | 234 | #endif // SORT_SUFFIX_H 235 | -------------------------------------------------------------------------------- /include_/time_.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (C) powturbo 2013-2023 3 | GPL v2 License 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 18 | 19 | - homepage : https://sites.google.com/site/powturbo/ 20 | - github : https://github.com/powturbo 21 | - twitter : https://twitter.com/powturbo 22 | - email : powturbo [_AT_] gmail [_DOT_] com 23 | **/ 24 | // time_.h : parameter free high precision time/benchmark functions 25 | #include 26 | #include 27 | #ifdef _WIN32 28 | #include 29 | #ifndef sleep 30 | #define sleep(n) Sleep((n) * 1000) 31 | #endif 32 | #define uint64_t unsigned __int64 33 | 34 | #else 35 | #include 36 | #include 37 | #define Sleep(ms) usleep((ms) * 1000) 38 | #endif 39 | 40 | #if defined (__i386__) || defined( __x86_64__ ) // ------------------ rdtsc -------------------------- 41 | #ifdef _MSC_VER 42 | #include // __rdtsc 43 | #else 44 | #include 45 | #endif 46 | 47 | #ifdef __corei7__ 48 | #define RDTSC_INI(_c_) do { unsigned _cl, _ch; \ 49 | __asm volatile ("cpuid\n\t" \ 50 | "rdtsc\n\t" \ 51 | "mov %%edx, %0\n" \ 52 | "mov %%eax, %1\n": "=r" (_ch), "=r" (_cl):: \ 53 | "%rax", "%rbx", "%rcx", "%rdx"); \ 54 | _c_ = (uint64_t)_ch << 32 | _cl; \ 55 | } while(0) 56 | 57 | #define RDTSC(_c_) do { unsigned _cl, _ch; \ 58 | __asm volatile("rdtscp\n" \ 59 | "mov %%edx, %0\n" \ 60 | "mov %%eax, %1\n" \ 61 | "cpuid\n\t": "=r" (_ch), "=r" (_cl):: "%rax",\ 62 | "%rbx", "%rcx", "%rdx");\ 63 | _c_ = (uint64_t)_ch << 32 | _cl;\ 64 | } while(0) 65 | #else 66 | /*#define RDTSC(_c_) do { unsigned _cl, _ch;\ 67 | __asm volatile ("cpuid \n"\ 68 | "rdtsc"\ 69 | : "=a"(_cl), "=d"(_ch)\ 70 | : "a"(0)\ 71 | : "%ebx", "%ecx");\ 72 | _c_ = (uint64_t)_ch << 32 | _cl;\ 73 | } while(0)*/ 74 | #define RDTSC(_c_) do { unsigned _cl, _ch;\ 75 | __asm volatile("rdtsc" : "=a"(_cl), "=d"(_ch) );\ 76 | _c_ = (uint64_t)_ch << 32 | _cl;\ 77 | } while(0) 78 | #endif 79 | 80 | #define RDTSC_INI(_c_) RDTSC(_c_) 81 | #else // ------------------ time -------------------------- 82 | #define RDTSC_INI(_c_) 83 | #define RDTSC(_c_) 84 | #endif 85 | 86 | #ifndef TM_F 87 | #define TM_F 1.0 // TM_F=4 -> MI/s 88 | #endif 89 | 90 | #ifdef _RDTSC //---------------------- rdtsc -------------------------------- 91 | #define TM_M (CLOCKS_PER_SEC*1000000ull) 92 | #define TM_PRE 4 93 | #define TM_MBS "cycle/byte" 94 | static double TMBS(unsigned l, double t) { return (double)t/(double)l; } 95 | 96 | typedef uint64_t tm_t; 97 | static tm_t tmtime() { uint64_t c; RDTSC(c); return c; } 98 | static tm_t tminit() { uint64_t c; __asm volatile("" ::: "memory"); RDTSC_INI(c); return c; } 99 | static double tmdiff(tm_t start, tm_t stop) { return (double)(stop - start); } 100 | static int tmiszero(tm_t t) { return !t; } 101 | #else //---------------------- time ----------------------------------- 102 | #define TM_M 1 103 | #define TM_PRE 2 104 | #define TM_MBS "MB/s" 105 | static double TMBS(unsigned l, double t) { return (l/t)/1000000.0; } 106 | 107 | #ifdef _WIN32 //-------- windows 108 | static LARGE_INTEGER tps; 109 | 110 | typedef unsigned __int64 tm_t; 111 | static tm_t tmtime() { LARGE_INTEGER tm; tm_t t; QueryPerformanceCounter(&tm); return tm.QuadPart; } 112 | static tm_t tminit() { tm_t t0,ts; QueryPerformanceFrequency(&tps); t0 = tmtime(); while((ts = tmtime())==t0) {}; return ts; } 113 | static double tmdiff(tm_t start, tm_t stop) { return (double)(stop - start)/tps.QuadPart; } 114 | static int tmiszero(tm_t t) { return !t; } 115 | #else // Linux & compatible / MacOS 116 | #ifdef __APPLE__ 117 | #include 118 | #ifndef MAC_OS_X_VERSION_10_12 119 | #define MAC_OS_X_VERSION_10_12 101200 120 | #endif 121 | #define CIVETWEB_APPLE_HAVE_CLOCK_GETTIME (defined(__APPLE__) && defined(MAC_OS_X_VERSION_MIN_REQUIRED) && MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_12) 122 | #if !(CIVETWEB_APPLE_HAVE_CLOCK_GETTIME) 123 | #include 124 | #define CLOCK_REALTIME 0 125 | #define CLOCK_MONOTONIC 0 126 | int clock_gettime(int /*clk_id*/, struct timespec* t) { 127 | struct timeval now; 128 | int rv = gettimeofday(&now, NULL); 129 | if (rv) return rv; 130 | t->tv_sec = now.tv_sec; 131 | t->tv_nsec = now.tv_usec * 1000; 132 | return 0; 133 | } 134 | #endif 135 | #endif 136 | 137 | typedef struct timespec tm_t; 138 | static tm_t tmtime() { struct timespec tm; clock_gettime(CLOCK_MONOTONIC, &tm); return tm; } 139 | static double tmdiff(tm_t start, tm_t stop) { return (stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec)/1e9f; } 140 | static tm_t tminit() { tm_t t0 = tmtime(),t; while(!tmdiff(t = tmtime(),t0)) {}; return t; } 141 | static int tmiszero(tm_t t) { return !(t.tv_sec|t.tv_nsec); } 142 | #endif 143 | #endif 144 | 145 | //---------------------------------------- bench ---------------------------------------------------------------------- 146 | // for each a function call is repeated until exceeding tm_tx seconds. 147 | // A run duration is always tm_tx seconds 148 | // The number of runs can be set with the program options -I and -J (specify -I15 -J15 for more precision) 149 | 150 | // sleep after each 8 runs to avoid cpu throttling. 151 | #define TMSLEEP do { tm_T = tmtime(); if(tmiszero(tm_0)) tm_0 = tm_T; else if(tmdiff(tm_0, tm_T) > tm_TX) { if(tm_verbose>2) { printf("S \b\b");fflush(stdout); } sleep(tm_slp); tm_0=tmtime();} } while(0) 152 | 153 | // benchmark loop 154 | #define TMBEG(_tm_Reps_) { unsigned _tm_r,_tm_c = 0,_tm_R,_tm_Rx = _tm_Reps_,_tm_Rn = _tm_Reps_; double _tm_t;\ 155 | for(tm_rm = tm_rep, tm_tm = DBL_MAX, _tm_R = 0; _tm_R < _tm_Rn; _tm_R++) { tm_t _tm_t0 = tminit(); /*for each run*/\ 156 | for(_tm_r = 0;_tm_r < tm_rm;) { /*repeat tm_rm times */ 157 | 158 | #define TMEND(_size_) ;\ 159 | _tm_r++; if(tm_tm == DBL_MAX && (_tm_t = tmdiff(_tm_t0, tmtime())) > tm_tx) break;\ 160 | }\ 161 | /*1st run: break the loop after tm_tx=1 sec, calculate a new repeats 'tm_rm' to avoid calling time() after each function call*/\ 162 | /*other runs: break the loop only after 'tm_rm' repeats */ \ 163 | _tm_t = tmdiff(_tm_t0, tmtime());\ 164 | /*set min time, recalculate repeats tm_rm based on tm_tx, recalculate number of runs based on tm_TX*/\ 165 | if(_tm_t < tm_tm) { if(tm_tm == DBL_MAX) { tm_rm = _tm_r; _tm_Rn = tm_TX/_tm_t; _tm_Rn = _tm_Rn<_tm_Rx?_tm_Rn:_tm_Rx; /*printf("repeats=%u,%u,%.4f ", _tm_Rn, _tm_Rx, _tm_t);*/ } \ 166 | tm_tm = _tm_t; _tm_c++;\ 167 | } else if(_tm_t > tm_tm*1.15) TMSLEEP;/*force sleep at 15% divergence*/\ 168 | if(tm_verbose>2) { printf("%8.*f %2d_%.2d\b\b\b\b\b\b\b\b\b\b\b\b\b\b",TM_PRE, TMBS(_size_, tm_tm/tm_rm),_tm_R+1,_tm_c),fflush(stdout); }\ 169 | if((_tm_R & 7)==7) sleep(tm_slp); /*pause 20 secs after each 8 runs to avoid cpu throttling*/\ 170 | }\ 171 | } 172 | 173 | static unsigned tm_rep = 1u<<30, tm_Rep = 3, tm_Rep2 = 3, tm_rm, tm_RepMin = 1, tm_slp = 20, tm_verbose = 3; 174 | static tm_t tm_0, tm_T; 175 | static double tm_tm, tm_tx = 1.0*TM_M, tm_TX = 60.0*TM_M; 176 | 177 | static void tm_init(int _tm_Rep, int _tm_verbose) { tm_verbose = _tm_verbose; if(_tm_Rep) tm_Rep = _tm_Rep; } 178 | 179 | #define TM(_name_, _efunc_, _size_, _len_, _dfunc_) do {\ 180 | TMBEG(tm_Rep) _efunc_; if(_size_ && !(_tm_R|_tm_r) ) pr(_len_,_size_); TMEND(_size_);\ 181 | double dm = tm_tm, dr = tm_rm; \ 182 | if(tm_verbose>2) printf("%8.*f \b\b\b\b\b", TM_PRE, TMBS(_size_, dm/dr) );\ 183 | else if(tm_verbose) printf("%8.*f ", TM_PRE, TMBS(_size_, dm/dr) );\ 184 | \ 185 | TMBEG(tm_Rep2) _dfunc_; TMEND(_size_);\ 186 | dm = tm_tm; dr = tm_rm; if(tm_verbose>2) printf("%8.*f \b\b\b\b\b", TM_PRE,TMBS(_size_, dm/dr) );else if(tm_verbose) printf("%8.*f ", TM_PRE,TMBS(_size_, dm/dr) );\ 187 | if(tm_verbose>3) printf("%s ", _name_?_name_:#_efunc_);\ 188 | } while(0) 189 | 190 | #define TM0(_name_, _efunc_, _size_, _len_) do {\ 191 | TMBEG(tm_Rep) _efunc_; if(_size_ && !(_tm_R|_tm_r) ) pr(_len_,_size_); TMEND(_size_);\ 192 | double dm = tm_tm, dr = tm_rm; \ 193 | if(tm_verbose>2) printf("%8.*f \b\b\b\b\b", TM_PRE, TMBS(_size_, dm/dr) );\ 194 | else if(tm_verbose) printf("%8.*f ", TM_PRE, TMBS(_size_, dm/dr) );\ 195 | \ 196 | if(tm_verbose>3) printf("%s ", _name_?_name_:#_efunc_);\ 197 | } while(0) 198 | 199 | static void pr(unsigned l, unsigned n) { 200 | double r = (double)l*100.0/n; 201 | if(r>0.1) printf("%10u %6.2f%% ", l, r); 202 | else if(r>0.01) printf("%10u %7.3f%% ", l, r); 203 | else printf("%10u %8.4f%% ", l, r); fflush(stdout); 204 | } 205 | 206 | //---------------------------------------------------------------------------------------------------------------------------------- 207 | #define Kb (1u<<10) 208 | #define Mb (1u<<20) 209 | #define Gb (1u<<30) 210 | #define KB 1000 211 | #define MB 1000000 212 | #define GB 1000000000 213 | 214 | static unsigned argtoi(char *s, unsigned def) { 215 | char *p; 216 | unsigned n = strtol(s, &p, 10),f = 1; 217 | switch(*p) { 218 | case 'K': f = KB; break; 219 | case 'M': f = MB; break; 220 | case 'G': f = GB; break; 221 | case 'k': f = Kb; break; 222 | case 'm': f = Mb; break; 223 | case 'g': f = Gb; break; 224 | case 'B': return n; break; 225 | case 'b': def = 0; 226 | default: if(!def) return n>=32?0xffffffffu:(1u << n); f = def; 227 | } 228 | return n*f; 229 | } 230 | static uint64_t argtol(char *s) { 231 | char *p; 232 | uint64_t n = strtol(s, &p, 10),f=1; 233 | switch(*p) { 234 | case 'K': f = KB; break; 235 | case 'M': f = MB; break; 236 | case 'G': f = GB; break; 237 | case 'k': f = Kb; break; 238 | case 'm': f = Mb; break; 239 | case 'g': f = Gb; break; 240 | case 'B': return n; break; 241 | case 'b': return 1u << n; 242 | default: f = Mb; 243 | } 244 | return n*f; 245 | } 246 | 247 | static uint64_t argtot(char *s) { 248 | char *p; 249 | uint64_t n = strtol(s, &p, 10),f=1; 250 | switch(*p) { 251 | case 'h': f = 3600000; break; 252 | case 'm': f = 60000; break; 253 | case 's': f = 1000; break; 254 | case 'M': f = 1; break; 255 | default: f = 1000; 256 | } 257 | return n*f; 258 | } 259 | 260 | static void memrcpy(unsigned char *out, unsigned char *in, unsigned n) { int i; for(i = 0; i < n; i++) out[i] = ~in[i]; } 261 | 262 | --------------------------------------------------------------------------------