├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
└── src
    ├── cmdline.cpp
    ├── cmdline.h
    ├── common
        ├── alignbuf.h
        ├── bufio.h
        ├── histbuf.h
        ├── math.h
        ├── md5.cpp
        ├── md5.h
        ├── rand.h
        ├── timer.h
        ├── utils.cpp
        └── utils.h
    ├── file
        ├── file.cpp
        ├── file.h
        ├── sac.cpp
        ├── sac.h
        ├── wav.cpp
        └── wav.h
    ├── global.h
    ├── libsac
        ├── cost.h
        ├── libsac.cpp
        ├── libsac.h
        ├── map.cpp
        ├── map.h
        ├── pred.cpp
        ├── pred.h
        ├── profile.cpp
        ├── profile.h
        ├── sparse.h
        ├── vle.cpp
        └── vle.h
    ├── main.cpp
    ├── model
        ├── counter.h
        ├── domain.h
        ├── mixer.h
        ├── model.h
        ├── range.cpp
        ├── range.h
        └── sse.h
    ├── opt
        ├── cma.cpp
        ├── cma.h
        ├── dds.cpp
        ├── dds.h
        ├── de.cpp
        ├── de.h
        ├── opt.cpp
        ├── opt.h
        └── ssc.h
    └── pred
        ├── bias.h
        ├── blend.h
        ├── lms.h
        ├── lms_cascade.h
        ├── lpc.h
        ├── rls.cpp
        └── rls.h


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows thumbnail cache files
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | ehthumbs_vista.db
 5 | 
 6 | # Folder config file
 7 | Desktop.ini
 8 | 
 9 | # Recycle Bin used on file shares
10 | $RECYCLE.BIN/
11 | 
12 | # Windows Installer files
13 | *.cab
14 | *.msi
15 | *.msm
16 | *.msp
17 | 
18 | # Windows shortcuts
19 | *.lnk
20 | 
21 | # =========================
22 | # Operating System Files
23 | # =========================
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Sebastian Lehmann
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Version](https://img.shields.io/github/release/slmdev/sac)
 2 | ![Repo size](https://img.shields.io/github/repo-size/slmdev/sac)
 3 | ![Loc](https://sloc.xyz/github/slmdev/sac)
 4 | ![License](https://img.shields.io/github/license/slmdev/sac)
 5 | 
 6 | # Sac
 7 | Sac is a state-of-the-art lossless audio compression model
 8 | 
 9 | Lossless audio compression is a complex problem, because PCM data is highly non-stationary and uses high sample resolution (typically >=16bit). That's why classic context modelling suffers from context dilution problems. Sac employs a simple OLS-NLMS predictor per frame including bias correction. Prediction residuals are encoded using a sophisticated bitplane coder including SSE and various forms of probability estimations. Meta-parameters of the predictor are optimized with [DDS](https://agupubs.onlinelibrary.wiley.com/doi/10.1029/2005WR004723) on by-frame basis. This results in a highly asymmetric codec design. 
10 | 
11 | This program wouldn't exist without the help from the following people (in no particular order):
12 | 
13 | Matt Mahoney, Dmitry Shkarin, Eugene D. Shelwien, Florin Ghido, Grzegorz Ulacha
14 | 
15 | ## Technical features
16 | * Input: wav file with 1-16 bit sample size, mono/stereo, pcm
17 | * Output: sac file including all input metadata
18 | * Decoded wav file is bit for bit identical to input wav file
19 | * MD5 of raw pcm values
20 | 
21 | ## Technical limitations
22 | Sac uses fp64 for many internal calculations. The change of compiler options or (cpu-)platform might effect the output. Use at your own risk and for testing purposes only.
23 |  
24 | ## Benchmarks
25 | **Sac v0.7.18**
26 | 
27 | 16 files (44.1Khz, stereo) - 51.014.742 bytes - parallel on i7-13700H
28 | 
29 | **Asymmetric encoding profiles** - bits per sample (bps) is mean bps over all files
30 | |Profile||Size|Enc-time|Dec-time|bps|
31 | |:-|-:|:-|:-|:-|:-|
32 | |FLAC -8|100.0%|29.793.060|00:00:00|00:00:00|9.385|
33 | |--normal|89.6%|26.691.217|00:00:22|00:00:20|8.401|
34 | |--high|89.1%|26.546.317|00:03:42|00:00:50|8.352|
35 | |--veryhigh|88.9%|26.480.793|00:17:38|00:00:53|8.330|
36 | |--extrahigh|88.8%|26.453.344|00:48:46|00:00:52|8.322|
37 | |--best|88.6%|26.397.759|04:37:00|00:01:00|8.303|
38 | 
39 | &nbsp;
40 | 
41 | **Comparison with other lossless audio codecs**
42 | |Program|Parameters|Source|
43 | |:-|:-|:-|
44 | |Sac v0.7.18|--best|open|
45 | |OFR v5.100|--preset max|closed|
46 | |paq8px_v208fix1|-6|open|
47 | |MP4ALS RM23|-b -p -z3|open|
48 | |MAC v10.44|-c5000|open|
49 | |FLAC v1.4.3|-8|open|
50 | 
51 | Numbers are bits per sample (bps)
52 | | Name  | Sac | OFR | paq8px | MP4ALS | MAC | FLAC |
53 | |:---|---:|---:|---:|---:|---:|---:|
54 | |ATrain|6.991|7.156|7.353|7.232|7.269|7.962|
55 | |BeautySlept|7.089|7.790|7.826|8.305|8.464|10.134|
56 | |chanchan|9.658|9.778|9.723|9.886|9.951|10.206|
57 | |death2|5.036|5.465|5.215|6.660|6.213|6.092|
58 | |experiencia|10.825|10.915|10.963|10.992|11.005|11.428|
59 | |female_speech|4.356|4.498|4.708|4.711|5.190|5.364|
60 | |FloorEssence|9.034|9.409|9.488|9.509|9.537|10.201|
61 | |ItCouldBeSweet|8.182|8.310|8.330|8.396|8.531|9.064|
62 | |Layla|9.454|9.571|9.725|9.691|9.783|10.415|
63 | |LifeShatters|10.748|10.808|10.868|10.836|10.838|11.194|
64 | |macabre|8.968|9.026|9.249|9.076|9.172|10.043|
65 | |male_speech|4.229|4.256|4.509|4.813|5.255|5.674|
66 | |SinceAlways|10.329|10.409|10.455|10.473|10.522|11.254|
67 | |thear1|11.362|11.400|11.474|11.425|11.451|11.783|
68 | |TomsDiner|6.991|7.108|7.057|7.268|7.432|8.572|
69 | |velvet|9.595|9.990|10.030|10.212|10.461|10.770|
70 | |*Mean*|**8.303**|8.493|8.561|8.718|8.817|9.385|
71 | |||||||
72 | |Enc-time|04:37:00|00:00:09|00:05:37|00:00:15|00:00:01|00:00:00|
73 | |Dec-time|00:01:00|00:00:02|00:05:40|00:00:13|00:00:01|00:00:00|
74 | 
75 | 


--------------------------------------------------------------------------------
/src/cmdline.cpp:
--------------------------------------------------------------------------------
  1 | #include <format>
  2 | #include <cstring>
  3 | #include "cmdline.h"
  4 | #include "common/utils.h"
  5 | #include "common/timer.h"
  6 | #include "file/sac.h"
  7 | 
  8 | 
  9 | CmdLine::CmdLine()
 10 | :mode(ENCODE)
 11 | {
 12 | }
 13 | 
 14 | void CmdLine::PrintWav(const AudioFile &myWav)
 15 | {
 16 |   std::cout << "  WAVE  Codec: PCM (" << myWav.getKBPS() << " kbps)\n";
 17 |   std::cout << "  " << myWav.getSampleRate() << "Hz " << myWav.getBitsPerSample() << " Bit  ";
 18 |   if (myWav.getNumChannels()==1) std::cout << "Mono";
 19 |   else if (myWav.getNumChannels()==2) std::cout << "Stereo";
 20 |   else std::cout << myWav.getNumChannels() << " Channels";
 21 |   std::cout << "\n";
 22 |   std::cout << "  " << myWav.getNumSamples() << " Samples [" << miscUtils::getTimeStrFromSamples(myWav.getNumSamples(),myWav.getSampleRate()) << "]\n";
 23 | }
 24 | 
 25 | std::string CmdLine::CostStr(const FrameCoder::SearchCost cost_func)
 26 | {
 27 |   using enum FrameCoder::SearchCost;
 28 |   std::string rstr;
 29 |   switch (cost_func) {
 30 |     case L1:rstr="L1";break;
 31 |     case RMS:rstr="rms";break;
 32 |     case Golomb:rstr="glb";break;
 33 |     case Entropy:rstr="ent";break;
 34 |     case Bitplane:rstr="bpn";break;
 35 |     default:break;
 36 |   }
 37 |   return rstr;
 38 | }
 39 | 
 40 | std::string CmdLine::SearchStr(const FrameCoder::SearchMethod search_func)
 41 | {
 42 |   using enum FrameCoder::SearchMethod;
 43 |   std::string rstr;
 44 |   switch (search_func) {
 45 |     case DDS:rstr="DDS";break;
 46 |     case DE:rstr="DE";break;
 47 |     default: break;
 48 |   }
 49 |   return rstr;
 50 | }
 51 | 
 52 | void CmdLine::PrintMode()
 53 | {
 54 |   const FrameCoder::toptim_cfg &ocfg = cfg.ocfg;
 55 |   std::cout << "  Profile: ";
 56 |   std::cout << "mt" << cfg.mt_mode;
 57 |   std::cout << " " << cfg.max_framelen << "s";
 58 |   if (cfg.adapt_block) std::cout << " ab";
 59 |   if (cfg.zero_mean) std::cout << " zero-mean";
 60 |   if (cfg.sparse_pcm) std::cout << " sparse-pcm";
 61 |   std::cout << '\n';
 62 |   if (cfg.optimize) {
 63 |       std::cout << "  Optimize: ";
 64 |       std::cout << SearchStr(ocfg.optimize_search);
 65 |       std::cout << " " << std::format("{:.1f}%", ocfg.fraction*100.0);
 66 |       std::cout << ",n=" << ocfg.maxnfunc;
 67 |       std::cout << "," << CostStr(ocfg.optimize_cost);
 68 |       std::cout << ",k=" << ocfg.optk;
 69 |       std::cout << '\n';
 70 |   }
 71 |   std::cout << std::endl;
 72 | }
 73 | 
 74 | void CmdLine::Split(const std::string &str,std::string &key,std::string &val,const char splitval)
 75 | {
 76 |   key=val="";
 77 |   std::size_t found=str.find(splitval);
 78 |   if (found!=std::string::npos) {
 79 |     key=str.substr(0,found);
 80 |     val=str.substr(found+1);
 81 |   } else {
 82 |     key=str;
 83 |   }
 84 | }
 85 | 
 86 | double CmdLine::stod_safe(const std::string& str)
 87 | {
 88 |     double d;
 89 |     try {
 90 |         d = std::stod(str);
 91 |     } catch (const std::invalid_argument&) {
 92 |         std::cerr << "stod: argument is invalid\n";
 93 |         throw;
 94 |     } catch (const std::out_of_range&) {
 95 |         std::cerr << "stod: argument is out of range for a double\n";
 96 |         throw;
 97 |     }
 98 |     return d;
 99 | }
100 | 
101 | int CmdLine::Parse(int argc,const char *argv[])
102 | {
103 |   if (argc < 2) {
104 |     std::cout << SACHelp;
105 |     return 1;
106 |   }
107 | 
108 |   bool first=true;
109 |   std::string param;
110 |   int k=1;
111 |   while (k<argc) {
112 |     param=argv[k];
113 |     std::string uparam=StrUtils::str_up(param);
114 |     std::string key,val;
115 |     Split(uparam,key,val);
116 | 
117 |     if (param.length()>1 && (param[0]=='-' && param[1]=='-'))
118 |     {
119 |        if (key=="--ENCODE") mode=ENCODE;
120 |        else if (key=="--DECODE") mode=DECODE;
121 |        else if (key=="--LIST") mode=LIST;
122 |        else if (key=="--LISTFULL") mode=LISTFULL;
123 |        else if (key=="--VERBOSE") {
124 |           if (val.length()) cfg.verbose_level=std::max(0,stoi(val));
125 |           else cfg.verbose_level=1;
126 |        }
127 |        else if (key=="--NORMAL") {
128 |          cfg.optimize=0;
129 |        } else if (key=="--HIGH") {
130 |          cfg.optimize=1;
131 |          cfg.ocfg.fraction=0.075;
132 |          cfg.ocfg.maxnfunc=100;
133 |          cfg.ocfg.sigma=0.2;
134 |          cfg.ocfg.dds_cfg.c_fail_max=30;
135 |        } else if (key=="--VERYHIGH") {
136 |          cfg.optimize=1;
137 |          cfg.ocfg.fraction=0.2;
138 |          cfg.ocfg.maxnfunc=250;
139 |          cfg.ocfg.sigma=0.25;
140 |        } else if (key=="--EXTRAHIGH") {
141 |          cfg.optimize=1;
142 |          cfg.ocfg.fraction=0.25;
143 |          cfg.ocfg.maxnfunc=500;
144 |          cfg.ocfg.sigma=0.25;
145 |        } else if (key=="--BEST") {
146 |          cfg.optimize=1;
147 |          cfg.ocfg.fraction=0.50;
148 |          cfg.ocfg.maxnfunc=1000;
149 |          cfg.ocfg.sigma=0.25;
150 |          cfg.ocfg.optimize_cost=FrameCoder::SearchCost::Bitplane;
151 |        } else if (key=="--INSANE") {
152 |          cfg.optimize=1;
153 |          cfg.ocfg.fraction=0.5;
154 |          cfg.ocfg.maxnfunc=1500;
155 |          cfg.ocfg.sigma=0.25;
156 |          cfg.ocfg.optimize_cost=FrameCoder::SearchCost::Bitplane;
157 |        } else if (key=="--OPTIMIZE") {
158 |          if (val=="NO" || val=="0") cfg.optimize=0;
159 |          else {
160 |           std::vector<std::string>vs;
161 |           StrUtils::SplitToken(val,vs,",");
162 |           if (vs.size()>=2)  {
163 |             cfg.ocfg.fraction=std::clamp(stod_safe(vs[0]),0.,1.);
164 |             cfg.ocfg.maxnfunc=std::clamp(std::stoi(vs[1]),0,50000);
165 |             if (vs.size()>=3) {
166 |               std::string cf=StrUtils::str_up(vs[2]);
167 |               if (cf=="L1") cfg.ocfg.optimize_cost = FrameCoder::SearchCost::L1;
168 |               else if (cf=="RMS") cfg.ocfg.optimize_cost = FrameCoder::SearchCost::RMS;
169 |               else if (cf=="GLB") cfg.ocfg.optimize_cost = FrameCoder::SearchCost::Golomb;
170 |               else if (cf=="ENT") cfg.ocfg.optimize_cost = FrameCoder::SearchCost::Entropy; //default
171 |               else if (cf=="BPN") cfg.ocfg.optimize_cost = FrameCoder::SearchCost::Bitplane;
172 |               else std::cerr << "warning: unknown cost function '" << vs[2] << "'\n";
173 |             }
174 |             if (vs.size()>=4) {
175 |               cfg.ocfg.optk=std::clamp(stoi(vs[3]),1,32);
176 |             }
177 |             if (cfg.ocfg.fraction>0. && cfg.ocfg.maxnfunc>0) cfg.optimize=1;
178 |             else cfg.optimize=0;
179 |           } else std::cerr << "unknown option: " << val << '\n';
180 |          }
181 |        }
182 |        else if (key=="--FRAMELEN") {
183 |          if (val.length()) cfg.max_framelen=std::max(0,stoi(val));
184 |        }
185 |        else if (key=="--MT-MODE") {
186 |          if (val.length()) cfg.mt_mode=std::max(0,stoi(val));
187 |        }
188 |        else if (key=="--SPARSE-PCM") {
189 |           if (val=="NO" || val=="0") cfg.sparse_pcm=0;
190 |           else cfg.sparse_pcm=1;
191 |        } else if (key=="--STEREO-MS") {
192 |          cfg.stereo_ms=1;
193 |        } else if (key=="--OPT-RESET") {
194 |          cfg.ocfg.reset=1;
195 |        } else if (key=="--OPT-CFG") {
196 |          std::vector<std::string>vs;
197 |          StrUtils::SplitToken(val,vs,",");
198 |          if (vs.size()>=1) {
199 |             std::string cval=StrUtils::str_up(vs[0]);
200 |             if (cval=="DDS") cfg.ocfg.optimize_search=FrameCoder::SearchMethod::DDS;
201 |             else if (cval=="DE") cfg.ocfg.optimize_search=FrameCoder::SearchMethod::DE;
202 |             else if (cval=="CMA") cfg.ocfg.optimize_search=FrameCoder::SearchMethod::CMA;
203 |             else std::cerr << "  warning: invalid opt='"<<cval<<"'\n";
204 |          }
205 |          if (vs.size()>=2) cfg.ocfg.num_threads = std::clamp(std::stoi(vs[1]),0,256);
206 |          if (vs.size()>=3) cfg.ocfg.sigma=std::clamp(stod_safe(vs[2]),0.,1.);
207 |        } else if (key=="--ADAPT-BLOCK") {
208 |          if (val=="NO" || val=="0") cfg.adapt_block=0;
209 |          else cfg.adapt_block=1;
210 |        } else if (key=="--ZERO-MEAN") {
211 |          if (val=="NO" || val=="0") cfg.zero_mean=0;
212 |          else cfg.zero_mean=1;
213 |        }
214 |        else std::cerr << "warning: unknown option '" << param << "'\n";
215 |     } else {
216 |        if (first) {sinputfile=param;first=false;}
217 |        else soutputfile=param;
218 |     }
219 |     k++;
220 |   }
221 |   // configure opt method
222 |   if (cfg.ocfg.optimize_search==FrameCoder::SearchMethod::DDS)
223 |   {
224 |     cfg.ocfg.dds_cfg.nfunc_max=cfg.ocfg.maxnfunc;
225 |     cfg.ocfg.dds_cfg.num_threads=cfg.ocfg.num_threads; // also accepts zero
226 |     cfg.ocfg.dds_cfg.sigma_init=cfg.ocfg.sigma;
227 |   } else if (cfg.ocfg.optimize_search==FrameCoder::SearchMethod::DE)
228 |   {
229 |     cfg.ocfg.de_cfg.nfunc_max=cfg.ocfg.maxnfunc;
230 |     cfg.ocfg.de_cfg.num_threads=std::max(cfg.ocfg.num_threads,1);
231 |     cfg.ocfg.de_cfg.sigma_init=cfg.ocfg.sigma;
232 |   } else if (cfg.ocfg.optimize_search==FrameCoder::SearchMethod::CMA)
233 |   {
234 |     cfg.ocfg.cma_cfg.nfunc_max=cfg.ocfg.maxnfunc;
235 |     cfg.ocfg.cma_cfg.num_threads=std::max(cfg.ocfg.num_threads,1);
236 |     cfg.ocfg.cma_cfg.sigma_init=cfg.ocfg.sigma;
237 |   }
238 | 
239 |   return 0;
240 | }
241 | 
242 | int CmdLine::Process()
243 | {
244 |   Timer myTimer;
245 |   myTimer.start();
246 | 
247 |   if (mode==ENCODE) {
248 |     Wav myWav(cfg.verbose_level>0);
249 |     std::cout << "Open: '" << sinputfile << "': ";
250 |     if (myWav.OpenRead(sinputfile)==0) {
251 |       std::cout << "ok (" << myWav.getFileSize() << " Bytes)\n";
252 |       if (myWav.ReadHeader()==0) {
253 |          PrintWav(myWav);
254 | 
255 |          bool fsupp=(myWav.getBitsPerSample()<=24) && ( (myWav.getNumChannels()==2) || (myWav.getNumChannels()==1));
256 |          if (!fsupp)
257 |          {
258 |             std::cerr << "unsupported input format" << std::endl;
259 |             std::cerr << "must be 1-16 bit, mono/stereo, pcm" << std::endl;
260 |             myWav.Close();
261 |             return 1;
262 |          }
263 |          Sac mySac(myWav);
264 |          std::cout << "Create: '" << soutputfile << "': ";
265 |          if (mySac.OpenWrite(soutputfile)==0) {
266 |            std::cout << "ok\n";
267 |            PrintMode();
268 |            Codec myCodec(cfg);
269 | 
270 |            Timer time;
271 |            time.start();
272 |            myCodec.EncodeFile(myWav,mySac);
273 |            time.stop();
274 | 
275 |            uint64_t infilesize=myWav.getFileSize();
276 |            uint64_t outfilesize=mySac.readFileSize();
277 |            double r=0.,bps=0.;
278 |            if (outfilesize) {
279 |              r=outfilesize*100.0/infilesize;
280 |              bps=(outfilesize*8.)/static_cast<double>(myWav.getNumSamples()*myWav.getNumChannels());
281 |            }
282 |            double xrate=0.0;
283 |            if (time.elapsedS() > 0.0)
284 |             xrate=(myWav.getNumSamples()/double(myWav.getSampleRate()))/time.elapsedS();
285 | 
286 |            std::cout << "\n  " << infilesize << "->" << outfilesize<< "=";
287 |            std::cout << std::format("{:.1f}",r) << "% (" << std::format("{:.3f}",bps) <<" bps)";
288 |            std::cout << "  " << std::format("{:.3f}x",xrate) << '\n';
289 |            mySac.Close();
290 |          } else std::cout << "could not create\n";
291 |       } else std::cout << "warning: input is not a valid .wav file\n";
292 |       myWav.Close();
293 |     } else std::cout << "could not open\n";
294 |   } else if (mode==LIST || mode==LISTFULL || mode==DECODE) {
295 |     Sac mySac;
296 |     std::cout << "Open: '" << sinputfile << "': ";
297 |     if (mySac.OpenRead(sinputfile)==0) {
298 |       std::streampos FileSizeSAC = mySac.getFileSize();
299 |       std::cout << "ok (" << FileSizeSAC << " Bytes)\n";
300 |       if (mySac.ReadSACHeader()==0) {
301 |         uint8_t md5digest[16];
302 |         mySac.ReadMD5(md5digest);
303 |         double bps=(static_cast<double>(FileSizeSAC)*8.0)/static_cast<double>(mySac.getNumSamples()*mySac.getNumChannels());
304 |         int kbps=round((mySac.getSampleRate()*mySac.getNumChannels()*bps)/1000);
305 |         mySac.setKBPS(kbps);
306 |         PrintWav(mySac);
307 |         std::cout << "  Profile: ";
308 |         std::cout << "mt" << cfg.mt_mode;
309 |         std::cout << " " << static_cast<int>(mySac.mcfg.max_framelen) << "s";
310 |         std::cout << std::endl;
311 |         std::cout << "  Ratio:   " << std::fixed << std::setprecision(3) << bps << " bps\n\n";
312 |         std::cout << "  Audio MD5: ";
313 |         for (auto x : md5digest) std::cout << std::hex << (int)x;
314 |         std::cout << std::dec << '\n';
315 | 
316 | 
317 |         if (mode==LISTFULL) {
318 |           Codec myCodec;
319 |           myCodec.ScanFrames(mySac);
320 |         } else if (mode==DECODE) {
321 | 
322 |           Wav myWav(mySac);
323 |           std::cout << "Create: '" << soutputfile << "': ";
324 |           if (myWav.OpenWrite(soutputfile)==0) {
325 |             std::cout << "ok\n";
326 | 
327 |             Timer time;
328 |             time.start();
329 | 
330 |             Codec myCodec(cfg);
331 |             myCodec.DecodeFile(mySac,myWav);
332 |             MD5::Finalize(&myWav.md5ctx);
333 |             time.stop();
334 | 
335 |             double xrate=0.0;
336 |             if (time.elapsedS() > 0.0)
337 |             xrate=(myWav.getNumSamples()/double(myWav.getSampleRate()))/time.elapsedS();
338 |             std::cout << "\n  Speed " << std::format("{:.3f}x",xrate) << '\n';
339 | 
340 |             std::cout << "  Audio MD5: ";
341 |             bool md5diff=std::memcmp(myWav.md5ctx.digest, md5digest, 16);
342 |             if (!md5diff) std::cout << "ok\n";
343 |             else {
344 |               std::cout << "Error (";
345 |               for (auto x : myWav.md5ctx.digest) std::cout << std::hex << (int)x;
346 |               std::cout << std::dec << ")\n";
347 |             }
348 |             myWav.Close();
349 |           } else std::cout << "could not create\n";
350 |         }
351 |       } else std::cout << "warning: input is not a valid .sac file\n";
352 |     }
353 |   } else std::cout << "could not open\n";
354 | 
355 |   myTimer.stop();
356 |   std::cout << "\n  Time:    [" << miscUtils::getTimeStrFromSeconds(round(myTimer.elapsedS())) << "]" << std::endl;
357 |   return 0;
358 | }
359 | 


--------------------------------------------------------------------------------
/src/cmdline.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMDLINE_H
 2 | #define CMDLINE_H
 3 | 
 4 | #include <string>
 5 | #include "file/wav.h"
 6 | #include "libsac/libsac.h"
 7 | 
 8 | const char SACHelp[] =
 9 | "usage: sac [--options] input output\n\n"
10 | "  --encode            encode input.wav to output.sac (def)\n"
11 | "    --normal|high|veryhigh|extrahigh compression (def=normal)\n"
12 | "    --best            you asked for it\n\n"
13 | "  --decode            decode input.sac to output.wav\n"
14 | "  --list              list info about input.sac\n"
15 | "  --listfull          verbose info about input\n"
16 | "  --verbose           verbose output\n\n"
17 | "  supported types: 1-16 bit, mono/stereo pcm\n"
18 | "  advanced options    (automatically set)\n"
19 | "   --optimize=#       frame-based optimization\n"
20 | "     no|s,n,c,k       s=[0,1.0],n=[0,10000]\n"
21 | "                      c=[l1,rms,glb,ent,bpn] k=[1,32]\n"
22 | "   --opt-cfg=#        configure optimization method\n"
23 | "     de|dds,nt,s      nt=num threads,s=search radius (def=0.2)\n"
24 | "   --opt-reset        reset opt params at frame boundaries\n"
25 | "   --mt-mode=n        multi-threading level n=[0-2]\n"
26 | "   --zero-mean        zero-mean input\n"
27 | "   --adapt-block      adaptive frame splitting\n"
28 | "   --framelen=n       def=20 seconds\n"
29 | "   --sparse-pcm       enable pcm modelling\n";
30 | 
31 | class CmdLine {
32 |   enum CMODE {ENCODE,DECODE,LIST,LISTFULL};
33 |   public:
34 |     CmdLine();
35 |     int Parse(int argc,const char *argv[]);
36 |     int Process();
37 |   private:
38 |     double stod_safe(const std::string& str);
39 |     std::string CostStr(const FrameCoder::SearchCost cost_func);
40 |     std::string SearchStr(const FrameCoder::SearchMethod search_func);
41 |     void PrintMode();
42 |     void PrintWav(const AudioFile &myWav);
43 |     void Split(const std::string &str,std::string &key,std::string &val,const char splitval='=');
44 |     std::string sinputfile,soutputfile;
45 |     CMODE mode;
46 |     FrameCoder::tsac_cfg cfg;
47 | };
48 | 
49 | 
50 | #endif // CMDLINE_H
51 | 


--------------------------------------------------------------------------------
/src/common/alignbuf.h:
--------------------------------------------------------------------------------
 1 | #ifndef ALIGNBUF_H
 2 | #define ALIGNBUF_H
 3 | 
 4 | template <typename T, std::size_t align_t=64>
 5 | struct align_alloc {
 6 |     using value_type = T;
 7 | 
 8 |     template <typename U>
 9 |     struct rebind {
10 |         using other = align_alloc<U, align_t>;
11 |     };
12 | 
13 |     constexpr align_alloc() noexcept = default;
14 |     constexpr align_alloc(const align_alloc &) noexcept = default;
15 | 
16 | 
17 |     template <typename U>
18 |     constexpr align_alloc(const align_alloc<U, align_t> &) noexcept {}
19 | 
20 |     T* allocate(std::size_t n) {
21 |         auto ptr = static_cast<T*>(::operator new(n * sizeof(T), std::align_val_t(align_t)));
22 |         return ptr;
23 |     }
24 | 
25 |     void deallocate(T* p, std::size_t n) noexcept {
26 |         ::operator delete(p, n * sizeof(T), std::align_val_t(align_t));
27 |     }
28 | };
29 | 
30 | template <typename T, typename U, std::size_t align_t>
31 | bool operator==(const align_alloc<T, align_t>&, const align_alloc<U, align_t>&) noexcept {
32 |     return true;
33 | }
34 | 
35 | template <typename T, typename U, std::size_t align_t>
36 | bool operator!=(const align_alloc<T, align_t>&, const align_alloc<U, align_t>&) noexcept {
37 |     return false;
38 | }
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/src/common/bufio.h:
--------------------------------------------------------------------------------
 1 | #ifndef BUFIO_H
 2 | #define BUFIO_H
 3 | 
 4 | #include <vector>
 5 | #include <cstdint>
 6 | 
 7 | class BufIO {
 8 |   public:
 9 |       BufIO():buf(1024){Reset();};
10 |       explicit BufIO(int initsize):buf(initsize){Reset();};
11 |       void Reset(){bufpos=0;};
12 |       void PutByte(int val)
13 |       {
14 |         if (bufpos>=buf.size()) buf.resize(buf.size()*2);
15 |         buf[bufpos++]=val;
16 |       }
17 |       int GetByte() {
18 |         if (bufpos>=buf.size()) return -1;
19 |         else return buf[bufpos++];
20 |       }
21 |       size_t GetBufPos(){return bufpos;};
22 |       std::vector <uint8_t> &GetBuf(){return buf;};
23 |   private:
24 |      size_t bufpos;
25 |      std::vector <uint8_t>buf;
26 | };
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/src/common/histbuf.h:
--------------------------------------------------------------------------------
 1 | #ifndef HISTBUF_H
 2 | #define HISTBUF_H
 3 | 
 4 | #include "../global.h"
 5 | #include "alignbuf.h"
 6 | 
 7 | // rolling buffer, n must be power of two
 8 | template <typename T>
 9 | class HistBuffer {
10 |   public:
11 |       HistBuffer(int n)
12 |       :n(n),mask(n-1),pos(0),buf(n)
13 |       {
14 |       }
15 |       T& operator[](int idx) { return buf[(pos-idx)&mask];};
16 |       const T operator[](int idx) const { return buf[(pos-idx)&mask];};
17 |       void PushBack(T val)
18 |       {
19 |          buf[pos]=val;
20 |          pos=(pos+1)&mask;
21 |       }
22 |   private:
23 |     int n,mask,pos;
24 |     std::vector <T> buf;
25 | };
26 | 
27 | // circulating buffer
28 | // operator [] starts from 0=newest to oldest
29 | template <typename T>
30 | class RollBuffer {
31 |   public:
32 |     explicit RollBuffer(int n)
33 |     :n(n),pos(-1),buf(n)
34 |     {
35 | 
36 |     }
37 |     const T& operator[](int idx) const
38 |     {
39 |       return buf[clamp_idx(pos-idx)];
40 |     };
41 |     void push(T val)
42 |     {
43 |       if (++pos>=n) pos=0;
44 |       buf[pos]=val;
45 |     }
46 |     int clamp_idx(int idx) const
47 |     {
48 |       if (idx>=n) idx-=n;
49 |       else if (idx<0) idx+=n;
50 |       return idx;
51 |     }
52 |     const std::vector<T> &getbuf() {return buf;};
53 |   private:
54 |     int n,pos;
55 |     std::vector <T> buf;
56 | };
57 | 
58 | //circulating buffer, bi-partit
59 | //operator [] starts from 0=newest to oldest
60 | template <typename T>
61 | class RollBuffer2 {
62 |   public:
63 |     explicit RollBuffer2(std::size_t capacity)
64 |     :n(capacity),pos(0),buf(2*capacity)
65 |     {
66 |     }
67 |     void push(T val)
68 |     {
69 |       pos = (pos + n - 1) % n;
70 |       buf[pos] = val;
71 |       buf[pos+n] = val;
72 |     }
73 | 
74 |     const T& operator[](int index) const
75 |     {
76 |       return buf[pos + index];
77 |     }
78 | 
79 |     const std::span<const T> get_span() const {
80 |       return std::span<const T>{buf.data() + pos,n};
81 |     }
82 |     const T* data() const {
83 |       return buf.data() + pos;
84 |     }
85 |   private:
86 |     std::size_t n,pos;
87 |     std::vector<T, align_alloc<T> > buf;
88 | };
89 | 
90 | 
91 | #endif // HISTBUF_H
92 | 


--------------------------------------------------------------------------------
/src/common/math.h:
--------------------------------------------------------------------------------
  1 | #ifndef MATH_H
  2 | #define MATH_H
  3 | 
  4 | #include "../global.h"
  5 | #include <cassert>
  6 | #include <cmath>
  7 | #include <numeric>
  8 | #include <immintrin.h>
  9 | 
 10 | namespace slmath
 11 | {
 12 | 
 13 |   // inplace cholesky
 14 |   // matrix must be positive definite and symmetric
 15 |   class Cholesky
 16 |   {
 17 |     public:
 18 |       const double ftol=1E-8;
 19 |       Cholesky(int n)
 20 |       :n(n),G(n,vec1D(n))
 21 |       {
 22 | 
 23 |       }
 24 |       int Factor(const vec2D &matrix,const double nu)
 25 |       {
 26 |         for (int i=0;i<n;i++) //copy lower triangular matrix
 27 |           std::copy_n(begin(matrix[i]),i+1,begin(G[i]));
 28 | 
 29 |         for (int i=0;i<n;i++) {
 30 | 
 31 |           // off-diagonal
 32 |           for (int j=0;j<i;j++) {
 33 |             double sum=G[i][j];
 34 |             for (int k=0;k<j;k++) sum-=(G[i][k]*G[j][k]);
 35 |             G[i][j]=sum/G[j][j];
 36 |           }
 37 | 
 38 |           // diagonal
 39 |           double sum=G[i][i]+nu; //add regularization
 40 |           for (int k=0;k<i;k++) sum-=(G[i][k]*G[i][k]);
 41 |           if (sum>ftol) G[i][i]=std::sqrt(sum);
 42 |           else return 1;
 43 |         }
 44 |         return 0;
 45 |       }
 46 |       void Solve(const vec1D &b,vec1D &x)
 47 |       {
 48 |         for (int i=0;i<n;i++) {
 49 |           double sum=b[i];
 50 |           for (int j=0;j<i;j++) sum-=(G[i][j]*x[j]);
 51 |           x[i]=sum/G[i][i];
 52 |         }
 53 |         for (int i=n-1;i>=0;i--) {
 54 |           double sum=x[i];
 55 |           for (int j=i+1;j<n;j++) sum-=(G[j][i]*x[j]);
 56 |           x[i]=sum/G[i][i];
 57 |         }
 58 |       }
 59 |       int n;
 60 |       vec2D G;
 61 |   };
 62 | 
 63 |   // inner-product of x and y
 64 |   inline double dot(span_cf64 x,span_cf64 y)
 65 |   {
 66 |     assert(x.size()==y.size());
 67 |     const std::size_t n=x.size();
 68 |     double total=0.0;
 69 |     std::size_t i=0;
 70 | 
 71 |     if constexpr(SACGlobalCfg::USE_AVX2) {
 72 |       if (n>=SACGlobalCfg::AVX2_MINN)
 73 |       {
 74 |         __m256d sum1 = _mm256_setzero_pd();
 75 |         __m256d sum2 = _mm256_setzero_pd();
 76 |         for (;i + 8 <= n;i += 8)
 77 |         {
 78 |           __m256d vx1 = _mm256_loadu_pd(&x[i]);
 79 |           __m256d vy1 = _mm256_loadu_pd(&y[i]);
 80 |           sum1 = _mm256_fmadd_pd(vx1, vy1, sum1);
 81 |           __m256d vx2 = _mm256_loadu_pd(&x[i + 4]);
 82 |           __m256d vy2 = _mm256_loadu_pd(&y[i + 4]);
 83 |           sum2 = _mm256_fmadd_pd(vx2, vy2, sum2);
 84 |         }
 85 |         sum1 = _mm256_add_pd(sum1, sum2);
 86 |         alignas(32) double buffer[4];
 87 |         _mm256_store_pd(buffer, sum1);
 88 |         total = buffer[0] + buffer[1] + buffer[2] + buffer[3];
 89 |       }
 90 |     }
 91 | 
 92 |     total += std::transform_reduce(cbegin(x)+i,cend(x),cbegin(y)+i,0.0,
 93 |                                    std::plus<>(),std::multiplies<>());
 94 |     return total;
 95 |   }
 96 | 
 97 |   // calculate powersum for NLMS
 98 |   inline double calc_s2pow(span_cf64 x,span_cf64 powtab)
 99 |   {
100 |     const std::size_t n=x.size();
101 |     double spow=0.0;
102 |     std::size_t i=0;
103 | 
104 |     if constexpr(SACGlobalCfg::USE_AVX2) {
105 |       //powtab is assumed to be correctly aligned
106 |       if (n>=SACGlobalCfg::AVX2_MINN) {
107 |         __m256d sum_vec = _mm256_setzero_pd();
108 |         for (; i + 4 <= n; i += 4) {
109 |           __m256d x_vec = _mm256_loadu_pd(&x[i]);
110 |           __m256d pow_vec = _mm256_load_pd(&powtab[i]);
111 |           __m256d x_squared = _mm256_mul_pd(x_vec, x_vec);
112 |           sum_vec = _mm256_fmadd_pd(pow_vec, x_squared, sum_vec);
113 |         }
114 | 
115 |         alignas(32) double buffer[4];
116 |         _mm256_store_pd(buffer, sum_vec);
117 |         spow = buffer[0] + buffer[1] + buffer[2] + buffer[3];
118 |       }
119 |     }
120 | 
121 |     spow += std::transform_reduce(cbegin(x)+i,cend(x),cbegin(powtab)+i,0.0,
122 |                                   std::plus<>(),[](double xi, double pi) { return pi * (xi * xi);});
123 | 
124 |     return spow;
125 |   }
126 | 
127 | 
128 |   // vector = matrix * vector
129 |   inline vec1D mul(const vec2D &m,const vec1D &v)
130 |   {
131 |     vec1D v_out(m.size());
132 |     for (std::size_t i=0;i<m.size();i++)
133 |       v_out[i]=slmath::dot(m[i],v);
134 |     return v_out;
135 |   }
136 | 
137 |   // vector = scalar * vector
138 |   inline vec1D mul(const double s,const vec1D &v)
139 |   {
140 |     vec1D v_out(v.size());
141 |     for (std::size_t i=0;i<v.size();i++)
142 |       v_out[i]=s*v[i];
143 |     return v_out;
144 |   }
145 | 
146 |   // matrix = matrix  * matrix
147 |   inline vec2D mul(const vec2D &m1, const vec2D &m2)
148 |   {
149 |     vec2D m_out(m1.size(), vec1D(m2[0].size()));
150 |     for (int j=0;j<(int)m_out.size();j++)
151 |       for (int i=0;i<(int)m_out[0].size();i++)
152 |       {
153 |         double sum=0;
154 |         for (int k=0;k<(int)m2.size();k++)
155 |           sum += m1[j][k]*m2[k][i];
156 |         m_out[j][i] = sum;
157 |       }
158 |     return m_out;
159 |   }
160 | 
161 |   //vector s1*v1 + s2*v2
162 |   inline vec1D mul_add(double s1,const vec1D &v1,double s2,const vec1D &v2)
163 |   {
164 |     assert(v1.size()==v2.size());
165 |     vec1D v_out(v1.size());
166 |     for (std::size_t i=0;i<v1.size();i++) {
167 |       v_out[i] = s1*v1[i] + s2*v2[i];
168 |     }
169 |     return v_out;
170 |   }
171 | 
172 |   // matrix s1*m1 + s2*m2
173 |   inline vec2D mul_add(double s1,const vec2D &m1,double s2,const vec2D &m2)
174 |   {
175 |    assert(m1.size()==m2.size());
176 |    vec2D m_out(m1.size());
177 |    for (std::size_t j=0;j<m1.size();j++)
178 |      m_out[j] = mul_add(s1,m1[j],s2,m2[j]);
179 | 
180 |    return m_out;
181 |   }
182 | 
183 |   // outer product of u*v^T
184 |   inline vec2D outer(const vec1D &u,const vec1D &v)
185 |   {
186 |     int nrows=u.size();
187 |     int ncols=v.size();
188 |     vec2D m_out(nrows, vec1D(ncols));
189 |     for (int j=0;j<nrows;j++)
190 |       for (int i=0;i<ncols;i++)
191 |         m_out[j][i]=u[j]*v[i];
192 |     return m_out;
193 |   }
194 | 
195 | };
196 | 
197 | #endif
198 | 
199 | 


--------------------------------------------------------------------------------
/src/common/md5.cpp:
--------------------------------------------------------------------------------
  1 | #include "md5.h"
  2 | 
  3 | #define A 0x67452301
  4 | #define B 0xefcdab89
  5 | #define C 0x98badcfe
  6 | #define D 0x10325476
  7 | 
  8 | 
  9 | /*
 10 |  * Bit-manipulation functions defined by the MD5 algorithm
 11 |  */
 12 | #define F(X, Y, Z) ((X & Y) | (~X & Z))
 13 | #define G(X, Y, Z) ((X & Z) | (Y & ~Z))
 14 | #define H(X, Y, Z) (X ^ Y ^ Z)
 15 | #define I(X, Y, Z) (Y ^ (X | ~Z))
 16 | 
 17 | 
 18 | static uint32_t S[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
 19 |                        5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20,
 20 |                        4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
 21 |                        6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
 22 | 
 23 | static uint32_t K[] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
 24 |                        0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
 25 |                        0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
 26 |                        0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
 27 |                        0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
 28 |                        0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
 29 |                        0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
 30 |                        0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
 31 |                        0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
 32 |                        0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
 33 |                        0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
 34 |                        0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
 35 |                        0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
 36 |                        0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
 37 |                        0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
 38 |                        0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};
 39 | 
 40 | /*
 41 |  * Padding used to make the size (in bits) of the input congruent to 448 mod 512
 42 |  */
 43 | static uint8_t PADDING[] = {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 44 |                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 45 |                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 46 |                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 47 |                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 48 |                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 49 |                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 50 |                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
 51 | 
 52 | uint32_t MD5::rotateLeft(uint32_t x, uint32_t n)
 53 | {
 54 |     return (x << n) | (x >> (32 - n));
 55 | }
 56 | 
 57 | void MD5::Init(MD5Context *ctx){
 58 |     ctx->size = (uint64_t)0;
 59 | 
 60 |     ctx->buffer[0] = (uint32_t)A;
 61 |     ctx->buffer[1] = (uint32_t)B;
 62 |     ctx->buffer[2] = (uint32_t)C;
 63 |     ctx->buffer[3] = (uint32_t)D;
 64 | }
 65 | 
 66 | /*
 67 |  * Add some amount of input to the context
 68 |  *
 69 |  * If the input fills out a block of 512 bits, apply the algorithm (md5Step)
 70 |  * and save the result in the buffer. Also updates the overall size.
 71 |  */
 72 | void MD5::Update(MD5Context *ctx, uint8_t *input_buffer, size_t input_len){
 73 |     uint32_t input[16];
 74 |     unsigned int offset = ctx->size % 64;
 75 |     ctx->size += (uint64_t)input_len;
 76 | 
 77 |     // Copy each byte in input_buffer into the next space in our context input
 78 |     for(unsigned int i = 0; i < input_len; ++i){
 79 |         ctx->input[offset++] = (uint8_t)*(input_buffer + i);
 80 | 
 81 |         // If we've filled our context input, copy it into our local array input
 82 |         // then reset the offset to 0 and fill in a new buffer.
 83 |         // Every time we fill out a chunk, we run it through the algorithm
 84 |         // to enable some back and forth between cpu and i/o
 85 |         if(offset % 64 == 0){
 86 |             for(unsigned int j = 0; j < 16; ++j){
 87 |                 // Convert to little-endian
 88 |                 // The local variable `input` our 512-bit chunk separated into 32-bit words
 89 |                 // we can use in calculations
 90 |                 input[j] = (uint32_t)(ctx->input[(j * 4) + 3]) << 24 |
 91 |                            (uint32_t)(ctx->input[(j * 4) + 2]) << 16 |
 92 |                            (uint32_t)(ctx->input[(j * 4) + 1]) <<  8 |
 93 |                            (uint32_t)(ctx->input[(j * 4)]);
 94 |             }
 95 |             Step(ctx->buffer, input);
 96 |             offset = 0;
 97 |         }
 98 |     }
 99 | }
100 | 
101 | /*
102 |  * Pad the current input to get to 448 bytes, append the size in bits to the very end,
103 |  * and save the result of the final iteration into digest.
104 |  */
105 | void MD5::Finalize(MD5Context *ctx){
106 |     uint32_t input[16];
107 |     unsigned int offset = ctx->size % 64;
108 |     unsigned int padding_length = offset < 56 ? 56 - offset : (56 + 64) - offset;
109 | 
110 |     // Fill in the padding and undo the changes to size that resulted from the update
111 |     Update(ctx, PADDING, padding_length);
112 |     ctx->size -= (uint64_t)padding_length;
113 | 
114 |     // Do a final update (internal to this function)
115 |     // Last two 32-bit words are the two halves of the size (converted from bytes to bits)
116 |     for(unsigned int j = 0; j < 14; ++j){
117 |         input[j] = (uint32_t)(ctx->input[(j * 4) + 3]) << 24 |
118 |                    (uint32_t)(ctx->input[(j * 4) + 2]) << 16 |
119 |                    (uint32_t)(ctx->input[(j * 4) + 1]) <<  8 |
120 |                    (uint32_t)(ctx->input[(j * 4)]);
121 |     }
122 |     input[14] = (uint32_t)(ctx->size * 8);
123 |     input[15] = (uint32_t)((ctx->size * 8) >> 32);
124 | 
125 |     Step(ctx->buffer, input);
126 | 
127 |     // Move the result into digest (convert from little-endian)
128 |     for(unsigned int i = 0; i < 4; ++i){
129 |         ctx->digest[(i * 4) + 0] = (uint8_t)((ctx->buffer[i] & 0x000000FF));
130 |         ctx->digest[(i * 4) + 1] = (uint8_t)((ctx->buffer[i] & 0x0000FF00) >>  8);
131 |         ctx->digest[(i * 4) + 2] = (uint8_t)((ctx->buffer[i] & 0x00FF0000) >> 16);
132 |         ctx->digest[(i * 4) + 3] = (uint8_t)((ctx->buffer[i] & 0xFF000000) >> 24);
133 |     }
134 | }
135 | 
136 | /*
137 |  * Step on 512 bits of input with the main MD5 algorithm.
138 |  */
139 | void MD5::Step(uint32_t *buffer, uint32_t *input){
140 |     uint32_t AA = buffer[0];
141 |     uint32_t BB = buffer[1];
142 |     uint32_t CC = buffer[2];
143 |     uint32_t DD = buffer[3];
144 | 
145 |     uint32_t E;
146 | 
147 |     unsigned int j;
148 | 
149 |     for(unsigned int i = 0; i < 64; ++i){
150 |         switch(i / 16){
151 |             case 0:
152 |                 E = F(BB, CC, DD);
153 |                 j = i;
154 |                 break;
155 |             case 1:
156 |                 E = G(BB, CC, DD);
157 |                 j = ((i * 5) + 1) % 16;
158 |                 break;
159 |             case 2:
160 |                 E = H(BB, CC, DD);
161 |                 j = ((i * 3) + 5) % 16;
162 |                 break;
163 |             default:
164 |                 E = I(BB, CC, DD);
165 |                 j = (i * 7) % 16;
166 |                 break;
167 |         }
168 | 
169 |         uint32_t temp = DD;
170 |         DD = CC;
171 |         CC = BB;
172 |         BB = BB + rotateLeft(AA + E + K[i] + input[j], S[i]);
173 |         AA = temp;
174 |     }
175 | 
176 |     buffer[0] += AA;
177 |     buffer[1] += BB;
178 |     buffer[2] += CC;
179 |     buffer[3] += DD;
180 | }
181 | 


--------------------------------------------------------------------------------
/src/common/md5.h:
--------------------------------------------------------------------------------
 1 | #ifndef MD5_H
 2 | #define MD5_H
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | namespace MD5 {
 7 |   typedef struct{
 8 |     uint64_t size;        // Size of input in bytes
 9 |     uint32_t buffer[4];   // Current accumulation of hash
10 |     uint8_t input[64];    // Input to be used in the next step
11 |     uint8_t digest[16];   // Result of algorithm
12 |   } MD5Context;
13 | 
14 | /*
15 |  * Rotates a 32-bit word left by n bits
16 |  */
17 |   uint32_t rotateLeft(uint32_t x, uint32_t n);
18 |   void Init(MD5Context *ctx);
19 |   void Update(MD5Context *ctx, uint8_t *input, size_t input_len);
20 |   void Finalize(MD5Context *ctx);
21 |   void Step(uint32_t *buffer, uint32_t *input);
22 | };
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/common/rand.h:
--------------------------------------------------------------------------------
 1 | #ifndef RAND_H
 2 | #define RAND_H
 3 | 
 4 | #include <random>
 5 | 
 6 | class Random {
 7 |   public:
 8 |     Random():engine(time(0)){};
 9 |     Random(uint32_t seed):engine(seed){};
10 |     double r_01() { // [0,1)
11 |       return std::uniform_real_distribution<double>{0,1}(engine);
12 |     };
13 |     double r_01open() { // (0,1)
14 |       return std::uniform_real_distribution<double>{std::nextafter(0.0, std::numeric_limits<double>::max()),1.0}(engine);
15 |     };
16 |     double r_01closed() { // [0,1]
17 |       return std::uniform_real_distribution<double>{0,std::nextafter(1.0, std::numeric_limits<double>::max())}(engine);
18 |     };
19 |     double r_int(double imin,double imax) { //double in [imin,imax]
20 |       return std::uniform_real_distribution<double>{imin,std::nextafter(imax, std::numeric_limits<double>::max())}(engine);
21 |     };
22 |     uint32_t ru_int(uint32_t imin,uint32_t imax) { //int in [imin,imax]
23 |       return std::uniform_int_distribution<uint32_t>{imin, imax}(engine);
24 |     };
25 |     double r_norm(double mu=0.0,double sigma=1.0) { // normal
26 |       return std::normal_distribution<double>{mu,sigma}(engine);
27 |     }
28 |     double r_cauchy(double mu,double sigma) { // normal
29 |       return std::cauchy_distribution<double>{mu,sigma}(engine);
30 |     }
31 |     double r_lognorm(double mu,double sigma) { // log-normal
32 |       return exp(std::normal_distribution<double>{mu,sigma}(engine));
33 |     }
34 |     uint32_t ru_geo(double p) { // geometric
35 |       return std::geometric_distribution<uint32_t>{p}(engine);
36 |     }
37 |     uint32_t ru_poi(double lambda) { // poisson
38 |       return std::poisson_distribution<uint32_t>{lambda}(engine);
39 |     }
40 |     bool event(double p) {
41 |       if (r_01()<p) return true;
42 |       else return false;
43 |     };
44 |   private:
45 |     std::mt19937 engine;
46 | };
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/src/common/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef TIMER_H
 2 | #define TIMER_H
 3 | 
 4 | #include "../global.h"
 5 | 
 6 | // time measuring via C++ 11 chrono
 7 | class Timer {
 8 |   using Clock=std::chrono::high_resolution_clock;
 9 |   using durationS=std::chrono::duration<double, std::ratio<1>>;
10 |   using durationMS=std::chrono::duration<double, std::milli>;
11 |   using Timepoint=Clock::time_point;
12 |   public:
13 |     void start() {tstart=Clock::now();};
14 |     void stop() {tstop=Clock::now();};
15 |     double elapsedMS() {
16 |       durationMS elapsed=tstop-tstart;
17 |       return elapsed.count();
18 |       //cout << std::chrono::duration_cast<double,TimeT>(tstop - tstart).count() << endl;
19 |       //return std::chrono::duration_cast<TimeT>(tstop - tstart).count();
20 |     };
21 |     double elapsedS() {
22 |       durationS elapsed=tstop-tstart;
23 |       return elapsed.count();
24 |     };
25 |   private:
26 |     Timepoint tstart,tstop;
27 | };
28 | #endif // TIMER_H
29 | 


--------------------------------------------------------------------------------
/src/common/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | 
 3 | 
 4 | namespace BitUtils
 5 | {
 6 |   uint32_t get32HL(const uint8_t *buf)
 7 |   {
 8 |     return((uint32_t)buf[3] + ((uint32_t)buf[2] << 8) +((uint32_t)buf[1] << 16) + ((uint32_t)buf[0] << 24));
 9 |   }
10 |   uint16_t get16LH(const uint8_t *buf)
11 |   {
12 |     return((uint16_t)buf[0] + ((uint16_t)buf[1] << 8));
13 |   }
14 |   uint32_t get32LH(const uint8_t *buf)
15 |   {
16 |     return((uint32_t)buf[0] + ((uint32_t)buf[1] << 8) +((uint32_t)buf[2] << 16) + ((uint32_t)buf[3] << 24));
17 |   }
18 |   void put16LH(uint8_t *buf,uint16_t val)
19 |   {
20 |     buf[0] = val & 0xff;
21 |     buf[1] = (val>>8) & 0xff;
22 |   }
23 |   void put32LH(uint8_t *buf,uint32_t val)
24 |   {
25 |     buf[0] = val & 0xff;
26 |     buf[1] = (val>>8) & 0xff;
27 |     buf[2] = (val>>16) & 0xff;
28 |     buf[3] = (val>>24) & 0xff;
29 |   }
30 |   std::string U322Str(uint32_t val)
31 |   {
32 |     std::string s;
33 |     for (int i=0;i<4;i++) {s+=(char)(val & 0xff);val>>=8;};
34 |     return s;
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/common/utils.h:
--------------------------------------------------------------------------------
  1 | #ifndef UTILS_H
  2 | #define UTILS_H
  3 | 
  4 | #include "../global.h"
  5 | #include "math.h"
  6 | 
  7 | #include <algorithm>
  8 | #include <numeric>
  9 | #include <cstring>
 10 | #include <cmath>
 11 | #include <immintrin.h>
 12 | 
 13 | // running exponential smoothing
 14 | // sum=alpha*sum+(1.0-alpha)*val, where 1/(1-alpha) is the mean number of samples considered
 15 | class RunExp {
 16 |   public:
 17 |       RunExp(double alpha):sum(0.0),alpha(alpha){};
 18 |       RunExp(double alpha,double sum):sum(sum),alpha(alpha){};
 19 |       inline void Update(double val) {
 20 |         sum=alpha*sum+(1.-alpha)*val;
 21 |       }
 22 |     double sum;
 23 |   private:
 24 |     double alpha;
 25 | };
 26 | 
 27 | // running weighted sum: sum_{i=0}^n alpha^(n-i) val
 28 | class RunWeight {
 29 |   public:
 30 |       RunWeight(double alpha):sum(0.0),alpha(alpha){};
 31 |       inline void Update(double val) {
 32 |         sum=alpha*sum+val;
 33 |       }
 34 |     double sum;
 35 |   private:
 36 |     double alpha;
 37 | };
 38 | 
 39 | template <int tEMA=1,int tbias_corr=0>
 40 | class RunSum {
 41 |   public:
 42 |     RunSum(double alpha)
 43 |     :alpha(alpha),power_alpha(1.0),sum(0.0)
 44 |     {
 45 |     }
 46 |     inline void Update(double val)
 47 |     {
 48 |       if constexpr(tEMA)
 49 |         sum = alpha*sum + (1.0-alpha)*val;
 50 |       else
 51 |         sum = alpha*sum + val;
 52 | 
 53 |       if constexpr(tbias_corr)
 54 |         power_alpha*=alpha;
 55 |     }
 56 |     inline double Get() const
 57 |     {
 58 |       if constexpr(tbias_corr)
 59 |       {
 60 |         const double denom=1.0-power_alpha;
 61 |         assert(denom>0); // must call Update() before first Get()
 62 |         return sum/denom;
 63 |       } else
 64 |         return sum;
 65 |     }
 66 |   protected:
 67 |     double alpha,power_alpha,sum;
 68 | };
 69 | 
 70 | using RunSumEMA = RunSum<1,0>;
 71 | using RunSumEMA_BC = RunSum<1,1>;
 72 | using RunSumGEO = RunSum<0,0>;
 73 | using RunSumGEO_BC = RunSum<0,1>;
 74 | 
 75 | class RunMeanVar {
 76 |   public:
 77 |     RunMeanVar(double alpha)
 78 |     :alpha_(alpha),first_(true)
 79 |     {
 80 |       mean_ = var_ = 0.0;
 81 |     }
 82 |     void Update(double val)
 83 |     {
 84 |       /*if (first_) {
 85 |         mean_ = val;
 86 |         var_ = 0.0;
 87 |         first_ = false;
 88 |       } else*/
 89 |       {
 90 |         #if 0
 91 |           double delta = val - mean_;
 92 |           mean_ += (1 - alpha_) * delta;
 93 |           var_ = alpha_ * var_ + (1 - alpha_) * delta * delta;
 94 |         #else // slightly more stable
 95 |           // Welford
 96 |           double old_mean = mean_;
 97 |           mean_=alpha_*mean_+(1.0-alpha_)*val;
 98 |           var_=alpha_*var_+(1.0-alpha_)*((val-old_mean)*(val-mean_));
 99 |         #endif
100 |       }
101 |     }
102 |     auto get()
103 |     {
104 |       return std::pair{mean_,std::max(0.0,var_)};
105 |     }
106 |   protected:
107 |     double alpha_;
108 |     bool first_;
109 |     double mean_,var_;
110 | };
111 | 
112 | namespace StrUtils {
113 |       inline void StrUpper(std::string &str)
114 |       {
115 |         std::transform(str.begin(), str.end(),str.begin(), ::toupper);
116 |       }
117 |       inline std::string str_up(const std::string &str)
118 |       {
119 |         std::string ts=str;
120 |         for (auto &c:ts) c=toupper(c);
121 |         return ts;
122 |       }
123 |       inline void SplitToken(const std::string& str,std::vector<std::string>& tokens,const std::string& delimiters)
124 |       {
125 |         auto lastPos = str.find_first_not_of(delimiters, 0); // Skip delimiters at beginning.
126 |         auto pos     = str.find_first_of(delimiters, lastPos); // Find first "non-delimiter".
127 | 
128 |         while (std::string::npos != pos || std::string::npos != lastPos)  {
129 |           tokens.push_back(str.substr(lastPos, pos - lastPos)); // Found a token, add it to the vector.
130 |           lastPos = str.find_first_not_of(delimiters, pos); // Skip delimiters.  Note the "not_of"
131 |           pos = str.find_first_of(delimiters, lastPos); // Find next "non-delimiter"
132 |         }
133 |       }
134 |       inline void RemoveWhite(std::string &str,const std::string &whites)
135 |       {
136 |         auto firstPos = str.find_first_not_of(whites);
137 | 
138 |         if (firstPos!=std::string::npos) {
139 |           auto lastPos = str.find_last_not_of(whites);
140 |           str=str.substr(firstPos,lastPos-firstPos+1);
141 |         } else str="";
142 |       }
143 |       inline void SplitFloat(const std::string &str,std::vector<float>&x)
144 |       {
145 |         std::vector <std::string> tokens;
146 |         SplitToken(str,tokens,",");
147 |         for (auto &token:tokens) x.push_back(std::stof(token));
148 |       }
149 | };
150 | 
151 | namespace MathUtils {
152 | 
153 |   inline double calc_loglik_L1(double abs_e,double b)
154 |   {
155 |     return -std::log(2*b) - abs_e / b;
156 |   }
157 |   inline double calc_loglik_L2(double sq_e,double sigma2)
158 |   {
159 |     return -0.5*std::log(2*M_PI*sigma2) - 0.5 * sq_e / sigma2;
160 |   }
161 | 
162 |   // inverse of pos. def. symmetric matrix
163 |   class InverseSym
164 |   {
165 |     public:
166 |       InverseSym(int n)
167 |       :chol(n),n(n),b(n)
168 |       {
169 | 
170 |       }
171 |       void Solve(const vec2D &matrix,vec2D &sol,const double nu=0.0)
172 |       {
173 |         if (!chol.Factor(matrix,nu)) {
174 |           for (int i=0;i<n;i++) {
175 |              std::fill(std::begin(b),std::end(b),0.0);
176 |              b[i]=1.0;
177 |              chol.Solve(b,sol[i]);
178 |           }
179 |         };
180 |       }
181 |     protected:
182 |       slmath::Cholesky chol;
183 |       int n;
184 |       vec1D b;
185 |   };
186 | 
187 |   // estimate running covariance of vectors of len n
188 |   class EstCov {
189 |     public:
190 |       EstCov(int n,double alpha=0.998,double init_val=1.0)
191 |       :mcov(n,vec1D(n)),
192 |       n(n),alpha(alpha)
193 |       {
194 |         for (int i=0;i<n;i++)
195 |           mcov[i][i]=init_val;
196 |       }
197 |       void Update(const vec1D &x)
198 |       {
199 |         for (int j=0;j<n;j++) {
200 |           for (int i=0;i<n;i++) {
201 |             mcov[j][i] = alpha*mcov[j][i] + (1.0-alpha)*x[i]*x[j];
202 |           }
203 |         }
204 |       }
205 |       vec2D mcov;
206 |       int n;
207 |       double alpha;
208 |   };
209 | 
210 | 
211 |   template <typename T>
212 |   T med3(T a,T b,T c)
213 |   {
214 |     if ((a<b && b<c) || (c<b && b<a)) {
215 |       return b;
216 |     } else if ((b < a && a < c) || (c < a && a < b)) {
217 |       return a;
218 |     } else
219 |       return c;
220 |   }
221 | 
222 |   inline int iLog2(int val) {
223 |     int nbits=0;
224 |     while (val>>=1) nbits++;
225 |     return nbits;
226 |   }
227 |   inline double SumDiff(const std::vector<double> &v1,const std::vector<double> &v2)
228 |   {
229 |      if (v1.size()!=v2.size()) return -1;
230 |      else {
231 |        double sum=0.;
232 |        for (size_t i=0;i<v1.size();i++) sum+=fabs(v1[i]-v2[i]);
233 |        return sum;
234 |      }
235 |   }
236 |   inline int32_t S2U(int32_t val)
237 |   {
238 |     if (val<0) val=2*(-val);
239 |     else if (val>0) val=(2*val)-1;
240 |     return val;
241 |   }
242 |   inline int32_t U2S(int32_t val)
243 |   {
244 |     if (val&1) val=((val+1)>>1);
245 |     else val=-(val>>1);
246 |     return val;
247 |   }
248 |   inline double norm2(const std::vector<double> &vec1,const std::vector<double> &vec2)
249 |   {
250 |      if (vec1.size()!=vec2.size()) return 0;
251 |      else {
252 |        double sum=0.;
253 |        for (size_t i=0;i<vec1.size();i++) {double t=vec1[i]-vec2[i];sum+=t*t;};
254 |        return sqrt(sum);
255 |      }
256 |   }
257 |   inline double mean(const std::vector<double> &vec)
258 |   {
259 |     if (vec.size()) {
260 |       double sum=std::accumulate(begin(vec),end(vec),0.0);
261 |       return sum/static_cast<double>(vec.size());
262 |     }
263 |     return 0;
264 |   }
265 |   //contraharmonic mean
266 |   inline double meanL(const std::vector<double> &vec)
267 |   {
268 |     if (vec.size()) {
269 |       double sum0=0.0;
270 |       double sum1=0.0;
271 |       for (size_t i=0;i<vec.size();++i) {
272 |         sum0+=(vec[i]*vec[i]);
273 |         sum1+=vec[i];
274 |       }
275 |       if (sum1>0.0) return sum0 / sum1;
276 |       else return 0.;
277 |     }
278 |     return 0.;
279 |   }
280 |   inline double linear_map_n(int n0,int n1,double y0,double y1,int idx)
281 |   {
282 |     double dx = static_cast<double>(n1-n0);
283 |     double dy = y1-y0;
284 |     return idx*(dy/dx)+y0;
285 |   }
286 |   template <typename T>
287 |   T sgn(T x) {
288 |     return (x > 0) - (x < 0);
289 |     /*if (x>0) return 1;
290 |     if (x<0) return -1;
291 |     return 0;*/
292 |   }
293 | };
294 | 
295 | namespace miscUtils {
296 | 
297 |   enum class MapMode {rec,exp,tanh,power,sigmoid};
298 | 
299 |   template <MapMode mode>
300 |   double decay_map(double gamma, double val)
301 |   {
302 |     if constexpr (mode == MapMode::rec) return 1.0 / (1.0 + gamma * val);
303 |     else if constexpr (mode == MapMode::exp) return std::exp(-gamma * val);
304 |     else if constexpr (mode == MapMode::tanh) return 1.0-std::tanh(gamma * val);
305 |     else if constexpr (mode == MapMode::power) return std::pow(gamma, val);
306 |     else if constexpr (mode == MapMode::sigmoid) return 1.0 / (1.0 + std::exp(gamma*(val-1.0)));
307 |     return 0;
308 |   }
309 | 
310 | /*static float rsqrt(float __x)
311 | {
312 |     float reciprocal;
313 |     __asm__ __volatile__ (
314 |         "movss %1, %%xmm0\n"
315 |         "rsqrtss %%xmm0, %%xmm1\n"
316 |         "movss %%xmm1, %0\n"
317 |         :"=m"(reciprocal)
318 |         :"m"(__x)
319 |         :"xmm0", "xmm1"
320 |     );
321 |   return reciprocal;
322 | }*/
323 |   template <typename T>
324 |   void swap_erase(std::vector<T>& e, std::size_t idx)
325 |   {
326 |     if (idx < e.size()) {
327 |       std::swap(e[idx], e.back());
328 |       e.pop_back();
329 |     }
330 |   }
331 | 
332 |   inline void RollBack(vec1D &data,double input)
333 |   {
334 |     if (data.size()) {
335 |       std::memmove(&data[1],&data[0],(data.size()-1)*sizeof(double));
336 |       data[0]=input;
337 |     }
338 |   }
339 |   inline std::string getTimeStrFromSamples(int64_t numsamples,int64_t samplerate)
340 |   {
341 |    std::ostringstream ss;
342 |    int h,m,s,ms;
343 |    h=m=s=ms=0;
344 |    if (numsamples>0 && samplerate>0) {
345 |      while (numsamples >= 3600*samplerate) {++h;numsamples-=3600*samplerate;};
346 |      while (numsamples >= 60*samplerate) {++m;numsamples-=60*samplerate;};
347 |      while (numsamples >= samplerate) {++s;numsamples-=samplerate;};
348 |      ms=round((numsamples*1000.)/samplerate);
349 |    }
350 |    ss << std::setfill('0') << std::setw(2) << h << ":" << std::setw(2) << m << ":" << std::setw(2) << s << "." << ms;
351 |    return ss.str();
352 |  }
353 |  inline std::string getTimeStrFromSeconds(int seconds)
354 |  {
355 |    std::ostringstream ss;
356 |    int h,m,s;
357 |    h=m=s=0;
358 |    if (seconds>0) {
359 |       while (seconds >= 3600) {++h;seconds-=3600;};
360 |       while (seconds >= 60) {++m;seconds-=60;};
361 |       s=seconds;
362 |    }
363 |    ss << std::setfill('0') << std::setw(2) << h << ":" << std::setw(2) << m << ":" << std::setw(2) << s;
364 |    return ss.str();
365 |  }
366 |  inline std::string ConvertFixed(double val,int digits)
367 |  {
368 |    std::ostringstream ss;
369 |    ss << std::fixed << std::setprecision(digits) << val;
370 |    return ss.str();
371 |  }
372 | };
373 | 
374 | namespace BitUtils {
375 |   uint32_t get32HL(const uint8_t *buf);
376 |   uint32_t get32LH(const uint8_t *buf);
377 |   uint16_t get16LH(const uint8_t *buf);
378 |   void put16LH(uint8_t *buf,uint16_t val);
379 |   void put32LH(uint8_t *buf,uint32_t val);
380 |   std::string U322Str(uint32_t val);
381 | 
382 |   inline int32_t count_bits32(uint32_t m)
383 |   {
384 |     #ifdef __GNUC__
385 |       return m == 0 ? 0 : (32 - __builtin_clz(m));
386 |     #else
387 |       return std::bit_width(m);
388 |     #endif
389 |   }
390 | }
391 | 
392 | #endif // UTILS_H
393 | 


--------------------------------------------------------------------------------
/src/file/file.cpp:
--------------------------------------------------------------------------------
 1 | #include "file.h"
 2 | 
 3 | std::streampos AudioFile::readFileSize()
 4 | {
 5 |     std::streampos oldpos=file.tellg();
 6 |     file.seekg(0,std::ios_base::end);
 7 |     std::streampos fsize = file.tellg();
 8 |     file.seekg(oldpos);
 9 |     return fsize;
10 | }
11 | 
12 | int AudioFile::OpenRead(const std::string &fname)
13 | {
14 |     file.open(fname,std::ios_base::in|std::ios_base::binary);
15 |     if (file.is_open()) {filesize=readFileSize();return 0;}
16 |     else return 1;
17 | }
18 | 
19 | int AudioFile::OpenWrite(const std::string &fname)
20 | {
21 |   file.open(fname,std::ios_base::out|std::ios_base::binary);
22 |   if (file.is_open()) return 0;
23 |   else return 1;
24 | }
25 | 
26 | void AudioFile::ReadData(std::vector <uint8_t>&data,size_t len)
27 | {
28 |   if (data.size()<len) data.resize(len);
29 |   file.read(reinterpret_cast<char*>(&data[0]),len);
30 | }
31 | 
32 | void AudioFile::WriteData(const std::vector <uint8_t>&data,size_t len)
33 | {
34 |   file.write(reinterpret_cast<const char*>(&data[0]),len);
35 | }
36 | 


--------------------------------------------------------------------------------
/src/file/file.h:
--------------------------------------------------------------------------------
 1 | #ifndef FILE_H
 2 | #define FILE_H
 3 | 
 4 | //#include "../global.h"
 5 | #include <cstdint>
 6 | #include <string>
 7 | #include <vector>
 8 | #include <fstream>
 9 | 
10 | class AudioFile
11 | {
12 |   public:
13 |     AudioFile():samplerate(0),bitspersample(0),numchannels(0),numsamples(0),kbps(0){};
14 |     AudioFile(const AudioFile &file)
15 |     :samplerate(file.getSampleRate()),bitspersample(file.getBitsPerSample()),
16 |     numchannels(file.getNumChannels()),numsamples(file.getNumSamples()),kbps(0){};
17 | 
18 |     int OpenRead(const std::string &fname);
19 |     int OpenWrite(const std::string &fname);
20 |     std::streampos getFileSize() const {return filesize;};
21 |     int getNumChannels()const {return numchannels;};
22 |     int getSampleRate()const {return samplerate;};
23 |     int getBitsPerSample()const {return bitspersample;};
24 |     int getKBPS()const {return kbps;};
25 |     void setKBPS(int kbps) {this->kbps=kbps;};
26 |     int getNumSamples()const {return numsamples;};
27 |     std::streampos readFileSize();
28 |     void Close() {if (file.is_open()) file.close();};
29 |     void ReadData(std::vector <uint8_t>&data,size_t len);
30 |     void WriteData(const std::vector <uint8_t>&data,size_t len);
31 |     std::fstream file;
32 |   protected:
33 |     std::streampos filesize;
34 |     int samplerate,bitspersample,numchannels,numsamples,kbps;
35 | };
36 | #endif // FILE_H
37 | 


--------------------------------------------------------------------------------
/src/file/sac.cpp:
--------------------------------------------------------------------------------
 1 | #include "sac.h"
 2 | #include "../common/utils.h"
 3 | #include <iostream>
 4 | 
 5 | void Sac::WriteMD5(uint8_t digest[16])
 6 | {
 7 |   file.write(reinterpret_cast<char*>(digest),16);
 8 | }
 9 | 
10 | void Sac::ReadMD5(uint8_t digest[16])
11 | {
12 |   file.read(reinterpret_cast<char*>(digest), 16);
13 | }
14 | 
15 | int Sac::WriteSACHeader(Wav &myWav)
16 | {
17 |   Chunks &myChunks=myWav.GetChunks();
18 |   uint8_t buf[32];
19 |   std::vector <uint8_t>metadata;
20 |   buf[0]='S';
21 |   buf[1]='A';
22 |   buf[2]='C';
23 |   buf[3]='2';
24 |   BitUtils::put16LH(buf+4,numchannels);
25 |   BitUtils::put32LH(buf+6,samplerate);
26 |   BitUtils::put16LH(buf+10,bitspersample);
27 |   BitUtils::put32LH(buf+12,numsamples);
28 |   buf[16] = mcfg.max_framelen;
29 |   buf[17] = 0;
30 | 
31 |   // write wav meta data
32 |   const uint32_t metadatasize=myChunks.GetMetaDataSize();
33 |   BitUtils::put32LH(buf+18,metadatasize);
34 |   file.write((char*)buf,22);
35 |   if (myChunks.PackMetaData(metadata)!=metadatasize) std::cerr << "  warning: metadatasize mismatch\n";
36 |   WriteData(metadata,metadatasize);
37 |   return 0;
38 | }
39 | 
40 | int Sac::UnpackMetaData(Wav &myWav)
41 | {
42 |   size_t unpackedbytes=myWav.GetChunks().UnpackMetaData(metadata);
43 |   if (mcfg.metadatasize!=unpackedbytes) {std::cerr << "  warning: unpackmetadata mismatch\n";return 1;}
44 |   return 0;
45 | }
46 | 
47 | int Sac::ReadSACHeader()
48 | {
49 |   uint8_t buf[32];
50 |   file.read((char*)buf,22);
51 |   if (buf[0]=='S' && buf[1]=='A' && buf[2]=='C' && buf[3]=='2') {
52 |     numchannels=BitUtils::get16LH(buf+4);
53 |     samplerate=BitUtils::get32LH(buf+6);
54 |     bitspersample=BitUtils::get16LH(buf+10);
55 |     numsamples=BitUtils::get32LH(buf+12);
56 |     mcfg.max_framelen=buf[16];
57 |     mcfg.metadatasize=BitUtils::get32LH(buf+18);
58 |     ReadData(metadata,mcfg.metadatasize);
59 |     mcfg.max_framesize=samplerate*static_cast<uint32_t>(mcfg.max_framelen);
60 |     return 0;
61 |   } else return 1;
62 | }
63 | 


--------------------------------------------------------------------------------
/src/file/sac.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAC_H
 2 | #define SAC_H
 3 | 
 4 | #include "file.h"
 5 | #include "wav.h"
 6 | 
 7 | struct tFrameHeader {
 8 | 
 9 | };
10 | 
11 | class Sac : public AudioFile
12 | {
13 |   public:
14 |     struct sac_cfg
15 |     {
16 |       uint8_t max_framelen=0;
17 | 
18 |       uint32_t max_framesize=0;
19 |       uint32_t metadatasize=0;
20 |     } mcfg;
21 |     Sac():AudioFile(){};
22 |     Sac(Wav &file)
23 |     :AudioFile(file)
24 |     {
25 | 
26 |     }
27 |     void WriteFrameHeader(tFrameHeader &hdr);
28 |     int WriteSACHeader(Wav &myWav);
29 |     void WriteMD5(uint8_t digest[16]);
30 |     void ReadMD5(uint8_t digest[16]);
31 |     int ReadSACHeader();
32 |     int UnpackMetaData(Wav &myWav);
33 |     std::vector <uint8_t>metadata;
34 | };
35 | 
36 | 
37 | #endif // SAC_H
38 | 


--------------------------------------------------------------------------------
/src/file/wav.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <format>
  3 | #include "../common/utils.h"
  4 | #include "wav.h"
  5 | 
  6 | int word_align(int numbytes)
  7 | {
  8 |   return (numbytes&1)?(numbytes+1):numbytes;
  9 | }
 10 | 
 11 | void Chunks::Append(uint32_t chunkid,uint32_t chunksize,const uint8_t *data,uint32_t len)
 12 | {
 13 |   tChunk chunk;
 14 |   chunk.id=chunkid;
 15 |   chunk.csize=chunksize;
 16 |   if (len) {
 17 |     chunk.data.resize(len);
 18 |     copy(data,data+len,chunk.data.begin());
 19 |   }
 20 |   wavchunks.push_back(chunk);
 21 |   metadatasize+=(8+len);
 22 | }
 23 | 
 24 | size_t Chunks::PackMetaData(std::vector <uint8_t>&data)
 25 | {
 26 |   data.resize(metadatasize);
 27 |   size_t ofs=0;
 28 |   for (size_t i=0;i<GetNumChunks();i++) {
 29 |     const tChunk &wavchunk=wavchunks[i];
 30 |     BitUtils::put32LH(&data[ofs],wavchunk.id);ofs+=4;
 31 |     BitUtils::put32LH(&data[ofs],wavchunk.csize);ofs+=4;
 32 |     std::copy(begin(wavchunk.data),end(wavchunk.data),begin(data)+ofs);
 33 |     ofs+=wavchunk.data.size();
 34 |   }
 35 |   return ofs;
 36 | }
 37 | 
 38 | size_t Chunks::UnpackMetaData(const std::vector <uint8_t>&data)
 39 | {
 40 |   size_t ofs=0;
 41 |   while (ofs<data.size()) {
 42 |     uint32_t chunkid,chunksize;
 43 |     chunkid=BitUtils::get32LH(&data[ofs]);ofs+=4;
 44 |     chunksize=BitUtils::get32LH(&data[ofs]);ofs+=4;
 45 |     if (chunkid==0x46464952) {Append(chunkid,chunksize,&data[ofs],4);ofs+=4;}
 46 |     else if (chunkid==0x61746164) {Append(chunkid,chunksize,NULL,0);}
 47 |     else {
 48 |         const uint32_t writesize=word_align(chunksize);
 49 |         Append(chunkid,chunksize,&data[ofs],writesize);ofs+=writesize;
 50 |     };
 51 |   }
 52 |   return ofs;
 53 | }
 54 | 
 55 | Wav::Wav(bool verbose)
 56 | :chunkpos(0),datapos(0),endofdata(0),byterate(0),blockalign(0),samplesleft(0),verbose(verbose)
 57 | {
 58 |   MD5::Init(&md5ctx);
 59 | };
 60 | 
 61 | Wav::Wav(AudioFile &file,bool verbose)
 62 | :AudioFile(file),chunkpos(0),samplesleft(0),verbose(verbose)
 63 | {
 64 |   kbps=(samplerate*numchannels*bitspersample)/1000;
 65 |   int csize=static_cast<int>(ceil(static_cast<double>(bitspersample)/8.));
 66 |   byterate=samplerate*numchannels*csize;
 67 |   blockalign=numchannels*csize;
 68 |   MD5::Init(&md5ctx);
 69 | };
 70 | 
 71 | 
 72 | void Wav::InitFileBuf(int maxframesize)
 73 | {
 74 |   filebuffer.resize(maxframesize*blockalign);
 75 | }
 76 | 
 77 | int Wav::ReadSamples(std::vector <std::vector <int32_t>>&data,int samplestoread)
 78 | {
 79 |   // read samples
 80 |   if (samplestoread>samplesleft) samplestoread=samplesleft;
 81 |   int bytestoread=samplestoread*blockalign;
 82 |   file.read(reinterpret_cast<char*>(&filebuffer[0]),bytestoread);
 83 |   int bytesread=file.gcount();
 84 |   int samplesread=bytesread/blockalign;
 85 | 
 86 |   samplesleft-=samplesread;
 87 |   if (samplesread!=samplestoread) std::cerr << "warning: read over eof\n";
 88 | 
 89 |   MD5::Update(&md5ctx, &filebuffer[0], bytestoread);
 90 | 
 91 |   const int csize=blockalign/numchannels;
 92 |   // decode samples
 93 |   if (csize==1) {
 94 |     int bufptr=0;
 95 |     for (int i=0;i<samplesread;i++) { // unpack samples
 96 |       for (int k=0;k<numchannels;k++) {
 97 |         uint8_t sample=filebuffer[bufptr];bufptr+=1;
 98 |         data[k][i]=static_cast<int32_t>(sample)-128;
 99 |       }
100 |     }
101 |   } else if (csize==2) {
102 |     int bufptr=0;
103 |     for (int i=0;i<samplesread;i++) { // unpack samples
104 |       for (int k=0;k<numchannels;k++) {
105 |         int16_t sample=((filebuffer[bufptr+1]<<8)|filebuffer[bufptr]);bufptr+=2;
106 |         data[k][i]=static_cast<int32_t>(sample);
107 |       }
108 |     }
109 |   } else if (csize==3) {
110 |     int bufptr=0;
111 |     for (int i=0;i<samplesread;i++) { // unpack samples
112 |       for (int k=0;k<numchannels;k++) {
113 |         int32_t sample=0;
114 |         sample = static_cast<int32_t>(filebuffer[bufptr+2])<<24;
115 |         sample |= static_cast<int32_t>(filebuffer[bufptr+1])<<16;
116 |         sample |= static_cast<int32_t>(filebuffer[bufptr])<<8;
117 |         bufptr+=3;
118 |         data[k][i]=sample>>8;
119 |       }
120 |     }
121 |   } else std::cerr << "error: unknown csize=" << csize << '\n';
122 | 
123 |   return samplesread;
124 | }
125 | 
126 | int Wav::WriteSamples(const std::vector <std::vector <int32_t>>&data,int samplestowrite)
127 | {
128 |   const int csize=blockalign/numchannels;
129 |   if (csize==1) {
130 |     int bufptr=0;
131 |       for (int i=0;i<samplestowrite;i++) { // pack samples
132 |         for (int k=0;k<numchannels;k++) {
133 |           filebuffer[bufptr]=static_cast<uint8_t>( (data[k][i]+128) &0xff);
134 |           bufptr+=1;
135 |       }
136 |     }
137 |   } else if (csize==2) {
138 |     int bufptr=0;
139 |       for (int i=0;i<samplestowrite;i++) { // pack samples
140 |         for (int k=0;k<numchannels;k++) {
141 |           int16_t sample=data[k][i];
142 |           filebuffer[bufptr]=sample&0xff;
143 |           filebuffer[bufptr+1]=(sample>>8)&0xff;
144 |           bufptr+=2;
145 |       }
146 |     }
147 |   } else if (csize==3) {
148 |     int bufptr=0;
149 |       for (int i=0;i<samplestowrite;i++) { // pack samples
150 |         for (int k=0;k<numchannels;k++) {
151 |           int32_t sample=data[k][i];
152 |           filebuffer[bufptr]=sample&0xff;
153 |           filebuffer[bufptr+1]=(sample>>8)&0xff;
154 |           filebuffer[bufptr+2]=(sample>>16)&0xff;
155 |           bufptr+=3;
156 |       }
157 |     }
158 |   }
159 |   int bytestowrite=samplestowrite*blockalign;
160 |   file.write(reinterpret_cast<char*>(&filebuffer[0]),bytestowrite);
161 | 
162 |   MD5::Update(&md5ctx, &filebuffer[0], bytestowrite);
163 |   return bytestowrite;
164 | }
165 | 
166 | 
167 | int Wav::ReadHeader()
168 | {
169 |   bool seektodatapos=true;
170 |   uint8_t buf[40];
171 |   std::vector <uint8_t> vbuf;
172 |   uint32_t chunkid,chunksize;
173 | 
174 |   file.read(reinterpret_cast<char*>(buf),12); // read 'RIFF' chunk descriptor
175 |   chunkid=BitUtils::get32LH(buf);
176 |   chunksize=BitUtils::get32LH(buf+4);
177 | 
178 |   // do we have a wave file?
179 |   if (chunkid==0x46464952 && BitUtils::get32LH(buf+8)==0x45564157) {
180 | 
181 |     myChunks.Append(chunkid,chunksize,buf+8,4);
182 |     while (1) {
183 |       file.read(reinterpret_cast<char*>(buf),8);
184 |       if (!file) {std::cout << "could not read!\n";return 1;};
185 | 
186 |       chunkid   = BitUtils::get32LH(buf);
187 |       chunksize = BitUtils::get32LH(buf+4);
188 |       if (chunkid==0x020746d66) { // read 'fmt ' chunk
189 |         if (chunksize!=16 && chunksize!=18 && chunksize!=40)
190 |         {
191 |           std::cerr << "warning: invalid fmt-chunk size (" << chunksize << ")\n";
192 |           return 1;
193 |         } else {
194 |           file.read(reinterpret_cast<char*>(buf),chunksize);
195 |           myChunks.Append(chunkid,chunksize,buf,chunksize);
196 | 
197 |           int audioformat=BitUtils::get16LH(buf);
198 |           numchannels=BitUtils::get16LH(buf+2);
199 |           samplerate=BitUtils::get32LH(buf+4);
200 |           byterate=BitUtils::get32LH(buf+8);
201 |           blockalign=BitUtils::get16LH(buf+12);
202 |           bitspersample=BitUtils::get16LH(buf+14);
203 |           if (chunksize>=18) {
204 |              int cbsize=BitUtils::get16LH(buf+16);
205 |              if (verbose) std::cout << "  WAVE-Ext (" << cbsize << " Bytes)"<<std::endl;
206 |              if (cbsize>=22) {
207 |                int valid_bitspersample=BitUtils::get16LH(buf+18);
208 |                int channel_mask=BitUtils::get32LH(buf+20);
209 |                int data_fmt=BitUtils::get16LH(buf+24);
210 |                if (verbose) {
211 |                  std::cout << "  audio format=" << std::format("{:#x}",audioformat);
212 |                  std::cout << ",channel mask=" << std::format("{:#x}",channel_mask);
213 |                  std::cout << ",valid bps=" << valid_bitspersample;
214 |                  std::cout << ",data format=" << data_fmt << '\n';
215 |                }
216 |                bitspersample=valid_bitspersample;
217 |                audioformat = data_fmt;
218 |              }
219 | 
220 |           }
221 |           kbps=(samplerate*numchannels*bitspersample)/1000;
222 |           if (audioformat!=1) {std::cerr << "warning: only PCM Format supported\n";return 1;};
223 |         }
224 |       } else if (chunkid==0x61746164) { // 'data' chunk
225 |         myChunks.Append(chunkid,chunksize,NULL,0);
226 |         datapos=file.tellg();
227 | 
228 |         numsamples=chunksize/blockalign;
229 |         samplesleft=numsamples;
230 | 
231 |         endofdata=datapos+(std::streampos)(word_align(chunksize));
232 |         //std::cout << endofdata << ' ' << filesize << '\n';
233 |         if (endofdata>=filesize) { // if data chunk is last chunk, break
234 |             if (endofdata>filesize) {
235 |               numsamples = (filesize-datapos) / blockalign;
236 |               samplesleft = numsamples;
237 |               std::cerr << "  warning: endofdata>filesize\n";
238 |               std::cerr << "  numsamples: " << numsamples << '\n';
239 |             }
240 |             seektodatapos=false;
241 |             break;
242 |         } else {
243 |           int64_t pos=file.tellg();
244 |           file.seekg(pos+chunksize);
245 |         }
246 |       } else { // read remaining chunks
247 |         const uint32_t readsize=word_align(chunksize);
248 |         ReadData(vbuf,readsize);
249 |         myChunks.Append(chunkid,chunksize,&vbuf[0],readsize);
250 |       }
251 |       if (file.tellg()==getFileSize()) break;
252 |     }
253 |   } else return 1;
254 | 
255 |   if (verbose) {
256 |     std::cout << " Number of chunks: " << myChunks.GetNumChunks() << std::endl;
257 |     for (size_t i=0;i<myChunks.GetNumChunks();i++)
258 |       std::cout << "  Chunk" << std::setw(2) << (i+1) << ": '" << BitUtils::U322Str(myChunks.GetChunkID(i)) << "' " << myChunks.GetChunkSize(i) << " Bytes\n";
259 |     std::cout << " Metadatasize: " << myChunks.GetMetaDataSize() << " Bytes\n";
260 |   }
261 |   if (seektodatapos) {file.seekg(datapos);seektodatapos=false;};
262 |   return 0;
263 | }
264 | 
265 | int Wav::WriteHeader()
266 | {
267 |   uint8_t buf[8];
268 |   while (chunkpos<myChunks.GetNumChunks())
269 |   {
270 |     const Chunks::tChunk &chunk=myChunks.wavchunks[chunkpos++];
271 |     BitUtils::put32LH(buf+0,chunk.id);
272 |     BitUtils::put32LH(buf+4,chunk.csize);
273 |     file.write((char*)buf,8);
274 |     if (chunk.id==0x61746164) break;
275 |     else {
276 |        if (verbose) std::cout << " chunk size " << chunk.data.size() << std::endl;
277 |        WriteData(chunk.data,chunk.data.size());
278 |     }
279 |   }
280 |   return 0;
281 | }
282 | 
283 | 


--------------------------------------------------------------------------------
/src/file/wav.h:
--------------------------------------------------------------------------------
 1 | #ifndef WAV_H
 2 | #define WAV_H
 3 | 
 4 | #include "file.h"
 5 | #include "../common/md5.h"
 6 | 
 7 | class Chunks {
 8 |   public:
 9 |     struct tChunk {
10 |       uint32_t id,csize;
11 |       std::vector <uint8_t>data;
12 |     };
13 |     Chunks():metadatasize(0){};
14 |     void Append(uint32_t chunkid,uint32_t chunksize,const uint8_t *data,uint32_t len);
15 |     size_t GetNumChunks() const {return wavchunks.size();};
16 |     uint32_t GetChunkID(int chunk) const {return wavchunks[chunk].id;};
17 |     uint32_t GetChunkSize(int chunk) const {return wavchunks[chunk].csize;};
18 |     size_t GetChunkDataSize(int chunk) const {return wavchunks[chunk].data.size();};
19 |     uint32_t GetMetaDataSize() const {return metadatasize;};
20 |     size_t PackMetaData(std::vector <uint8_t>&data);
21 |     size_t UnpackMetaData(const std::vector <uint8_t>&data);
22 |     std::vector <tChunk> wavchunks;
23 |   private:
24 |     uint32_t metadatasize;
25 | };
26 | 
27 | class Wav : public AudioFile {
28 |   public:
29 |     Wav(bool verbose=false);
30 |     Wav(AudioFile &file,bool verbose=false);
31 |     int ReadHeader();
32 |     int WriteHeader();
33 |     void InitFileBuf(int maxframesize);
34 |     int ReadSamples(std::vector <std::vector <int32_t>>&data,int samplestoread);
35 |     int WriteSamples(const std::vector <std::vector <int32_t>>&data,int samplestowrite);
36 |     Chunks &GetChunks(){return myChunks;};
37 |     MD5::MD5Context md5ctx;
38 |   private:
39 |     Chunks myChunks;
40 |     size_t chunkpos;
41 |     std::vector <uint8_t>filebuffer;
42 |     std::streampos datapos,endofdata;
43 |     int byterate,blockalign,samplesleft;
44 |     bool verbose;
45 | };
46 | #endif // WAV_H
47 | 


--------------------------------------------------------------------------------
/src/global.h:
--------------------------------------------------------------------------------
 1 | #ifndef GLOBAL_H
 2 | #define GLOBAL_H
 3 | 
 4 | //#include "windows.h"
 5 | #include <iostream>
 6 | #include <algorithm>
 7 | #include <fstream>
 8 | #include <sstream>
 9 | #include <iomanip>
10 | #include <vector>
11 | #include <span>
12 | 
13 | #ifndef M_PI
14 | #define M_PI (3.14159265358979323846)
15 | #endif
16 | 
17 | #define NDEBUG
18 | 
19 | using vec1D=std::vector<double>;
20 | using vec2D=std::vector<std::vector<double>>;
21 | using span_i32=std::span<int32_t>;
22 | using span_ci32=std::span<const int32_t>;
23 | using span_cf64=std::span<const double>;
24 | 
25 | struct SACGlobalCfg {
26 |   static constexpr bool USE_AVX2=true;
27 |   static constexpr int AVX2_MINN=8;
28 |   static constexpr double NLMS_POW_EPS=1.0;
29 |   static constexpr double LMS_ADA_EPS=1E-5;
30 |   static constexpr bool LMS_MIX_INIT=true;// increase stability
31 |   static constexpr bool LMS_MIX_CLAMPW=true;
32 |   static constexpr bool RLS_ALC=true; //adaptive lambda control
33 | };
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/src/libsac/cost.h:
--------------------------------------------------------------------------------
  1 | #ifndef COST_H
  2 | #define COST_H
  3 | 
  4 | #include "vle.h"
  5 | #include "../common/utils.h"
  6 | #include <cmath>
  7 | 
  8 | class CostFunction {
  9 |   public:
 10 |     CostFunction() {};
 11 |     virtual double Calc(span_ci32 buf) const =0;
 12 |     virtual ~CostFunction(){};
 13 | };
 14 | 
 15 | class CostL1 : public CostFunction {
 16 |   public:
 17 |     double Calc(span_ci32 buf) const override
 18 |     {
 19 |       if (buf.size()) {
 20 |         int64_t sum=0;
 21 |         for (const auto val:buf)
 22 |           sum+=std::fabs(val);
 23 |         return sum/static_cast<double>(buf.size());
 24 |       } else return 0.;
 25 |     }
 26 | };
 27 | 
 28 | class CostRMS : public CostFunction {
 29 |   public:
 30 |     double Calc(span_ci32 buf) const override
 31 |     {
 32 |       if (buf.size()) {
 33 |         int64_t sum=0.0;
 34 |         for (const auto val:buf)
 35 |           sum+=val*val;
 36 |         return sqrt(sum/static_cast<double>(buf.size()));
 37 |       } else return 0.;
 38 |     }
 39 | };
 40 | 
 41 | 
 42 | // estimate bytes per frame with a simple golomb model
 43 | class CostGolomb : public CostFunction {
 44 |   const double alpha=0.97; // critical
 45 |   public:
 46 |     CostGolomb(){};
 47 |     double Calc(span_ci32 buf) const override
 48 |     {
 49 |       RunWeight rm(alpha);
 50 |       if (buf.size()) {
 51 |         int64_t nbits=0;
 52 |         for (const auto sval:buf) {
 53 |           const auto m=std::max(static_cast<int32_t>(rm.sum),1);
 54 |           const auto uval=MathUtils::S2U(sval);
 55 |           int q=uval/m;
 56 |           //int r=val-q*m;
 57 |           nbits+=(q+1);
 58 |           if (m>1) {
 59 |             nbits+=BitUtils::count_bits32(m);
 60 |           }
 61 |           rm.Update(uval);
 62 |         }
 63 |         return nbits/(8.);
 64 |       } else return 0;
 65 |     }
 66 | };
 67 | 
 68 | //#define TOTAL_SELF_INFORMATION
 69 | // entropy using order-0 markov model
 70 | class CostEntropy : public CostFunction {
 71 |   public:
 72 |     CostEntropy(){};
 73 |     double Calc(span_ci32 buf) const override
 74 |     {
 75 |       double entropy=0.0;
 76 |       if (buf.size())
 77 |       {
 78 |         int32_t minval = std::numeric_limits<int32_t>::max();
 79 |         int32_t maxval = std::numeric_limits<int32_t>::min();
 80 |         for (const auto val:buf) {
 81 |           if (val>maxval) maxval=val;
 82 |           if (val<minval) minval=val;
 83 |         }
 84 |         std::vector<int> counts(maxval-minval+1);
 85 | 
 86 |         const auto cmap=[&](int32_t val) -> int& {
 87 |           return counts[val-minval];};
 88 | 
 89 |         for (const auto val:buf)
 90 |           ++cmap(val);
 91 | 
 92 |         const double invs=1.0/static_cast<double>(buf.size());
 93 |         #ifdef TOTAL_SELF_INFORMATION
 94 |           for (const auto val:buf) {
 95 |             const double p=cmap(val)*invs;
 96 |             entropy+=p*log(p);
 97 |           }
 98 |         #else
 99 |           if (counts.size() < buf.size()) { // over alphabet
100 |             for (const auto c:counts) {
101 |               if (c==0) continue;
102 |               const double p=c*invs;
103 |               entropy += c*log2(p);
104 |             }
105 |           } else { // over input
106 |             for (const auto val:buf) {
107 |               const double p=cmap(val)*invs;
108 |               entropy+=log2(p);
109 |             }
110 |           }
111 |           entropy = -entropy / 8.0;
112 |         #endif
113 |       }
114 |       return entropy;
115 |     }
116 | };
117 | 
118 | /*class StaticBitModel {
119 |   public:
120 |     StaticBitModel()
121 |     :pr(2,vec1D(PSCALE))
122 |     {
123 |       for (std::size_t i=1;i<PSCALE;i++)
124 |       {
125 |         double p1 = static_cast<double>(i)/static_cast<double>(PSCALE);
126 |         double t0 = -std::log2(1.0-p1);
127 |         double t1 = -std::log2(p1);
128 |         pr[0][i]= t0;
129 |         pr[1][i]= t1;
130 |       }
131 |       ResetCount();
132 |     }
133 |     void ResetCount(){nbits=0;};
134 |     void EncodeBitOne(uint32_t p1,int bit)
135 |     {
136 |       nbits += pr[bit][p1];
137 |     }
138 |     auto EncodeP1_Func() {return [&](uint32_t p1,int bit) {return EncodeBitOne(p1,bit);};}; // stupid C++
139 | 
140 |   double nbits;
141 |   vec2D pr;
142 | };*/
143 | 
144 | class CostBitplane : public CostFunction {
145 |  public:
146 |   CostBitplane() {
147 |   }
148 |   double Calc(span_ci32 buf) const override
149 |   {
150 |     int numsamples=buf.size();
151 |     std::vector<int32_t> ubuf(numsamples);
152 |     int vmax=0;
153 |     for (int i=0;i<numsamples;i++) {
154 |        int val=MathUtils::S2U(buf[i]);
155 |        if (val>vmax) vmax=val;
156 |        ubuf[i]=val;
157 |     }
158 |     #if 1
159 |     BufIO iobuf;
160 |     RangeCoderSH rc(iobuf);
161 |     rc.Init();
162 |     BitplaneCoder bc_rc(MathUtils::iLog2(vmax),numsamples);
163 |     bc_rc.Encode(rc.encode_p1,&ubuf[0]);
164 |     rc.Stop();
165 |     double c0=iobuf.GetBufPos();
166 |     #else
167 | 
168 |     StaticBitModel bm;
169 |     BitplaneCoder bc_bit(MathUtils::iLog2(vmax),numsamples);
170 |     bc_bit.Encode(bm.EncodeP1_Func(),&ubuf[0]);
171 | 
172 |     double c0=bm.nbits/8.0;
173 |     #endif
174 |     return c0;
175 |   }
176 | };
177 | 
178 | #endif
179 | 


--------------------------------------------------------------------------------
/src/libsac/libsac.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <thread>
  3 | #include <future>
  4 | 
  5 | #include "libsac.h"
  6 | #include "pred.h"
  7 | #include "sparse.h"
  8 | #include "../common/timer.h"
  9 | #include "../opt/dds.h"
 10 | #include "../opt/de.h"
 11 | #include "../opt/cma.h"
 12 | 
 13 | FrameCoder::FrameCoder(int numchannels,int framesize,const tsac_cfg &cfg)
 14 | :numchannels_(numchannels),framesize_(framesize),cfg(cfg)
 15 | {
 16 |   profile_size_bytes_=base_profile.LoadBaseProfile()*4;
 17 | 
 18 |   framestats.resize(numchannels);
 19 |   samples.resize(numchannels);
 20 |   error.resize(numchannels);
 21 |   s2u_error.resize(numchannels);
 22 |   s2u_error_map.resize(numchannels);
 23 |   pred.resize(numchannels);
 24 |   for (int i=0;i<numchannels;i++) {
 25 |     samples[i].resize(framesize);
 26 |     error[i].resize(framesize);
 27 |     pred[i].resize(framesize);
 28 |     s2u_error[i].resize(framesize);
 29 |     s2u_error_map[i].resize(framesize);
 30 |   }
 31 |   encoded.resize(numchannels);
 32 |   enc_temp1.resize(numchannels);
 33 |   enc_temp2.resize(numchannels);
 34 |   numsamples_=0;
 35 | }
 36 | 
 37 | //#define BIAS_SCALE1
 38 | 
 39 | void FrameCoder::SetParam(Predictor::tparam &param,const SacProfile &profile,bool optimize)
 40 | {
 41 |   if (optimize) param.k=cfg.ocfg.optk;
 42 |   else param.k=1;
 43 | 
 44 |   param.lambda0=param.lambda1=profile.Get(0);
 45 |   param.ols_nu0=param.ols_nu1=profile.Get(1);
 46 | 
 47 |   param.vn0={(int)round(profile.Get(28)),(int)round(profile.Get(29)),(int)round(profile.Get(30)),(int)round(profile.Get(37))};
 48 |   param.vn1={(int)round(profile.Get(31)),(int)round(profile.Get(32)),(int)round(profile.Get(33)),(int)round(profile.Get(38))};
 49 | 
 50 |   param.vmu0={profile.Get(2)/double(param.vn0[0]),profile.Get(3)/double(param.vn0[1]),profile.Get(4)/double(param.vn0[2]),profile.Get(5)/double(param.vn0[3])};
 51 |   param.vmudecay0={profile.Get(6),profile.Get(39),profile.Get(46),profile.Get(47)};
 52 |   param.vpowdecay0={profile.Get(7),profile.Get(8),profile.Get(50),profile.Get(51)};
 53 |   param.mu_mix0=profile.Get(10);
 54 |   param.mu_mix_beta0=profile.Get(11);
 55 | 
 56 |   param.lambda1=profile.Get(12);
 57 |   param.ols_nu1=profile.Get(13);
 58 |   param.vmu1={profile.Get(14)/double(param.vn1[0]),profile.Get(15)/double(param.vn1[1]),profile.Get(16)/double(param.vn1[2]),profile.Get(17)/double(param.vn1[3])};
 59 |   param.vmudecay1={profile.Get(18),profile.Get(40),profile.Get(48),profile.Get(49)};
 60 |   param.vpowdecay1={profile.Get(19),profile.Get(20),profile.Get(21),profile.Get(52)};
 61 |   param.mu_mix1=profile.Get(22);
 62 |   param.mu_mix_beta1=profile.Get(23);
 63 | 
 64 |   param.nA=round(profile.Get(24));
 65 |   param.nB=round(profile.Get(25));
 66 |   param.nS0=round(profile.Get(26));
 67 |   param.nS1=round(profile.Get(27));
 68 |   param.nM0=round(profile.Get(9));
 69 | 
 70 |   param.beta_sum0=profile.Get(34);
 71 |   param.beta_pow0=profile.Get(35);
 72 |   param.beta_add0=profile.Get(36);
 73 | 
 74 |   param.beta_sum1=profile.Get(34);
 75 |   param.beta_pow1=profile.Get(35);
 76 |   param.beta_add1=profile.Get(36);
 77 | 
 78 |   param.lm_n=std::round(profile.Get(41));
 79 |   param.lm_alpha=profile.Get(42);
 80 | 
 81 |   param.bias_mu0=profile.Get(43);
 82 |   param.bias_mu1=profile.Get(44);
 83 | 
 84 |   param.bias_scale0=param.bias_scale1=std::round(profile.Get(45));
 85 | 
 86 |   param.ch_ref=0;
 87 |   if (param.nS1 < 0) {
 88 |     param.nS1 = -param.nS1;
 89 |     param.ch_ref=1;
 90 |   } //else if (param.nS1==0) param.nS1=1;
 91 | }
 92 | 
 93 | void FrameCoder::PredictFrame(const SacProfile &profile,tch_samples &error,int from,int numsamples,bool optimize)
 94 | {
 95 | 
 96 |   Predictor::tparam param;
 97 |   SetParam(param,profile,optimize);
 98 |   Predictor pr(param);
 99 | 
100 |   auto eprocess=[&](int ch_p,int ch,int32_t val,int idx) {
101 |       double pd=pr.predict(ch_p);
102 |       int32_t pi=std::clamp((int32_t)std::round(pd),framestats[ch].minval,framestats[ch].maxval);
103 |       if (!optimize) pred[ch][idx]=pi+framestats[ch].mean;
104 |       error[ch][idx]=val-pi; // needed for cost-function within optimize
105 |       pr.update(ch_p,val);
106 |   };
107 | 
108 | 
109 |   if (numchannels_==1) {
110 |     const auto *src=&samples[0][from];
111 |     for (int idx=0;idx<numsamples;idx++)
112 |     {
113 |       pr.fillbuf_ch0(src,idx,src,idx);
114 |       eprocess(0,0,src[idx],idx);
115 |     }
116 |   } else if (numchannels_==2) {
117 |     int ch0=param.ch_ref;
118 |     int ch1=1-ch0;
119 | 
120 |     const auto *src0=&samples[ch0][from];
121 |     const auto *src1=&samples[ch1][from];
122 | 
123 |     int idx0=0,idx1=0;
124 |     while (idx0<numsamples || idx1<numsamples)
125 |     {
126 |       if (idx0<numsamples) {
127 |         pr.fillbuf_ch0(src0,idx0,src1,idx1);
128 |         eprocess(0,ch0,src0[idx0],idx0);
129 |         idx0++;
130 |       }
131 |       if (idx0>=param.nS1) {
132 |         pr.fillbuf_ch1(src0,src1,idx1,numsamples);
133 |         eprocess(1,ch1,src1[idx1],idx1);
134 |         idx1++;
135 |       }
136 |     }
137 |   }
138 | }
139 | 
140 | void FrameCoder::UnpredictFrame(const SacProfile &profile,int numsamples)
141 | {
142 |   Predictor::tparam param;
143 |   SetParam(param,profile,false);
144 |   Predictor pr(param);
145 | 
146 |   auto dprocess=[&](int ch_p,int ch,int32_t *dst,int idx) {
147 |     const double pd=pr.predict(ch_p);
148 |     const int32_t pi=std::clamp((int32_t)round(pd),framestats[ch].minval,framestats[ch].maxval);
149 | 
150 | 
151 |     if (framestats[ch].enc_mapped)
152 |       dst[idx]=pi+framestats[ch].mymap.Unmap(pi+framestats[ch].mean,error[ch][idx]);
153 |     else
154 |       dst[idx]=pi+error[ch][idx];
155 | 
156 |     pr.update(ch_p,dst[idx]);
157 |   };
158 | 
159 |   if (numchannels_==1) {
160 |     auto *dst=&samples[0][0];
161 |     for (int idx=0;idx<numsamples;idx++)
162 |     {
163 |       pr.fillbuf_ch0(dst,idx,dst,idx);
164 |       dprocess(0,0,dst,idx);
165 |     }
166 |   } else if (numchannels_==2) {
167 |     int ch0=param.ch_ref;
168 |     int ch1=1-ch0;
169 | 
170 |     auto *dst0=&samples[ch0][0];
171 |     auto *dst1=&samples[ch1][0];
172 |     int idx0=0,idx1=0;
173 |     while (idx0<numsamples || idx1<numsamples)
174 |     {
175 |       if (idx0<numsamples) {
176 |         pr.fillbuf_ch0(dst0,idx0,dst1,idx1);
177 |         dprocess(0,ch0,dst0,idx0);
178 |         idx0++;
179 |       }
180 |       if (idx0>=param.nS1) {
181 |         pr.fillbuf_ch1(dst0,dst1,idx1,numsamples);
182 |         dprocess(1,ch1,dst1,idx1);
183 |         idx1++;
184 |       }
185 |     }
186 |   }
187 | 
188 |   // add mean
189 |   for (int ch=0;ch<numchannels_;ch++) {
190 |     if (framestats[ch].mean!=0)
191 |       for (int i=0;i<numsamples;i++) samples[ch][i]+=framestats[ch].mean;
192 |   }
193 | }
194 | 
195 | int FrameCoder::EncodeMonoFrame_Normal(int ch,int numsamples,BufIO &buf)
196 | {
197 |   buf.Reset();
198 |   RangeCoderSH rc(buf);
199 |   rc.Init();
200 | 
201 |   BitplaneCoder bc(framestats[ch].maxbpn,numsamples);
202 |   int32_t *psrc=&(s2u_error[ch][0]);
203 |   bc.Encode(rc.encode_p1,psrc);
204 |   rc.Stop();
205 |   return buf.GetBufPos();
206 | }
207 | 
208 | int FrameCoder::EncodeMonoFrame_Mapped(int ch,int numsamples,BufIO &buf)
209 | {
210 |   buf.Reset();
211 | 
212 |   RangeCoderSH rc(buf);
213 |   rc.Init();
214 | 
215 |   BitplaneCoder bc(framestats[ch].maxbpn_map,numsamples);
216 | 
217 |   MapEncoder me(rc,framestats[ch].mymap.usedl,framestats[ch].mymap.usedh);
218 |   me.Encode();
219 |   bc.Encode(rc.encode_p1,&(s2u_error_map[ch][0]));
220 |   rc.Stop();
221 |   return buf.GetBufPos();
222 | }
223 | 
224 | double FrameCoder::CalcRemapError(int ch, int numsamples)
225 | {
226 |     std::vector<int32_t>emap(numsamples);
227 |     int32_t emax_map=0;
228 |     for (int i=0;i<numsamples;i++) {
229 |       int32_t map_e=framestats[ch].mymap.Map(pred[ch][i],error[ch][i]);
230 |       int32_t map_ue=MathUtils::S2U(map_e);
231 |       emap[i]=map_e;
232 |       s2u_error_map[ch][i]=map_ue;
233 |       if (map_ue>emax_map) emax_map=map_ue;
234 |     }
235 |     framestats[ch].maxbpn_map=MathUtils::iLog2(emax_map);
236 | 
237 |     CostL1 cost;
238 | 
239 |     double ent1 = cost.Calc(std::span{&error[ch][0],static_cast<unsigned>(numsamples)});
240 |     double ent2 = cost.Calc(std::span{&emap[0],static_cast<unsigned>(numsamples)});
241 |     double r=1.0;
242 |     if (ent2!=0.0) r=ent1/ent2;
243 |     if (cfg.verbose_level>0) std::cout << "  cost pcm-model: " << ent1 << ' ' << ent2 << ' ' << r << '\n';
244 |     return r;
245 | }
246 | 
247 | void FrameCoder::EncodeMonoFrame(int ch,int numsamples)
248 | {
249 |   if (cfg.sparse_pcm==0) {
250 |     EncodeMonoFrame_Normal(ch,numsamples,enc_temp1[ch]);
251 |     framestats[ch].enc_mapped=false;
252 |     encoded[ch]=enc_temp1[ch];
253 |   } else {
254 |     double r = CalcRemapError(ch,numsamples);
255 |     int size_normal=EncodeMonoFrame_Normal(ch,numsamples,enc_temp1[ch]);
256 |     framestats[ch].enc_mapped=false;
257 |     encoded[ch]=enc_temp1[ch];
258 | 
259 |     if (r > 1.05)
260 |     {
261 |       int size_mapped=EncodeMonoFrame_Mapped(ch,numsamples,enc_temp2[ch]);
262 |       if (size_mapped<size_normal)
263 |       {
264 |         if (cfg.verbose_level>0) {
265 |           std::cout << "  sparse frame " << size_normal << " -> " << size_mapped << " (" << (size_mapped-size_normal) << ")\n";
266 |         }
267 |         framestats[ch].enc_mapped=true;
268 |         encoded[ch]=enc_temp2[ch];
269 |       }
270 |     }
271 |   }
272 | }
273 | 
274 | void FrameCoder::DecodeMonoFrame(int ch,int numsamples)
275 | {
276 |   int32_t *dst=&(error[ch][0]);
277 |   BufIO &buf=encoded[ch];
278 |   buf.Reset();
279 | 
280 |   RangeCoderSH rc(buf,1);
281 |   rc.Init();
282 |   if (framestats[ch].enc_mapped) {
283 |     framestats[ch].mymap.Reset();
284 |     MapEncoder me(rc,framestats[ch].mymap.usedl,framestats[ch].mymap.usedh);
285 |     me.Decode();
286 |     //std::cout << buf.GetBufPos() << std::endl;
287 |   }
288 | 
289 |   BitplaneCoder bc(framestats[ch].maxbpn,numsamples);
290 |   bc.Decode(rc.decode_p1,dst);
291 |   rc.Stop();
292 | }
293 | 
294 | 
295 | void FrameCoder::PrintProfile(SacProfile &profile)
296 | {
297 |     Predictor::tparam param;
298 |     SetParam(param,profile);
299 | 
300 |     std::cout << '\n';
301 |     std::cout << "lpc (nA " << std::round(profile.Get(24)) << " nM0 " << std::round(profile.Get(9));
302 |     std::cout << ") (nB " << std::round(profile.Get(25)) << " nS0 " << std::round(profile.Get(26)) << " nS1 " << std::round(profile.Get(27)) << ")\n";
303 |     std::cout << "lpc nu " << param.ols_nu0 << ' ' << param.ols_nu1 << '\n';
304 |     std::cout << "lpc cov0 " << param.beta_sum0 << ' ' << param.beta_pow0 << ' ' << param.beta_add0 << "\n";
305 |     std::cout << "lms0 ";
306 |     for (int i=28;i<=30;i++) std::cout << std::round(profile.Get(i)) << ' ';
307 |     std::cout << std::round(profile.Get(37));
308 |     std::cout << '\n';
309 |     std::cout << "lms1 ";
310 |     for (int i=31;i<=33;i++) std::cout << std::round(profile.Get(i)) << ' ';
311 |     std::cout << std::round(profile.Get(38));
312 |     std::cout << '\n';
313 |     std::cout << "mu ";
314 |     for (std::size_t i=0;i<std::size(param.vmu0);i++)
315 |       std::cout << (param.vmu0[i]*param.vn0[i]) << ' ';
316 |     std::cout << '\n';
317 |     std::cout << "mu_decay ";
318 |     for (const auto &x : param.vmudecay0)
319 |       std::cout << x << ' ';
320 |     std::cout << '\n';
321 |     std::cout << "pow_decay ";
322 |     for (const auto &x : param.vpowdecay0)
323 |       std::cout << x << ' ';
324 |     std::cout << '\n';
325 | 
326 |     std::cout << "mu mix mu " << param.mu_mix0 << " " << param.mu_mix1 << '\n';
327 |     std::cout << "mu mix beta " << param.mu_mix_beta0 << " " << param.mu_mix_beta1 << '\n';
328 |     std::cout << "ch-ref " << param.ch_ref << "\n";
329 |     std::cout << "bias mu " << param.bias_mu0 << ", " << param.bias_mu1 << " scale " << (1<<param.bias_scale0) << ' ' << (1<<param.bias_scale1) << '\n';
330 |     std::cout << "lm " << param.lm_n << " gamma " << param.lm_alpha << '\n';
331 | }
332 | 
333 | double FrameCoder::GetCost(const CostFunction *func,const tch_samples &samples,std::size_t samples_to_optimize) const
334 | {
335 |   // return a span over samples
336 |   const auto span_ch = [=](int ch){
337 |     return std::span{&samples[ch][0],samples_to_optimize};
338 |   };
339 | 
340 |   double cost=0.0;
341 |   if (cfg.mt_mode>1 && numchannels_>1) {
342 | 
343 |     std::vector <std::future<double>> threads;
344 |     for (int ch=0;ch<numchannels_;ch++)
345 |         threads.emplace_back(std::async([=]{return func->Calc(span_ch(ch));}));
346 | 
347 |     for (auto &thread : threads)
348 |       cost += thread.get();
349 | 
350 |   } else {
351 |     for (int ch=0;ch<numchannels_;ch++)
352 |       cost += func->Calc(span_ch(ch));
353 |   }
354 |   return cost;
355 | }
356 | 
357 | void FrameCoder::Optimize(const FrameCoder::toptim_cfg &ocfg,SacProfile &profile,const std::vector<int>&params_to_optimize)
358 | {
359 |   int samples_to_optimize=std::min(numsamples_,static_cast<int>(std::ceil(framesize_*ocfg.fraction)));
360 |   const int start_pos=(numsamples_-samples_to_optimize)/2;
361 | 
362 |   CostFunction *CostFunc=nullptr;
363 |   switch (ocfg.optimize_cost)  {
364 |     case FrameCoder::SearchCost::L1:CostFunc=new CostL1();break;
365 |     case FrameCoder::SearchCost::RMS:CostFunc=new CostRMS();break;
366 |     case FrameCoder::SearchCost::Golomb:CostFunc=new CostGolomb();break;
367 |     case FrameCoder::SearchCost::Entropy:CostFunc=new CostEntropy();break;
368 |     case FrameCoder::SearchCost::Bitplane:CostFunc=new CostBitplane();break;
369 |     default:std::cerr << "  error: unknown FramerCoder::CostFunction\n";return;
370 |   }
371 | 
372 |   const int ndim=params_to_optimize.size();
373 |   vec1D xstart(ndim); // starting vector
374 |   Opt::box_const pb(ndim); // set constraints
375 |   for (int i=0;i<ndim;i++) {
376 |     pb[i].xmin=profile.coefs[params_to_optimize[i]].vmin;
377 |     pb[i].xmax=profile.coefs[params_to_optimize[i]].vmax;
378 |     xstart[i]=profile.coefs[params_to_optimize[i]].vdef;
379 |   }
380 | 
381 |   auto cost_func=[&](const vec1D &x) {
382 |     // create thread safe copies for error and profile
383 |     tch_samples tmp_error(numchannels_,std::vector<int32_t>(samples_to_optimize));
384 |     SacProfile tmp_profile=profile;
385 | 
386 |     for (int i=0;i<ndim;i++) tmp_profile.coefs[params_to_optimize[i]].vdef=x[i];
387 | 
388 |     PredictFrame(tmp_profile,tmp_error,start_pos,samples_to_optimize,true);
389 |     return GetCost(CostFunc,tmp_error,samples_to_optimize);
390 |   };
391 | 
392 |   if (cfg.verbose_level>0) {
393 |     std::string opt_str="";
394 |     if (ocfg.optimize_search==FrameCoder::SearchMethod::DDS) opt_str="DDS";
395 |     if (ocfg.optimize_search==FrameCoder::SearchMethod::DE) opt_str="DE";
396 |     else if (ocfg.optimize_search==FrameCoder::SearchMethod::CMA) opt_str="CMA";
397 |     std::cout << "\n " << opt_str << " " << ocfg.maxnfunc << "= ";
398 |   }
399 | 
400 |   std::unique_ptr<Opt> myOpt;
401 | 
402 |   if (ocfg.optimize_search==FrameCoder::SearchMethod::DDS)
403 |     myOpt = std::make_unique<OptDDS>(ocfg.dds_cfg,pb,cfg.verbose_level);
404 |   else if (ocfg.optimize_search==FrameCoder::SearchMethod::DE)
405 |     myOpt = std::make_unique<OptDE>(ocfg.de_cfg,pb,cfg.verbose_level);
406 |   else if (ocfg.optimize_search==FrameCoder::SearchMethod::CMA)
407 |     myOpt = std::make_unique<OptCMA>(ocfg.cma_cfg,pb,cfg.verbose_level);
408 | 
409 |   Opt::ppoint ret = myOpt->run(cost_func,xstart);
410 | 
411 |   // save optimal vector to baseprofile
412 |   for (int i=0;i<ndim;i++)
413 |     profile.coefs[params_to_optimize[i]].vdef=ret.second[i];
414 | 
415 |   if (cfg.verbose_level>0) {
416 |     PrintProfile(profile);
417 |   }
418 | 
419 |   delete CostFunc;
420 | }
421 | 
422 | void FrameCoder::CnvError_S2U(const tch_samples &error,int numsamples)
423 | {
424 |   for (int ch=0;ch<numchannels_;ch++)
425 |   {
426 |     int32_t emax=0;
427 |     for (int i=0;i<numsamples;i++) {
428 |       const int32_t e_s2u=MathUtils::S2U(error[ch][i]);
429 |       if (e_s2u>emax) emax=e_s2u;
430 |       s2u_error[ch][i]=e_s2u;
431 |     }
432 |     framestats[ch].maxbpn=MathUtils::iLog2(emax);
433 |   }
434 | }
435 | 
436 | void FrameCoder::Predict()
437 | {
438 |   for (int ch=0;ch<numchannels_;ch++)
439 |   {
440 |     AnalyseMonoChannel(ch,numsamples_);
441 |     if (cfg.sparse_pcm) {
442 |       framestats[ch].mymap.Reset();
443 |       framestats[ch].mymap.Analyse(&(samples[ch][0]),numsamples_);
444 |     }
445 |     if (cfg.zero_mean==0) {
446 |       framestats[ch].mean = 0;
447 |     } else if (framestats[ch].mean!=0) {
448 |       for (int i=0;i<numsamples_;i++) samples[ch][i] -= framestats[ch].mean;
449 |       framestats[ch].minval -= framestats[ch].mean;
450 |       framestats[ch].maxval -= framestats[ch].mean;
451 |     }
452 |   }
453 | 
454 |   if (cfg.optimize)
455 |   {
456 |     // reset profile params
457 |     // otherwise: starting point for optimization is the best point from the last frame
458 |     if (cfg.ocfg.reset)
459 |       base_profile.LoadBaseProfile();
460 | 
461 |     // optimize all params
462 |     std::vector<int>lparam_base(base_profile.coefs.size());
463 |     std::iota(std::begin(lparam_base),std::end(lparam_base),0);
464 | 
465 |     Optimize(cfg.ocfg,base_profile,lparam_base);
466 |   }
467 |   PredictFrame(base_profile,error,0,numsamples_,false);
468 |   CnvError_S2U(error,numsamples_);
469 | }
470 | 
471 | void FrameCoder::Unpredict()
472 | {
473 |   UnpredictFrame(base_profile,numsamples_);
474 | }
475 | 
476 | void FrameCoder::Encode()
477 | {
478 |   if (cfg.mt_mode && numchannels_>1)  {
479 |     std::vector <std::jthread> threads;
480 |     for (int ch=0;ch<numchannels_;ch++)
481 |       threads.emplace_back(std::jthread(&FrameCoder::EncodeMonoFrame,this,ch,numsamples_));
482 |   } else
483 |     for (int ch=0;ch<numchannels_;ch++) EncodeMonoFrame(ch,numsamples_);
484 | }
485 | 
486 | void FrameCoder::Decode()
487 | {
488 |   if (cfg.mt_mode && numchannels_>1) {
489 |     std::vector <std::jthread> threads;
490 |     for (int ch=0;ch<numchannels_;ch++)
491 |       threads.emplace_back(std::jthread(&FrameCoder::DecodeMonoFrame,this,ch,numsamples_));
492 |   } else
493 |     for (int ch=0;ch<numchannels_;ch++)
494 |       DecodeMonoFrame(ch,numsamples_);
495 | }
496 | 
497 | void FrameCoder::EncodeProfile(const SacProfile &profile,std::vector <uint8_t>&buf)
498 | {
499 |   //assert(sizeof(float)==4);
500 |   //std::cout << "number of coefs: " << profile.coefs.size() << " (" << profile_size_bytes_ << ")" << std::endl;
501 | 
502 |   uint32_t ix;
503 |   for (int i=0;i<(int)profile.coefs.size();i++) {
504 |      memcpy(&ix,&profile.coefs[i].vdef,4);
505 |      //ix=*((uint32_t*)&profile.coefs[i].vdef);
506 |      BitUtils::put32LH(&buf[4*i],ix);
507 |   }
508 | }
509 | 
510 | void FrameCoder::DecodeProfile(SacProfile &profile,const std::vector <uint8_t>&buf)
511 | {
512 |   uint32_t ix;
513 |   for (int i=0;i<(int)profile.coefs.size();i++) {
514 |      ix=BitUtils::get32LH(&buf[4*i]);
515 |      memcpy(&profile.coefs[i].vdef,&ix,4);
516 |      //profile.coefs[i].vdef=*((float*)&ix);
517 |   }
518 | }
519 | 
520 | int FrameCoder::WriteBlockHeader(std::fstream &file, const std::vector<SacProfile::FrameStats> &framestats,int ch)
521 | {
522 |   uint8_t buf[32];
523 |   BitUtils::put32LH(buf,framestats[ch].blocksize);
524 |   BitUtils::put32LH(buf+4,static_cast<uint32_t>(framestats[ch].mean));
525 |   BitUtils::put32LH(buf+8,static_cast<uint32_t>(framestats[ch].minval));
526 |   BitUtils::put32LH(buf+12,static_cast<uint32_t>(framestats[ch].maxval));
527 |   uint16_t flag=0;
528 |   if (framestats[ch].enc_mapped) {
529 |     flag|=(1<<9);
530 |     flag|=framestats[ch].maxbpn_map;
531 |   } else {
532 |     flag|=framestats[ch].maxbpn;
533 |   }
534 |   BitUtils::put16LH(buf+16,flag);
535 |   file.write(reinterpret_cast<char*>(buf),18);
536 |   return 18;
537 | }
538 | 
539 | int FrameCoder::ReadBlockHeader(std::fstream &file, std::vector<SacProfile::FrameStats> &framestats,int ch)
540 | {
541 |   uint8_t buf[32];
542 |   file.read(reinterpret_cast<char*>(buf),18);
543 | 
544 |   framestats[ch].blocksize=BitUtils::get32LH(buf);
545 |   framestats[ch].mean=static_cast<int32_t>(BitUtils::get32LH(buf+4));
546 |   framestats[ch].minval=static_cast<int32_t>(BitUtils::get32LH(buf+8));
547 |   framestats[ch].maxval=static_cast<int32_t>(BitUtils::get32LH(buf+12));
548 |   uint16_t flag=BitUtils::get16LH(buf+16);
549 |   if (flag>>9) framestats[ch].enc_mapped=true;
550 |   else framestats[ch].enc_mapped=false;
551 |   framestats[ch].maxbpn=flag&0xff;
552 |   return 18;
553 | }
554 | 
555 | void FrameCoder::WriteEncoded(AudioFile &fout)
556 | {
557 |   uint8_t buf[12];
558 |   BitUtils::put32LH(buf,numsamples_);
559 |   fout.file.write(reinterpret_cast<char*>(buf),4);
560 |   std::vector <uint8_t>profile_buf(profile_size_bytes_);
561 |   EncodeProfile(base_profile,profile_buf);
562 |   fout.file.write(reinterpret_cast<char*>(&profile_buf[0]),profile_size_bytes_);
563 |   for (int ch=0;ch<numchannels_;ch++) {
564 |     framestats[ch].blocksize = encoded[ch].GetBufPos();
565 |     WriteBlockHeader(fout.file, framestats, ch);
566 |     fout.WriteData(encoded[ch].GetBuf(),framestats[ch].blocksize);
567 |   }
568 | }
569 | 
570 | void FrameCoder::ReadEncoded(AudioFile &fin)
571 | {
572 |   uint8_t buf[8];
573 |   fin.file.read(reinterpret_cast<char*>(buf),4);
574 |   numsamples_=BitUtils::get32LH(buf);
575 |   std::vector <uint8_t>profile_buf(profile_size_bytes_);
576 |   fin.file.read(reinterpret_cast<char*>(&profile_buf[0]),profile_size_bytes_);
577 |   DecodeProfile(base_profile,profile_buf);
578 | 
579 |   for (int ch=0;ch<numchannels_;ch++) {
580 |     ReadBlockHeader(fin.file, framestats, ch);
581 |     fin.ReadData(encoded[ch].GetBuf(),framestats[ch].blocksize);
582 |   }
583 | }
584 | 
585 | double FrameCoder::AnalyseStereoChannel(int ch0, int ch1, int numsamples)
586 | {
587 |   int32_t *src0=&(samples[ch0][0]);
588 |   int32_t *src1=&(samples[ch1][0]);
589 |   int64_t sum0=0,sum1=0,sum_m=0,sum_s=0;
590 |   for (int i=0;i<numsamples;i++) {
591 |     sum0+=fabs(src0[i]);
592 |     sum1+=fabs(src1[i]);
593 |     int32_t m=(src0[i]+src1[i]) / 2;
594 |     int32_t s=(src0[i]-src1[i]);
595 | 
596 |     sum_m+=fabs(m);
597 |     sum_s+=fabs(s);
598 |   }
599 |   int64_t c0 = sum0+sum1;
600 |   int64_t c1 = sum_m+sum_s;
601 |   return double(c0) / double(c1);
602 | }
603 | 
604 | void FrameCoder::ApplyMs(int ch0, int ch1, int numsamples)
605 | {
606 |   int32_t *src0=&(samples[ch0][0]);
607 |   int32_t *src1=&(samples[ch1][0]);
608 |   for (int i=0;i<numsamples;i++) {
609 |     int32_t m=(src0[i]+src1[i]) / 2;
610 |     int32_t s=(src0[i]-src1[i]);
611 |     src0[i]=m;
612 |     src1[i]=s;
613 |   }
614 | }
615 | 
616 | void FrameCoder::AnalyseMonoChannel(int ch, int numsamples)
617 | {
618 |   int32_t *src=&(samples[ch][0]);
619 | 
620 |   if (numsamples) {
621 |     int64_t sum=0;
622 |     for (int i=0;i<numsamples;i++) {
623 |         sum += src[i];
624 |     }
625 |     framestats[ch].mean = (int)std::floor(sum / (double)numsamples);
626 | 
627 |     int32_t minval = std::numeric_limits<int32_t>::max();
628 |     int32_t maxval = std::numeric_limits<int32_t>::min();
629 |     for (int i=0;i<numsamples;i++) {
630 |       const int32_t val=src[i];
631 |       if (val>maxval) maxval=val;
632 |       if (val<minval) minval=val;
633 |     }
634 |     framestats[ch].minval = minval;
635 |     framestats[ch].maxval = maxval;
636 |     if (cfg.verbose_level>0) {
637 |       std::cout << "  ch" << ch << " samples=" << numsamples;
638 |       std::cout << ",mean=" << framestats[ch].mean << ",min=" << framestats[ch].minval << ",max=" << framestats[ch].maxval << "\n";
639 |     }
640 |   }
641 | }
642 | 
643 | void Codec::PrintProgress(int samplesprocessed,int totalsamples)
644 | {
645 |   double r=samplesprocessed*100.0/(double)totalsamples;
646 |   std::cout << "  " << samplesprocessed << "/" << totalsamples << ":" << std::setw(6) << miscUtils::ConvertFixed(r,1) << "%\r";
647 | }
648 | 
649 | void Codec::ScanFrames(Sac &mySac)
650 | {
651 |   std::vector<SacProfile::FrameStats> framestats(mySac.getNumChannels());
652 |   std::streampos fsize=mySac.getFileSize();
653 | 
654 |   SacProfile profile_tmp; //create dummy profile
655 |   profile_tmp.LoadBaseProfile();
656 |   const int size_profile_bytes=profile_tmp.coefs.size()*4;
657 | 
658 |   int frame_num=1;
659 |   int coef_hdr_size=0;
660 |   int block_hdr_size=0;
661 |   while (mySac.file.tellg()<fsize) {
662 |     uint8_t buf[12];
663 |     mySac.file.read(reinterpret_cast<char*>(buf),4);
664 |     int numsamples=BitUtils::get32LH(buf);
665 |     std::cout << "Frame " << frame_num << ": " << numsamples << " samples "<< std::endl;
666 | 
667 |     mySac.file.seekg(size_profile_bytes,std::ios_base::cur); // skip profile coefs
668 |     coef_hdr_size += size_profile_bytes;
669 | 
670 | 
671 |     for (int ch=0;ch<mySac.getNumChannels();ch++) {
672 |       int num_bytes=FrameCoder::ReadBlockHeader(mySac.file, framestats, ch);
673 |       block_hdr_size += num_bytes;
674 |       std::cout << "  Channel " << ch << ": " << framestats[ch].blocksize << " bytes\n";
675 |       std::cout << "    Bpn: " << framestats[ch].maxbpn << ", sparse_pcm: " << (framestats[ch].enc_mapped) << std::endl;
676 |       std::cout << "    mean: " << framestats[ch].mean << ", min: " << framestats[ch].minval << ", max: " << framestats[ch].maxval << std::endl;
677 |       mySac.file.seekg(framestats[ch].blocksize, std::ios_base::cur);
678 |     }
679 |     frame_num++;
680 |   }
681 |   std::cout << "Frames   " << (frame_num-1) << '\n';
682 |   std::cout << "Hdr_size " << (coef_hdr_size+block_hdr_size) << " (coefs " << coef_hdr_size << ",block " << block_hdr_size << ")\n";
683 | }
684 | 
685 | 
686 | std::pair<double,double> Codec::AnalyseSparse(std::span<const int32_t> buf)
687 | {
688 |   SparsePCM spcm;
689 |   spcm.Analyse(buf);
690 | 
691 |   return {spcm.fraction_used,spcm.fraction_cost};
692 | }
693 | 
694 | void Codec::PushState(std::vector<Codec::tsub_frame> &sub_frames,Codec::tsub_frame &curframe,int min_frame_length,int block_state=-1,int samples_block=0)
695 | {
696 |   if (block_state==curframe.state)
697 |     curframe.length+=samples_block;
698 |   else {
699 |     if (curframe.length < min_frame_length && sub_frames.size()) // extend
700 |     {
701 |       sub_frames.back().length+=curframe.length;
702 |     } else {
703 |       if (opt_.verbose_level>1)
704 |         std::cout << "push subframe of length " << curframe.length << " samples\n";
705 |       sub_frames.push_back(curframe);
706 | 
707 |       if (samples_block) {
708 |         curframe.state=block_state; // set new blockstate
709 |         curframe.start+=curframe.length;
710 |         curframe.length=samples_block;
711 |       }
712 |     }
713 |   }
714 | }
715 | 
716 | std::vector<Codec::tsub_frame> Codec::Analyse(const std::vector <std::vector<int32_t>>&samples,int blocksamples,int min_frame_length,int samples_read)
717 | {
718 |   std::vector<Codec::tsub_frame> sub_frames;
719 | 
720 |   int samples_processed=0;
721 |   int nblock=0;
722 | 
723 |   Codec::tsub_frame curframe;
724 | 
725 |   while (samples_processed < samples_read)
726 |   {
727 |     int samples_left = samples_read-samples_processed;
728 |     int samples_block = std::min(blocksamples,samples_left);
729 |     double avg_cost=0,avg_used=0;
730 |     for (unsigned ch=0;ch<samples.size();ch++)
731 |     {
732 |       auto [fused,fcost]=AnalyseSparse(std::span{&samples[ch][samples_processed],static_cast<unsigned>(samples_block)});
733 |       avg_cost+=fcost;
734 |       avg_used+=fused;
735 |     }
736 |     avg_cost /= (double)samples.size();
737 |     avg_used /= (double)samples.size();
738 |     int block_state=(avg_cost>1.35); // high threshold
739 |     if (opt_.verbose_level>1) {
740 |       std::cout << "  analyse block " << nblock << ' ' << samples_block << " sparse " << block_state << " (" << avg_cost << "," << avg_used << ")\n";
741 |     }
742 | 
743 |     if (nblock==0)
744 |     {
745 |       curframe.state = block_state;
746 |       curframe.length=samples_block;
747 |       curframe.start=0;
748 |     } else
749 |       PushState(sub_frames,curframe,min_frame_length,block_state,samples_block);
750 | 
751 |     samples_processed += samples_block;
752 |     nblock++;
753 |   }
754 | 
755 |   if (curframe.length)
756 |     PushState(sub_frames,curframe,min_frame_length);
757 | 
758 |   if (samples_processed != samples_read)
759 |     std::cerr << "  warning: samples_processed != samples_read (" << samples_processed << "," << samples_read << ")\n";
760 | 
761 |   if (opt_.verbose_level>1) std::cout << "sub_frames\n";
762 |   int64_t nlen=0;
763 |   for (const auto &frame : sub_frames) {
764 |     if (opt_.verbose_level>1) std::cout << "  " << frame.start << ' ' << frame.length << ' ' << frame.state << '\n';
765 |     nlen+=frame.length;
766 |   }
767 |   if (nlen!=samples_read)
768 |     std::cerr << "  warning: nlen != samples_read\n";
769 |   return sub_frames;
770 | }
771 | 
772 | int Codec::EncodeFile(Wav &myWav,Sac &mySac)
773 | {
774 |   uint32_t max_framesize=static_cast<uint32_t>(opt_.max_framelen)*myWav.getSampleRate();
775 | 
776 |   const int numchannels=myWav.getNumChannels();
777 | 
778 |   FrameCoder myFrame(numchannels,max_framesize,opt_);
779 | 
780 |   mySac.mcfg.max_framelen = opt_.max_framelen;
781 | 
782 |   mySac.WriteSACHeader(myWav);
783 |   std::streampos hdrpos = mySac.file.tellg();
784 |   mySac.WriteMD5(myWav.md5ctx.digest);
785 |   myWav.InitFileBuf(max_framesize);
786 | 
787 |   Timer gtimer,ltimer;
788 |   double time_prd=0,time_enc=0;
789 | 
790 |   gtimer.start();
791 |   int samplescoded=0;
792 |   int samplestocode=myWav.getNumSamples();
793 |   std::vector<std::vector<int32_t>> csamples(myWav.getNumChannels(),std::vector<int32_t>(max_framesize));
794 | 
795 |   while (samplestocode>0) {
796 |       int samplesread=myWav.ReadSamples(csamples,max_framesize);
797 | 
798 |       std::vector<Codec::tsub_frame> sub_frames;
799 |       if (opt_.adapt_block) {
800 |         int block_len=myWav.getSampleRate()*3;
801 |         int min_frame_len=myWav.getSampleRate()*3;
802 |         sub_frames=Analyse(csamples,block_len,min_frame_len,samplesread);
803 |       } else {
804 |         sub_frames.push_back({0,0,samplesread});
805 |       }
806 | 
807 |       for (auto &subframe:sub_frames)
808 |       {
809 |         if (opt_.verbose_level)
810 |           std::cout << "frame " << subframe.start << " state " << subframe.state << " len " << subframe.length << '\n';
811 | 
812 |         for (int ch=0;ch<myWav.getNumChannels();ch++)
813 |           std::copy_n(&csamples[ch][subframe.start],subframe.length,&myFrame.samples[ch][0]);
814 | 
815 |         myFrame.SetNumSamples(subframe.length);
816 | 
817 |         ltimer.start();myFrame.Predict();ltimer.stop();time_prd+=ltimer.elapsedS();
818 |         ltimer.start();myFrame.Encode();ltimer.stop();time_enc+=ltimer.elapsedS();
819 |         myFrame.WriteEncoded(mySac);
820 | 
821 |         samplescoded+=subframe.length;
822 |         PrintProgress(samplescoded,myWav.getNumSamples());
823 |         samplestocode-=subframe.length;
824 |       }
825 |   }
826 |   MD5::Finalize(&myWav.md5ctx);
827 |   gtimer.stop();
828 |   double time_total=gtimer.elapsedS();
829 |   if (time_total>0.)   {
830 |      double rprd=time_prd*100./time_total;
831 |      double renc=time_enc*100./time_total;
832 |      std::cout << "\n  Timing:  pred " << miscUtils::ConvertFixed(rprd,2) << "%, ";
833 |      std::cout << "enc " << miscUtils::ConvertFixed(renc,2) << "%, ";
834 |      std::cout << "misc " << miscUtils::ConvertFixed(100.-rprd-renc,2) << "%" << std::endl;
835 |   }
836 |   std::cout << "  MD5:     ";
837 |   for (auto x : myWav.md5ctx.digest) std::cout << std::hex << (int)x;
838 |   std::cout << std::dec << '\n';
839 | 
840 |   std::streampos eofpos = mySac.file.tellg();
841 |   mySac.file.seekg(hdrpos);
842 |   mySac.WriteMD5(myWav.md5ctx.digest);
843 |   mySac.file.seekg(eofpos);
844 |   return 0;
845 | }
846 | 
847 | void Codec::DecodeFile(Sac &mySac,Wav &myWav)
848 | {
849 |   const Sac::sac_cfg &file_cfg=mySac.mcfg;
850 |   myWav.InitFileBuf(file_cfg.max_framesize);
851 |   mySac.UnpackMetaData(myWav);
852 |   myWav.WriteHeader();
853 | 
854 |   opt_.max_framelen=file_cfg.max_framelen;
855 |   FrameCoder myFrame(mySac.getNumChannels(),file_cfg.max_framesize,opt_);
856 | 
857 |   int64_t data_nbytes=0;
858 |   int samplestodecode=mySac.getNumSamples();
859 |   int samplesdecoded=0;
860 |   while (samplestodecode>0) {
861 |     myFrame.ReadEncoded(mySac);
862 |     myFrame.Decode();
863 |     myFrame.Unpredict();
864 |     data_nbytes += myWav.WriteSamples(myFrame.samples,myFrame.GetNumSamples());
865 | 
866 |     samplesdecoded+=myFrame.GetNumSamples();
867 |     PrintProgress(samplesdecoded,myWav.getNumSamples());
868 |     samplestodecode-=myFrame.GetNumSamples();
869 |   }
870 |   // pad odd sized data chunk
871 |   if (data_nbytes&1) myWav.WriteData(std::vector<uint8_t>{0},1);
872 |   myWav.WriteHeader();
873 | }
874 | 


--------------------------------------------------------------------------------
/src/libsac/libsac.h:
--------------------------------------------------------------------------------
  1 | #ifndef CODEC_H
  2 | #define CODEC_H
  3 | 
  4 | #include "../file/wav.h"
  5 | #include "../file/sac.h"
  6 | #include "cost.h"
  7 | #include "profile.h"
  8 | #include "../opt/dds.h"
  9 | #include "../opt/de.h"
 10 | #include "../opt/cma.h"
 11 | 
 12 | class FrameCoder {
 13 |   public:
 14 |     enum SearchCost {L1,RMS,Entropy,Golomb,Bitplane};
 15 |     enum SearchMethod {DDS,DE,CMA};
 16 | 
 17 |     typedef std::vector <std::vector<int32_t>> tch_samples;
 18 | 
 19 |     struct toptim_cfg {
 20 |       OptDDS::DDSCfg dds_cfg;
 21 |       OptDE::DECfg de_cfg;
 22 |       OptCMA::CMACfg cma_cfg;
 23 |       int reset=0;
 24 |       double fraction=0;
 25 |       int maxnfunc=0;
 26 |       int num_threads=0;
 27 |       double sigma=0.2;
 28 |       int optk=4;
 29 |       SearchMethod optimize_search=SearchMethod::DDS;
 30 |       SearchCost optimize_cost=SearchCost::Entropy;
 31 |     };
 32 |     struct tsac_cfg {
 33 |       int optimize=0;
 34 |       int sparse_pcm=1;
 35 |       int zero_mean=1;
 36 |       int max_framelen=20;
 37 |       int verbose_level=0;
 38 |       int stereo_ms=0;
 39 |       int mt_mode=2;
 40 |       int adapt_block=1;
 41 | 
 42 |       toptim_cfg ocfg;
 43 |       SacProfile profiledata;
 44 |     };
 45 |     FrameCoder(int numchannels,int framesize,const tsac_cfg &sac_cfg);
 46 |     void SetNumSamples(int nsamples){numsamples_=nsamples;};
 47 |     int GetNumSamples(){return numsamples_;};
 48 |     void Predict();
 49 |     void Unpredict();
 50 |     void Encode();
 51 |     void Decode();
 52 |     void WriteEncoded(AudioFile &fout);
 53 |     void ReadEncoded(AudioFile &fin);
 54 |     std::vector <std::vector<int32_t>>samples,error,s2u_error,s2u_error_map,pred;
 55 |     std::vector <BufIO> encoded,enc_temp1,enc_temp2;
 56 |     std::vector <SacProfile::FrameStats> framestats;
 57 | 
 58 |     static int WriteBlockHeader(std::fstream &file, const std::vector<SacProfile::FrameStats> &framestats, int ch);
 59 |     static int ReadBlockHeader(std::fstream &file, std::vector<SacProfile::FrameStats> &framestats, int ch);
 60 |   private:
 61 |     void CnvError_S2U(const tch_samples &error,int numsamples);
 62 |     void SetParam(Predictor::tparam &param,const SacProfile &profile,bool optimize=false);
 63 |     void PrintProfile(SacProfile &profile);
 64 |     void EncodeProfile(const SacProfile &profile,std::vector <uint8_t>&buf);
 65 |     void DecodeProfile(SacProfile &profile,const std::vector <uint8_t>&buf);
 66 |     void AnalyseMonoChannel(int ch, int numsamples);
 67 |     double AnalyseStereoChannel(int ch0, int ch1, int numsamples);
 68 |     void ApplyMs(int ch0, int ch1, int numsamples);
 69 |     //void InterChannel(int ch0,int ch1,int numsamples);
 70 |     int EncodeMonoFrame_Normal(int ch,int numsamples,BufIO &buf);
 71 |     int EncodeMonoFrame_Mapped(int ch,int numsamples,BufIO &buf);
 72 |     void Optimize(const FrameCoder::toptim_cfg &ocfg,SacProfile &profile,const std::vector<int>&params_to_optimize);
 73 |     double GetCost(const CostFunction *func,const tch_samples &samples,std::size_t samples_to_optimize) const;
 74 |     void PredictFrame(const SacProfile &profile,tch_samples &error,int from,int numsamples,bool optimize);
 75 |     void UnpredictFrame(const SacProfile &profile,int numsamples);
 76 |     double CalcRemapError(int ch, int numsamples);
 77 |     void EncodeMonoFrame(int ch,int numsamples);
 78 |     void DecodeMonoFrame(int ch,int numsamples);
 79 |     int numchannels_,framesize_,numsamples_;
 80 |     int profile_size_bytes_;
 81 |     SacProfile base_profile;
 82 |     tsac_cfg cfg;
 83 | };
 84 | 
 85 | class Codec {
 86 |   enum ErrorCode {COULD_NOT_WRITE};
 87 |   struct tsub_frame {
 88 |     int state=-1;
 89 |     int start=0;
 90 |     int length=0;
 91 |   };
 92 |   public:
 93 |     Codec(){};
 94 |     Codec(FrameCoder::tsac_cfg &cfg):opt_(cfg) {};
 95 |     int EncodeFile(Wav &myWav,Sac &mySac);
 96 |     //void EncodeFile(Wav &myWav,Sac &mySac,int profile,int optimize,int sparse_pcm);
 97 |     void DecodeFile(Sac &mySac,Wav &myWav);
 98 |     void ScanFrames(Sac &mySac);
 99 |   private:
100 |     std::vector<Codec::tsub_frame> Analyse(const std::vector <std::vector<int32_t>>&samples,int blocksamples,int min_frame_length,int samples_read);
101 |     void PushState(std::vector<Codec::tsub_frame> &sub_frames,Codec::tsub_frame &curframe,int min_frame_length,int block_state,int samples_block);
102 |     std::pair<double,double> AnalyseSparse(std::span<const int32_t> buf);
103 |     void PrintProgress(int samplesprocessed,int totalsamples);
104 |     FrameCoder::tsac_cfg opt_;
105 |     //int framesize;
106 | };
107 | 
108 | #endif
109 | 


--------------------------------------------------------------------------------
/src/libsac/map.cpp:
--------------------------------------------------------------------------------
  1 | #include "map.h"
  2 | 
  3 | MapEncoder::MapEncoder(RangeCoderSH &rc,std::vector <bool>&usedl,std::vector <bool>&usedh)
  4 | :rc(rc),mixl(4,NMixLogistic(5)),mixh(4,NMixLogistic(5)),finalmix(2),ul(usedl),uh(usedh)
  5 | {
  6 | }
  7 | 
  8 | int MapEncoder::PredictLow(int i)
  9 | {
 10 |   int ctx1=ul[i-1];
 11 |   int ctx2=uh[i-1];
 12 |   int ctx3=i>1?ul[i-2]:0;
 13 | 
 14 |   pc1=&cnt[ctx1];
 15 |   pc2=&cnt[2+ctx2];
 16 |   pc3=&cnt[4+(ctx1<<1)+ctx3];
 17 |   pc4=&cnt[8+(ctx1<<1)+ctx2];
 18 | 
 19 |   int sctx=ul[i-1];
 20 |   if (i>1) sctx+=(ul[i-2]<<1);
 21 |   if (i>2) sctx+=(ul[i-3]<<2);
 22 |   if (i>3) sctx+=(ul[i-4]<<3);
 23 |   px=&cctx[sctx];
 24 | 
 25 |   mix=&mixl[ctx1+(ctx3<<1)];
 26 |   std::vector <int>p={pc1->p1,pc2->p1,pc3->p1,pc4->p1,px->p1};
 27 |   return mix->Predict(p);
 28 | }
 29 | 
 30 | int MapEncoder::PredictHigh(int i)
 31 | {
 32 |   int ctx1=uh[i-1];
 33 |   int ctx2=ul[i];
 34 |   int ctx3=i>1?uh[i-2]:0;
 35 |   //int n=0;
 36 |   pc1=&cnt[12+ctx1];
 37 |   pc2=&cnt[12+2+ctx2];
 38 |   pc3=&cnt[12+4+(ctx1<<1)+ctx3];
 39 |   pc4=&cnt[12+8+(ctx1<<1)+ctx2];
 40 | 
 41 |   int sctx=uh[i-1];
 42 |   if (i>1) sctx+=(uh[i-2]<<1);
 43 |   if (i>2) sctx+=(uh[i-3]<<2);
 44 |   if (i>3) sctx+=(uh[i-4]<<3);
 45 |   px=&cctx[32+sctx];
 46 |   mix=&mixh[ctx1+(ctx3<<1)];
 47 |   std::vector <int>p={pc1->p1,pc2->p1,pc3->p1,pc4->p1,px->p1};
 48 |   return mix->Predict(p);
 49 | }
 50 | 
 51 | void MapEncoder::Update(int bit)
 52 | {
 53 |   pc1->update(bit,cnt_upd_rate);
 54 |   pc2->update(bit,cnt_upd_rate);
 55 |   pc3->update(bit,cnt_upd_rate);
 56 |   pc4->update(bit,cnt_upd_rate);
 57 |   px->update(bit,cnt_upd_rate);
 58 |   mix->Update(bit,mix_upd_rate);
 59 | }
 60 | 
 61 | int MapEncoder::PredictSSE(int p1,int ctx)
 62 | {
 63 |   std::vector <int>vp={sse[ctx].Predict(p1),p1};
 64 |   return finalmix.Predict(vp);
 65 | }
 66 | 
 67 | void MapEncoder::UpdateSSE(int bit,int ctx)
 68 | {
 69 |   sse[ctx].Update(bit,cntsse_upd_rate);
 70 |   finalmix.Update(bit,mixsse_upd_rate);
 71 | }
 72 | 
 73 | void MapEncoder::Encode()
 74 | {
 75 |   for (int i=1;i<=1<<15;i++) {
 76 |     int bit=ul[i];
 77 | 
 78 |     rc.EncodeBitOne(PredictSSE(PredictLow(i),0),bit);
 79 |     Update(bit);
 80 |     UpdateSSE(bit,0);
 81 | 
 82 |     bit=uh[i];
 83 |     rc.EncodeBitOne(PredictSSE(PredictHigh(i),0),bit);
 84 |     Update(bit);
 85 |     UpdateSSE(bit,0);
 86 |   }
 87 | }
 88 | 
 89 | void MapEncoder::Decode()
 90 | {
 91 |   for (int i=1;i<=1<<15;i++) {
 92 |     int bit=rc.DecodeBitOne(PredictSSE(PredictLow(i),0));
 93 |     Update(bit);
 94 |     ul[i]=bit;
 95 |     UpdateSSE(bit,0);
 96 | 
 97 |     bit=rc.DecodeBitOne(PredictSSE(PredictHigh(i),0));
 98 |     Update(bit);
 99 |     uh[i]=bit;
100 |     UpdateSSE(bit,0);
101 |   }
102 | }
103 | 
104 | Remap::Remap()
105 | :scale(1<<15),usedl(scale+1),usedh(scale+1)
106 | {
107 | }
108 | 
109 | void Remap::Reset()
110 | {
111 |   std::fill(begin(usedl),end(usedl),0);
112 |   std::fill(begin(usedh),end(usedh),0);
113 |   vmin=vmax=0;
114 | }
115 | 
116 | double Remap::Compare(const Remap &cmap)
117 | {
118 |   int diff=0;
119 |   for (int i=1;i<=scale;i++) {
120 |     if (usedl[i]!=cmap.usedl[i]) diff++;
121 |     if (usedh[i]!=cmap.usedh[i]) diff++;
122 |   }
123 |   return diff*100./double(2*scale);
124 | }
125 | 
126 | void Remap::Analyse(int32_t *src,int numsamples)
127 | {
128 |   for (int i=0;i<numsamples;i++) {
129 |     int val=src[i];
130 |     if (val>0) {
131 |       if (val>scale) std::cout << "val too large: " << val << std::endl;
132 |       else {
133 |         if (val>vmax) vmax=val;
134 |         usedh[val]=true;
135 |       }
136 |     } else if (val<0) {
137 |       val=(-val);
138 |       if (val>scale) std::cout << "val too large: " << val << std::endl;
139 |       else {
140 |         if (val>vmin) vmin=val;
141 |         usedl[val]=true;
142 |       }
143 |     }
144 |   }
145 |   mapl.resize((1<<15)+1);
146 |   maph.resize((1<<15)+1);
147 |   int j=1;
148 |   for (int i=1;i<=(1<<15);i++) {
149 |     mapl[i]=j;
150 |     if (usedl[i]) {j++;};
151 |   }
152 |   j=1;
153 |   for (int i=1;i<=(1<<15);i++) {
154 |     maph[i]=j;
155 |     if (usedh[i]) {j++;};
156 |   }
157 | }
158 | 
159 | bool Remap::isUsed(int val)
160 | {
161 |   if (val>scale) return false;
162 |   if (val<-scale) return false;
163 |   if (val>0) return usedh[val];
164 |   if (val<0) return usedl[-val];
165 |   return true;
166 | }
167 | 
168 | int32_t Remap::Map2(int32_t pred)
169 | {
170 |   if (pred>0) return maph[pred];
171 |   else if (pred<0) return -mapl[-pred];
172 |   else return 0;
173 | }
174 | 
175 | int32_t Remap::Map(int32_t pred,int32_t err)
176 | {
177 |   int sgn=1;
178 |   if (err==0) return 0;
179 |   if (err<0) {err=-err;sgn=-1;};
180 | 
181 |   int merr=0;
182 |   for (int i=1;i<=err;i++) {
183 |     if (isUsed(pred+(sgn*i))) merr++;
184 |   }
185 |   return sgn*merr;
186 | }
187 | 
188 | int32_t Remap::Unmap(int32_t pred,int32_t merr)
189 | {
190 |   int sgn=1;
191 |   if (merr==0) return 0;
192 |   if (merr<0) {merr=-merr;sgn=-1;};
193 | 
194 |   int err=1;
195 |   int terr=0;
196 |   while (1) {
197 |     if (isUsed(pred+(sgn*err))) terr++;
198 |     if (terr==merr) break;
199 |     err++;
200 |   }
201 |   return sgn*err;
202 | }
203 | 


--------------------------------------------------------------------------------
/src/libsac/map.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAP_H
 2 | #define MAP_H
 3 | 
 4 | #include "../model/range.h"
 5 | #include "../model/counter.h"
 6 | #include "../model/mixer.h"
 7 | #include "../model/sse.h"
 8 | #include <vector>
 9 | 
10 | class MapEncoder {
11 |   const int cnt_upd_rate=500;
12 |   const int cntsse_upd_rate=300;
13 |   const int mix_upd_rate=1000;
14 |   const int mixsse_upd_rate=500;
15 |   public:
16 |     MapEncoder(RangeCoderSH &rc,std::vector <bool>&usedl,std::vector <bool>&usedh);
17 |     void Encode();
18 |     void Decode();
19 |   private:
20 |     int PredictLow(int i);
21 |     int PredictHigh(int i);
22 |     void Update(int bit);
23 |     int PredictSSE(int p1,int ctx);
24 |     void UpdateSSE(int bit,int ctx);
25 |     RangeCoderSH &rc;
26 |     LinearCounter16 cnt[24];
27 |     LinearCounter16 cctx[256];
28 |     LinearCounter16 *pc1,*pc2,*pc3,*pc4,*px;
29 |     std::vector <NMixLogistic> mixl,mixh;
30 |     NMixLogistic finalmix;
31 |     NMixLogistic *mix;
32 |     SSENL<32> sse[32];
33 |     std::vector <bool>&ul,&uh;
34 | };
35 | 
36 | class Remap {
37 |   public:
38 |     Remap();
39 |     void Reset();
40 |     double Compare(const Remap &cmap);
41 |     void Analyse(int32_t *src,int numsamples);
42 |     bool isUsed(int val);
43 |     int32_t Map2(int32_t pred);
44 |     int32_t Map(int32_t pred,int32_t err);
45 |     int32_t Unmap(int32_t pred,int32_t merr);
46 |     int scale,vmin,vmax;
47 |     std::vector <bool>usedl,usedh;
48 |     std::vector<int32_t> mapl,maph;
49 | };
50 | 
51 | 
52 | #endif // MAP_H
53 | 


--------------------------------------------------------------------------------
/src/libsac/pred.cpp:
--------------------------------------------------------------------------------
 1 | #include "pred.h"
 2 | #include <cassert>
 3 | 
 4 | Predictor::Predictor(const tparam &p)
 5 | :p(p),nA(p.nA),nB(p.nB),nM0(p.nM0),nS0(p.nS0),nS1(p.nS1),
 6 | ols{OLS(nA+nM0,p.k,p.lambda0,p.ols_nu0,p.beta_sum0,p.beta_pow0,p.beta_add0),
 7 | OLS(nB+nS0+nS1,p.k,p.lambda1,p.ols_nu1,p.beta_sum1,p.beta_pow1,p.beta_add1)},
 8 | lms{Cascade(p.vn0,p.vmu0,p.vmudecay0,p.vpowdecay0,p.mu_mix0,p.mu_mix_beta0,p.lm_n,p.lm_alpha),
 9 | Cascade(p.vn1,p.vmu1,p.vmudecay1,p.vpowdecay1,p.mu_mix1,p.mu_mix_beta1,p.lm_n,p.lm_alpha)},
10 | be{BiasEstimator(p.bias_mu0,p.bias_scale0),
11 |    BiasEstimator(p.bias_mu1,p.bias_scale1)}
12 | {
13 |   for (int i=0;i<2;i++)
14 |     p_lpc[i] = p_lms[i] = 0.0;
15 | }
16 | 
17 | void Predictor::fillbuf_ch0(const int32_t *src0,int idx0,const int32_t *src1,int idx1)
18 | {
19 |   vec1D &buf=ols[0].x;
20 |   int bp=0;
21 |   for (int i=idx0-nA;i<idx0;i++) buf[bp++]=(i>=0)?src0[i]:0.0;
22 |   for (int i=idx1-nM0;i<idx1;i++) buf[bp++]=(i>=0)?src1[i]:0.0;
23 | }
24 | 
25 | void Predictor::fillbuf_ch1(const int32_t *src0,const int32_t *src1,int idx1,int numsamples)
26 | {
27 |   vec1D &buf=ols[1].x;
28 |   int bp=0;
29 |   for (int i=idx1-nB;i<idx1;i++) buf[bp++]=(i>=0)?src1[i]:0.0;;
30 |   for (int i=idx1-nS0;i<idx1+nS1;i++) buf[bp++]=(i>=0 && i<numsamples)?src0[i]:0.0;
31 | }
32 | 
33 | double Predictor::predict(int ch)
34 | {
35 |   p_lpc[ch]=ols[ch].Predict();
36 |   p_lms[ch]=lms[ch].Predict();
37 |   return be[ch].Predict(p_lpc[ch]+p_lms[ch]);
38 | }
39 | 
40 | void Predictor::update(int ch,double val)
41 | {
42 |   ols[ch].Update(val);
43 |   lms[ch].Update(val-p_lpc[ch]);
44 |   be[ch].Update(val);
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/src/libsac/pred.h:
--------------------------------------------------------------------------------
 1 | #ifndef PRED_H
 2 | #define PRED_H
 3 | 
 4 | #include "../pred/lms.h"
 5 | #include "../pred/lms_cascade.h"
 6 | #include "../pred/lpc.h"
 7 | #include "../pred/bias.h"
 8 | 
 9 | class Predictor {
10 |   public:
11 |     struct tparam {
12 |       int nA,nB,nM0,nS0,nS1,k;
13 |       std::vector <int>vn0,vn1;
14 |       std::vector <double>vmu0,vmu1;
15 |       std::vector <double>vmudecay0,vmudecay1;
16 |       std::vector <double>vpowdecay0,vpowdecay1;
17 |       double lambda0,lambda1,ols_nu0,ols_nu1,mu_mix0,mu_mix1,mu_mix_beta0,mu_mix_beta1;
18 |       double beta_sum0,beta_pow0,beta_add0;
19 |       double beta_sum1,beta_pow1,beta_add1;
20 |       int ch_ref;
21 |       double bias_mu0,bias_mu1;
22 |       int bias_scale0,bias_scale1;
23 |       int lm_n;
24 |       double lm_alpha;
25 |     };
26 |     explicit Predictor(const tparam &p);
27 | 
28 |     double predict(int ch);
29 |     void update(int ch,double val);
30 | 
31 |     void fillbuf_ch0(const int32_t *src0,int idx0,const int32_t *src1,int idx1);
32 |     void fillbuf_ch1(const int32_t *src0,const int32_t *src1,int idx1,int numsamples);
33 | 
34 |     tparam p;
35 |     int nA,nB,nM0,nS0,nS1;
36 | 
37 |     OLS ols[2];
38 |     Cascade lms[2];
39 |     BiasEstimator be[2];
40 |     double p_lpc[2],p_lms[2];
41 | };
42 | 
43 | #endif // PRED_H
44 | 


--------------------------------------------------------------------------------
/src/libsac/profile.cpp:
--------------------------------------------------------------------------------
 1 | #include "profile.h"
 2 | 
 3 | int SacProfile::LoadBaseProfile()
 4 | {
 5 |   const int mo_lpc=32; // maximum ols order
 6 |   const int wbits_lms=13; // maximum nlms order 2^wbits_lms
 7 |   SacProfile &profile=*this;
 8 | 
 9 |   profile.Init(53);
10 | 
11 |   profile.Set(0,0.99,0.9999,0.998);
12 |   profile.Set(1,0.001,10.0,0.1);
13 | 
14 |   profile.Set(2,0.001,1.0,0.1);//mu0
15 |   profile.Set(3,0.001,1.0,0.12);//mu1
16 |   profile.Set(4,0.001,1.0,0.06);//mu2
17 |   profile.Set(5,0.001,1.0,0.04);//mu3
18 | 
19 |   profile.Set(6,0.98,1,1.0); // mu-decay
20 |   profile.Set(7,0.0,1.0,0.8);   // pow-decay
21 |   profile.Set(8,0.0,1.0,0.8);   // pow-decay
22 | 
23 |   profile.Set(10,0.0001,0.008,0.002);//mu-mix
24 |   profile.Set(11,0.8,0.9999,0.95);//mu-mix-beta
25 | 
26 |   profile.Set(12,0.99,0.9999,0.998);
27 |   profile.Set(13,0.001,10.0,0.1);
28 | 
29 |   profile.Set(14,0.001,1.0,0.1);//mu0
30 |   profile.Set(15,0.001,1.0,0.12);//mu1
31 |   profile.Set(16,0.001,1.0,0.06);//mu2
32 |   profile.Set(17,0.001,1.0,0.04);//mu3
33 | 
34 |   profile.Set(18,0.98,1,1.0); // mu-decay
35 |   profile.Set(19,0.0,1.0,0.8);   // pow-decay
36 |   profile.Set(20,0.0,1.0,0.8);   // pow-decay
37 |   profile.Set(21,0.0,1.0,0.8);   // pow-decay
38 |   profile.Set(22,0.0001,0.008,0.002);//mu-mix
39 |   profile.Set(23,0.8,0.9999,0.95);//mu-mix-beta*/
40 | 
41 |   profile.Set(24,4,mo_lpc,16);//nA
42 |   profile.Set(25,4,mo_lpc,16);//nB
43 |   profile.Set(26,0,mo_lpc,8);//nS0
44 |   profile.Set(27,-mo_lpc,mo_lpc,8);//nS1
45 |   profile.Set(9,0,mo_lpc,0); //nM0
46 | 
47 |   profile.Set(28,256,1<<wbits_lms,1280);
48 |   profile.Set(29,32,1<<(wbits_lms-1),256);
49 |   profile.Set(30,4,1<<(wbits_lms-2),32);
50 | 
51 |   profile.Set(31,256,1<<wbits_lms,1280);
52 |   profile.Set(32,32,1<<(wbits_lms-1),256);
53 |   profile.Set(33,4,1<<(wbits_lms-2),32);
54 | 
55 |   profile.Set(34,0,1,0.6);
56 |   profile.Set(35,0.1,2,0.8);
57 |   profile.Set(36,0,10,2);
58 | 
59 |   profile.Set(37,2,1<<(wbits_lms-3),4); //stage 4
60 |   profile.Set(38,2,1<<(wbits_lms-3),4);
61 | 
62 |   profile.Set(39,0.98,1,1.0); // mu-decay
63 |   profile.Set(40,0.98,1,1.0); // mu-decay
64 | 
65 |   profile.Set(41,1,10,4); //stage-5 lm
66 |   profile.Set(42,0.1,10.0,5); // shape parameter gamma
67 | 
68 | 
69 |   profile.Set(43,0.001,0.005,0.0015);//bc-mu0
70 |   profile.Set(44,0.001,0.005,0.0015);//bc-mu1
71 | 
72 |   profile.Set(45,4,10,5);//bias scale in bits
73 | 
74 |   profile.Set(46,0.98,1,1.0); // mu_decay
75 |   profile.Set(48,0.98,1,1.0); // mu_decay
76 |   profile.Set(50,0.0,1.0,0.8); //pow_decay
77 |   profile.Set(47,0.98,1,1.0); // mu_decay
78 |   profile.Set(49,0.98,1,1.0); // mu_decay
79 |   profile.Set(51,0.0,1.0,0.8); //pow_decay
80 |   profile.Set(52,0.0,1.0,0.8); //pow_decay
81 | 
82 |   return profile.coefs.size();
83 | }
84 | 


--------------------------------------------------------------------------------
/src/libsac/profile.h:
--------------------------------------------------------------------------------
  1 | #ifndef PROFILE_H
  2 | #define PROFILE_H
  3 | 
  4 | #include <vector>
  5 | #include <variant>
  6 | #include "map.h"
  7 | #include "pred.h"
  8 | 
  9 | class SACProfile {
 10 |   public:
 11 |     struct FrameStats {
 12 |       int maxbpn,maxbpn_map;
 13 |       bool enc_mapped;
 14 |       int32_t blocksize,minval,maxval,mean;
 15 |       Remap mymap;
 16 |     };
 17 |     struct elem {
 18 |       float vmin,vmax;
 19 |       std::variant<float,uint16_t>val;
 20 |     };
 21 |     void add_float(float vmin,float vmax,float val) {
 22 |       vparam.push_back(elem{vmin,vmax,val});
 23 |     }
 24 |     float get_float()
 25 |     {
 26 |       float val = get<float>(vparam[index].val);
 27 |       index++;
 28 |       return val;
 29 |     }
 30 |     void add_ols() {
 31 |       add_float(0.99,0.9999,0.998); // lambda
 32 |       add_float(0.001,10.0,0.001); // ols-nu
 33 |       add_float(4,32,16); //
 34 |     }
 35 |     void get_ols(Predictor::tparam &param)
 36 |     {
 37 |       param.lambda0 = get_float();
 38 |       param.ols_nu0 = get_float();
 39 |       param.nA = get_float();
 40 |     }
 41 | 
 42 |     void set_profile()
 43 |     {
 44 |       add_ols();
 45 |     }
 46 |     void get_profile(Predictor::tparam &param,bool optimize=false)
 47 |     {
 48 |       if (optimize) param.k=4;
 49 |       else param.k=1;
 50 |       index = 0;
 51 |       get_ols(param);
 52 |     }
 53 |     SACProfile()
 54 |     {
 55 |     }
 56 |     std::vector<elem> vparam;
 57 |   protected:
 58 |     int index;
 59 | };
 60 | 
 61 | class SacProfile {
 62 |   public:
 63 |     struct FrameStats {
 64 |       int maxbpn,maxbpn_map;
 65 |       bool enc_mapped;
 66 |       int32_t blocksize,minval,maxval,mean;
 67 |       Remap mymap;
 68 |     };
 69 | 
 70 |     struct coef {
 71 |       float vmin,vmax,vdef;
 72 |     };
 73 | 
 74 |       SacProfile(){};
 75 |       void Init(int numcoefs)
 76 |       {
 77 |          coefs.resize(numcoefs);
 78 |       }
 79 |       SacProfile(int numcoefs)
 80 |       :coefs(numcoefs)
 81 |       {
 82 | 
 83 |       }
 84 |       int LoadBaseProfile();
 85 |       std::size_t get_size() {return coefs.size();};
 86 |       void Set(int num,double vmin,double vmax,double vdef)
 87 |       {
 88 |         if (num>=0 && num< static_cast<int>(coefs.size())) {
 89 |           coefs[num].vmin=vmin;
 90 |           coefs[num].vmax=vmax;
 91 |           coefs[num].vdef=vdef;
 92 |         }
 93 |       }
 94 |       void Set(int num,const std::vector<float>&x)
 95 |       {
 96 |         if (num>=0 && num<static_cast<int>(coefs.size()) && (x.size()>=3)) {
 97 |           coefs[num].vmin=x[0];
 98 |           coefs[num].vmax=x[1];
 99 |           coefs[num].vdef=x[2];
100 |         }
101 |       }
102 |       float Get(std::size_t num) const {
103 |         if (num<coefs.size()) {
104 |             return coefs[num].vdef;
105 |         } else return 0.;
106 |       }
107 |       std::vector <coef> coefs;
108 | };
109 | 
110 | #endif // PROFILE_H
111 | 


--------------------------------------------------------------------------------
/src/libsac/sparse.h:
--------------------------------------------------------------------------------
 1 | #ifndef SPARSEPCM_H
 2 | #define SPARSEPCM_H
 3 | 
 4 | class SimplePred {
 5 |   public:
 6 |     SimplePred()
 7 |     :lb(0)
 8 |     {
 9 | 
10 |     }
11 |     double Predict()
12 |     {
13 |       return lb;
14 |     }
15 |     void Update(int32_t val)
16 |     {
17 |       lb = val;
18 |     }
19 |   protected:
20 |     int32_t lb;
21 | };
22 | 
23 | class SparsePCM {
24 |   const double cost_pow=1;
25 |   public:
26 |     SparsePCM()
27 |     :minval(0),maxval(0),fraction_used(0.),fraction_cost(0.)
28 |     {
29 |     };
30 |     void Analyse(std::span<const int32_t> buf)
31 |     {
32 |       minval = std::numeric_limits<int32_t>::max();
33 |       maxval = std::numeric_limits<int32_t>::min();
34 |       for (auto val : buf) {
35 |         if (val>maxval) maxval=val;
36 |         if (val<minval) minval=val;
37 |       }
38 |       used.resize(maxval-minval+1);
39 | 
40 |       for (auto val : buf) used[val-minval] = 1;
41 |       int sum=std::accumulate(begin(used),end(used),0);
42 |       fraction_used = used.size()>0?(sum/static_cast<double>(used.size()))*100.:0.0;
43 | 
44 | 
45 |       // calc cost
46 |       //SimplePred pred;
47 |       double sum0=0,sum1=0;
48 |       for (auto val : buf) {
49 |         //int32_t p=std::clamp((int)std::round(pred.Predict()),minval,maxval);
50 |         int32_t e0=val;
51 |         int32_t e1=map_val(e0);
52 | 
53 |         sum0+=pow(std::fabs(e0),cost_pow);
54 |         sum1+=pow(std::fabs(e1),cost_pow);
55 | 
56 |         //pred.Update(val);
57 |       }
58 |       fraction_cost=sum1>0?sum0/static_cast<double>(sum1):0;
59 |     }
60 |     int map_val(const int32_t val,const int32_t p=0)
61 |     {
62 |       if (val==0) return 0;
63 |       const int sgn=MathUtils::sgn(val);
64 | 
65 |       const int pidx=p-minval;
66 |       int mres=0;
67 |       if (val>0) {
68 |         for (int i=pidx+1;i<=pidx+val;i++)
69 |           mres+=used[i];
70 |           //if (used[i]) ++mres;
71 |       } else {
72 |         for (int i=pidx-1;i>=pidx+val;i--)
73 |           mres+=used[i];
74 |          //if (used[i])  ++mres;
75 |       }
76 |       return sgn*mres;
77 |     }
78 |     int32_t minval,maxval;
79 |     double fraction_used,fraction_cost;
80 |   protected:
81 |     std::vector<int> used;
82 | };
83 | 
84 | 
85 | #endif // SPARSEPCM_H
86 | 


--------------------------------------------------------------------------------
/src/libsac/vle.cpp:
--------------------------------------------------------------------------------
  1 | #include "vle.h"
  2 | 
  3 | BitplaneCoder::BitplaneCoder(int maxbpn,int numsamples)
  4 | :csig0(1<<20),csig1(1<<20),csig2(1<<20),csig3(1<<20),
  5 | cref0(1<<20),cref1(1<<20),cref2(1<<20),cref3(1<<20),
  6 | p_laplace(32),
  7 | lmixref(256,NMixLogistic(5)),lmixsig(256,NMixLogistic(3)),
  8 | ssemix(2),
  9 | msb(numsamples),
 10 | maxbpn(maxbpn),numsamples(numsamples),lm(maxbpn)
 11 | //n_laplace(32),weights_laplace(2*n_laplace+1),
 12 | {
 13 |   state=0;
 14 |   bpn=0;
 15 |   nrun=0;
 16 |   double theta=0.99;
 17 |   for (int i=0;i<32;i++) {
 18 |     int p=(std::min)((std::max)((int)round((1.0-1.0/(1+pow(theta,1<<i)))*PSCALE),1),PSCALEm);
 19 |     //std::cout << p << ' ';
 20 |     p_laplace[i].p1=p;
 21 |   }
 22 |   pestimate=0;
 23 |   for (int i=0;i<32;i++) {
 24 |     bmask[i]=~((1<<i)-1);
 25 |   }
 26 |   /*double s=35;
 27 |   for (int i=0;i<2*n_laplace+1;i++) {
 28 |     int idx=i-n_laplace;
 29 |     weights_laplace[i]=1.0; //exp(-(idx*idx)/(s*s));
 30 |   }*/
 31 | }
 32 | 
 33 | void BitplaneCoder::GetSigState(int i)
 34 | {
 35 |   sigst[0]=msb[i];
 36 |   sigst[1]=i>0?msb[i-1]:0;
 37 |   sigst[2]=i<numsamples-1?msb[i+1]:0;
 38 |   sigst[3]=i>1?msb[i-2]:0;
 39 |   sigst[4]=i<numsamples-2?msb[i+2]:0;
 40 |   sigst[5]=i>2?msb[i-3]:0;
 41 |   sigst[6]=i<numsamples-3?msb[i+3]:0;
 42 |   sigst[7]=i>3?msb[i-4]:0;
 43 |   sigst[8]=i<numsamples-4?msb[i+4]:0;
 44 |   sigst[9]=i>4?msb[i-5]:0;
 45 |   sigst[10]=i<numsamples-5?msb[i+5]:0;
 46 |   sigst[11]=i>5?msb[i-6]:0;
 47 |   sigst[12]=i<numsamples-6?msb[i+6]:0;
 48 |   sigst[13]=i>6?msb[i-7]:0;
 49 |   sigst[14]=i<numsamples-7?msb[i+7]:0;
 50 |   sigst[15]=i>7?msb[i-8]:0;
 51 |   sigst[16]=i<numsamples-8?msb[i+8]:0;
 52 | }
 53 | 
 54 | uint32_t BitplaneCoder::GetAvgSum(int n)
 55 | {
 56 |   uint64_t nsum=0;
 57 |   int nidx=0;
 58 | 
 59 |   for (int k=sample-n;k<=sample+n;k++) {
 60 |     if (k>=0 && k<numsamples) {
 61 |       int val=pabuf[k];
 62 |       val&=k<sample?bmask[bpn]:bmask[bpn+1];
 63 |       nsum+=val;
 64 |       nidx++;
 65 |     }
 66 |   }
 67 |   return nidx>0?(nsum+(nidx-1))/nidx:0;
 68 | }
 69 | 
 70 | int BitplaneCoder::PredictLaplace(uint32_t avg_sum)
 71 | {
 72 |   double p_l=0.0;
 73 |   if (avg_sum>0) {
 74 |     double theta=exp(-1.0/avg_sum);
 75 |     p_l=1.0-1.0/(1+pow(theta,1<<bpn));
 76 |   };
 77 |   int p1=std::min(std::max((int)round(p_l*PSCALE),1),PSCALEm);
 78 |   return p1;
 79 | }
 80 | 
 81 | int BitplaneCoder::PredictRef()
 82 | {
 83 |   int val=pabuf[sample];
 84 | 
 85 |   int lval=sample>0?pabuf[sample-1]:0;
 86 |   int lval2=sample>1?pabuf[sample-2]:0;
 87 |   int nval=sample<(numsamples-1)?pabuf[sample+1]:0;
 88 |   int nval2=sample<(numsamples-2)?pabuf[sample+2]:0;
 89 | 
 90 |   int b0=(val>>(bpn+1));
 91 |   int b1=(lval>>(bpn));
 92 |   int b2=(nval>>(bpn+1));
 93 |   int b3=(lval2>>(bpn));
 94 |   int b4=(nval2>>(bpn+1));
 95 | 
 96 | 
 97 |   int c0=(b0<<1)<b1?1:0;
 98 |   int c1=(b0)<b2?1:0;
 99 |   int c2=(b0<<1)<b3?1:0;
100 |   int c3=(b0)<(b4)?1:0;
101 | 
102 | 
103 |   int x0=(val>>(bpn+1))<<1;
104 |   int x1=(lval>>bpn);
105 |   int x2=(nval>>(bpn+1))<<1;
106 |   int x3=(lval2>>(bpn));
107 |   int x4=(nval2>>(bpn+1))<<1;
108 |   int xm=(x0+x1+x2+x3+x4)/5;
109 | 
110 |   int d0=x0>xm;
111 |   int d1=x1>xm;
112 |   //int d2=x2>xm;
113 | 
114 |   int ctx1=(b0&15)+((b1&15)<<4)+((b2&15)<<8);
115 |   int ctx2=(c0+(c1<<1)+(c2<<2)+(c3<<3))+(d0<<4)+(d1<<5);
116 |   int ctx3=(sigst[1]+sigst[2]+sigst[3]+sigst[4]+sigst[5]+sigst[6]+sigst[7]+sigst[8]);
117 | 
118 |   pl=&p_laplace[bpn];
119 |   pc1=&cref0[msb[sample]];
120 |   pc2=&cref1[ctx1&255];
121 |   pc3=&cref2[ctx2];
122 |   pc4=&cref3[ctx3];
123 | 
124 |   int pctx=((((pestimate>>12)<<1)+d0)<<1)+(b0&1);
125 |   plmix=&lmixref[pctx];
126 | 
127 |   int px=plmix->Predict({pestimate,pl->p1,pc1->p1,pc2->p1,pc3->p1});
128 | 
129 |   return px;
130 | }
131 | 
132 | void BitplaneCoder::UpdateRef(int bit)
133 | {
134 |   pl->update(bit,cnt_upd_rate_p);
135 |   pc1->update(bit,cnt_upd_rate_ref);
136 |   pc2->update(bit,cnt_upd_rate_ref);
137 |   pc3->update(bit,cnt_upd_rate_ref);
138 |   pc4->update(bit,cnt_upd_rate_ref);
139 |   plmix->Update(bit,mix_upd_rate_ref);
140 |   state=(state<<1)+0;
141 | }
142 | 
143 | // count number of significant samples in neighborhood
144 | void BitplaneCoder::CountSig(int n,int &n1,int &n2)
145 | {
146 |   n1=n2=0;
147 |   for (int i=1;i<=n;i++) {
148 |     if (sample-i>=0) {
149 |        if (msb[sample-i]) n1+=1;
150 |        if (msb[sample-i]>bpn) n2+=1;
151 |     }
152 |     if (sample+i<numsamples-1) {
153 |        if (msb[sample+i]) n1+=1;
154 |        if (msb[sample+i]>bpn) n2+=1;
155 |     }
156 |   }
157 | }
158 | 
159 | int BitplaneCoder::PredictSig()
160 | {
161 |   int ctx1=0;
162 |   for (int i=0;i<16;i++)
163 |     if (sigst[i+1]) ctx1+=1<<i;
164 | 
165 |   int n1,n2;
166 |   CountSig(32,n1,n2);
167 |   int ctx2=n2;
168 | 
169 |   pl=&p_laplace[bpn];
170 |   pc1=&csig0[ctx1];
171 |   pc2=&csig1[ctx2];
172 | 
173 |   int mixctx=((state&15)<<3)+((n1>=3?3:n1)<<1)+(n2>0?1:0);
174 |   plmix=&lmixsig[mixctx];
175 |   int p_mix=plmix->Predict({pl->p1,pc1->p1,pc2->p1});
176 |   return p_mix;
177 | }
178 | 
179 | void BitplaneCoder::UpdateSig(int bit)
180 | {
181 |   pl->update(bit,cnt_upd_rate_p);
182 |   pc1->update(bit,cnt_upd_rate_sig);
183 |   pc2->update(bit,cnt_upd_rate_sig);
184 |   plmix->Update(bit,mix_upd_rate_sig);
185 |   state=(state<<1)+1;
186 | }
187 | 
188 | int BitplaneCoder::PredictSSE(int p1)
189 | {
190 |   int ctx1=((pestimate>>11)<<1)+(sigst[0]?1:0);
191 |   int ctx2=32+(sigst[0]?1:0)+((sigst[1]?1:0)<<1)+((sigst[2]?1:0)<<2)+((sigst[3]?1:0)<<3)+((sigst[4]?1:0)<<4)+((sigst[5]?1:0)<<5)+((sigst[6]?1:0)<<6);
192 |   psse1=&sse[ctx1];
193 |   psse2=&sse[ctx2];
194 |   int pr1=psse1->Predict(p1);
195 |   int pr2=psse2->Predict(pr1);
196 |   return ssemix.Predict({(pr1+pr2+1)>>1,p1});
197 | }
198 | 
199 | void BitplaneCoder::UpdateSSE(int bit)
200 | {
201 |   psse1->Update(bit,cntsse_upd_rate);
202 |   psse2->Update(bit,cntsse_upd_rate);
203 |   ssemix.Update(bit,mixsse_upd_rate);
204 | }
205 | 
206 | void BitplaneCoder::Encode(EncodeP1 encode_p1,int32_t *abuf)
207 | {
208 |   pabuf=abuf;
209 |   for (bpn=maxbpn;bpn>=0;bpn--)  {
210 |     state=0;
211 |     for (sample=0;sample<numsamples;sample++) {
212 |       uint32_t avg_sum = GetAvgSum(32);
213 |       pestimate=PredictLaplace(avg_sum);//lm.Predict(avg_sum,bpn);
214 |       GetSigState(sample);
215 |       int bit=(pabuf[sample]>>bpn)&1;
216 |       int p=0;
217 |       if (sigst[0]) { // coef is significant, refine
218 |         p=PredictSSE(PredictRef());
219 |         encode_p1(p,bit);
220 |         UpdateRef(bit);
221 |         UpdateSSE(bit);
222 |       } else { // coef is insignificant
223 |         p=PredictSSE(PredictSig());
224 |         encode_p1(p,bit);
225 |         UpdateSig(bit);
226 |         UpdateSSE(bit);
227 |         if (bit) msb[sample]=bpn;
228 |       }
229 |     }
230 |   }
231 | }
232 | 
233 | void BitplaneCoder::Decode(DecodeP1 decode_p1,int32_t *buf)
234 | {
235 |   int bit;
236 |   pabuf=buf;
237 |   for (int i=0;i<numsamples;i++) buf[i]=0;
238 |   for (bpn=maxbpn;bpn>=0;bpn--)  {
239 |     state=0;
240 |     for (sample=0;sample<numsamples;sample++) {
241 |       uint32_t avg_sum=GetAvgSum(32);
242 |       pestimate=PredictLaplace(avg_sum);//lm.Predict(avg_sum,bpn);
243 |       GetSigState(sample);
244 |       if (sigst[0]) { // coef is significant, refine
245 |         bit=decode_p1(PredictSSE(PredictRef()));
246 |         UpdateRef(bit);
247 |         UpdateSSE(bit);
248 |         if (bit) buf[sample]+=(1<<bpn);
249 |        } else { // coef is insignificant
250 |          bit=decode_p1(PredictSSE(PredictSig()));
251 |          UpdateSig(bit);
252 |          UpdateSSE(bit);
253 |          if (bit) {
254 |            buf[sample]+=(1<<bpn);
255 |            msb[sample]=bpn;
256 |           }
257 |         }
258 |     }
259 |   }
260 |   for (int i=0;i<numsamples;i++) buf[i]=MathUtils::U2S(buf[i]);
261 | }
262 | 
263 | 


--------------------------------------------------------------------------------
/src/libsac/vle.h:
--------------------------------------------------------------------------------
  1 | #ifndef VLE_H
  2 | #define VLE_H
  3 | 
  4 | #include "../model/range.h"
  5 | #include "../model/counter.h"
  6 | #include "../model/sse.h"
  7 | #include "../model/mixer.h"
  8 | #include "../common/utils.h"
  9 | #include <functional>
 10 | 
 11 | //#define h1y(v,k) (((v)>>k)^(v))
 12 | //#define h2y(v,k) (((v)*2654435761UL)>>(k))
 13 | 
 14 | class StaticLaplaceModel {
 15 |   public:
 16 |     StaticLaplaceModel(int maxbpn)
 17 |     :pr((1<<maxbpn),std::vector<int>(32))
 18 |     {
 19 |       for (int sum=0;sum<(1<<maxbpn);sum++) {
 20 |         for (int bpn=0;bpn<32;bpn++) {
 21 |           double pd=0.;
 22 |           if (sum>0) {
 23 |             double theta=exp(-1.0/static_cast<double>(sum));
 24 |             pd=1.0-1.0/(1+pow(theta,1<<bpn));
 25 |           }
 26 |           int pi=std::clamp((int)round(pd*PSCALE),1,PSCALEm);
 27 |           pr[sum][bpn]=pi;
 28 |         }
 29 |       }
 30 |     }
 31 |     int Predict(int avg,int bpn)
 32 |     {
 33 |       return pr[avg][bpn];
 34 |     }
 35 |   private:
 36 |     std::vector<std::vector<int>> pr;
 37 | };
 38 | 
 39 | using EncodeP1 = std::function<void(uint32_t,int)>;
 40 | using DecodeP1 = std::function<int(uint32_t)>;
 41 | 
 42 | class BitplaneCoder {
 43 |   const int cnt_upd_rate_p=150;
 44 |   const int cnt_upd_rate_sig=300;
 45 |   const int cnt_upd_rate_ref=150;
 46 |   const int mix_upd_rate_ref=800;
 47 |   const int mix_upd_rate_sig=700;
 48 |   const int cntsse_upd_rate=250;
 49 |   const int mixsse_upd_rate=250;
 50 |   public:
 51 |     BitplaneCoder(int maxbpn,int numsamples);
 52 |     void Encode(EncodeP1 encode_p1,int32_t *abuf);
 53 |     void Decode(DecodeP1 decode_p1,int32_t *buf);
 54 |   private:
 55 |     void CountSig(int n,int &n1,int &n2);
 56 |     void GetSigState(int i); // get actual significance state
 57 |     int PredictLaplace(uint32_t avg_sum);
 58 |     int PredictRef();
 59 |     void UpdateRef(int bit);
 60 |     int PredictSig();
 61 |     void UpdateSig(int bit);
 62 |     int PredictSSE(int p1);
 63 |     void UpdateSSE(int bit);
 64 |     uint32_t GetAvgSum(int n);
 65 | 
 66 |     std::vector<LinearCounterLimit> csig0,csig1,csig2,csig3,cref0,cref1,cref2,cref3;
 67 |     std::vector<LinearCounterLimit>p_laplace;
 68 |     std::vector <NMixLogistic>lmixref,lmixsig;
 69 |     NMixLogistic ssemix;
 70 | 
 71 |     SSENL<15> sse[1<<12];
 72 |     SSENL<15> *psse1,*psse2;
 73 |     LinearCounterLimit *pc1,*pc2,*pc3,*pc4;
 74 |     LinearCounterLimit *pl;
 75 |     NMixLogistic *plmix;
 76 |     int *pabuf,sample;
 77 |     std::vector <int>msb;
 78 |     //int n_laplace;
 79 |     //std::vector <double>weights_laplace;
 80 |     int sigst[17];
 81 |     uint32_t bmask[32];
 82 |     int maxbpn,bpn,numsamples,nrun,pestimate;
 83 |     uint32_t state;
 84 |     StaticLaplaceModel lm;
 85 | };
 86 | 
 87 | class Golomb {
 88 |   public:
 89 |     Golomb (RangeCoderSH &rc)
 90 |     :msum(0.98,1<<15),rc(rc)
 91 |     {
 92 |       lastl=0;
 93 |     }
 94 |     void Encode(int val)
 95 |     {
 96 |       if (val<0) val=2*(-val);
 97 |       else if (val>0) val=(2*val)-1;
 98 | 
 99 |       int m=(std::max)(static_cast<int>(msum.sum),1);
100 |       int q=val/m;
101 |       int r=val-q*m;
102 | 
103 |       //for (int i=0;i<q;i++) rc.EncodeBitOne(PSCALEh,1); // encode exponent unary
104 |       //rc.EncodeBitOne(PSCALEh,0);
105 | 
106 |       int ctx=1;
107 |       for (int i=7;i>=0;i--) {
108 |         int bit=(q>>i)&1;
109 |         rc.EncodeBitOne(cnt[ctx].p1,bit);
110 |         cnt[ctx].update(bit,250);
111 | 
112 |         ctx+=ctx+bit;
113 |       }
114 | 
115 |       /*int ctx=0;
116 |       for (int i=0;i<q;i++) {
117 |         int pctx=lastl+(ctx<<1);
118 |         rc.EncodeBitOne(cnt[pctx].p1,1);
119 |         cnt[pctx].update(1,128);
120 |         ctx++;
121 |         if (ctx>1) ctx=1;
122 |       }
123 |       int pctx=lastl+(ctx<<1);
124 |       rc.EncodeBitOne(cnt[pctx].p1,0);
125 |       cnt[pctx].update(0,128);
126 | 
127 |       if (q>0) lastl=1;
128 |       else lastl=0;*/
129 | 
130 |       if (m>1)
131 |       {
132 |         int b=ceil(log(m)/log(2));
133 |         int t=(1<<b)-m;
134 |         if (r < t) {
135 |           for (int i=b-2;i>=0;i--) rc.EncodeBitOne(PSCALEh,((r>>i)&1));
136 |         } else {
137 |           for (int i=b-1;i>=0;i--) rc.EncodeBitOne(PSCALEh,(((r+t)>>i)&1));
138 |         }
139 |       }
140 | 
141 |       msum.Update(val);
142 |     }
143 |     int Decode() {
144 |       int q=0;
145 |       while (rc.DecodeBitOne(PSCALEh)!=0) q++;
146 | 
147 |       int m=(std::max)(static_cast<int>(msum.sum),1);
148 |       int r=0;
149 | 
150 |       if (m>1)
151 |       {
152 |         int b=ceil(log(m)/log(2));
153 |         int t=(1<<b)-m;
154 |         for (int i=b-2;i>=0;i--) r=(r<<1)+rc.DecodeBitOne(PSCALEh);
155 |         if (r>=t) r=((r<<1)+rc.DecodeBitOne(PSCALEh))-t;
156 |       }
157 | 
158 |       int val=m*q+r;
159 |       msum.Update(val);
160 | 
161 |       if (val) {
162 |         if (val&1) val=((val+1)>>1);
163 |         else val=-(val>>1);
164 |       }
165 |       return val;
166 |     }
167 |     RunExp msum;
168 |   private:
169 |     RangeCoderSH &rc;
170 |     LinearCounter16 cnt[512];
171 |     int lastl;
172 | };
173 | 
174 | class GolombRC {
175 |   public:
176 |     GolombRC (RangeCoder &rc)
177 |     :msum(0.8,1<<15),rc(rc)
178 |     {
179 |     }
180 |     void Encode(int val)
181 |     {
182 |       if (val<0) val=2*(-val);
183 |       else if (val>0) val=(2*val)-1;
184 | 
185 |       int m=(std::max)(static_cast<int>(msum.sum),1);
186 |       int q=val/m;
187 |       int r=val-q*m;
188 | 
189 |       for (int i=0;i<q;i++) rc.EncodeBitOne(PSCALEh,1); // encode exponent unary
190 |       rc.EncodeBitOne(PSCALEh,0);
191 | 
192 |       rc.EncodeSymbol(r,1,m);
193 | 
194 |       msum.Update(val);
195 |     }
196 |     int Decode() {
197 |       int q=0;
198 |       while (rc.DecodeBitOne(PSCALEh)!=0) q++;
199 | 
200 |       int m=(std::max)(static_cast<int>(msum.sum),1);
201 | 
202 |       int r=rc.DecProb(m);
203 |       rc.DecodeSymbol(r,1);
204 | 
205 |       int val=m*q+r;
206 |       msum.Update(val);
207 | 
208 |       if (val) {
209 |         if (val&1) val=((val+1)>>1);
210 |         else val=-(val>>1);
211 |       }
212 |       return val;
213 |     }
214 |     RunExp msum;
215 |   private:
216 |     RangeCoder &rc;
217 | };
218 | 
219 | 
220 | #endif
221 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "cmdline.h"
 2 | #include "opt/opt.h"
 3 | 
 4 | #define SAC_VERSION "0.7.22"
 5 | 
 6 | int main(int argc,const char *argv[])
 7 | {
 8 | 
 9 |   std::cout << "Sac v" << SAC_VERSION << " - Lossless Audio Coder (c) Sebastian Lehmann\n";
10 |   std::cout << "compiled on " << __DATE__ << " ";
11 |   #ifdef __x86_64
12 |     std::cout << "(64-bit";
13 |   #else
14 |     std::cout << "(32-bit";
15 |   #endif
16 |   if constexpr(SACGlobalCfg::USE_AVX2)
17 |     std::cout << ",AVX2";
18 |   std::cout << ")";
19 |   #ifdef __clang__
20 |     std::cout << " clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__ << "\n";
21 |   #elif __GNUC__ // __clang__
22 |     std::cout << " gcc " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__;
23 |   #endif
24 |   std::cout << "\n\n";
25 | 
26 |   CmdLine cmdline;
27 |   int error=cmdline.Parse(argc,argv);
28 |   if (error==0) error=cmdline.Process();
29 |   return error;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/model/counter.h:
--------------------------------------------------------------------------------
  1 | #ifndef COUNTER_H
  2 | #define COUNTER_H
  3 | 
  4 | #include "model.h"
  5 | 
  6 | class Prob16Counter
  7 | {
  8 |   public:
  9 |     uint16_t p1;
 10 |     Prob16Counter():p1(PSCALEh){};
 11 |   protected:
 12 |     int idiv(int val,int s) {return (val+(1<<(s-1)))>>s;};
 13 |     int idiv_signed(int val,int s){return val<0?-(((-val)+(1<<(s-1)))>>s):(val+(1<<(s-1)))>>s;};
 14 | };
 15 | 
 16 | // Linear Counter, p=16 bit
 17 | class LinearCounter16 : public Prob16Counter
 18 | {
 19 |   public:
 20 |     using Prob16Counter::Prob16Counter;
 21 |     //p'=(1-w0)*p+w0*((1-w1)*bit+w1*0.5)
 22 |     #define wh(w) ((w*PSCALEh+PSCALEh)>>PBITS)
 23 |     void update(int bit,const int w0,const int w1)
 24 |     {
 25 |       int h=(w0*wh(w1))>>PBITS;
 26 |       int p=idiv((PSCALE-w0)*p1,PBITS);
 27 |       p+=bit?w0-h:h;
 28 |       p1=std::clamp(p,1,PSCALEm);
 29 |     };
 30 |     //p'+=L*(bit-p)
 31 |     void update(int bit,int L)
 32 |     {
 33 |       int err=(bit<<PBITS)-p1;
 34 |       // p1 should be converted to "int" implicit anyway?
 35 |       int px = int(p1) + idiv_signed(L*err,PBITS);
 36 |       p1=std::clamp(px,1,PSCALEm);
 37 |     }
 38 | };
 39 | 
 40 | static struct tdiv_tbl
 41 | {
 42 |   tdiv_tbl()
 43 |   {
 44 |     for (int i=0;i<PSCALE;i++)
 45 |     {
 46 |       tbl[i]=PSCALE/(i+3);
 47 |     }
 48 |   }
 49 |   int& operator[](int i)  {return tbl[i];};
 50 |   int tbl[PSCALE];
 51 | } div_tbl;
 52 | 
 53 | class LinearCounterLimit: public Prob16Counter
 54 | {
 55 |   uint16_t counter;
 56 |   public:
 57 |     LinearCounterLimit():Prob16Counter(){counter=0;};
 58 |     void update(int bit,int limit)
 59 |     {
 60 |       if (counter<limit) counter++;
 61 |       #if 0
 62 |         int dp=((bit<<PBITS)-p1)/int(counter+3);
 63 |       #else
 64 |         int dp=bit?((PSCALE-p1)*div_tbl[counter])>>PBITS:-((p1*div_tbl[counter])>>PBITS);
 65 |         //int dp=(((bit<<PBITS)-p1)*div_tbl[counter]+PSCALEh)>>PBITS;
 66 |       #endif
 67 |       p1=std::clamp(p1+dp,1,PSCALEm);
 68 |     };
 69 | };
 70 | 
 71 | // Paq8-state table
 72 | static const uint8_t State_table[256][4]={
 73 |   {  1,  2, 0, 0},{  3,  5, 1, 0},{  4,  6, 0, 1},{  7, 10, 2, 0}, // 0-3
 74 |   {  8, 12, 1, 1},{  9, 13, 1, 1},{ 11, 14, 0, 2},{ 15, 19, 3, 0}, // 4-7
 75 |   { 16, 23, 2, 1},{ 17, 24, 2, 1},{ 18, 25, 2, 1},{ 20, 27, 1, 2}, // 8-11
 76 |   { 21, 28, 1, 2},{ 22, 29, 1, 2},{ 26, 30, 0, 3},{ 31, 33, 4, 0}, // 12-15
 77 |   { 32, 35, 3, 1},{ 32, 35, 3, 1},{ 32, 35, 3, 1},{ 32, 35, 3, 1}, // 16-19
 78 |   { 34, 37, 2, 2},{ 34, 37, 2, 2},{ 34, 37, 2, 2},{ 34, 37, 2, 2}, // 20-23
 79 |   { 34, 37, 2, 2},{ 34, 37, 2, 2},{ 36, 39, 1, 3},{ 36, 39, 1, 3}, // 24-27
 80 |   { 36, 39, 1, 3},{ 36, 39, 1, 3},{ 38, 40, 0, 4},{ 41, 43, 5, 0}, // 28-31
 81 |   { 42, 45, 4, 1},{ 42, 45, 4, 1},{ 44, 47, 3, 2},{ 44, 47, 3, 2}, // 32-35
 82 |   { 46, 49, 2, 3},{ 46, 49, 2, 3},{ 48, 51, 1, 4},{ 48, 51, 1, 4}, // 36-39
 83 |   { 50, 52, 0, 5},{ 53, 43, 6, 0},{ 54, 57, 5, 1},{ 54, 57, 5, 1}, // 40-43
 84 |   { 56, 59, 4, 2},{ 56, 59, 4, 2},{ 58, 61, 3, 3},{ 58, 61, 3, 3}, // 44-47
 85 |   { 60, 63, 2, 4},{ 60, 63, 2, 4},{ 62, 65, 1, 5},{ 62, 65, 1, 5}, // 48-51
 86 |   { 50, 66, 0, 6},{ 67, 55, 7, 0},{ 68, 57, 6, 1},{ 68, 57, 6, 1}, // 52-55
 87 |   { 70, 73, 5, 2},{ 70, 73, 5, 2},{ 72, 75, 4, 3},{ 72, 75, 4, 3}, // 56-59
 88 |   { 74, 77, 3, 4},{ 74, 77, 3, 4},{ 76, 79, 2, 5},{ 76, 79, 2, 5}, // 60-63
 89 |   { 62, 81, 1, 6},{ 62, 81, 1, 6},{ 64, 82, 0, 7},{ 83, 69, 8, 0}, // 64-67
 90 |   { 84, 71, 7, 1},{ 84, 71, 7, 1},{ 86, 73, 6, 2},{ 86, 73, 6, 2}, // 68-71
 91 |   { 44, 59, 5, 3},{ 44, 59, 5, 3},{ 58, 61, 4, 4},{ 58, 61, 4, 4}, // 72-75
 92 |   { 60, 49, 3, 5},{ 60, 49, 3, 5},{ 76, 89, 2, 6},{ 76, 89, 2, 6}, // 76-79
 93 |   { 78, 91, 1, 7},{ 78, 91, 1, 7},{ 80, 92, 0, 8},{ 93, 69, 9, 0}, // 80-83
 94 |   { 94, 87, 8, 1},{ 94, 87, 8, 1},{ 96, 45, 7, 2},{ 96, 45, 7, 2}, // 84-87
 95 |   { 48, 99, 2, 7},{ 48, 99, 2, 7},{ 88,101, 1, 8},{ 88,101, 1, 8}, // 88-91
 96 |   { 80,102, 0, 9},{103, 69,10, 0},{104, 87, 9, 1},{104, 87, 9, 1}, // 92-95
 97 |   {106, 57, 8, 2},{106, 57, 8, 2},{ 62,109, 2, 8},{ 62,109, 2, 8}, // 96-99
 98 |   { 88,111, 1, 9},{ 88,111, 1, 9},{ 80,112, 0,10},{113, 85,11, 0}, // 100-103
 99 |   {114, 87,10, 1},{114, 87,10, 1},{116, 57, 9, 2},{116, 57, 9, 2}, // 104-107
100 |   { 62,119, 2, 9},{ 62,119, 2, 9},{ 88,121, 1,10},{ 88,121, 1,10}, // 108-111
101 |   { 90,122, 0,11},{123, 85,12, 0},{124, 97,11, 1},{124, 97,11, 1}, // 112-115
102 |   {126, 57,10, 2},{126, 57,10, 2},{ 62,129, 2,10},{ 62,129, 2,10}, // 116-119
103 |   { 98,131, 1,11},{ 98,131, 1,11},{ 90,132, 0,12},{133, 85,13, 0}, // 120-123
104 |   {134, 97,12, 1},{134, 97,12, 1},{136, 57,11, 2},{136, 57,11, 2}, // 124-127
105 |   { 62,139, 2,11},{ 62,139, 2,11},{ 98,141, 1,12},{ 98,141, 1,12}, // 128-131
106 |   { 90,142, 0,13},{143, 95,14, 0},{144, 97,13, 1},{144, 97,13, 1}, // 132-135
107 |   { 68, 57,12, 2},{ 68, 57,12, 2},{ 62, 81, 2,12},{ 62, 81, 2,12}, // 136-139
108 |   { 98,147, 1,13},{ 98,147, 1,13},{100,148, 0,14},{149, 95,15, 0}, // 140-143
109 |   {150,107,14, 1},{150,107,14, 1},{108,151, 1,14},{108,151, 1,14}, // 144-147
110 |   {100,152, 0,15},{153, 95,16, 0},{154,107,15, 1},{108,155, 1,15}, // 148-151
111 |   {100,156, 0,16},{157, 95,17, 0},{158,107,16, 1},{108,159, 1,16}, // 152-155
112 |   {100,160, 0,17},{161,105,18, 0},{162,107,17, 1},{108,163, 1,17}, // 156-159
113 |   {110,164, 0,18},{165,105,19, 0},{166,117,18, 1},{118,167, 1,18}, // 160-163
114 |   {110,168, 0,19},{169,105,20, 0},{170,117,19, 1},{118,171, 1,19}, // 164-167
115 |   {110,172, 0,20},{173,105,21, 0},{174,117,20, 1},{118,175, 1,20}, // 168-171
116 |   {110,176, 0,21},{177,105,22, 0},{178,117,21, 1},{118,179, 1,21}, // 172-175
117 |   {110,180, 0,22},{181,115,23, 0},{182,117,22, 1},{118,183, 1,22}, // 176-179
118 |   {120,184, 0,23},{185,115,24, 0},{186,127,23, 1},{128,187, 1,23}, // 180-183
119 |   {120,188, 0,24},{189,115,25, 0},{190,127,24, 1},{128,191, 1,24}, // 184-187
120 |   {120,192, 0,25},{193,115,26, 0},{194,127,25, 1},{128,195, 1,25}, // 188-191
121 |   {120,196, 0,26},{197,115,27, 0},{198,127,26, 1},{128,199, 1,26}, // 192-195
122 |   {120,200, 0,27},{201,115,28, 0},{202,127,27, 1},{128,203, 1,27}, // 196-199
123 |   {120,204, 0,28},{205,115,29, 0},{206,127,28, 1},{128,207, 1,28}, // 200-203
124 |   {120,208, 0,29},{209,125,30, 0},{210,127,29, 1},{128,211, 1,29}, // 204-207
125 |   {130,212, 0,30},{213,125,31, 0},{214,137,30, 1},{138,215, 1,30}, // 208-211
126 |   {130,216, 0,31},{217,125,32, 0},{218,137,31, 1},{138,219, 1,31}, // 212-215
127 |   {130,220, 0,32},{221,125,33, 0},{222,137,32, 1},{138,223, 1,32}, // 216-219
128 |   {130,224, 0,33},{225,125,34, 0},{226,137,33, 1},{138,227, 1,33}, // 220-223
129 |   {130,228, 0,34},{229,125,35, 0},{230,137,34, 1},{138,231, 1,34}, // 224-227
130 |   {130,232, 0,35},{233,125,36, 0},{234,137,35, 1},{138,235, 1,35}, // 228-231
131 |   {130,236, 0,36},{237,125,37, 0},{238,137,36, 1},{138,239, 1,36}, // 232-235
132 |   {130,240, 0,37},{241,125,38, 0},{242,137,37, 1},{138,243, 1,37}, // 236-239
133 |   {130,244, 0,38},{245,135,39, 0},{246,137,38, 1},{138,247, 1,38}, // 240-243
134 |   {140,248, 0,39},{249,135,40, 0},{250, 69,39, 1},{ 80,251, 1,39}, // 244-247
135 |   {140,252, 0,40},{249,135,41, 0},{250, 69,40, 1},{ 80,251, 1,40}, // 248-251
136 |   {140,252, 0,41}};
137 | #define nex(state,sel) State_table[state][sel]
138 | 
139 | class StateProb {
140 | public:
141 |   static int GetP1(int state)
142 |   {
143 |     int n0=State_table[state][2];
144 |     int n1=State_table[state][3];
145 |     //if (n0==0) n1*=64;
146 |     //if (n1==0) n0*=64;
147 |     return ((n1+1)*PSCALE)/(n0+n1+2);
148 |   };
149 | };
150 | 
151 | #endif // COUNTER_H
152 | 


--------------------------------------------------------------------------------
/src/model/domain.h:
--------------------------------------------------------------------------------
 1 | #ifndef _DOMAIN_H
 2 | #define _DOMAIN_H
 3 | 
 4 | #include "../global.h"
 5 | #include <cmath>
 6 | 
 7 | static class LogDomain {
 8 |   public:
 9 |     int min,max;
10 |     const int scale,dbits,dscale,dmin,dmax;
11 |     LogDomain():scale(256),dbits(12),dscale(1<<dbits),dmin(-(dscale>>1)),dmax((dscale>>1)-1)
12 |     {
13 |       for (int i=0;i<PSCALE;i++)
14 |       {
15 |         FwdTbl[i]=floor(log((i+0.5)/(PSCALE-i-0.5))*double(scale)+0.5);
16 |       };
17 |       min=FwdTbl[0];
18 |       max=FwdTbl[PSCALE-1];
19 |       //printf("%i %i\n",min,max);
20 |       // 12-Bit
21 |       InvTbl=new int[dscale];
22 |       for (int i=dmin;i<=dmax;i++)
23 |       {
24 |          double p=double(PSCALE)/(1.0+exp(-double(i)/double(scale)));
25 |          InvTbl[i-dmin]=floor(p);
26 |       };
27 |     }
28 |     ~LogDomain()
29 |     {
30 |       delete []InvTbl;
31 |     }
32 |     inline int Fwd(int p)
33 |     {
34 |        return FwdTbl[p];
35 |     }
36 |     inline int Inv(int x)
37 |     {
38 |        if (x<dmin) return 0;
39 |        else if (x>dmax) return PSCALE-1;
40 |        else return InvTbl[x-dmin];
41 |     }
42 |     void Check()
43 |     {
44 |       int sum=0;
45 |       printf("%i %i\n",min,max);
46 |       printf("%i  [%i %i]\n",dscale,dmin,dmax);
47 |       printf("%i %i\n",Inv(0),Fwd(PSCALEh));
48 |       for (int i=0;i<PSCALE;i++)
49 |       {
50 |         int p=Inv(Fwd(i));
51 |         sum+=(p-i)*(p-i);
52 |       }
53 |       printf(" mse: %0.2f\n",double(sum)/double(PSCALE));
54 |     }
55 |   protected:
56 |     int FwdTbl[PSCALE];
57 |     int *InvTbl;
58 | } myDomain;
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/src/model/mixer.h:
--------------------------------------------------------------------------------
  1 | #ifndef MIXER_H
  2 | #define MIXER_H
  3 | 
  4 | #include "model.h"
  5 | #include "domain.h"
  6 | 
  7 | // adaptive linear 2-input mix
  8 | // maximum weight precision 16-Bit
  9 | class Mix2
 10 | {
 11 |   public:
 12 |     virtual int Predict(int _p1,int _p2)=0;
 13 |     virtual void Update(int bit,int rate)=0;
 14 | };
 15 | 
 16 | class Mix2Linear : public Mix2
 17 | {
 18 |   public:
 19 |     Mix2Linear(){Init(WSCALEh);};
 20 |     void Init(int iw){w=iw;};
 21 |     //pm=(1-w)*p1+w*p2
 22 |     int Predict(int _p1,int _p2)
 23 |     {
 24 |       p1=_p1;p2=_p2;
 25 |       pm = p1+idiv_signed32((p2-p1)*w,WBITS);
 26 |       pm = std::clamp(pm,1,PSCALEm);
 27 |       return pm;
 28 |     }
 29 |     int w,p1,p2,pm;
 30 |   protected:
 31 |     inline int idiv_signed32(int val,int s){return val<0?-(((-val)+(1<<(s-1)))>>s):(val+(1<<(s-1)))>>s;};
 32 |     inline void upd_w(int d,int rate) {int wd=idiv_signed32(rate*d,PBITS);w=std::clamp(w+wd,0,int(WSCALE));};
 33 | };
 34 | 
 35 | class Mix2LeastSquares : public Mix2Linear  {
 36 |   public:
 37 |     // w_(i+1)=w_i + rate*(p2-p1)*e
 38 |     void Update(int bit,int rate)
 39 |     {
 40 |       int e=(bit<<PBITS)-pm;
 41 |       int d=idiv_signed32((p2-p1)*e,PBITS);
 42 |       upd_w(d,rate);
 43 |     }
 44 | };
 45 | 
 46 | class Mix2LeastCost : public Mix2Linear {
 47 |   public:
 48 |     void Update(int bit,int rate)
 49 |     {
 50 |       int d;
 51 |       //if (bit) d=(((p2-p1)<<PBITS)*uint64_t(div32tbl[pm]))>>32;
 52 |       //else d=(((p1-p2)<<PBITS)*uint64_t(div32tbl[PSCALE-pm]))>>32;
 53 |       if (bit) d=((p2-p1)<<PBITS)/pm;
 54 |       else d=((p1-p2)<<PBITS)/(PSCALE-pm);
 55 |       upd_w(d,rate);
 56 |     }
 57 | };
 58 | 
 59 | class NMixLogistic
 60 | {
 61 |   enum {WRANGE=1<<19};
 62 |   std::vector <int16_t> x;
 63 |   std::vector <int>w;
 64 | 
 65 |   int16_t pd;
 66 |   uint8_t n;
 67 |   public:
 68 |     NMixLogistic(int n)
 69 |     :x(n),w(n),pd(0),n(n)
 70 |      {
 71 |        Init(0);
 72 |      };
 73 |     void Init(int iw){
 74 |       for (int i=0;i<n;i++) w[i]=iw;
 75 |     };
 76 |     int Predict(const std::vector <int>&p)
 77 |     {
 78 |       int64_t sum=0;
 79 |       for (int i=0;i<n;i++)
 80 |       {
 81 |          x[i]=myDomain.Fwd(p[i]);
 82 |          sum+=int64_t(w[i]*x[i]);
 83 |       }
 84 |       sum=idiv_signed64(sum,WBITS);
 85 |       pd=std::clamp(myDomain.Inv(sum),1,PSCALEm);
 86 |       return pd;
 87 |     }
 88 |     void Update(int bit,int rate)
 89 |     {
 90 |       int err=(bit<<PBITS)-pd;
 91 |       for (int i=0;i<n;i++)
 92 |       {
 93 |          int de=idiv_signed32(x[i]*err,myDomain.dbits);
 94 |          upd_w(i,idiv_signed32(de*rate,myDomain.dbits));
 95 |       }
 96 |     };
 97 |   protected:
 98 |     inline int idiv_signed32(int val,int s){return val<0?-(((-val)+(1<<(s-1)))>>s):(val+(1<<(s-1)))>>s;};
 99 |     inline int idiv_signed64(int64_t val,int64_t s){return val<0?-(((-val)+(1<<(s-1)))>>s):(val+(1<<(s-1)))>>s;};
100 |     inline void upd_w(int i,int wd){w[i]=std::clamp(w[i]+wd,-WRANGE,WRANGE-1);}
101 | };
102 | 
103 | #endif // MIXER_H
104 | 


--------------------------------------------------------------------------------
/src/model/model.h:
--------------------------------------------------------------------------------
 1 | #ifndef MODEL_H
 2 | #define MODEL_H
 3 | 
 4 | #include <algorithm>
 5 | 
 6 | // probability precision
 7 | #define PBITS   (15)
 8 | #define PSCALE  (1<<PBITS)
 9 | #define PSCALEh (PSCALE>>1)
10 | #define PSCALEm (PSCALE-1)
11 | 
12 | // weight precision
13 | #define WBITS   (16)
14 | #define WSCALE  (1<<WBITS)
15 | #define WSCALEh (WSCALE>>1)
16 | 
17 | #endif // MODEL_H
18 | 


--------------------------------------------------------------------------------
/src/model/range.cpp:
--------------------------------------------------------------------------------
 1 | #include "range.h"
 2 | 
 3 | #define DO(n) for (uint32_t _=0;_<n;_++)
 4 | 
 5 | void RangeCoder::Init()
 6 | {
 7 |   low     = code  = 0;
 8 |   range   = uint32_t(-1);
 9 |   if (decode==1) DO(NUM) (code <<=8) += buf.GetByte();
10 | }
11 | 
12 | void RangeCoder::Stop()
13 | {
14 |   if (decode==0) DO(NUM) buf.PutByte(low>>24),low<<=8;
15 | }
16 | 
17 | void RangeCoder::EncodeSymbol(uint32_t cumfreq,uint32_t freq,uint32_t totfreq)
18 | {
19 |   low   += cumfreq * (range /= totfreq);
20 |   range *= freq;
21 |   RANGE_ENC_NORMALIZE
22 | }
23 | 
24 | void RangeCoder::DecodeSymbol(uint32_t cumfreq,uint32_t freq)
25 | {
26 |   low   += cumfreq*range;
27 |   range *= freq;
28 |   RANGE_DEC_NORMALIZE
29 | }
30 | 
31 | uint32_t RangeCoder::DecProb(uint32_t totfreq)
32 | {
33 |   uint32_t tmp=(code-low) / (range /= totfreq);
34 |   return tmp;
35 | }
36 | 
37 | void RangeCoder::EncodeBitOne(uint32_t p1,const int bit)
38 | {
39 |   const uint32_t rnew=SCALE_RANGE;
40 |   bit ? low += rnew,range-=rnew:range=rnew;
41 |   RANGE_ENC_NORMALIZE
42 | }
43 | 
44 | int RangeCoder::DecodeBitOne(uint32_t p1)
45 | {
46 |   const uint32_t rnew=SCALE_RANGE;
47 |   int bit=(code-low>=rnew);
48 |   bit ? low += rnew,range-=rnew:range=rnew;
49 |   RANGE_DEC_NORMALIZE
50 |   return bit;
51 | }
52 | 
53 | // binary rangecoder
54 | void RangeCoderSH::Init()
55 | {
56 |   range = 0xFFFFFFFF;
57 |   lowc = FFNum = Cache = code = 0;
58 |   if(decode==1) DO(NUM+1) (code <<=8) += buf.GetByte();
59 | }
60 | 
61 | void RangeCoderSH::Stop()
62 | {
63 |   if (decode==0) DO(NUM+1) ShiftLow();
64 | }
65 | 
66 | void RangeCoderSH::EncodeBitOne(uint32_t p1,int bit)
67 | {
68 |   const uint32_t rnew=SCALE_RANGE;
69 |   bit ? range-=rnew, lowc+=rnew : range=rnew;
70 |   while(range<TOP) range<<=8,ShiftLow();
71 | }
72 | 
73 | int RangeCoderSH::DecodeBitOne(uint32_t p1)
74 | {
75 |   const uint32_t rnew=SCALE_RANGE;
76 |   int bit = (code>=rnew);
77 |   bit ? range-=rnew, code-=rnew : range=rnew;
78 |   while(range<TOP) range<<=8,(code<<=8)+=buf.GetByte();
79 |   return bit;
80 | }
81 | 
82 | void RangeCoderSH::ShiftLow()
83 | {
84 |   uint32_t Carry = uint32_t(lowc>>32), low = uint32_t(lowc);
85 |   if( low<Thres || Carry )
86 |   {
87 |      buf.PutByte(Cache+Carry);
88 |      for (;FFNum != 0;FFNum--) buf.PutByte(Carry-1);
89 |      Cache = low>>24;
90 |    } else FFNum++;
91 |   lowc = (low<<8);
92 | }
93 | 


--------------------------------------------------------------------------------
/src/model/range.h:
--------------------------------------------------------------------------------
 1 | #ifndef RANGE_H
 2 | #define RANGE_H
 3 | 
 4 | #include "../common/bufio.h"
 5 | #include "model.h"
 6 | #include <functional>
 7 | 
 8 | class RangeCoderBase {
 9 |   public:
10 |     explicit RangeCoderBase(BufIO &buf,int dec=0):buf(buf),decode(dec){};
11 |     void SetDecode(){decode=1;};
12 |     void SetEncode(){decode=0;};
13 |   protected:
14 |     BufIO &buf;
15 |     int decode;
16 | };
17 | 
18 | //#define SCALE_RANGE (((PSCALE-p1)*uint64_t(range)) >> PBITS) // 64 bit shift
19 | #define SCALE_RANGE ((uint64_t(range)*((PSCALE-p1)<<(32-PBITS)))>>32)
20 | 
21 | #define RANGE_ENC_NORMALIZE  while ((low ^ (low+range))<TOP || (range<BOT && ((range= -(int)low & (BOT-1)),1))) buf.PutByte(low>>24),range<<=8,low<<=8;
22 | #define RANGE_DEC_NORMALIZE  while ((low ^ (low+range))<TOP || (range<BOT && ((range= -(int)low & (BOT-1)),1))) (code<<=8)+=buf.GetByte(),range<<=8,low<<=8;
23 | 
24 | // Carryless RangeCoder
25 | // derived from Dimitry Subbotin (public domain)
26 | class RangeCoder : public RangeCoderBase
27 | {
28 |   enum : uint32_t {NUM=4,TOP=0x01000000U,BOT=0x00010000U};
29 |   public:
30 |     using RangeCoderBase::RangeCoderBase;
31 |     void Init();
32 |     void Stop();
33 |     void EncodeSymbol(uint32_t low,uint32_t freq,uint32_t tot);
34 |     void DecodeSymbol(uint32_t low,uint32_t freq);
35 |     void EncodeBitOne(uint32_t p1,int bit);
36 |     int  DecodeBitOne(uint32_t p1);
37 |     uint32_t DecProb(uint32_t totfreq);
38 |   protected:
39 |     uint32_t low,range,code;
40 | };
41 | 
42 | 
43 | // Binary RangeCoder with Carry and 64-bit low
44 | // derived from rc_v3 by Eugene Shelwien
45 | class RangeCoderSH : public RangeCoderBase {
46 |   enum : uint32_t { NUM=4,TOP=0x01000000U,Thres=0xFF000000U};
47 |   public:
48 |     using RangeCoderBase::RangeCoderBase;
49 |     void Init();
50 |     void Stop();
51 |     void EncodeBitOne(uint32_t p1,int bit);
52 |     int  DecodeBitOne(uint32_t p1);
53 | 
54 |     std::function<void(uint32_t,int)> encode_p1 = [this](uint32_t p1,int bit) {return EncodeBitOne(p1,bit);}; // stupid C++
55 |     std::function<int(uint32_t)> decode_p1 = [this](uint32_t p1) {return DecodeBitOne(p1);};
56 |   protected:
57 |     void ShiftLow();
58 |     uint32_t range,code,FFNum,Cache;
59 |     uint64_t lowc;
60 | };
61 | 
62 | #endif // RANGE_H
63 | 


--------------------------------------------------------------------------------
/src/model/sse.h:
--------------------------------------------------------------------------------
  1 | #ifndef SSE_H
  2 | #define SSE_H
  3 | 
  4 | #include "counter.h"
  5 | #include "domain.h"
  6 | 
  7 | /*
  8 |  SSE: functions of context history
  9 |  maps a probability via (linear-)quantization to a new probability
 10 | */
 11 | template <int NB>
 12 | class SSE {
 13 |   uint16_t p_quant,px;
 14 |   public:
 15 |     enum {mapsize=1<<NB};
 16 |     SSE ()
 17 |     {
 18 |       for (int i=0;i<=mapsize;i++) // init prob-map that SSE.p1(p)~p
 19 |       {
 20 |         int v=((i*PSCALE)>>NB);
 21 |         v = std::clamp(v,1,PSCALEm);
 22 |         Map[i].p1=v;
 23 |       }
 24 |     }
 25 |     int Predict(int p1) // linear interpolate beetween bins
 26 |     {
 27 |       p_quant=p1>>(PBITS-NB);
 28 |       int p_mod=p1&(mapsize-1); //int p_mod=p1%map_size;
 29 |       int pl=Map[p_quant].p1;
 30 |       int ph=Map[p_quant+1].p1;
 31 |       px=pl+((p_mod*(ph-pl))>>NB);
 32 |       return px;
 33 |     }
 34 |     void Update(int bit,int rate) // update both bins
 35 |     {
 36 |       Map[p_quant].update(bit,rate);
 37 |       Map[p_quant+1].update(bit,rate);
 38 |     }
 39 |     /*void update4(int bit,int rate) // update four nearest bins
 40 |     {
 41 |       if (p_quant>0) Map[p_quant-1].update(bit,rate>>1);
 42 |       Map[p_quant].update(bit,rate);
 43 |       Map[p_quant+1].update(bit,rate);
 44 |       if (p_quant<mapsize-1) Map[p_quant+2].update(bit,rate>>1);
 45 |     }
 46 |     void update1(int bit,int rate) // update artifical bin
 47 |     {
 48 |       LinearCounter16 tmp;
 49 |       tmp.p1=px;
 50 |       tmp.update(bit,rate);
 51 |       int pm=tmp.p1-px;
 52 |       int pt1=Map[p_quant].p1+pm;
 53 |       int pt2=Map[p_quant+1].p1+pm;
 54 |       Map[p_quant].p1=clamp(pt1,1,PSCALEm);
 55 |       Map[p_quant+1].p1=clamp(pt2,1,PSCALEm);
 56 |     }*/
 57 |   protected:
 58 |     LinearCounter16 Map[(1<<NB)+1];
 59 | };
 60 | 
 61 | // Maps a state to a probability
 62 | class HistProbMapping
 63 | {
 64 |   enum {NUMSTATES=256};
 65 |   public:
 66 |     HistProbMapping()
 67 |     {
 68 |       for (int i=0;i<NUMSTATES;i++) Map[i].p1=StateProb::GetP1(i);
 69 |     };
 70 |     inline int p1(uint8_t state)
 71 |     {
 72 |        st=state;
 73 |        return Map[state].p1;
 74 |     }
 75 |     void Update(int bit,int rate)
 76 |     {
 77 |        Map[st].update(bit,rate);
 78 |     }
 79 |   protected:
 80 |     uint8_t st;
 81 |     LinearCounterLimit Map[NUMSTATES];
 82 | };
 83 | 
 84 | template <int N>
 85 | class SSENL
 86 | {
 87 |   //enum {szmap=1<<NB};
 88 |   public:
 89 |     int tscale,xscale;
 90 |     uint16_t p_quant;
 91 |     SSENL(int scale=myDomain.max)
 92 |     :tscale(scale),xscale((2*tscale)/(N-1))
 93 |     {
 94 |       if (xscale==0) xscale=1;
 95 |       for (int i=0;i<=N;i++)
 96 |       {
 97 |          int x=myDomain.Inv(i*xscale-tscale);
 98 |          Map[0][i].p1=x;
 99 |          Map[1][i].p1=x;
100 |       }
101 |       lb=0;
102 |     };
103 |     int Predict(int p1)
104 |     {
105 |        int pq=(std::min)(2*tscale,(std::max)(0,myDomain.Fwd(p1)+tscale));
106 | 
107 |        p_quant=pq/xscale;
108 |        int p_mod=pq-(p_quant*xscale); //%xscale;
109 | 
110 |        int pl=Map[lb][p_quant].p1;
111 |        int ph=Map[lb][p_quant+1].p1;
112 | 
113 |        int px=(pl*(xscale-p_mod)+ph*p_mod)/xscale;
114 |        return std::clamp(px,1,PSCALEm);
115 |     };
116 |     void Update(int bit,int rate,bool updlb=true)
117 |     {
118 |        Map[lb][p_quant].update(bit,rate);
119 |        Map[lb][p_quant+1].update(bit,rate);
120 |        if (updlb) lb=bit;
121 |     };
122 |   protected:
123 |     LinearCounter16 Map[2][N+1];
124 |     int lb;
125 | };
126 | 
127 | #endif
128 | 


--------------------------------------------------------------------------------
/src/opt/cma.cpp:
--------------------------------------------------------------------------------
 1 | #include <format>
 2 | #include "../common/math.h"
 3 | #include "cma.h"
 4 | #include "ssc.h"
 5 | 
 6 | OptCMA::OptCMA(const CMACfg &cfg,const box_const &parambox,bool verbose)
 7 | :Opt(parambox),cfg(cfg),p(ndim),
 8 | chol(ndim),
 9 | verbose(verbose)
10 | {
11 |   p.sigma = cfg.sigma_init;
12 |   p.psucc = p.p_target_succ;
13 | }
14 | 
15 | 
16 | // generate mvn-distributed variable
17 | // xgen=x + sigma*(G*z)
18 | auto OptCMA::generate_candidate(const vec1D &x,double sigma)
19 | {
20 |   vec1D xnorm=scale(x);
21 | 
22 |   vec1D z(ndim);
23 |   for (auto &r:z)
24 |     r = rand.r_norm();
25 | 
26 |   vec1D az=slmath::mul(chol.G,z);
27 | 
28 |   vec1D xgen(ndim);
29 |   for (int i=0;i<ndim;i++)
30 |   {
31 |     #if 1
32 |       double scale=(pb[i].xmax-pb[i].xmin)*sigma;
33 |       double xnew=x[i] + scale*az[i];
34 |       xgen[i]=reflect(xnew,pb[i].xmin,pb[i].xmax);
35 |     #else
36 |       double xnew=xnorm[i] + sigma*az[i];
37 |       xgen[i] = reflect(xnew,0,1);
38 |     #endif
39 |   }
40 |   return std::tuple{xgen,az};
41 | }
42 | 
43 | void OptCMA::update_cov(vec2D &mcov, vec1D &pc,const vec1D &az)
44 | {
45 |   pc = slmath::mul_add(1.0-p.cc, pc, std::sqrt(p.cc*(2.0-p.cc)), az);
46 |   mcov = slmath::mul_add(1.0-p.ccov, mcov, p.ccov, slmath::outer(pc,pc));
47 | }
48 | 
49 | Opt::ppoint OptCMA::run(opt_func func,const vec1D &xstart)
50 | {
51 |   vec1D pc(ndim); // evolution path
52 |   vec2D mcov(ndim,vec1D(ndim)); //covariance matrix
53 |   for (int i=0;i<ndim;i++)
54 |     mcov[i][i]=1.0;
55 | 
56 | 
57 |   //SSC1 ssc(0.05,0.10,0.05);
58 |   //std::cout << p.p_target_succ << ' '  << p.cp << ' ' << 1.0/p.d << '\n';
59 |   SSC1 ssc(p.p_target_succ,p.cp,1.0/p.d);
60 | 
61 |   int nfunc=1;
62 |   ppoint xb{func(xstart),xstart};
63 |   if (verbose) std::cout << xb.first << '\n';
64 | 
65 |   while (nfunc < cfg.nfunc_max)
66 |   {
67 |     chol.Factor(mcov,0.1);
68 | 
69 |     auto [xgen,az]=generate_candidate(xb.second,p.sigma);
70 | 
71 |     double fn = func(xgen);
72 | 
73 |     double lambda=(fn<xb.first)?1.0:0.0;
74 |     p.sigma = ssc.update(p.sigma,lambda);
75 | 
76 |     if (fn < xb.first)
77 |     {
78 |       xb.first = fn;
79 |       xb.second = xgen;
80 | 
81 |       update_cov(mcov,pc,az);
82 |     }
83 | 
84 |     nfunc++;
85 |     if (verbose) {
86 |       std::cout << "CMA " << std::format("{:5}",nfunc) << ": " << std::format("{:0.2f}",xb.first) << " s=" << std::format("{:0.4f}",p.sigma);
87 |       std::cout << "\r";
88 |     }
89 |   }
90 | 
91 |   return xb;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/opt/cma.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMA_H
 2 | #define CMA_H
 3 | 
 4 | #include "opt.h"
 5 | #include "../common/utils.h"
 6 | #include "../common/math.h"
 7 | 
 8 | class OptCMA : public Opt {
 9 |   public:
10 |     struct CMACfg
11 |     {
12 |       int num_threads=1;
13 |       int nfunc_max=0;
14 |       double sigma_init=0.;
15 |     };
16 |     struct CMAParams
17 |     {
18 |       double d,p_target_succ,cp;
19 |       double cc,ccov,ccovm,pthres;
20 |       double psucc,sigma;
21 | 
22 |       CMAParams(int n)
23 |       {
24 |         d=1.0+n/2.0; // damping parameter
25 |         p_target_succ=2.0/11.0;
26 |         cp=1.0/12.0; // learning rate
27 | 
28 |         // covariance matrix adaption
29 |         cc=2.0/(n+2.0); // learning rate evolution path
30 |         ccov=2.0/(n*n + 6.0); // learning rate covariance matrix
31 |         ccovm=0.4/(std::pow(n,1.6)+1.0); // learning rate active covariance update
32 |         pthres=0.44;
33 | 
34 |         psucc=sigma=0.0;
35 |       }
36 |     };
37 |     OptCMA(const CMACfg &cfg,const box_const &parambox,bool verbose=false);
38 |     ppoint run(opt_func func,const vec1D &xstart) override;
39 |   protected:
40 |     auto generate_candidate(const vec1D &x,double sigma);
41 |     void update_cov(vec2D &mcov, vec1D &pc,const vec1D &az);
42 |     const CMACfg &cfg;
43 |     CMAParams p;
44 |     slmath::Cholesky chol;
45 |     bool verbose;
46 | 
47 | };
48 | 
49 | #endif
50 | 
51 | 


--------------------------------------------------------------------------------
/src/opt/dds.cpp:
--------------------------------------------------------------------------------
  1 | #include <format>
  2 | #include "dds.h"
  3 | #include "ssc.h"
  4 | 
  5 | 
  6 | OptDDS::OptDDS(const DDSCfg &cfg,const box_const &parambox,bool verbose)
  7 | :Opt(parambox),cfg(cfg),
  8 | verbose(verbose)
  9 | {
 10 | }
 11 | 
 12 | vec1D OptDDS::generate_candidate(const vec1D &x,int nfunc,double sigma)
 13 | {
 14 |   std::vector <int>J; // select J of D variables
 15 |   double p=1.0-log(nfunc)/log(cfg.nfunc_max);
 16 | 
 17 |   for (int i=0;i<ndim;i++) {
 18 |     if (rand.event(p)) J.push_back(i);
 19 |   }
 20 |   // set empty? select random element
 21 |   if (!J.size()) J.push_back(rand.ru_int(0,ndim-1));
 22 | 
 23 |   // perturb decision variables
 24 |   vec1D xtest=x;
 25 |   for (auto k:J) {
 26 |     xtest[k]=gen_norm(x[k],pb[k],sigma);
 27 |     assert(xtest[k]>=pb[k].xmin && xtest[k]<=pb[k].xmax);
 28 |   }
 29 |   return xtest;
 30 | }
 31 | 
 32 | // sequential single threaded
 33 | Opt::ppoint OptDDS::run_single(opt_func func,const vec1D &xstart)
 34 | {
 35 |   int nfunc=1;
 36 |   ppoint xb{func(xstart),xstart};
 37 |   if (verbose) std::cout << xb.first << '\n';
 38 | 
 39 |   double sigma=cfg.sigma_init;
 40 | 
 41 |   // step size control
 42 |   SSC0 ssc(cfg.c_succ_max,cfg.c_fail_max);
 43 | 
 44 |   while (nfunc<cfg.nfunc_max) {
 45 |     ppoint x_gen;
 46 |     x_gen.second=generate_candidate(xb.second,nfunc,sigma);
 47 |     x_gen.first=func(x_gen.second);
 48 |     nfunc++;
 49 | 
 50 |     double lambda=0.0;
 51 |     if (x_gen.first<xb.first) {
 52 |       xb = x_gen;
 53 |       lambda=1.0;
 54 |     }
 55 |     sigma = ssc.update(sigma,lambda);
 56 | 
 57 |     if (verbose) std::cout << " DDS " << std::format("{:5}",nfunc) << ": " << std::format("{:0.4f}",xb.first) << " s=" << sigma << "\r";
 58 |   }
 59 |   return xb;
 60 | }
 61 | 
 62 | // multi-threaded variant
 63 | Opt::ppoint OptDDS::run_mt(opt_func func,const vec1D &xstart)
 64 | {
 65 | 
 66 |   ppoint xb{func(xstart),xstart}; // eval at initial solution
 67 | 
 68 |   if (verbose) std::cout << xb.first << '\n';
 69 | 
 70 |   double sigma=cfg.sigma_init;
 71 | 
 72 |   // step size control
 73 |   SSC1 ssc(0.05,0.10,0.05);
 74 | 
 75 |   int nfunc=1;
 76 |   while (nfunc<cfg.nfunc_max) {
 77 |     const int nthreads = std::min(cfg.nfunc_max-nfunc,cfg.num_threads);
 78 | 
 79 |     // generate candidates around current xbest
 80 |     opt_points x_gen(nthreads);
 81 |     for (int i=0;i<nthreads;i++) {
 82 |       x_gen[i].second=generate_candidate(xb.second,nfunc,sigma);
 83 |       nfunc++;
 84 |     };
 85 | 
 86 |     eval_points_mt(func,x_gen);
 87 | 
 88 |     // select
 89 |     ppoint xb_old=xb;
 90 |     int nsucc=0;
 91 |     for (const auto &xg : x_gen)
 92 |       if (xg.first<xb_old.first)  {
 93 |         nsucc++; // count as success, if better than parent
 94 |         if (xg.first < xb.first) // replace overall best
 95 |           xb = xg;
 96 |       }
 97 |     double lambda=nsucc/static_cast<double>(nthreads);
 98 |     sigma=ssc.update(sigma,lambda);
 99 | 
100 |     if (verbose) {
101 |         std::cout << " DDS mt=" << nthreads << ": " << std::format("{:5}",nfunc) << ": " << std::format("{:0.2f}",xb.first);
102 |         std::cout << " s=" << std::format("{:0.3f}",sigma) << ", p_succ=" << std::format("{:0.4f}",ssc.p_succ) << "\r";
103 |     }
104 |   }
105 |   return xb;
106 | }
107 | 
108 | OptDDS::ppoint OptDDS::run(opt_func func,const vec1D &xstart)
109 | {
110 |   assert(pb.size()==xstart.size());
111 | 
112 |   ppoint pbest;
113 |   if (cfg.num_threads<=0) pbest=run_single(func,xstart);
114 |   else pbest=run_mt(func,xstart);
115 | 
116 |   if (verbose) std::cout << '\n';
117 |   return pbest;
118 | }
119 | 


--------------------------------------------------------------------------------
/src/opt/dds.h:
--------------------------------------------------------------------------------
 1 | #ifndef DDS_H
 2 | #define DDS_H
 3 | 
 4 | #include <future>
 5 | #include <cassert>
 6 | #include "opt.h"
 7 | 
 8 | // Dynamical dimensioned search algorithm for computationally efficient watershed model calibration
 9 | // Tolson, Shoemaker 2007
10 | class OptDDS : public Opt {
11 |   public:
12 |     struct DDSCfg
13 |     {
14 |       double sigma_init=0.2;
15 |       int c_succ_max=3;
16 |       int c_fail_max=50;
17 |       int num_threads=1;
18 |       int nfunc_max=0;
19 |     };
20 |     OptDDS(const DDSCfg &cfg,const box_const &parambox,bool verbose=false);
21 |     ppoint run(opt_func func,const vec1D &xstart) override;
22 |   protected:
23 |     vec1D generate_candidate(const vec1D &x,int nfunc,double sigma);
24 |     ppoint run_single(opt_func func,const vec1D &xstart);
25 |     ppoint run_mt(opt_func func,const vec1D &xstart);
26 |     const DDSCfg &cfg;
27 |     bool verbose;
28 | };
29 | 
30 | #endif // DDS_H
31 | 
32 | 


--------------------------------------------------------------------------------
/src/opt/de.cpp:
--------------------------------------------------------------------------------
  1 | #include <cassert>
  2 | #include <format>
  3 | #include "de.h"
  4 | #include "../common/utils.h"
  5 | 
  6 | OptDE::OptDE(const DECfg &cfg,const box_const &parambox,bool verbose)
  7 | :Opt(parambox),cfg(cfg),verbose(verbose)
  8 | {
  9 | }
 10 | 
 11 | // select k unique elements from 0..n-1 except ie
 12 | std::vector<int> OptDE::select_k_unique_except(int n,int ie,int k)
 13 | {
 14 |   std::vector<int>r;
 15 |   if (k>=n-1) return r;
 16 | 
 17 |   std::vector<int> e(n);
 18 |   std::iota(std::begin(e),std::end(e),0);
 19 |   std::erase(e,ie);
 20 | 
 21 |   for (int i=0;i<k;i++)
 22 |   {
 23 |     int idx = rand.ru_int(0,e.size()-1);
 24 |     int val = e[idx];
 25 |     r.push_back(val);
 26 |     std::erase(e,val);
 27 |   }
 28 |   return r;
 29 | }
 30 | 
 31 | auto OptDE::generate_candidate(const opt_points &pop,const vec1D &xbest,int iagent,double mCR,double mF)
 32 | {
 33 |   const double tCR = gen_CR(mCR);
 34 |   const double tF  = gen_F(mF);
 35 |   const int R = rand.ru_int(0,ndim-1);
 36 | 
 37 |   auto gp = [&](int i) -> auto& {return pop[i].second;};
 38 | 
 39 |   // select k distinct elements from pop depending on mut_method
 40 |   auto v = select_k_unique_except(pop.size(),iagent,MutVals[cfg.mut_method]);
 41 | 
 42 |   // mutation
 43 |   vec1D xm;
 44 |   if (cfg.mut_method==BEST1BIN) {
 45 |     xm = mut_1bin(xbest,gp(v[0]),gp(v[1]),tF);
 46 |   } else if (cfg.mut_method==RAND1BIN) {
 47 |     xm = mut_1bin(gp(v[0]),gp(v[1]),gp(v[2]),tF);
 48 |   } else if (cfg.mut_method==CUR1BEST) {
 49 |     xm = mut_curbest(xbest,gp(iagent),gp(v[0]),gp(v[1]),tF);
 50 |   } else if (cfg.mut_method==CURPBEST) {
 51 |     // pop.size() can by smaller than cfg.NP
 52 |     int np = std::min(cfg.npbest,static_cast<int>(pop.size())-1);
 53 |     // np=0 reduces to CUR1BEST as gp(0)=xbest with sorted pop
 54 |     int xp = np>0?rand.ru_int(0,np):0;
 55 |     xm = mut_curbest(gp(xp),gp(iagent),gp(v[0]),gp(v[1]),tF);
 56 |   }
 57 | 
 58 |   // cross-over
 59 |   vec1D xtrial(ndim);
 60 |   const ppoint &xi=pop[iagent];
 61 |   for (int i=0;i<ndim;i++)
 62 |   {
 63 |     if (rand.event(tCR) || (i==R)) xtrial[i] = xm[i];
 64 |     else xtrial[i] = xi.second[i];
 65 |   }
 66 | 
 67 |   return std::tuple{xtrial,tCR,tF};
 68 | }
 69 | 
 70 | void OptDE::print_status(std::size_t nfunc,double fx,double mCR,double mF)
 71 | {
 72 |   std::cout << "DE " << std::format("{:5}",nfunc) << ": " << std::format("{:0.2f}",fx);
 73 |   std::cout << " (mCR=" << std::format("{:0.4f}",mCR) << " mF=" << std::format("{:0.4f}",mF);
 74 |   std::cout << ")\r";
 75 | }
 76 | 
 77 | OptDE::ppoint OptDE::run(opt_func func,const vec1D &xstart)
 78 | {
 79 |   assert(pb.size()==xstart.size());
 80 | 
 81 |   std::size_t nfunc=1;
 82 |   ppoint xb{func(xstart),xstart}; // eval at initial solution
 83 |   if (verbose) std::cout << xb.first << '\n';
 84 | 
 85 |   opt_points pop(cfg.NP); // population
 86 |   pop[0] = xb;
 87 | 
 88 |   std::span<ppoint> pop_span(pop.begin()+1,pop.end());
 89 |   // generate random population
 90 |   for (auto &x : pop_span) {
 91 |     vec1D xt;
 92 |     if (cfg.init_method == INIT_UNIV)
 93 |       xt=gen_uniform_samples(xb.second,cfg.sigma_init);
 94 |     else if (cfg.init_method == INIT_NORM)
 95 |       xt=gen_norm_samples(xb.second,cfg.sigma_init);
 96 | 
 97 |     x.second = xt;
 98 |   }
 99 | 
100 |   // eval inital population in parallel (excluding first)
101 |   nfunc +=eval_pop_pool(func,pop_span,cfg.num_threads);
102 |   // update best sample
103 |   for (const auto &x:pop_span)
104 |     if (x.first < xb.first)
105 |       xb = x;
106 | 
107 |   double mCR = cfg.CR;
108 |   double mF = cfg.F;
109 | 
110 |   if (verbose) std::cout << "DE init pop " << pop.size() << " (mt=" << cfg.num_threads << "): s=" << cfg.sigma_init << ": " << xb.first << "\n";
111 | 
112 |   // trial agents
113 |   opt_points gen_pop;
114 |   std::vector<std::pair<double,double>> gen_mut;
115 | 
116 |   while (nfunc<cfg.nfunc_max)
117 |   {
118 |     if (cfg.mut_method==CURPBEST) { // sort by function value
119 |       std::sort(begin(pop),end(pop),
120 |         [](const auto &a,const auto &b){return a.first < b.first;});
121 |     }
122 | 
123 |     // ensure we don't use more than nfunc_max evals
124 |     const int num_agents = std::min(cfg.nfunc_max-nfunc,pop.size());
125 | 
126 |     // trial agents
127 |     gen_mut.resize(num_agents);
128 |     gen_pop.resize(num_agents);
129 | 
130 |     for (int iagent=0;iagent<num_agents;iagent++)
131 |     {
132 |       auto [xtrial, tCR, tF] = generate_candidate(pop,xb.second,iagent,mCR,mF);
133 |       gen_mut[iagent]={tCR,tF}; // save (random) mutation params
134 |       gen_pop[iagent].second = xtrial;
135 |     }
136 | 
137 |     // evaluate trial population
138 |     nfunc+=eval_pop_pool(func,gen_pop,cfg.num_threads);
139 | 
140 |     // greedy selection
141 |     std::vector<double>CR_succ;
142 |     std::vector<double>F_succ;
143 |     for (int iagent=0;iagent<num_agents;iagent++)
144 |       if (gen_pop[iagent].first < pop[iagent].first)
145 |       {
146 |         pop[iagent] = gen_pop[iagent]; // replace
147 |         CR_succ.push_back(gen_mut[iagent].first);
148 |         F_succ.push_back(gen_mut[iagent].second);
149 | 
150 |         // replace best vector
151 |         if (pop[iagent].first < xb.first)
152 |           xb = pop[iagent];
153 |       }
154 | 
155 |     if (verbose) print_status(nfunc,xb.first,mCR,mF);
156 | 
157 |     if (nfunc >= cfg.nfunc_max) break;
158 | 
159 |     mCR = (1.0-cfg.c)*mCR + cfg.c*MathUtils::mean(CR_succ);
160 |     mF  = (1.0-cfg.c)*mF  + cfg.c*MathUtils::meanL(F_succ);
161 |   }
162 |   if (verbose) std::cout << '\n';
163 |   return xb;
164 | }
165 | 
166 | vec1D OptDE::mut_1bin(const vec1D &xb,const vec1D &x1,const vec1D &x2,double F)
167 | {
168 |   vec1D xm(ndim);
169 |   for (int i=0;i<ndim;i++) {
170 |     double y=xb[i] + F*(x1[i] - x2[i]);
171 |     xm[i] = reflect(y, pb[i].xmin,pb[i].xmax);
172 |   }
173 |   return xm;
174 | }
175 | 
176 | vec1D OptDE::mut_curbest(const vec1D &xbest,const vec1D &xb,const vec1D &x1,const vec1D &x2,double F)
177 | {
178 |   vec1D xm(ndim);
179 |   for (int i=0;i<ndim;i++) {
180 |     double y=xb[i] + F*(xbest[i] - xb[i]) + F*(x1[i] - x2[i]);
181 |     xm[i] = reflect(y, pb[i].xmin,pb[i].xmax);
182 |   }
183 |   return xm;
184 | }
185 | 


--------------------------------------------------------------------------------
/src/opt/de.h:
--------------------------------------------------------------------------------
 1 | #ifndef DE_H
 2 | #define DE_H
 3 | 
 4 | #include "opt.h"
 5 | 
 6 | // Differential Evolution
 7 | class OptDE : public Opt {
 8 |   public:
 9 |     enum MutMethod {BEST1BIN,RAND1BIN,CUR1BEST,CURPBEST};
10 |     std::unordered_map<MutMethod, int> MutVals = {
11 |       {BEST1BIN,2},
12 |       {RAND1BIN,3},
13 |       {CUR1BEST,2},
14 |       {CURPBEST,2}
15 |     };
16 |     enum InitMethod {INIT_UNIV,INIT_NORM};
17 |     struct DECfg
18 |     {
19 |       int NP=30;
20 |       double CR=0.5;
21 |       double F=0.5;
22 |       double c=0.1;
23 |       MutMethod mut_method=CURPBEST;
24 |       InitMethod init_method=INIT_NORM;
25 |       double pbest=0.1;
26 |       int npbest=std::clamp(static_cast<int>(std::round(pbest*NP))-1,0,NP-1);
27 |       double sigma_init=0.15;
28 |       std::size_t num_threads=1;
29 |       std::size_t nfunc_max=0;
30 |     };
31 | 
32 |     OptDE(const DECfg &cfg,const box_const &parambox,bool verbose=false);
33 | 
34 |     ppoint run(opt_func func,const vec1D &xstart) override;
35 | 
36 | 
37 |   protected:
38 |     auto generate_candidate(const opt_points &pop,const vec1D &xbest,int iagent,double mCR,double mF);
39 |     void print_status(std::size_t nfunc,double fx,double mCR,double mF);
40 |     std::vector<int> select_k_unique_except(int n,int t,int k);
41 | 
42 |     double gen_CR(double mCR)
43 |     {
44 |       return std::clamp(rand.r_norm(mCR,0.1),0.01,1.0);
45 |     }
46 |     double gen_F(double mF)
47 |     {
48 |       return std::clamp(rand.r_cauchy(mF,0.1),0.01,1.0);
49 |     }
50 | 
51 |     vec1D mut_1bin(const vec1D &xb,const vec1D &x1,const vec1D &x2,double F);
52 |     vec1D mut_curbest(const vec1D &xbest,const vec1D &xb,const vec1D &x1,const vec1D &x2,double F);
53 | 
54 |     const DECfg &cfg;
55 |     bool verbose;
56 | };
57 | 
58 | #endif // DDS_H
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/src/opt/opt.cpp:
--------------------------------------------------------------------------------
  1 | #include "opt.h"
  2 | #include <cassert>
  3 | 
  4 | Opt::Opt(const box_const &parambox)
  5 | :rand(0),pb(parambox),ndim(parambox.size())
  6 | {
  7 | 
  8 | };
  9 | 
 10 | // evaluate span of points with ps.size() parallel threads
 11 | std::size_t Opt::eval_points_mt(opt_func func,std::span<ppoint> ps)
 12 | {
 13 |   std::vector <std::future<double>> threads;
 14 |   threads.reserve(ps.size());
 15 | 
 16 |   for (std::size_t i=0;i<ps.size();i++) {
 17 |     auto arg=ps[i].second;
 18 |     threads.emplace_back(std::async(std::launch::async, [func, arg]() {
 19 |       return func(arg);
 20 |     }));
 21 |   }
 22 | 
 23 |   if (threads.size()!=ps.size())
 24 |     std::cerr << "  warning: eval_points_mt: thread count too low\n";
 25 | 
 26 |   for (std::size_t i=0;i<threads.size();i++) {
 27 |     ps[i].first=threads[i].get();
 28 |     if (std::isnan(ps[i].first))
 29 |       std::cerr << " warning: nan in eval_points_mt\n";
 30 |   }
 31 | 
 32 |   #if 0 // check for thread safety
 33 |     std::vector<double> rt(ps.size());
 34 |     for (std::size_t i=0;i<ps.size();i++)
 35 |       rt[i] = func(ps[i].second);
 36 | 
 37 |     for (std::size_t i=0;i<ps.size();i++)
 38 |       if (ps[i].first != rt[i])
 39 |         std::cerr << "  warning: mt res (" << i << "): " << ps[i].first << ' ' << rt[i] << '\n';
 40 |   #endif
 41 | 
 42 |   return threads.size();
 43 | }
 44 | 
 45 | // evaluate population parallel in rounds of num_threads
 46 | // all threads should have the same work load
 47 | std::size_t Opt::eval_pop(opt_func func,std::span<ppoint> pop,std::size_t num_threads)
 48 | {
 49 |   std::size_t n=0;
 50 |   while (n < pop.size())
 51 |   {
 52 |     std::size_t start=n;
 53 |     std::size_t ende=std::min(pop.size(),n+num_threads);
 54 |     std::size_t k=eval_points_mt(func,std::span{begin(pop)+start,begin(pop) + ende});
 55 | 
 56 |     n+=k;
 57 |   }
 58 |   return n;
 59 | }
 60 | 
 61 | // evaluate population with a simple thread-pool using a shared atomic counter
 62 | // more efficient if work load is different per thread
 63 | std::size_t Opt::eval_pop_pool(opt_func func,std::span<ppoint> pop,std::size_t num_threads)
 64 | {
 65 |   // shared atomic counter among threads
 66 |   std::atomic<std::size_t> index{0};
 67 | 
 68 |   auto worker = [&]() {
 69 |     while (true) {
 70 |       // return counter, inc after - only need atomic uniqueness not synchronization visibility
 71 |       std::size_t i = index.fetch_add(1,std::memory_order_relaxed);
 72 |       if (i >= pop.size()) break; // index oob - nothing to do
 73 | 
 74 |       double result = func(pop[i].second);
 75 |       pop[i].first = result;
 76 | 
 77 |       if (std::isnan(result)) {
 78 |           std::cerr << " warning: nan in eval_pop\n";
 79 |       }
 80 |     }
 81 |   };
 82 | 
 83 |   // launch workers
 84 |   std::vector<std::thread> threads;
 85 |   for (std::size_t i=0;i<std::min(pop.size(),num_threads);i++) {
 86 |     threads.emplace_back(worker);
 87 |   }
 88 | 
 89 |   for (auto &t:threads) {
 90 |     t.join();
 91 |   }
 92 | 
 93 |   return pop.size();
 94 | }
 95 | 
 96 | vec1D Opt::scale(const vec1D &x) {
 97 |   vec1D v_out(x.size());
 98 |   for (size_t i=0;i<x.size();i++)
 99 |     v_out[i] = (x[i]-pb[i].xmin) / (pb[i].xmax-pb[i].xmin);
100 |   return v_out;
101 | }
102 | vec1D Opt::unscale(const vec1D &x)
103 | {
104 |   vec1D v_out(x.size());
105 |   for (size_t i=0;i<x.size();i++)
106 |     v_out[i] = (x[i] * (pb[i].xmax-pb[i].xmin)) + pb[i].xmin;
107 |   return v_out;
108 | }
109 | 
110 | // generate random normal distributed sample around x with sigma r
111 | double Opt::gen_norm(const double x,const tboxconst &box,const double r)
112 | {
113 |   double sigma=r*(box.xmax-box.xmin);
114 |   double xnew=x+sigma*rand.r_norm();
115 |   return reflect(xnew,box.xmin,box.xmax);
116 | }
117 | double Opt::unscale(double r,const tboxconst &box)
118 | {
119 |   return (r*(box.xmax-box.xmin)) + box.xmin;
120 | }
121 | 
122 | vec1D Opt::gen_norm_samples(const vec1D &xb,double radius)
123 | {
124 |   assert(pb.size()==xb.size());
125 |   const int n=pb.size();
126 |   vec1D v_out(n);
127 |   for (int i=0;i<n;i++)
128 |     v_out[i] = gen_norm(xb[i],pb[i],radius);
129 |   return v_out;
130 | }
131 | vec1D Opt::gen_uniform_samples(const vec1D &xb, double radius)
132 | {
133 |   assert(pb.size()==xb.size());
134 |   const int n=pb.size();
135 |   vec1D v_out(n);
136 |   for (int i=0;i<n;i++) {
137 |     double s=radius*(pb[i].xmax-pb[i].xmin);
138 |     double rnd=rand.r_int(-1,+1);
139 |     double xn=xb[i] + rnd*s;
140 |     v_out[i] = reflect(xn,pb[i].xmin,pb[i].xmax);
141 |   }
142 |   return v_out;
143 | }
144 | 
145 | vec1D Opt::gen_uniform_sample()
146 | {
147 |   int n=pb.size();
148 |   vec1D v_out(n);
149 |   for (int i=0;i<n;i++) {
150 |     v_out[i] = unscale(rand.r_01closed(),pb[i]); //sample from [0,1] and scale
151 |   }
152 |   return v_out;
153 | }
154 | 
155 | // reflect xnew at boundaries
156 | double Opt::reflect(double xnew,double xmin,double xmax) {
157 |   if (xnew<xmin) {
158 |     xnew=xmin+(xmin-xnew);
159 |     if (xnew>xmax) xnew=xmin;
160 |   }
161 |   if (xnew>xmax) {
162 |     xnew=xmax-(xnew-xmax);
163 |     if (xnew<xmin) xnew=xmax;
164 |   }
165 |   return xnew;
166 | }
167 | 
168 | double Opt::reset(double xnew,double xmin,double xmax) {
169 |   if (xnew < xmin)
170 |     return xmin;
171 |   else if (xnew > xmax)
172 |     return xmax;
173 |   else
174 |     return xnew;
175 | }
176 | 
177 | 


--------------------------------------------------------------------------------
/src/opt/opt.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTIMIZER_H
 2 | #define OPTIMIZER_H
 3 | 
 4 | #include <functional>
 5 | #include <future>
 6 | #include "../global.h"
 7 | #include "../common/rand.h"
 8 | 
 9 | // general minimization for multivariate problems using box-constraints
10 | class Opt {
11 |     inline static const std::string SLOPT_VERSION="0.3.0";
12 |   public:
13 |     struct tboxconst {double xmin,xmax;};
14 |     using ppoint = std::pair<double,vec1D>;
15 |     using opt_points = std::vector<ppoint>;
16 |     using box_const = std::vector <tboxconst>;
17 |     using opt_func = std::function<double(const vec1D &param)>;
18 | 
19 |     Opt(const box_const &parambox);
20 |     virtual ppoint run(opt_func func,const vec1D &xstart) = 0;
21 |     virtual ~Opt() = default;
22 |   protected:
23 |     std::size_t eval_pop(opt_func func,std::span<ppoint> pop,std::size_t num_threads);
24 |     std::size_t eval_pop_pool(opt_func func,std::span<ppoint> pop,std::size_t num_threads);
25 |     std::size_t eval_points_mt(opt_func func,std::span<ppoint> ps);
26 | 
27 |     // scale to [0,1]
28 |     vec1D scale(const vec1D &x);
29 |     vec1D unscale(const vec1D &x);
30 |     double unscale(double r,const tboxconst &box);
31 | 
32 |     // generate random normal distributed sample around x with sigma r
33 |     double gen_norm(const double x,const tboxconst &box,const double r);
34 | 
35 |     vec1D gen_norm_samples(const vec1D &xb,double r);
36 |     vec1D gen_uniform_samples(const vec1D &xb, double r);
37 | 
38 |     vec1D gen_uniform_sample();
39 |     // reflect xnew at boundaries
40 |     double reflect(double xnew,double xmin,double xmax);
41 |     double reset(double xnew,double xmin,double xmax);
42 | 
43 |     Random rand;
44 |     const box_const pb;
45 |     const int ndim;
46 | };
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/src/opt/ssc.h:
--------------------------------------------------------------------------------
 1 | #ifndef SSC_H
 2 | #define SSC_H
 3 | 
 4 | // adaptive step-size control
 5 | 
 6 | class SSC0
 7 | {
 8 |   public:
 9 |     SSC0(int nsucc_max=3,int nfail_max=50)
10 |     :nsucc_max(nsucc_max),nfail_max(nfail_max)
11 |     {
12 |       nsucc = nfail=0;
13 |     }
14 |     double update(double sigma,double lambda)
15 |     {
16 |       if (lambda > 0.0) {
17 |         nsucc+=1;
18 |         nfail=0;
19 |       } else {
20 |         nsucc=0;
21 |         nfail+=1;
22 |       }
23 | 
24 |       if (nsucc >= nsucc_max) {
25 |         sigma=sigma*2.0;
26 |         nsucc=0;
27 |       } else if (nfail >= nfail_max) {
28 |         sigma=sigma/2.0;
29 |         nfail=0;
30 |       }
31 |       return std::clamp(sigma,0.05,0.5);
32 |     }
33 |   protected:
34 |     int nsucc_max,nfail_max,nsucc,nfail;
35 | };
36 | 
37 | // p_target: target succession prob.
38 | // p_a: learning rate for exp smoothed succession prob.
39 | // r_sigma: relative increase/decrease for sigma
40 | class SSC1
41 | {
42 |   public:
43 |     SSC1(double p_target=0.10,double p_c=0.10,double p_d=0.05,double lambda_min=0.05,double lambda_max=0.25)
44 |     :p_target(p_target),p_c(p_c),p_d(p_d),
45 |     lmin(lambda_min),lmax(lambda_max)
46 |     {
47 |       p_succ = p_target;
48 |     }
49 |     double update(double sigma,double lambda)
50 |     {
51 |       // update empirical success prob by exp. smoothing
52 |       p_succ=(1.0-p_c)*p_succ + p_c*lambda;
53 |       sigma = sigma * std::exp(p_d * (p_succ-p_target) / (1.0-p_target));
54 |       return std::clamp(sigma,lmin,lmax);
55 |     }
56 |     double p_succ;
57 |   protected:
58 |     double p_target,p_c,p_d;
59 |     double lmin,lmax;
60 | };
61 | 
62 | #if 0
63 |   /*if (p_succ > p_target)
64 |     sigma = std::min(sigma*1.01,0.5);
65 |   else if (p_succ < p_target) {
66 |     sigma = std::max(sigma/1.01,0.05);
67 |   }*/
68 |   double pd=0.01;
69 |   sigma = sigma * std::exp(pd * (p_succ-p_target) / (1.0-p_target));
70 |   sigma = std::max(std::min(sigma,0.5),0.05);
71 | #endif
72 | 
73 | 
74 | #endif // header guard
75 | 
76 | 


--------------------------------------------------------------------------------
/src/pred/bias.h:
--------------------------------------------------------------------------------
  1 | #ifndef BIAS_H
  2 | #define BIAS_H
  3 | 
  4 | #include "../global.h"
  5 | #include "../common/utils.h"
  6 | #include "lms.h"
  7 | 
  8 | #define BIAS_ROUND_PRED 1
  9 | #define BIAS_MIX_N 3
 10 | #define BIAS_MIX_NUMCTX 4
 11 | #define BIAS_MIX 0
 12 | #define BIAS_NAVG 5
 13 | #define BIAS_CLAMPW 0
 14 | 
 15 | class BiasEstimator {
 16 |   class CntAvg {
 17 |     struct bias_cnt {
 18 |       int cnt;
 19 |       double val;
 20 |     };
 21 |     public:
 22 |       CntAvg(int nb_scale=5,int freq0=4)
 23 |       :nscale(1<<nb_scale)
 24 |       {
 25 |          bias.cnt=freq0;
 26 |          bias.val=0.0;
 27 |       }
 28 |       double get() const
 29 |       {
 30 |         return bias.val/double(bias.cnt);
 31 |       }
 32 |       void update(double delta) {
 33 |         bias.val+=delta;
 34 |         bias.cnt++;
 35 |         if (bias.cnt>=nscale) {
 36 |           bias.val/=2.0;
 37 |           bias.cnt>>=1;
 38 |         }
 39 |       }
 40 |     private:
 41 |       const int nscale;
 42 |       bias_cnt bias;
 43 |   };
 44 | 
 45 |   public:
 46 |     BiasEstimator(double lms_mu=0.003,int nb_scale=5,double nd_sigma=1.5,double nd_lambda=0.998)
 47 |     :
 48 |     #if BIAS_MIX == 0
 49 |       mix_ada(BIAS_MIX_NUMCTX,SSLMS(BIAS_MIX_N,lms_mu)),
 50 |     #elif BIAS_MIX == 1
 51 |       mix_ada(BIAS_MIX_NUMCTX,LAD_ADA(BIAS_MIX_N,lms_mu,0.96)),
 52 |     #elif BIAS_MIX == 2
 53 |       mix_ada(BIAS_MIX_NUMCTX,LMS_ADA(BIAS_MIX_N,lms_mu,0.965,0.005)),
 54 |     #endif
 55 |     hist_input(8),hist_delta(8),
 56 |     cnt0(1<<6,CntAvg(nb_scale)),
 57 |     cnt1(1<<6,CntAvg(nb_scale)),
 58 |     cnt2(1<<6,CntAvg(nb_scale)),
 59 |     sigma(nd_sigma),
 60 |     run_mv(nd_lambda)
 61 |     {
 62 |       ctx0=ctx1=ctx2=mix_ctx=0;
 63 |       px=0.0;
 64 |     }
 65 |     void CalcContext(double p)
 66 |     {
 67 |       int b0=hist_input[0]>p?0:1;
 68 |       //int b1=hist_input[1]>p?0:1;
 69 | 
 70 |       int b2=hist_delta[0]<0?0:1;
 71 |       int b3=hist_delta[1]<0?0:1;
 72 |       int b4=hist_delta[2]<0?0:1;
 73 |       //int b42=hist_delta[3]<0?0:1;
 74 |       int b5=hist_delta[1]<hist_delta[0]?0:1;
 75 |       int b6=hist_delta[2]<hist_delta[1]?0:1;
 76 |       int b7=hist_delta[3]<hist_delta[2]?0:1;
 77 |       int b8=hist_delta[4]<hist_delta[3]?0:1;
 78 | 
 79 |       int b9=(fabs(hist_delta[0]))>32?0:1;
 80 |       int b10=2*hist_input[0]-hist_input[1]>p?0:1;
 81 |       int b11=3*hist_input[0]-3*hist_input[1]+hist_input[2]>p?0:1;
 82 | 
 83 |       double sum=0;
 84 |       for (int i=0;i<BIAS_NAVG;i++)
 85 |         sum+=fabs(hist_delta[i]);
 86 |       sum /= static_cast<double>(BIAS_NAVG);
 87 | 
 88 |       int t=0;
 89 |       if (sum>512) t=2;
 90 |       //else if (sum>128) t=2;
 91 |       else if (sum>32) t=1;
 92 | 
 93 |       ctx0=0;
 94 |       ctx0+=b0<<0;
 95 |       //ctx0+=b1<<1;
 96 |       ctx0+=b2<<1;
 97 |       ctx0+=b9<<2;
 98 |       ctx0+=b10<<3;
 99 |       ctx0+=b11<<4;
100 | 
101 |       ctx1=0;
102 |       ctx1+=b2<<0;
103 |       ctx1+=b3<<1;
104 |       ctx1+=b4<<2;
105 |       //ctx1+=t<<3;
106 | 
107 |       ctx2=0;
108 |       ctx2+=b5<<0;
109 |       ctx2+=b6<<1;
110 |       ctx2+=b7<<2;
111 |       ctx2+=b8<<3;
112 |       //ctx2+=mix_ctx<<4;
113 | 
114 |       mix_ctx=t;
115 |     }
116 |     double Predict(double pred)
117 |     {
118 |       px=pred;
119 | 
120 |       CalcContext(pred);
121 | 
122 |       vec1D pb(BIAS_MIX_N);
123 |       pb[0]=cnt0[ctx0].get();
124 |       pb[1]=cnt1[ctx1].get();
125 |       pb[2]=cnt2[ctx2].get();
126 |       return pred+mix_ada[mix_ctx].Predict(pb);
127 |     }
128 |     void Update(double val) {
129 |       #ifdef BIAS_ROUND_PRED
130 |         const double delta=val-std::round(px);
131 |       #else
132 |         const double delta=val-px;
133 |       #endif
134 |       miscUtils::RollBack(hist_input,val);
135 |       miscUtils::RollBack(hist_delta,delta);
136 | 
137 |       const auto [mean,var] = run_mv.get();
138 | 
139 |       const double q=sigma*sqrt(var);
140 |       const double lb=mean-q;
141 |       const double ub=mean+q;
142 | 
143 |       if ( (delta>lb) && (delta<ub))
144 |       {
145 |         cnt0[ctx0].update(delta);
146 |         cnt1[ctx1].update(delta);
147 |         cnt2[ctx2].update(delta);
148 |       }
149 | 
150 |       run_mv.Update(delta);
151 |       mix_ada[mix_ctx].Update(delta);
152 |       #if BIAS_CLAMPW == 1
153 |         for (int i=0;i<BIAS_MIX_N;i++)
154 |           mix_ada[mix_ctx].w[i] = std::max(mix_ada[mix_ctx].w[i],0.);
155 |       #endif
156 |     }
157 |   private:
158 |     #if BIAS_MIX == 0
159 |       std::vector<SSLMS> mix_ada;
160 |     #elif BIAS_MIX == 1
161 |       std::vector<LAD_ADA> mix_ada;
162 |     #elif BIAS_MIX == 2
163 |       std::vector<LMS_ADA> mix_ada;
164 |     #endif
165 |     vec1D hist_input,hist_delta;
166 |     int ctx0,ctx1,ctx2,mix_ctx;
167 |     double px;
168 |     //double alpha,p,bias0,bias1,bias2;
169 |     std::vector<CntAvg> cnt0,cnt1,cnt2;
170 |     const double sigma;
171 |     RunMeanVar run_mv;
172 | };
173 | 
174 | 
175 | #endif // BIAS_H
176 | 


--------------------------------------------------------------------------------
/src/pred/blend.h:
--------------------------------------------------------------------------------
 1 | #ifndef BLEND_H
 2 | #define BLEND_H
 3 | 
 4 | #include "lms.h"
 5 | 
 6 | // blend two expert outputs via sigmoid
 7 | class Blend2 {
 8 |   public:
 9 |     Blend2(double beta=0.95,double theta0=1.0,double theta1=0.0,double scale=5.0)
10 |     :w(0.5),
11 |      th0(theta0),th1(theta1),scale(scale),
12 |      rsum(beta)
13 |     {
14 |     }
15 |     double Predict(double p0,double p1) const
16 |     {
17 |       return w*p0 + (1.0-w)*p1;
18 |     }
19 | 
20 |     void Update(double score0,double score1)
21 |     {
22 |       rsum.Update(score1-score0);
23 | 
24 |       double delta=rsum.Get();
25 |       double z = th0*delta + th1;
26 |       z = std::clamp(z,-scale,scale);
27 |       w = 1.0 / (1.0 + std::exp(-z));
28 |    }
29 |   protected:
30 |     double w,th0,th1,scale;
31 |     RunSumEMA rsum;
32 | };
33 | 
34 | #endif
35 | 
36 | 


--------------------------------------------------------------------------------
/src/pred/lms.h:
--------------------------------------------------------------------------------
  1 | #ifndef LMS_H
  2 | #define LMS_H
  3 | 
  4 | #include <cmath>
  5 | #include "../global.h"
  6 | #include "../common/histbuf.h"
  7 | #include "../common/utils.h"
  8 | 
  9 | class LS_Stream {
 10 |   public:
 11 |     LS_Stream(int n)
 12 |     :n(n),x(n),w(n),pred(0.)
 13 |     {
 14 | 
 15 |     }
 16 |     double Predict()
 17 |     {
 18 |       pred=slmath::dot(x.get_span(),w);
 19 |       return pred;
 20 |     }
 21 |     virtual void Update(double val)=0;
 22 |     virtual ~LS_Stream(){};
 23 |   protected:
 24 |     int n;
 25 |     RollBuffer2<double>x;
 26 |     std::vector<double,align_alloc<double>> w;
 27 |     double pred;
 28 | };
 29 | 
 30 | class NLMS_Stream : public LS_Stream
 31 | {
 32 |   public:
 33 |     NLMS_Stream(int n,double mu,double mu_decay=1.0,double pow_decay=0.8)
 34 |     :LS_Stream(n),mutab(n),powtab(n),mu(mu)
 35 |     {
 36 |       sum_powtab=0;
 37 |       for (int i=0;i<n;i++) {
 38 |          powtab[i]=1.0/(pow(1+i,pow_decay));
 39 |          sum_powtab+=powtab[i];
 40 |          mutab[i]=pow(mu_decay,i);
 41 |       }
 42 |     }
 43 | 
 44 |     void Update(double val) override
 45 |     {
 46 |       const double spow=slmath::calc_s2pow(x.get_span(),powtab);
 47 |       const double wgrad=mu*(val-pred)*sum_powtab/(spow+SACGlobalCfg::NLMS_POW_EPS);
 48 |       for (int i=0;i<n;i++) {
 49 |         w[i]+=mutab[i]*(wgrad*x[i]);
 50 |       }
 51 |       x.push(val);
 52 |     };
 53 |     ~NLMS_Stream() override {} ;
 54 |   protected:
 55 |     std::vector<double,align_alloc<double>> mutab,powtab;
 56 |     double sum_powtab;
 57 |     double mu;
 58 | };
 59 | 
 60 | class LADADA_Stream : public LS_Stream
 61 | {
 62 |   public:
 63 |     LADADA_Stream(int n,double mu,double beta=0.97)
 64 |     :LS_Stream(n),eg(n),mu(mu),beta(beta)
 65 |     {
 66 | 
 67 |     }
 68 |     void Update(double val) override
 69 |     {
 70 |       const double serr=MathUtils::sgn(val-pred); // prediction error
 71 |       for (int i=0;i<n;i++) {
 72 |         double const grad=serr*x[i];
 73 |         eg[i]=beta*eg[i]+(1.0-beta)*grad*grad; //accumulate gradients
 74 |         double g=grad*1.0/(sqrt(eg[i])+SACGlobalCfg::LMS_ADA_EPS);// update weights
 75 |         w[i]+=mu*g;
 76 |       }
 77 |       x.push(val);
 78 |     }
 79 |   protected:
 80 |     vec1D eg;
 81 |     double mu,beta;
 82 | };
 83 | 
 84 | class LMSADA_Stream : public LS_Stream
 85 | {
 86 |   public:
 87 |     LMSADA_Stream(int n,double mu,double beta=0.97,double nu=0.0)
 88 |     :LS_Stream(n),eg(n),mu(mu),beta(beta),nu(nu)
 89 |     {
 90 | 
 91 |     }
 92 |     void Update(double val) override
 93 |     {
 94 |       const double err=val-pred; // prediction error
 95 |       for (int i=0;i<n;i++) {
 96 |         double const grad=err*x[i]-nu*MathUtils::sgn(w[i]);
 97 |         eg[i]=beta*eg[i]+(1.0-beta)*grad*grad; //accumulate gradients
 98 |         double g=grad*1.0/(sqrt(eg[i])+SACGlobalCfg::LMS_ADA_EPS);// update weights
 99 |         w[i]+=mu*g;
100 |       }
101 |       x.push(val);
102 |     }
103 |   protected:
104 |     vec1D eg;
105 |     double mu,beta,nu;
106 | };
107 | 
108 | 
109 | class LMS {
110 |   protected:
111 |   public:
112 |     LMS(int n,double mu)
113 |     :n(n),x(n),w(n),mu(mu),pred(0)
114 |     {
115 |     }
116 |     double Predict(const vec1D &inp)
117 |     {
118 |       x=inp;
119 |       pred=slmath::dot(x,w);
120 |       return pred;
121 |     }
122 |     virtual void Update(double)=0;
123 |     virtual ~LMS(){};
124 |     int n;
125 |     vec1D x,w;
126 |   protected:
127 |     double mu,pred;
128 | };
129 | 
130 | class LMS_ADA : public LMS
131 | {
132 |   public:
133 |     LMS_ADA(int n,double mu,double beta=0.95,double nu=0.001)
134 |     :LMS(n,mu),eg(n),beta(beta),nu(nu)
135 |     {
136 |     }
137 |     void Update(double val) override {
138 |       const double err=val-pred; // prediction error
139 |       for (int i=0;i<n;i++) {
140 |         double const grad=err*x[i] - nu*MathUtils::sgn(w[i]); // gradient + l1-regularization
141 | 
142 |         eg[i]=beta*eg[i]+(1.0-beta)*grad*grad; //accumulate gradients
143 |         double g=grad*1.0/(sqrt(eg[i])+SACGlobalCfg::LMS_ADA_EPS);// update weights
144 |         w[i]+=mu*g;
145 |       }
146 |     }
147 |   protected:
148 |     vec1D eg;
149 |     double beta,nu;
150 | };
151 | 
152 | class LAD_ADA : public LMS
153 | {
154 |   public:
155 |     LAD_ADA(int n,double mu,double beta=0.95)
156 |     :LMS(n,mu),eg(n),beta(beta)
157 |     {
158 |     }
159 |     void Update(double val) override
160 |     {
161 |       const double serr=MathUtils::sgn(val-pred); // prediction error
162 |       for (int i=0;i<n;i++) {
163 |         double const grad=serr*x[i];
164 |         eg[i]=beta*eg[i]+(1.0-beta)*grad*grad; //accumulate gradients
165 |         double scaled_grad=grad*1.0/(sqrt(eg[i])+SACGlobalCfg::LMS_ADA_EPS);// update weights
166 |         w[i]+=mu*scaled_grad;
167 |       }
168 |     }
169 |   protected:
170 |     vec1D eg;
171 |     double beta;
172 | };
173 | 
174 | // Huber loss + ADA-Grad
175 | class HBR_ADA : public LMS
176 | {
177 |   public:
178 |     HBR_ADA(int n,double mu,double beta=0.95,double delta=4)
179 |     :LMS(n,mu),eg(n),beta(beta),delta(delta)
180 |     {
181 |     }
182 |     double get_loss(double err_g,double delta)
183 |     {
184 |       if (std::abs(err_g) <= delta)
185 |         return 0.5*err_g*err_g;
186 |       else
187 |         return delta*(std::abs(err_g) - 0.5*delta);
188 |     }
189 |     double get_grad(double err_g,double delta)
190 |     {
191 |       if (std::abs(err_g) <= delta)
192 |         return err_g;
193 |       else
194 |         return delta*MathUtils::sgn(err_g);
195 |     }
196 |     void Update(double val) override {
197 |       const double err_g=val-pred; // prediction error
198 | 
199 |       double grad_loss = get_grad(err_g,delta);
200 |       for (int i=0;i<n;i++) {
201 |         double const grad=grad_loss*x[i];
202 |         eg[i]=beta*eg[i]+(1.0-beta)*grad*grad; //accumulate gradients
203 |         const double g=grad*1.0/(sqrt(eg[i])+SACGlobalCfg::LMS_ADA_EPS);// update weights
204 |         w[i]+=mu*g;
205 |       }
206 |     }
207 |   protected:
208 |     vec1D eg;
209 |     double beta,delta;
210 | };
211 | 
212 | class LMS_ADAM : public LMS
213 | {
214 |   public:
215 |     LMS_ADAM(int n,double mu,double beta1=0.9,double beta2=0.999)
216 |     :LMS(n,mu),M(n),S(n),beta1(beta1),beta2(beta2)
217 |     {
218 |       power_beta1=1.0;
219 |       power_beta11=beta1;
220 |       power_beta2=1.0;
221 |     }
222 |     void Update(double val) override {
223 |       power_beta1*=beta1;
224 |       power_beta11*=beta1;
225 |       power_beta2*=beta2;
226 |       const double err=val-pred; // prediction error
227 |       for (int i=0;i<n;i++) {
228 |         double const grad=err*x[i]; // gradient
229 | 
230 |         M[i]=beta1*M[i]+(1.0-beta1)*grad;
231 |         S[i]=beta2*S[i]+(1.0-beta2)*(grad*grad);
232 | 
233 |         /*double m_hat=beta1*M[i]/(1.0-power_beta11)+((1.0-beta1)*grad/(1.0-power_beta1));
234 |         double n_hat=beta2*S[i]/(1.0-power_beta2);*/
235 |         double m_hat=M[i]/(1.0-power_beta1);
236 |         double n_hat=S[i]/(1.0-power_beta2);
237 |         w[i]+=mu*m_hat/(sqrt(n_hat)+SACGlobalCfg::LMS_ADA_EPS);
238 |       }
239 |     }
240 |   private:
241 |     vec1D M,S;
242 |     double beta1,beta2,power_beta1,power_beta11,power_beta2;
243 | };
244 | 
245 | // sign-sign lms algorithm
246 | class SSLMS : public LMS {
247 |   public:
248 |       SSLMS(int n,double mu)
249 |       :LMS(n,mu)
250 |       {
251 |       }
252 |       void Update(double val) override
253 |       {
254 |         double e=val-pred;
255 |         const double wf=mu*MathUtils::sgn(e);
256 |         for (int i=0;i<n;i++) {
257 |            w[i]+=wf*MathUtils::sgn(x[i]);
258 |         }
259 |       }
260 | };
261 | 
262 | #endif // LMS_H
263 | 


--------------------------------------------------------------------------------
/src/pred/lms_cascade.h:
--------------------------------------------------------------------------------
  1 | #ifndef LMS_CASCADE_H
  2 | #define LMS_CASCADE_H
  3 | 
  4 | #include "lms.h"
  5 | #include "rls.h"
  6 | #include "blend.h"
  7 | #include "../common/utils.h"
  8 | 
  9 | /*
 10 |   double e0=std::abs(target-px0);
 11 |   double e1=(target-px1)*(target-px1);
 12 |   rp0.Update(e0);
 13 |   rp1.Update(e1);
 14 | 
 15 |   // log-likelihood under source model
 16 |   double pl0=MathUtils::calc_loglik_L1(e0,std::max(rp0.sum,1E-8));
 17 |   double pl1=MathUtils::calc_loglik_L2(e1,std::max(rp1.sum,1E-8));
 18 |   double nbits0 = -pl0 / std::log(2.0);
 19 |   double nbits1 = -pl1 / std::log(2.0);
 20 | 
 21 |   cv2.Update(nbits0,nbits1);
 22 | */
 23 | 
 24 | 
 25 | 
 26 | // Blend 2xLMS-ADA using L1 + L2 loss
 27 | // using absolute error as scoring function
 28 | class Blend2LMS_L1 {
 29 |   public:
 30 |     Blend2LMS_L1(int n,double lms_mu,double lms_beta,double blend_beta=0.95)
 31 |     :n(n),px0(0.0),px1(0.0),
 32 |      mix0(n,lms_mu,lms_beta),
 33 |      mix1(n,lms_mu,lms_beta),
 34 |      cw2(blend_beta)
 35 |     {
 36 |       if constexpr(SACGlobalCfg::LMS_MIX_INIT)
 37 |         for (int i=0;i<n-1;i++)
 38 |           mix0.w[i] = mix1.w[i] = 1.0/(i+1);
 39 |     }
 40 |     double GetWeight(int index) const
 41 |     {
 42 |       return cw2.Predict(mix0.w[index],mix1.w[index]);
 43 |     }
 44 |     double Predict(const vec1D &input)
 45 |     {
 46 |       px0=mix0.Predict(input);
 47 |       px1=mix1.Predict(input);
 48 |       return cw2.Predict(px0,px1);
 49 |     }
 50 |     void UpdateMixer(double target)
 51 |     {
 52 |       mix0.Update(target);
 53 |       mix1.Update(target);
 54 |       if constexpr(SACGlobalCfg::LMS_MIX_CLAMPW)
 55 |         for (int i=0;i<n;i++) {
 56 |           mix0.w[i]=std::max(mix0.w[i],0.0);
 57 |           mix1.w[i]=std::max(mix1.w[i],0.0);
 58 |         }
 59 |     }
 60 |     void UpdateBlend(double target)
 61 |     {
 62 |       double e0=std::abs(target-px0);
 63 |       double e1=std::abs(target-px1);
 64 |       cw2.Update(e0,e1);
 65 |     }
 66 |     int n;
 67 |     double px0,px1;
 68 |     LAD_ADA mix0;
 69 |     LMS_ADA mix1;
 70 |     Blend2 cw2;
 71 | };
 72 | 
 73 | 
 74 | class Cascade {
 75 |   public:
 76 |     Cascade(const std::vector<int> &vn,const std::vector<double>&vmu,
 77 |                const std::vector<double>&vmudecay,const std::vector<double> &vpowdecay,
 78 |                double mu_mix,double mu_mix_beta,int lm_n,double lm_alpha)
 79 |     :n(vn.size()),p(n+1),
 80 |      mix(n+1,mu_mix,mu_mix_beta),
 81 |      lm(lm_n,lm_alpha),
 82 |      clms(n)
 83 |     {
 84 |       for (int i=0;i<n;i++)
 85 |         clms[i]=new NLMS_Stream(vn[i],vmu[i],vmudecay[i],vpowdecay[i]);
 86 |     }
 87 |     double Predict()
 88 |     {
 89 |       for (int i=0;i<n;i++)
 90 |           p[i]=clms[i]->Predict();
 91 | 
 92 |       p[n]=lm.Predict();
 93 |       return mix.Predict(p);
 94 |     }
 95 |     void Update(const double target)
 96 |     {
 97 |       mix.UpdateMixer(target);
 98 | 
 99 |       double t=target;
100 |       for (int i=0;i<n; i++) {
101 |         clms[i]->Update(t);
102 |         t-=mix.GetWeight(i)*p[i];
103 |       }
104 |       lm.UpdateHist(t);
105 |       mix.UpdateBlend(target);
106 |     }
107 |     ~Cascade()
108 |     {
109 |       for (int i=0;i<n;i++) delete clms[i];
110 |     }
111 |   private:
112 |     int n;
113 |     vec1D p;
114 |     Blend2LMS_L1 mix;
115 |     RLS lm;
116 |     std::vector<LS_Stream*> clms;
117 | };
118 | 
119 | #endif
120 | 


--------------------------------------------------------------------------------
/src/pred/lpc.h:
--------------------------------------------------------------------------------
 1 | #ifndef LPC_H
 2 | #define LPC_H
 3 | 
 4 | #include "../common/utils.h"
 5 | #include "../common/math.h"
 6 | 
 7 | //#define INIT_COV
 8 | 
 9 | class OLS {
10 |   public:
11 |     OLS(int n,int kmax=1,double lambda=0.998,double nu=0.001,double beta_sum=0.6,double beta_pow=0.75,double beta_add=2)
12 |     :x(n),
13 |     chol(n),
14 |     w(n),b(n),mcov(n,vec1D(n)),
15 |     n(n),kmax(kmax),lambda(lambda),nu(n*nu),
16 |     beta_pow(beta_pow),beta_add(beta_add),esum(beta_sum)
17 |     {
18 |       km=0;
19 |       pred=0.0;
20 |       #ifdef INIT_COV
21 |         for (int i=0;i<n;i++) mcov[i][i]=1.0;
22 |       #endif
23 |     }
24 |     double Predict()
25 |     {
26 |       pred=slmath::dot(x,w);
27 |       return pred;
28 |     }
29 | 
30 |     void Update(double val)
31 |     {
32 |       // update estimate of covariance matrix
33 |       esum.Update(fabs(val-pred));
34 |       double c0=pow(esum.Get()+beta_add,-beta_pow);
35 | 
36 |       for (int j=0;j<n;j++) {
37 |         // only update lower triangular
38 |         for (int i=0;i<=j;i++) mcov[j][i]=lambda*mcov[j][i]+c0*(x[j]*x[i]);
39 |         b[j]=lambda*b[j]+c0*(x[j]*val);
40 |       }
41 | 
42 |       km++;
43 |       if (km>=kmax) {
44 |         if (!chol.Factor(mcov,nu)) chol.Solve(b,w);
45 |         km=0;
46 |       }
47 |     }
48 |     vec1D x;
49 |   protected:
50 |     slmath::Cholesky chol;
51 |     vec1D w,b;
52 |     vec2D mcov;
53 |     int n,kmax,km;
54 |     double lambda,nu,pred;
55 |     double beta_pow,beta_add;
56 |     RunSumGEO esum;
57 | };
58 | 
59 | 
60 | #endif // LPC_H
61 | 


--------------------------------------------------------------------------------
/src/pred/rls.cpp:
--------------------------------------------------------------------------------
 1 | #include "rls.h"
 2 | #include "../common/math.h"
 3 | #include "../common/utils.h"
 4 | 
 5 | RLS::RLS(int n,double gamma,double nu)
 6 | :n(n),
 7 | px(0.),gamma(gamma),
 8 | hist(n),w(n),
 9 | P(n,vec1D(n)), // inverse covariance matrix
10 | alc(gamma)
11 | {
12 |   for (int i=0;i<n;i++)
13 |     P[i][i]=1.0/nu;
14 | }
15 | 
16 | double RLS::Predict()
17 | {
18 |   px=slmath::dot(hist,w);
19 |   return px;
20 | }
21 | 
22 | double RLS::Predict(const vec1D &input)
23 | {
24 |   hist=input;
25 |   return Predict();
26 | }
27 | 
28 | void RLS::Update(double val)
29 | {
30 |   const double err=val-px;
31 | 
32 |   vec1D ph=slmath::mul(P,hist); //phi=hist P hist
33 |   // a priori variance of prediction
34 |   double phi=slmath::dot(hist,ph);
35 | 
36 |   double alpha=gamma;
37 |   if constexpr(SACGlobalCfg::RLS_ALC) {
38 |     // Normalized Innovation Squared
39 |     // quantifies how "unexpected" the observation is
40 |     // relative to the models uncertainty phi
41 |     double metric = (err*err);//(phi+1E-3);
42 |     alpha=alc.update(metric);
43 |   };
44 | 
45 |   //update inverse of covariance matrix
46 |   //P(n)=1/lambda*P(n-1)-1/lambda * k(n)*x^T(n)*P(n-1)
47 |   double denom=1./(alpha+phi);
48 |   double inv_alpha=1.0/(alpha);
49 |   for (int i=0;i<n;i++)
50 |     for (int j=0;j<=i;j++) {
51 |       double m=ph[i]*ph[j]; // outer product of ph
52 |       double v=(P[i][j] - denom * m) * inv_alpha;
53 |       P[i][j] = P[j][i] = v;
54 |     }
55 | 
56 |   // update weights
57 |   for (int i=0;i<n;i++)
58 |       w[i]+=err*(denom*ph[i]);
59 | }
60 | 
61 | void RLS::UpdateHist(double val)
62 | {
63 |   Update(val);
64 |   miscUtils::RollBack(hist,val);
65 | }
66 | 


--------------------------------------------------------------------------------
/src/pred/rls.h:
--------------------------------------------------------------------------------
 1 | #ifndef RLS_H
 2 | #define RLS_H
 3 | 
 4 | #include "../global.h"
 5 | #include "../common/utils.h"
 6 | #include <cmath>
 7 | 
 8 | // adaptive lambda control
 9 | template <miscUtils::MapMode tmap_mode>
10 | class ALC
11 | {
12 |   public:
13 |     ALC(double gamma=1.0,double beta=0.95)
14 |     :gamma(gamma),lambda_min(0.99),lambda_max(0.999),
15 |      msum(beta)
16 |     {
17 |     }
18 | 
19 |     double update(double metric)
20 |     {
21 |       msum.Update(metric);
22 | 
23 |       // normalize metric by average
24 |       double mnorm = metric/(msum.Get() + 1E-5);
25 |       // map with decay function
26 |       // high mnorm -> low alpha (faster adaption), low mnorm -> high alpha
27 |       double m=miscUtils::decay_map<tmap_mode>(gamma,mnorm);
28 |       return lambda_min + (lambda_max-lambda_min)*m;
29 |     }
30 |   protected:
31 |     double gamma,lambda_min,lambda_max;
32 |     RunSum <> msum;
33 | };
34 | 
35 | // Recursive Least Squares algorithm
36 | class RLS {
37 |   public:
38 |     explicit RLS(int n,double gamma,double nu=1);
39 |     double Predict();
40 |     double Predict(const vec1D &pred);
41 |     void Update(double val);
42 |     void UpdateHist(double val);
43 |     int n;
44 |   private:
45 |     double px,gamma;
46 |     vec1D hist,w;
47 |     vec2D P;
48 |     ALC<miscUtils::MapMode::exp> alc;
49 | };
50 | 
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------