├── randomdata
    ├── measure.sh
    ├── bias0p1.bin
    ├── bias0p2.bin
    ├── bias0p3.bin
    ├── bias0p4.bin
    ├── bias0p5.bin
    ├── bias0p6.bin
    ├── bias0p7.bin
    ├── bias0p8.bin
    ├── bias0p9.bin
    ├── corr0p1.bin
    ├── corr0p2.bin
    ├── corr0p3.bin
    ├── corr0p4.bin
    ├── corr0p5.bin
    ├── corr0p6.bin
    ├── corr0p7.bin
    ├── corr0p8.bin
    ├── corr0p9.bin
    ├── uniform.bin
    ├── corrm0p1.bin
    ├── corrm0p2.bin
    ├── corrm0p3.bin
    ├── corrm0p4.bin
    ├── corrm0p5.bin
    ├── corrm0p6.bin
    ├── corrm0p7.bin
    ├── corrm0p8.bin
    ├── corrm0p9.bin
    ├── filenames
    └── generate.sh
├── .gitignore
├── filename_parse.h
├── Makefile
├── longest_run_cdf.h
├── mathy_things.h
├── CITATION.cff
├── markov2p.h
├── longest_run_cdf.c
├── mathy_things.c
├── README.md
├── filename_parse.c
├── LICENSE
├── markov2p.c
└── djent.c


/randomdata/measure.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | djent -b -t *.bin
4 | 
5 | 


--------------------------------------------------------------------------------
/randomdata/bias0p1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p1.bin


--------------------------------------------------------------------------------
/randomdata/bias0p2.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p2.bin


--------------------------------------------------------------------------------
/randomdata/bias0p3.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p3.bin


--------------------------------------------------------------------------------
/randomdata/bias0p4.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p4.bin


--------------------------------------------------------------------------------
/randomdata/bias0p5.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p5.bin


--------------------------------------------------------------------------------
/randomdata/bias0p6.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p6.bin


--------------------------------------------------------------------------------
/randomdata/bias0p7.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p7.bin


--------------------------------------------------------------------------------
/randomdata/bias0p8.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p8.bin


--------------------------------------------------------------------------------
/randomdata/bias0p9.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p9.bin


--------------------------------------------------------------------------------
/randomdata/corr0p1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p1.bin


--------------------------------------------------------------------------------
/randomdata/corr0p2.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p2.bin


--------------------------------------------------------------------------------
/randomdata/corr0p3.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p3.bin


--------------------------------------------------------------------------------
/randomdata/corr0p4.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p4.bin


--------------------------------------------------------------------------------
/randomdata/corr0p5.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p5.bin


--------------------------------------------------------------------------------
/randomdata/corr0p6.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p6.bin


--------------------------------------------------------------------------------
/randomdata/corr0p7.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p7.bin


--------------------------------------------------------------------------------
/randomdata/corr0p8.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p8.bin


--------------------------------------------------------------------------------
/randomdata/corr0p9.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p9.bin


--------------------------------------------------------------------------------
/randomdata/uniform.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/uniform.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p1.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p2.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p2.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p3.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p3.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p4.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p4.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p5.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p5.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p6.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p6.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p7.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p7.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p8.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p8.bin


--------------------------------------------------------------------------------
/randomdata/corrm0p9.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p9.bin


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | *.ko
 4 | *.obj
 5 | *.elf
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Libraries
12 | *.lib
13 | *.a
14 | *.la
15 | *.lo
16 | 
17 | # Shared objects (inc. Windows DLLs)
18 | *.dll
19 | *.so
20 | *.so.*
21 | *.dylib
22 | 
23 | # Executables
24 | *.exe
25 | *.out
26 | *.app
27 | *.i*86
28 | *.x86_64
29 | *.hex
30 | 
31 | # Debug files
32 | *.dSYM/
33 | *.su
34 | 


--------------------------------------------------------------------------------
/filename_parse.h:
--------------------------------------------------------------------------------
 1 | /* look for vpattern in str. Return the match to found. Return True if found */
 2 | 
 3 | extern double voltage;
 4 | extern double temperature;
 5 | extern unsigned char deviceid[256];
 6 | extern unsigned char process[256];
 7 | 
 8 | int find_vpattern(char *str,char *found);
 9 | int find_tpattern(char *str,char *found) ;
10 | int find_cidpattern(char *str,char *found); 
11 | int find_procpattern(char *str,char *found); 
12 | void parse_the_filename(char *filename);
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/randomdata/filenames:
--------------------------------------------------------------------------------
 1 | # A comment
 2 | bias0p1.bin
 3 | bias0p2.bin
 4 | bias0p3.bin
 5 | bias0p4.bin
 6 | bias0p5.bin
 7 | bias0p6.bin
 8 | bias0p7.bin
 9 | bias0p8.bin
10 | bias0p9.bin
11 | corr0p1.bin
12 | corr0p2.bin
13 | corr0p3.bin
14 | corr0p4.bin
15 | corr0p5.bin
16 | corr0p6.bin
17 | corr0p7.bin
18 | corr0p8.bin
19 | corr0p9.bin
20 | corrm0p1.bin
21 | corrm0p2.bin
22 | corrm0p3.bin
23 | corrm0p4.bin
24 | corrm0p5.bin
25 | corrm0p6.bin
26 | corrm0p7.bin
27 | corrm0p8.bin
28 | corrm0p9.bin
29 | uniform.bin
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -I/usr/local/include -m64 -g -Wall
 3 | LDFLAGS = -L/usr/local/lib 
 4 | LDLIBS = -lm -lgmp -lmpfr
 5 | 
 6 | all: djent
 7 | 
 8 | longest_run_cdf.o: longest_run_cdf.c longest_run_cdf.h
 9 | 	$(CC) -c $(CFLAGS) -o longest_run_cdf.o longest_run_cdf.c
10 | 
11 | mathy_things.o: mathy_things.c mathy_things.h
12 | 	$(CC) -c $(CFLAGS) -o mathy_things.o mathy_things.c
13 | 
14 | filename_parse.o: filename_parse.c filename_parse.h
15 | 	$(CC) -c $(CFLAGS) -o filename_parse.o filename_parse.c
16 | 
17 | markov2p.o: markov2p.c markov2p.h
18 | 	$(CC) -c $(CFLAGS) -o markov2p.o markov2p.c
19 | 
20 | djent.o: djent.c markov2p.h filename_parse.h mathy_things.h
21 | 	$(CC) -c $(CFLAGS) -o djent.o djent.c
22 | 
23 | djent: djent.o markov2p.o filename_parse.o mathy_things.o longest_run_cdf.o
24 | 	$(CC) $(CFLAGS) $(LDFLAGS) longest_run_cdf.o mathy_things.o filename_parse.o markov2p.o djent.o -o djent $(LDLIBS)
25 | 
26 | install:
27 | 	cp djent /usr/local/bin
28 | 
29 | clean:
30 | 	rm -f longest_run_cdf.o
31 | 	rm -f filename_parse.o
32 | 	rm -f mathy_things.o
33 | 	rm -f markov2p.o
34 | 	rm -f djent.o
35 | 	rm -f djent
36 | 
37 | 


--------------------------------------------------------------------------------
/longest_run_cdf.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     djent - A reimplementation of Fourmilab's ent with several improvements. 
 3 |     
 4 |     Copyright (C) 2017  David Johnston
 5 | 
 6 |     This program is free software; you can redistribute it and/or modify
 7 |     it under the terms of the GNU General Public License as published by
 8 |     the Free Software Foundation; either version 2 of the License, or
 9 |     (at your option) any later version.
10 | 
11 |     This program is distributed in the hope that it will be useful,
12 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 |     GNU General Public License for more details.
15 | 
16 |     You should have received a copy of the GNU General Public License along
17 |     with this program; if not, write to the Free Software Foundation, Inc.,
18 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 | 
20 |     -----
21 |     
22 |     Contact. David Johnston dj@deadhat.com
23 | */
24 | 
25 | #ifndef NO_GMP
26 | #include <mpfr.h>
27 | #endif
28 | 
29 | // Return the probability of the longest run of heads being less than or equal to n
30 | // in a sequence of r uniform coin tosses. Use MPFR to avoid overflows.
31 | double longest_run_cdf(unsigned int ui_n,unsigned int ui_r);
32 | 


--------------------------------------------------------------------------------
/mathy_things.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     djent - A reimplementation of Fourmilab's ent with several improvements. 
 3 |     
 4 |     Copyright (C) 2017  David Johnston
 5 | 
 6 |     This program is free software; you can redistribute it and/or modify
 7 |     it under the terms of the GNU General Public License as published by
 8 |     the Free Software Foundation; either version 2 of the License, or
 9 |     (at your option) any later version.
10 | 
11 |     This program is distributed in the hope that it will be useful,
12 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 |     GNU General Public License for more details.
15 | 
16 |     You should have received a copy of the GNU General Public License along
17 |     with this program; if not, write to the Free Software Foundation, Inc.,
18 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 | 
20 |     -----
21 |     
22 |     Contact. David Johnston dj@deadhat.com
23 | */
24 | 
25 | #define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */
26 | #define I_SQRT_PI   0.5641895835477562869480795 /* 1 / sqrt (pi) */
27 | #define BIGX        20.0         /* max value to represent exp (x) */
28 | #define ex(x)       (((x) < -BIGX) ? 0.0 : exp(x))
29 | 
30 | uint64_t ipow(uint64_t base, uint64_t exp);
31 | double zcdf(double z);
32 | double chisqp(double ax, size_t df);
33 | 
34 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: djent
 6 | message: >-
 7 |   If you use this software, please cite it using the
 8 |   metadata from this file.
 9 | type: software
10 | authors:
11 |   - given-names: David
12 |     family-names: Johnston
13 |     email: dj@deadhat.com
14 |     orcid: 'https://orcid.org/0009-0002-5149-9414'
15 | repository-code: 'https://github.com/dj-on-github/djent'
16 | abstract: >-
17 |   djent is a reimplementation of the Fourmilab/John Walker
18 |   random number test program ent.
19 | 
20 | 
21 |   The improvements are:
22 | 
23 | 
24 |   Multiple input file names can be provided at once. This
25 |   works nicely with the CSV format output.
26 | 
27 |   -h works as well as -u to get the help information.
28 | 
29 |   The filename is present in CSV output
30 | 
31 |   The symbol size can be any number of bits up to 32. ent
32 |   was constrained to 1 or 8.
33 | 
34 |   The SCC test can be either wrap-around or not wrap-around.
35 | 
36 |   The SCC result can be given a lag value to get a LAG-N
37 |   correlation coefficient.
38 | 
39 |   A list of filenames to analyze can be read from a text
40 |   file using -i filename.
41 | 
42 |   Test condition details (Volts, temp, id etc.) can be
43 |   parsed from the filename and included in output.
44 | 
45 |   MCV Min Entropy is estimated in addition to Shannon
46 |   Entropy. The symbol and entropy are both reported
47 | 
48 |   The longest run and the symbol in the longest run are
49 |   reported. For 1 bit-per-symbol analysis, a p-value is
50 |   computed of the probability of a uniform random bit
51 |   sequence having a longest run length equal to or less than
52 |   the meaured run length.
53 | license: GPL-2.0
54 | 


--------------------------------------------------------------------------------
/randomdata/generate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | djenrandom -b -s -k 10 -m correlated --correlation=-0.1 > corrm0p1.bin
 3 | djenrandom -b -s -k 10 -m correlated --correlation=-0.2 > corrm0p2.bin
 4 | djenrandom -b -s -k 10 -m correlated --correlation=-0.3 > corrm0p3.bin
 5 | djenrandom -b -s -k 10 -m correlated --correlation=-0.4 > corrm0p4.bin
 6 | djenrandom -b -s -k 10 -m correlated --correlation=-0.5 > corrm0p5.bin
 7 | djenrandom -b -s -k 10 -m correlated --correlation=-0.6 > corrm0p6.bin
 8 | djenrandom -b -s -k 10 -m correlated --correlation=-0.7 > corrm0p7.bin
 9 | djenrandom -b -s -k 10 -m correlated --correlation=-0.8 > corrm0p8.bin
10 | djenrandom -b -s -k 10 -m correlated --correlation=-0.9 > corrm0p9.bin
11 | 
12 | djenrandom -b -s -k 10 -m correlated --correlation=0.1 > corr0p1.bin
13 | djenrandom -b -s -k 10 -m correlated --correlation=0.2 > corr0p2.bin
14 | djenrandom -b -s -k 10 -m correlated --correlation=0.3 > corr0p3.bin
15 | djenrandom -b -s -k 10 -m correlated --correlation=0.4 > corr0p4.bin
16 | djenrandom -b -s -k 10 -m correlated --correlation=0.5 > corr0p5.bin
17 | djenrandom -b -s -k 10 -m correlated --correlation=0.6 > corr0p6.bin
18 | djenrandom -b -s -k 10 -m correlated --correlation=0.7 > corr0p7.bin
19 | djenrandom -b -s -k 10 -m correlated --correlation=0.8 > corr0p8.bin
20 | djenrandom -b -s -k 10 -m correlated --correlation=0.9 > corr0p9.bin
21 | 
22 | djenrandom -b -s -k 10 -m biased --bias=0.1 > bias0p1.bin
23 | djenrandom -b -s -k 10 -m biased --bias=0.2 > bias0p2.bin
24 | djenrandom -b -s -k 10 -m biased --bias=0.3 > bias0p3.bin
25 | djenrandom -b -s -k 10 -m biased --bias=0.4 > bias0p4.bin
26 | djenrandom -b -s -k 10 -m biased --bias=0.5 > bias0p5.bin
27 | djenrandom -b -s -k 10 -m biased --bias=0.6 > bias0p6.bin
28 | djenrandom -b -s -k 10 -m biased --bias=0.7 > bias0p7.bin
29 | djenrandom -b -s -k 10 -m biased --bias=0.8 > bias0p8.bin
30 | djenrandom -b -s -k 10 -m biased --bias=0.9 > bias0p9.bin
31 | 
32 | djenrandom -b -s -k 10 > uniform.bin
33 | 
34 | 


--------------------------------------------------------------------------------
/markov2p.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |     djrandom - A utility to generate random numbers.
 4 |     
 5 |     Copyright (C) 2017  David Johnston
 6 | 
 7 |     This program is free software; you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation; either version 2 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License along
18 |     with this program; if not, write to the Free Software Foundation, Inc.,
19 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 | 
21 |     -----
22 |     
23 |     Contact. David Johnston dj@deadhat.com
24 | */
25 | 
26 | #include <stdio.h>
27 | #include <string.h>
28 | #include <stdlib.h>
29 | #include <sys/stat.h>
30 | #include <math.h>
31 | #include <stdint.h>
32 | #include <inttypes.h>
33 | 
34 | #include <unistd.h>
35 | #include <string.h>
36 | 
37 | #define EQUIPROBABLE 0
38 | #define P000_MAX 1
39 | #define P111_MAX 2
40 | #define P101_MAX 3
41 | #define P010_MAX 4
42 | 
43 | double symbol_prob(double p01, double p10, uint64_t x, int bitwidth) ;
44 | double max(double x, double y) ;
45 | uint64_t mk_symbol(int prefix, int tbp, int postfix, int bitwidth) ;
46 | uint64_t mk_symbol_nopostfix(int prefix, int tbp, int bitwidth) ;
47 | int most_probable_transition_pair(double p01, double p10) ;
48 | uint64_t most_probable_symbol_odd(double p01, double p10,int bitwidth) ;
49 | uint64_t most_probable_symbol_even(double p01, double p10,int bitwidth) ;
50 | uint64_t most_probable_symbol(double p01, double p10,int bitwidth) ;
51 | double symbol_max_probability(double p01, double p10,int bitwidth,uint64_t *mcv) ;
52 | double p_to_entropy(double p01, double p10,int bitwidth, double *mcv_prob, uint64_t *mcv) ;
53 | int near(double x,double y, double epsilon) ;
54 | //void pick_point(double *p01, double *p10, double desired, double epsilon, int bitwidth, t_rngstate* rngstate) ;
55 | void make_sample_table(double p01, double p10, int bitwidth, int **sampletable0, int **sampletable1) ;
56 | 
57 | 


--------------------------------------------------------------------------------
/longest_run_cdf.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |     djent - A reimplementation of Fourmilab's ent with several improvements. 
  3 |     
  4 |     Copyright (C) 2017  David Johnston
  5 | 
  6 |     This program is free software; you can redistribute it and/or modify
  7 |     it under the terms of the GNU General Public License as published by
  8 |     the Free Software Foundation; either version 2 of the License, or
  9 |     (at your option) any later version.
 10 | 
 11 |     This program is distributed in the hope that it will be useful,
 12 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 |     GNU General Public License for more details.
 15 | 
 16 |     You should have received a copy of the GNU General Public License along
 17 |     with this program; if not, write to the Free Software Foundation, Inc.,
 18 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 19 | 
 20 |     -----
 21 |     
 22 |     Contact. David Johnston dj@deadhat.com
 23 | */
 24 | 
 25 | #include <inttypes.h> 
 26 | #include <stdio.h>
 27 | #include <string.h>
 28 | #include <stdlib.h>
 29 | #include <sys/stat.h>
 30 | #include <math.h>
 31 | #include <stdint.h>
 32 | #include <string.h>
 33 | #include <ctype.h>
 34 | 
 35 | 
 36 | #ifndef NO_GMP
 37 | #include <mpfr.h>
 38 | #endif
 39 | 
 40 | // Return the probability of the longest run of heads being less than or equal to n
 41 | // in a sequence of r uniform coin tosses. Use MPFR to avoid overflows.
 42 | double longest_run_cdf(unsigned int ui_n,unsigned int ui_r) { // n=longest run. r = length of data sequence
 43 |     double answer;
 44 |     mpfr_set_default_prec(1024);
 45 |     mpfr_t n;
 46 |     mpfr_t r;
 47 |     mpfr_t topa;
 48 |     mpfr_t bottoma;
 49 |     mpfr_t first;
 50 |     mpfr_t topb;
 51 |     mpfr_t nplusone;
 52 |     mpfr_t bottomb;
 53 |     mpfr_t nplus2over2;
 54 |     mpfr_t second;
 55 |     mpfr_t mpfans;
 56 |     mpfr_set_default_prec(1024);
 57 | 
 58 |     mpfr_init(topa);
 59 |     mpfr_init(nplusone);
 60 |     mpfr_init(bottoma);
 61 |     mpfr_init(first);
 62 |     mpfr_init(topb);
 63 |     mpfr_init(bottomb);
 64 |     mpfr_init(nplus2over2);
 65 |     mpfr_init(second);
 66 |     mpfr_init(mpfans);
 67 |     mpfr_init_set_ui(n,ui_n,MPFR_RNDN);
 68 |     mpfr_init_set_ui(r,ui_r,MPFR_RNDN);
 69 | 
 70 |     mpfr_add_ui(topa,r,1,MPFR_RNDN);
 71 | 
 72 |     mpfr_add_ui(nplusone,n,1,MPFR_RNDN);
 73 | 
 74 | 
 75 |     mpfr_exp2(bottoma,nplusone,MPFR_RNDN);
 76 |     mpfr_sub(bottoma,bottoma,n,MPFR_RNDN);
 77 |     mpfr_sub_ui(bottoma,bottoma,2,MPFR_RNDN);
 78 | 
 79 |     mpfr_div(first,topa,bottoma,MPFR_RNDN);
 80 |     mpfr_neg(first,first,MPFR_RNDN);
 81 | 
 82 |     mpfr_exp(first,first,MPFR_RNDN);
 83 | 
 84 |     // Second
 85 |     mpfr_exp2(topb,nplusone,MPFR_RNDN);
 86 |     mpfr_sub_ui(topb,topb,1,MPFR_RNDN);
 87 | 
 88 |     mpfr_exp2(bottomb,nplusone,MPFR_RNDN);
 89 | 
 90 |     mpfr_add_ui(nplus2over2,n,2,MPFR_RNDN);
 91 |     mpfr_div_ui(nplus2over2,nplus2over2,2,MPFR_RNDN);
 92 | 
 93 |     mpfr_sub(bottomb,bottomb,nplus2over2,MPFR_RNDN);
 94 | 
 95 |     mpfr_div(second,topb,bottomb,MPFR_RNDN);
 96 | 
 97 |     //Final
 98 |     mpfr_mul(mpfans,first,second,MPFR_RNDN);
 99 |     answer = mpfr_get_d(mpfans,MPFR_RNDN);
100 | 
101 |     mpfr_clear(topa);
102 |     mpfr_clear(nplusone);
103 |     mpfr_clear(bottoma);
104 |     mpfr_clear(first);
105 |     mpfr_clear(topb);
106 |     mpfr_clear(bottomb);
107 |     mpfr_clear(nplus2over2);
108 |     mpfr_clear(second);
109 |     mpfr_clear(mpfans);
110 |     mpfr_clear(n);
111 |     mpfr_clear(r);
112 | 
113 |     return answer;
114 | 
115 | 
116 | }
117 | 


--------------------------------------------------------------------------------
/mathy_things.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |     djent - A reimplementation of Fourmilab's ent with several improvements. 
  3 |     
  4 |     Copyright (C) 2017  David Johnston
  5 | 
  6 |     This program is free software; you can redistribute it and/or modify
  7 |     it under the terms of the GNU General Public License as published by
  8 |     the Free Software Foundation; either version 2 of the License, or
  9 |     (at your option) any later version.
 10 | 
 11 |     This program is distributed in the hope that it will be useful,
 12 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 |     GNU General Public License for more details.
 15 | 
 16 |     You should have received a copy of the GNU General Public License along
 17 |     with this program; if not, write to the Free Software Foundation, Inc.,
 18 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 19 | 
 20 |     -----
 21 |     
 22 |     Contact. David Johnston dj@deadhat.com
 23 | */
 24 | 
 25 | #include <inttypes.h> 
 26 | #include <stdio.h>
 27 | #include <string.h>
 28 | #include <stdlib.h>
 29 | #include <sys/stat.h>
 30 | #include <math.h>
 31 | #include <stdint.h>
 32 | #include <string.h>
 33 | #include <ctype.h>
 34 | 
 35 | uint64_t ipow(uint64_t base, uint64_t exp)
 36 | {
 37 |     uint64_t result = 1;
 38 |     while (exp)
 39 |     {
 40 |         if (exp & 1)
 41 |             result *= base;
 42 |         exp >>= 1;
 43 |         base *= base;
 44 |     }
 45 | 
 46 |     return result;
 47 | }
 48 | 
 49 | /* Chi Square P value computation */
 50 | 
 51 | double zcdf(double z) {
 52 |     double w;
 53 |     double x;
 54 |     double y;
 55 |     double result;
 56 | 
 57 |     if (z == 0.0) return 0.5;
 58 | 
 59 |     y = fabs(z)/2.0;
 60 | 
 61 |     if (y >= 3.0) return 0.0;
 62 |     
 63 |     if (y < 1.0) {
 64 |         w = y * y;
 65 |         x =         0.000124818987;
 66 |         x = x * w - 0.001075204047;
 67 |         x = x * w + 0.005198775019;
 68 |         x = x * w - 0.019198292004;
 69 |         x = x * w + 0.059054035642;
 70 |         x = x * w - 0.151968751364;
 71 |         x = x * w + 0.319152932694;
 72 |         x = x * w - 0.531923007300;
 73 |         x = x * w + 0.797884560593;
 74 |         x = x * 2.0 * y;
 75 |     } else {
 76 |         y -= 2.0;
 77 |         x =        -0.000045255659;
 78 |         x = x * y + 0.000152529290;
 79 |         x = x * y - 0.000019538132;
 80 |         x = x * y - 0.000676904986;
 81 |         x = x * y + 0.001390604284;
 82 |         x = x * y - 0.000794620820;
 83 |         x = x * y - 0.002034254874;
 84 |         x = x * y + 0.006549791214;
 85 |         x = x * y - 0.010557625006;
 86 |         x = x * y + 0.011630447319;
 87 |         x = x * y - 0.009279453341;
 88 |         x = x * y + 0.005353579108;
 89 |         x = x * y - 0.002141268741;
 90 |         x = x * y + 0.000535310849;
 91 |         x = x * y + 0.999936657524;
 92 |     }
 93 | 
 94 | 
 95 |     if (z > 0.0) {
 96 |         result = (x/2.0)+0.5;
 97 |     } else {
 98 |         result = (0.5 - (x/2.0));
 99 |     }
100 | 
101 |     return result;
102 | }
103 | 
104 | #define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */
105 | #define I_SQRT_PI   0.5641895835477562869480795 /* 1 / sqrt (pi) */
106 | #define BIGX        20.0         /* max value to represent exp (x) */
107 | #define ex(x)       (((x) < -BIGX) ? 0.0 : exp(x))
108 | 
109 | double chisqp(double ax, size_t df) {
110 |     double x;
111 |     double a;
112 |     double y;
113 |     double s;
114 |     double e;
115 |     double c;
116 |     double z;
117 |     int dfeven;
118 |     
119 |     dfeven=0;
120 |     if ((df % 2)==0) dfeven = 1;
121 | 
122 |     x = ax;
123 | 
124 |     if (x <= 0.0 || df < 1) return 1.0;
125 | 
126 |     a = x/2.0;
127 | 
128 |     if (df > 1)  y = ex(-a);
129 | 
130 |     if (dfeven == 1) s = y;
131 |     else s = 2.0 * zcdf(-sqrt(x));
132 | 
133 |     if (df > 2) {
134 |         x = (df - 1.0)/2.0;
135 |         if (dfeven==1) z = 1.0;
136 |         else z = 0.5;
137 | 
138 |         if (a > BIGX) {
139 |             if (dfeven==1) e = 0.0;
140 |             else e = LOG_SQRT_PI;
141 |             
142 |             c = log(a);
143 |             
144 |             while (z <= x) {
145 |                 e = log(z) + e;
146 |                 s += ex(c * z - a - e);
147 |                 z += 1.0;
148 |             }
149 |             return (s);
150 |         } else {
151 |         if (dfeven==1) e = 1.0;
152 |         else e = (I_SQRT_PI / sqrt(a));
153 |         c = 0.0;
154 |         while (z <= x) {
155 |             e = e * (a / z);
156 |             c = c + e;
157 |             z += 1.0;
158 |             }
159 |         return (c * y + s);
160 |         }
161 |     } else {
162 |         return s;
163 |     }
164 | }
165 | 
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # djent
  2 | djent is a reimplementation of the Fourmilab/John Walker random number test program ent.
  3 | 
  4 | The improvements are:
  5 | 
  6 | * Multiple input file names can be provided at once. This works nicely with the CSV format output.
  7 | * -h works as well as -u to get the help information.
  8 | * The filename is present in CSV output
  9 | * The symbol size can be any number of bits up to 32. ent was constrained to 1 or 8.
 10 | * The SCC test can be either wrap-around or not wrap-around.
 11 | * The SCC result can be given a lag value to get a LAG-N correlation coefficient.
 12 | * A list of filenames to analyze can be read from a text file using -i filename.
 13 | * Test condition details (Volts, temp, id etc.) can be parsed from the filename and included in output. 
 14 | * MCV Min Entropy is estimated in addition to Shannon Entropy. The symbol and entropy are both reported
 15 | * The longest run and the symbol in the longest run are reported. For 1 bit-per-symbol analysis, a p-value is computed of the probability of a uniform random bit sequence having a longest run length equal to or less than the meaured run length. 
 16 | 
 17 | ```
 18 | djent -h
 19 | Usage: djent [-brRpcCuhds] [-l <n>] [-i <input file list filename>] [filename] [filename2] ...
 20 | 
 21 | Compute statistics of random data.
 22 |   Author: David Johnston, dj@deadhat.com
 23 | 
 24 |   -i <filename>  --inputfilelist=<filename> Read list of filenames from <filename>
 25 |   -p             --parse_filename           Extract CID, Process, Voltage and Temperature from filename.
 26 |                                             The values will be included in the output.
 27 |   -l <n>         --symbol_length=<n>        Treat incoming data symbols as bitlength n. Default is 8.
 28 |   -b             --binary                   Treat incoming data as binary. Default bit length will be -l 1
 29 |   -r             --byte_reverse             Reverse the bit order in incoming bytes
 30 |   -R             --word_reverse             Reverse the byte order in incoming 4 byte words
 31 |   -c             --occurrence               Print symbol occurrence counts
 32 |   -C             --longest                  Print symbol longest run counts
 33 |   -w             --scc_wrap                 Treat data as cyclical in SCC
 34 |   -n <n>         --lagn=<n>                 Lag gap in SCC. Default=1
 35 |   -f             --fold                     Fold uppercase letters to lower case
 36 |   -t             --terse                    Terse output
 37 |   -e             --ent_exact                Exactly match output format of ent
 38 |   -s             --suppress_header          Suppress the header in terse output
 39 |   -h or -u       --help                     Print this text
 40 | 
 41 |  Notes
 42 |    * By default djent is in hex mode where it reads ascii hex data and converts it to binary to analyze.
 43 |      In hex mode, the symbol length defaults to 8, so normal hex files can be treated as a representation
 44 |      of bytes. The symbol length can be changed to any value between 1 and 32 bits using the -l <n> option.
 45 |    * With the -b option djent switches to binary reads in each byte as binary with a symbol length of 1.
 46 |    * To analyze ascii text instead of hex ascii, you need djent to treat each byte as a separate symbol, so
 47 |      use binary mode with a symbol length of 8. I.E. djent -b -l 8 <filename>
 48 |    * By default djent treats the MSB of each byte as the first. This can be switched so that djent treats
 49 |      the LSB as the first bit in each byte using the -r option.
 50 |    * Terse output is requested using -t. This outputs in CSV format. The first line is the header. If
 51 |      multiple files are provided, there will be one line of CSV output per file in addition to the header.
 52 |      The CSV header can be suppressed with -s.
 53 |    * To analyze multiple files, just give multiple file names on the command line. To read data in from
 54 |      the command line, don't provide a filename and pipe the data in. <datasource> | djent
 55 |    * The parse filename option =p picks takes four patterns from the filename to include in the output,
 56 |      This is so that it is easy to plot test conditions that are commonly encoded in a filename.
 57 |      Fields are delimited by uderscores. The four patters for CID, process, Voltage and Temperature are:
 58 |      _CID-<componentID>_ , _PROC-<process info>_, _<x>p<y>V_ and _<x>p<y>C_ . 'p' is the decimal point.
 59 |    * To compute the statistics, djent builds a frequency table of the symbols. This can be displayed
 60 |      using the -c option. The size of this table is what limits the the maximum symbol size. For each
 61 |      of the 2^n symbols, a 64 bit entry in a table is created. So for n=32, that's 32GBytes so the ability
 62 |      to handle large symbol sizes is limited by the available memory and the per process allocation limit.
 63 |    * The serial correlation coefficient is not wrap around by default, meaning that it does not compare
 64 |      the last value in the data with the first. To get wrap around behaviour, use the -w option.
 65 |    * The Lag-N correlation coefficient can be computed by using the -n <n> option. This causes the SCC
 66 |      computation to compare each Xth symbol with the (X+n)th symbol instead of the (X+1)th symbol.
 67 |      If you use wrap around with Lag-N, then the wrap around will reach n bits further into the start
 68 |      of the sequence.
 69 |    * The byte reverse option -r reverses the order of bits within each byte. The word reverse option -R
 70 |      reverses the order of bytes within each 32 bit word, from 3,2,1,0 to 0,1,2,3. Both -R and -r can
 71 |      be used together. Using -R with a data that isn't a multiple of 32 bits long will get padded with
 72 |      zeros, which may not be what you want. A padding warning will be sent to STDERR.
 73 |    * Instead of providing data file names on the command line, djent can be told to read a list of files
 74 |      from a text file. The file must have one filename per line. Lines beginning with # will be ignored.
 75 |      Use the -i <filename> option to request that djent reads the file list from <filename>.
 76 | 
 77 |  Examples
 78 |    Print this help
 79 |      djent -h
 80 | 
 81 |    Analyze hex file from stdin
 82 |      cat datafile.hex | djent
 83 | 
 84 |    Analyze binary file
 85 |      djent -b datafile.bin
 86 | 
 87 |    Analyze several files with CSV output
 88 |      djent -t data1.hex data2.hex data3.hex
 89 | 
 90 |    Analyze ascii symbols - Read in binary and set symbol size to 8.
 91 |      djent -b -l 8  textfile.txt
 92 | 
 93 |    Analyze binary file with parsable filename.
 94 |      djent -b -t -p  rawdata_CID-X23_PROC-TTFT_1p2V_25p0C_.bin
 95 | ```
 96 |   
 97 | 
 98 | 
 99 |  
100 | 


--------------------------------------------------------------------------------
/filename_parse.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 |     djent - A reimplementation of Fourmilab's ent with several improvements. 
  4 |     
  5 |     Copyright (C) 2017  David Johnston
  6 | 
  7 |     This program is free software; you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation; either version 2 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License along
 18 |     with this program; if not, write to the Free Software Foundation, Inc.,
 19 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 20 | 
 21 |     -----
 22 |     
 23 |     Contact. David Johnston dj@deadhat.com
 24 | */
 25 | 
 26 | /* Visual Studio C doesnt have a regex library. So this does the
 27 |  * pattern search instead so I can compile on windows, linux and macos.
 28 |  */
 29 | 
 30 | 
 31 | #include <inttypes.h> 
 32 | #include <stdio.h>
 33 | #include <string.h>
 34 | #include <stdlib.h>
 35 | #include <sys/stat.h>
 36 | #include <math.h>
 37 | #include <stdint.h>
 38 | #include <string.h>
 39 | #include <ctype.h>
 40 | 
 41 | #include "filename_parse.h"
 42 | 
 43 | /* look for vpattern in str. Return the match to found. Return True if found */
 44 | int find_vpattern(char *str,char *found) {
 45 |     size_t len;
 46 |     int i;
 47 |     int start;
 48 |     int end;
 49 |     int pos;
 50 |     int state;
 51 |     int done;
 52 |     char c;
 53 |     len = strlen(str);
 54 |     start = 0;
 55 |     end = 0;
 56 |     done = 0;
 57 | 
 58 |     /* A little state machine to match the _<int>p<int>V_ pattern */
 59 |     pos = 0;
 60 |     state = 1;
 61 |     done = 0;
 62 |     do {
 63 |         c = str[pos];
 64 |         if (state == 1) { /* _ */
 65 |             if (c=='_') {
 66 |                 state++;
 67 |                 start=pos;
 68 |             }
 69 |             pos++;
 70 |         } else if (state == 2) { /* first int */
 71 |             if (isdigit((char)c)) {
 72 |                 state++;
 73 |             } else {
 74 |                 state = 1;
 75 |             }
 76 |             pos++;
 77 |         } else if (state == 3) { /* rest of int */
 78 |             if (isdigit((char)c)) {
 79 |                 ; /* stay here */
 80 |             } else if ((c=='p') || (c=='.')) { /* decimal point */
 81 |                 state++;
 82 |             } else {
 83 |                 state = 1;
 84 |             }
 85 |             pos++;
 86 |         } else if (state == 4) { /* first int */
 87 |             if (isdigit((char)c)) {
 88 |                 state++;
 89 |             } else {
 90 |                 state = 1;
 91 |             }
 92 |             pos++;
 93 |         } else if (state == 5) { /* rest of int */
 94 |             if (isdigit((char)c)) {
 95 |                 ; /* stay here */
 96 |             } else if (c=='V') { /* V */
 97 |                 state++;
 98 |             } else {
 99 |                 state = 1;
100 |             }
101 |             pos++;
102 |         } else if (state == 6) { /* _ */
103 |             if ((c=='_') || (c=='.')) { // Allow 1p0V.bin  instead of 1p0V_.bin.
104 |                 done = 1;
105 |                 end = pos;
106 |             } else {
107 |                 state = 1;
108 |             }
109 |             pos++;
110 |         }
111 | 
112 |     } while ((pos < len) && (done == 0));
113 |     
114 |     if (done == 0) return 0;
115 | 
116 |     for(i=start;i<=end;i++) {
117 |         found[i-start]=str[i];
118 |     }
119 |     found[i-start] = 0x00;
120 |     return 1;
121 |    
122 | }
123 | 
124 | /* look for tpattern in str. Return the match to found. Return True if found */
125 | int find_tpattern(char *str,char *found) {
126 |     size_t len;
127 |     int i;
128 |     int start;
129 |     int end;
130 |     int pos;
131 |     int state;
132 |     int done;
133 |     char c;
134 |     len = strlen(str);
135 |     start = 0;
136 |     end = 0;
137 |     done = 0;
138 | 
139 |     /* A little state machine to match the _<int>p<int>C_ pattern */
140 |     pos = 0;
141 |     state = 1;
142 |     done = 0;
143 |     do {
144 |         c = str[pos];
145 |         if (state == 1) { /* _ */
146 |             if (c=='_') {
147 |                 state++;
148 |                 start=pos;
149 |             }
150 |             pos++;
151 |         } else if (state == 2) { /* first int */
152 |             if (isdigit((char)c) || ((char)c == '-')) {
153 |                 state++;
154 |             } else {
155 |                 state = 1;
156 |             }
157 |             pos++;
158 |         } else if (state == 3) { /* rest of int */
159 |             if (isdigit((char)c)) {
160 |                 ; /* Stay here */
161 |             } else if ((c=='p') || (c=='.')) { /* decimal point */
162 |                 state++;
163 |             } else {
164 |                 state = 1;
165 |             }
166 |             pos++;
167 |         } else if (state == 4) { /* first int */
168 |             if (isdigit((char)c)) {
169 |                 state++;
170 |             } else {
171 |                 state = 1;
172 |             }
173 |             pos++;
174 |         } else if (state == 5) { /* rest of int */
175 |             if (isdigit((char)c)) {
176 |                 ; /* Stay here */
177 |             } else if (c=='C') { /* C */
178 |                 state++;
179 |             } else {
180 |                 state = 1;
181 |             }
182 |             pos++;
183 |         } else if (state == 6) { /* _ */
184 |             if ((c=='_') || (c=='.')) { // Allow 10p0C.bin  instead of 10p0C_.bin.
185 |                 done = 1;
186 |                 end = pos;
187 |             } else {
188 |                 state = 1;
189 |             }
190 |             pos++;
191 |         }
192 | 
193 |     } while ((pos < len) && (done == 0));
194 |     
195 |     if (done == 0) return 0;
196 | 
197 |     for(i=start;i<=end;i++) {
198 |         found[i-start]=str[i];
199 |     }
200 |     found[i-start] = 0x00;
201 |     return 1;
202 |    
203 | }
204 | 
205 | 
206 | /* look for cidpattern in str. Return the match to found. Return True if found */
207 | int find_cidpattern(char *str,char *found) {
208 |     size_t len;
209 |     int i;
210 |     int start;
211 |     int end;
212 |     int pos;
213 |     int state;
214 |     int done;
215 |     char c;
216 |     len = strlen(str);
217 |     start = 0;
218 |     end = 0;
219 |     done = 0;
220 | 
221 |     /* A little state machine to match the _<int>p<int>C_ pattern */
222 |     pos = 0;
223 |     state = 1;
224 |     done = 0;
225 |     do {
226 |         c = str[pos];
227 |         if (state == 1) { /* _ */
228 |             if (c=='_') {
229 |                 state++;
230 |                 start=pos;
231 |             }
232 |             pos++;
233 |         } else if (state == 2) {
234 |             if (c=='C') state++;
235 |             else state = 1;
236 |             pos++;         
237 |         } else if (state == 3) {
238 |             if (c=='I') state++;
239 |             else state = 1;
240 |             pos++;         
241 |         } else if (state == 4) {
242 |             if (c=='D') state++;
243 |             else state = 1;
244 |             pos++;         
245 |         } else if (state == 5) {
246 |             if (c=='-') state++;
247 |             else state = 1;
248 |             pos++;         
249 |         } else if (state == 6) { /* first char of ID */
250 |             if (c != '_') {
251 |                 state++;
252 |             } else {
253 |                 state = 1;
254 |             }
255 |             pos++;
256 |         } else if (state == 7) { /* rest of ID */
257 |             if (c != '_') {
258 |                 ; /* Stay here */
259 |             } else { /* _ */
260 |                 done = 1;
261 |                 end = pos;
262 |             } 
263 |             pos++;
264 |         }
265 | 
266 |     } while ((pos < len) && (done == 0));
267 |     
268 |     if (done == 0) return 0;
269 | 
270 |     for(i=start;i<=end;i++) {
271 |         found[i-start]=str[i];
272 |     }
273 |     found[i-start] = 0x00;
274 |     return 1;
275 |    
276 | }
277 | 
278 | /* look for procpattern in str. Return the match to found. Return True if found */
279 | int find_procpattern(char *str,char *found) {
280 |     size_t len;
281 |     int i;
282 |     int start;
283 |     int end;
284 |     int pos;
285 |     int state;
286 |     int done;
287 |     char c;
288 |     len = strlen(str);
289 |     start = 0;
290 |     end = 0;
291 |     done = 0;
292 | 
293 |     /* A little state machine to match the _PROC-<name>_ pattern */
294 |     pos = 0;
295 |     state = 1;
296 |     done = 0;
297 |     do {
298 |         c = str[pos];
299 |         if (state == 1) { /* _ */
300 |             if (c=='_') {
301 |                 state++;
302 |                 start=pos;
303 |             }
304 |             pos++;
305 |         } else if (state == 2) {
306 |             if (c=='P') state++;
307 |             else state = 1;
308 |             pos++;         
309 |         } else if (state == 3) {
310 |             if (c=='R') state++;
311 |             else state = 1;
312 |             pos++;         
313 |         } else if (state == 4) {
314 |             if (c=='O') state++;
315 |             else state = 1;
316 |             pos++;         
317 |         } else if (state == 5) {
318 |             if (c=='C') state++;
319 |             else state = 1;
320 |             pos++;         
321 |         } else if (state == 6) {
322 |             if (c=='-') state++;
323 |             else state = 1;
324 |             pos++;         
325 |         } else if (state == 7) { /* first char of ID */
326 |             if (c != '_') {
327 |                 state++;
328 |             } else {
329 |                 state = 1;
330 |             }
331 |             pos++;
332 |         } else if (state == 8) { /* rest of ID */
333 |             if (c != '_') {
334 |                 ; /* Stay here */
335 |             } else { /* _ */
336 |                 done = 1;
337 |                 end = pos;
338 |             } 
339 |             pos++;
340 |         }
341 | 
342 |     } while ((pos < len) && (done == 0));
343 |     
344 |     if (done == 0) return 0;
345 | 
346 |     for(i=start;i<=end;i++) {
347 |         found[i-start]=str[i];
348 |     }
349 |     found[i-start] = 0x00;
350 |     return 1;
351 |    
352 | }
353 | 
354 | void parse_the_filename(char *filename) {
355 | 
356 |     char match[256];
357 |     int i;
358 | 
359 |     if (find_vpattern(filename,match)) {
360 |         for (i=0;i<strlen(match);i++) {
361 |             if (match[i]=='p') match[i] = '.';
362 |         }
363 |         sscanf(match,"_%lfV_",&voltage);
364 |     } else {
365 |         fprintf(stderr,"Regex error scanning for _<num>p<num>V_:\n");
366 |         voltage = 0.0;
367 |     }
368 | 
369 | 
370 |     if (find_tpattern(filename,match)) {
371 |         for (i=0;i<strlen(match);i++) {
372 |             if (match[i]=='p') match[i] = '.';
373 |         }
374 |         sscanf(match,"_%lfC_",&temperature);
375 |     } else {
376 |         fprintf(stderr,"Regex error scanning for _<num>p<num>C_:\n");
377 |         temperature = 0.0;
378 |     }
379 | 
380 |     if (find_cidpattern(filename,match)) {
381 |         match[strlen(match)-1]=0x00;
382 |         sscanf(match,"_CID-%s",(char *)&deviceid);
383 |     } else {
384 |         fprintf(stderr,"Regex error scanning for _CID-<ID>__:\n");
385 |     }
386 | 
387 |     if (find_procpattern(filename,match)) {
388 |         match[strlen(match)-1]=0x00;
389 |         sscanf(match,"_PROC-%s",(char *)&process);
390 |     } else {
391 |         fprintf(stderr,"Regex error scanning for _PROC-<ID>__:\n");
392 |     }
393 | }
394 | 
395 | 
396 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/markov2p.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 |     djrandom - A utility to generate random numbers.
  4 |     
  5 |     Copyright (C) 2017  David Johnston
  6 | 
  7 |     This program is free software; you can redistribute it and/or modify
  8 |     it under the terms of the GNU General Public License as published by
  9 |     the Free Software Foundation; either version 2 of the License, or
 10 |     (at your option) any later version.
 11 | 
 12 |     This program is distributed in the hope that it will be useful,
 13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |     GNU General Public License for more details.
 16 | 
 17 |     You should have received a copy of the GNU General Public License along
 18 |     with this program; if not, write to the Free Software Foundation, Inc.,
 19 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 20 | 
 21 |     -----
 22 |     
 23 |     Contact. David Johnston dj@deadhat.com
 24 | */
 25 | 
 26 | #include <stdio.h>
 27 | #include <string.h>
 28 | #include <stdlib.h>
 29 | #include <sys/stat.h>
 30 | #include <math.h>
 31 | #include <stdint.h>
 32 | #include <inttypes.h>
 33 | 
 34 | #include <unistd.h>
 35 | #include <string.h>
 36 | 
 37 | //#include "djenrandommodel.h"
 38 | #include "markov2p.h"
 39 | 
 40 | #define KNRM  "\x1B[0m"
 41 | #define KRED  "\x1B[31m"
 42 | #define KGRN  "\x1B[32m"
 43 | #define KYEL  "\x1B[33m"
 44 | #define KBLU  "\x1B[34m"
 45 | #define KMAG  "\x1B[35m"
 46 | #define KCYN  "\x1B[36m"
 47 | #define KWHT  "\x1B[37m"
 48 | 
 49 | 
 50 | // A library for coverting between points, scc, bias and entropy
 51 | // with the 2 parameter markov model.
 52 | 
 53 | extern int verbose_mode;
 54 | 
 55 | char msymboltext[255];
 56 | 
 57 | void print_symbol(uint64_t x, int bitwidth) {
 58 |     int i;
 59 | 
 60 |     for(i=0;i<bitwidth;i++) {
 61 |         if (((x >> (bitwidth-1-i)) & 0x01)==0) msymboltext[i]='0';
 62 |         else msymboltext[i]='1';
 63 |     }
 64 |     msymboltext[bitwidth]=(char)0;
 65 | }
 66 | 
 67 | // Make two probability density functions for all the 2^bitwidth symbols
 68 | // One for when the previous bit is 0, one for when it is 1.
 69 | void make_pdf(double p01, double p10, int bitwidth, double *table0, double *table1) {
 70 |     double p00;
 71 |     double p11;
 72 |     double plist0;
 73 |     double plist1;
 74 |     int bp;
 75 |     int x; 
 76 |     int i;
 77 |     double sum0 = 0.0;
 78 |     double sum1 = 0.0;
 79 |     
 80 |     p00 = 1.0-p01;
 81 |     p11 = 1.0-p10;
 82 |    
 83 |     // For each symbol
 84 |     for (x=0;x<(1 << bitwidth);x++) {
 85 |         //fprintf(stderr," MAKE_PDF symbol %02x \n",x); 
 86 |         if ((p01==0.5) && (p10==0.5)){
 87 |             table0[x] = 1.0/(1<<bitwidth);
 88 |             table1[x] = 1.0/(1<<bitwidth);
 89 |             sum0 += table0[x];
 90 |             sum1 = sum0;
 91 |         } else {
 92 |             plist0 = 1.0;
 93 |             plist1 = 1.0;
 94 | 
 95 |             if ((x & 0x1)==0) { // first bit with previous last bit
 96 |                 plist0 *= p00;
 97 |                 plist1 *= p10;
 98 |                 //if (verbose_mode==1) {
 99 |                 //    if (x==0xaa) {
100 |                 //        fprintf(stderr," 0XAA !!\n");
101 |                 //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.6f\n",0,p00,0,p10);
102 |                 //    }
103 |                 //    if (x==0xa9) {
104 |                 //        fprintf(stderr," 0XA9 !!\n");
105 |                 //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.4f\n",0,p00,0,p10);
106 |                 //    }
107 |                 //}
108 |             } else {
109 |                 plist0 *= p01;
110 |                 plist1 *= p11;
111 |                     //if (verbose_mode==1) {
112 |                     //    if (x==0xaa) {
113 |                     //        fprintf(stderr," 0XAA !!\n");
114 |                     //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.4f\n",0,p01,0,p11);
115 |                     //    }
116 |                     //    if (x==0xa9) {
117 |                     //        fprintf(stderr," 0XA9 !!\n");
118 |                     //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.4f\n",0,p01,0,p11);
119 |                     //    }
120 |                     //}
121 |             }
122 | 
123 |             for (i=0;i<(bitwidth-1);i++) {
124 |                 bp = ((x >> i) & 0x3);  // Get the bit pair
125 |                 if (bp==0) {
126 |                     plist0 *= p00;
127 |                     plist1 *= p00;
128 |                     //if (verbose_mode==1) {
129 |                     //    if ((x==0xaa) || (x==0xa9)) {
130 |                     //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.4f\n",0,p00,0,p00);
131 |                     //    }
132 |                     //}
133 |                 } else if (bp==1) {
134 |                     plist0 *= p10;
135 |                     plist1 *= p10;
136 |                     //if (verbose_mode==1) {
137 |                     //    if ((x==0xaa) || (x==0xa9)) {
138 |                     //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.4f\n",0,p10,0,p10);
139 |                     //    }
140 |                     //}
141 |                 } else if (bp==2) {
142 |                     plist0 *= p01;
143 |                     plist1 *= p01;
144 |                     //if (verbose_mode==1) {
145 |                     //    if ((x==0xaa) || (x==0xa9)) {
146 |                     //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.4f\n",0,p01,0,p01);
147 |                     //    }
148 |                     //}
149 |                 } else if (bp==3) {
150 |                     plist0 *= p11;
151 |                     plist1 *= p11;
152 |                     //if (verbose_mode==1) {
153 |                     //    if ((x==0xaa) || (x==0xa9)) {
154 |                     //        fprintf(stderr,"plist0_%d = %1.4f  plist1_%d = %1.4f\n",0,p11,0,p11);
155 |                     //    }
156 |                     //}
157 |                 }
158 |             }
159 |             //if (verbose_mode==1) {
160 |             //    if ((x==0xaa) || (x==0xa9)) {
161 |             //        fprintf(stderr,"  FINAL plist0 %1.6f  plist1 %1.6f\n",plist0,plist1);
162 |             //    }
163 |             //} 
164 |             table0[x] = plist0;
165 |             table1[x] = plist1;
166 |             //if (verbose_mode==1) {
167 |             //    if ((x==0xaa) || (x==0xa9)) {
168 |             //        fprintf(stderr,"  SET table0[%02x]= %1.6f  table1[%02x] %1.6f\n",x,plist0,x,plist1);
169 |             //    }
170 |             //}   
171 |             sum0 += plist0;
172 |             sum1 += plist1;      
173 |         } // end if else
174 |         
175 |     } // end for
176 | 
177 |     for (i=0;i<256;i++) {
178 |         table0[i] = table0[i]/sum0;
179 |         table1[i] = table1[i]/sum1;
180 |     }
181 | 
182 |     //if (verbose_mode==1) {
183 |     //    fprintf(stderr, "END MAKE_PDF() pdf_table0[a9]=%1.6f\n",table0[0xa9]);
184 |     //    fprintf(stderr, "END MAKE_PDF() pdf_table0[aa]=%1.6f\n",table0[0xaa]);
185 |     //    fprintf(stderr, "END MAKE_PDF() pdf_table1[a9]=%1.6f\n",table1[0xa9]);
186 |     //    fprintf(stderr, "END MAKE_PDF() pdf_table1[aa]=%1.6f\n",table1[0xaa]);
187 |     //}
188 | }
189 |     
190 | // Make two cumulative density functions for all the 2^bitwidth symbols
191 | // One for when the previous bit is 0, one for when it is 1.
192 | void make_cdf(double p01, double p10, int bitwidth, double *table0, double *table1) {
193 |     double p00;
194 |     double p11;
195 |     double plist0;
196 |     double plist1;
197 |     int bp;
198 |     int x; 
199 |     int i;
200 |  
201 |     p00 = 1.0-p01;
202 |     p11 = 1.0-p10;
203 |    
204 |     // For each symbol
205 |     for (x=0;x<(1 << bitwidth);x++) {
206 |         
207 |         if ((p01==0.5) && (p10==0.5)){
208 |             if (x==0) {
209 |                 table0[x] = 1.0/(1<<bitwidth);
210 |                 table1[x] = 1.0/(1<<bitwidth);
211 |             } else {
212 |                 table0[x] = table0[x-1] + (1.0/(1<<bitwidth));
213 |                 table1[x] = table1[x-1] + (1.0/(1<<bitwidth));
214 |             }
215 |         } else {
216 |             plist0 = 1.0;
217 |             plist1 = 1.0;
218 | 
219 |             if ((x & 0x1)==0) {
220 |                 plist0 *= p00;
221 |                 plist1 *= p10;
222 |             } else {
223 |                 plist0 *= p01;
224 |                 plist1 *= p11;
225 |             }
226 | 
227 |             for (i=0;i<(bitwidth-1);i++) {
228 |                 bp = (x>>i) & 0x3;  // Get the bit pair
229 |                 if (bp==0) {
230 |                     plist0 *= p00;
231 |                     plist1 *= p00;
232 |                 } else if (bp==1) {
233 |                     plist0 *= p10;
234 |                     plist1 *= p10;
235 |                 } else if (bp==2) {
236 |                     plist0 *= p01;
237 |                     plist1 *= p01;
238 |                 } else if (bp==3) {
239 |                     plist0 *= p11;
240 |                     plist1 *= p11;
241 |                 }
242 |             }
243 |        
244 |             if (x==0) {
245 |                 table0[x] = plist0;
246 |                 table1[x] = plist1;         
247 |             } else {
248 |                 table0[x] = table0[x-1]+plist0;
249 |                 table1[x] = table1[x-1]+plist1;
250 |             }         
251 |         }
252 |         
253 |     }
254 |     
255 |     double max0=table0[((1 << bitwidth)-1)];
256 |     double max1=table1[((1 << bitwidth)-1)];
257 |     for (i=0;i<(1<<bitwidth);i++) {
258 |         table0[i] = table0[i]/max0;
259 |         table1[i] = table1[i]/max1;
260 |     }
261 |      
262 | }
263 | 
264 | void make_sample_table(double p01, double p10, int bitwidth, int **sampletable0, int **sampletable1) {
265 |     double *table0;
266 |     double *table1;
267 |     double *pdf_table0;
268 |     double *pdf_table1;
269 |     int *st0;
270 |     int *st1;
271 |     int x;
272 |     int i;
273 |     int index;
274 | 
275 |     //double sum=0.0;
276 | 
277 |     table0 = (double *)malloc(sizeof(double)*(1 << bitwidth));
278 |     table1 = (double *)malloc(sizeof(double)*(1 << bitwidth));
279 |     pdf_table0 = (double *)malloc(sizeof(double)*(1 << bitwidth));
280 |     pdf_table1 = (double *)malloc(sizeof(double)*(1 << bitwidth));
281 | 
282 |     //if (verbose_mode==1) {
283 |     //    fprintf(stderr," table0 malloc size = %d\n",(1<<bitwidth));
284 |     //}
285 |     
286 |     if ((table0==0) || (table1==0) || (pdf_table0==0) || (pdf_table1==0))  {
287 |         fprintf(stderr,"Error, could not allocate symbol table for Markov symbol lookups\n");
288 |         exit(1);
289 |     }
290 |     
291 |     
292 |         //if (verbose_mode==1) {
293 |         //    fprintf(stderr,"  CALLING make_cdf()\n");
294 |         //}
295 |     make_cdf(p01, p10, bitwidth, table0, table1);
296 |         //if (verbose_mode==1) {
297 |         //    fprintf(stderr,"  CALLING make_pdf()\n");
298 |         //}
299 |     make_pdf(p01, p10, bitwidth, pdf_table0, pdf_table1);
300 | 
301 |     //if (verbose_mode==1) {
302 |     //    fprintf(stderr, " AFTER CALLING make_pdf()\n");
303 |     //    fprintf(stderr, "    MAKE_PDF() pdf_table0[a9]=%1.6f\n",pdf_table0[0xa9]);
304 |     //    fprintf(stderr, "    MAKE_PDF() pdf_table0[aa]=%1.6f\n",pdf_table0[0xaa]);
305 |     //    fprintf(stderr, "    MAKE_PDF() pdf_table1[a9]=%1.6f\n",pdf_table1[0xa9]);
306 |     //    fprintf(stderr, "    MAKE_PDF() pdf_table1[aa]=%1.6f\n",pdf_table1[0xaa]);
307 |     //}
308 | 
309 |     //if (verbose_mode==1) {
310 |     //    fprintf(stderr,"PDF Table 0 ==\n");
311 |     //    for (i=0;i<(1<<bitwidth);i++) {
312 |     //        sum += pdf_table0[i];
313 |     //        fprintf(stderr,"%02x:%1.6f ", i,pdf_table0[i]);
314 |     //        if ((i>1) && ((i+1) % 8 ==0)) fprintf(stderr,"\n");
315 |     //    }
316 |     //
317 |     //    fprintf(stderr,"\n");
318 |     //    fprintf(stderr,"table0 sum == %f\n",sum);
319 |     //}
320 | 
321 |     //if (verbose_mode==1) {
322 |     //    sum = 0.0;
323 |     //    fprintf(stderr,"PDF Table 1 ==\n");
324 |     //    for (i=0;i<(1<<bitwidth);i++) {
325 |     //        sum += pdf_table1[i];
326 |     //        fprintf(stderr,"%02x:%1.6f ", i,pdf_table1[i]);
327 |     //        if ((i>1) && ((i+1) % 8 ==0)) fprintf(stderr,"\n");
328 |     //    }
329 |     //    fprintf(stderr,"\n");
330 |     //    fprintf(stderr,"table1 sum == %f\n",sum);
331 |     //}
332 |     
333 | 
334 |     st0 = (int *)malloc(sizeof(int)*(1 << 20));    
335 |     st1 = (int *)malloc(sizeof(int)*(1 << 20));
336 | 
337 |     if ((st0==0) || (st1==0)) {
338 |         fprintf(stderr,"Error, could not allocate 1M int sample tables for Markov generator\n");
339 |         exit(1);
340 |     }
341 | 
342 |     // populate the 1M table with symbols according to the CDF.
343 |     //   Do this by identifying the boundaries between the runs
344 |     //   of the same symbols and filling in the symbols up to the
345 |     //   boundary.
346 |     double floatpos;
347 |     int baseindex=0;
348 |     for (x=0;x<256;x++) {
349 |         floatpos = table0[x];
350 |         index = (int)(floatpos*(1<<20));
351 |         for (i=baseindex;i<index;i++) {
352 |             st0[i]=x; // assign the symbol into table
353 |         }
354 |         baseindex = index; // start the next block one position on.
355 |     }
356 | 
357 |     baseindex=0;
358 |     for (x=0;x<256;x++) {
359 |         floatpos = table1[x];
360 |         index = (int)(floatpos*(1<<20));
361 |         for (i=baseindex;i<index;i++) {
362 |             st1[i]=x; // assign the symbol into table
363 |         }
364 |         baseindex = index; // start the next block one position on.
365 |     }
366 |     //for (x=0;x<(1 << bitwidth); x++) {
367 |     //    for (i=0; i<(int)(table0[x]*(1 << 20)); i++) {
368 |     //        if (index > ((1 << 20)-1)) index=index; //fprintf(stderr,"ERROR, st0 table index too large");
369 |     //        else st0[index]=x;
370 |     //        index++;
371 |     //    }
372 |     //}
373 |     //
374 |     //index = 0;
375 |     //for (x=0;x<(1 << bitwidth); x++) {
376 |     //    for (i=0; i<(int)(table1[x]*(1 << 20)); i++) {
377 |     //        if (index > ((1 << 20)-1)) index=index; //fprintf(stderr,"ERROR, st1 table index too large");
378 |     //        else st1[index]=x;
379 |     //        index++;
380 |     //    }
381 |     //}
382 | 
383 |     
384 |     free(table0);
385 |     free(table1);
386 | 
387 |     *sampletable0 = st0;
388 |     *sampletable1 = st1;
389 | }
390 | 
391 | void free_sample_table(int *sampletable0, int *sampletable1) {
392 |     free(sampletable0);
393 |     free(sampletable1); 
394 | }
395 | 
396 | 
397 | 
398 | // Compute the min entropy per symbol for the
399 | // markov 2 parameter model, given the markov model
400 | // parameters p01 and p10.
401 | double symbol_prob(double p01, double p10, uint64_t x, int bitwidth) {
402 |     double p00;
403 |     double p11;
404 |     double mu;
405 |     double p0;
406 |     double p1;
407 |     double plist0;
408 |     double plist1;
409 |     int bp;
410 |     double p;
411 |     
412 |     int i;
413 |     
414 |     plist0=1.0;
415 |     plist1=1.0;
416 |     
417 |     p00 = 1.0-p01;
418 |     p11 = 1.0-p10;
419 |     mu = p01/(p10+p01);
420 |     p0 = 1.0-mu;
421 |     p1 = mu;
422 |     
423 |     print_symbol(x,bitwidth);
424 |     //fprintf(stderr,"  SYMBOL PROB p01=%f,   p10=%f,  x=%" PRIx64 " = b%s  bitwidth=%d\n",p01,p10,x,symboltext,bitwidth);
425 |     //fprintf(stderr,"              P01 = %f\n", p01);
426 |     //fprintf(stderr,"              P10 = %f\n", p10);
427 |     //fprintf(stderr,"              P00 = %f\n", p00);
428 |     //fprintf(stderr,"              P11 = %f\n", p11);
429 |     //fprintf(stderr,"              mu = %f\n", mu);
430 |     //fprintf(stderr,"              P0 = %f\n", p0);
431 |     //fprintf(stderr,"              P1 = %f\n", p1);
432 |      
433 |     if ((p01==0.5) && (p10==0.5)) return 1.0;
434 |     
435 |     plist0 = 1.0;
436 |     plist1 = 1.0;
437 |     
438 |     if ((x>>(bitwidth-1) & 0x1)==0) {
439 |         plist0 *= p00;
440 |         plist1 *= p10;
441 |     }
442 |     else {
443 |         plist0 *= p01;
444 |         plist1 *= p11;
445 |     }
446 |     
447 |     //fprintf(stderr," plist0=%f  ",plist0);
448 |     //fprintf(stderr," plist1=%f\n",plist1);
449 |     
450 |     for (i=0;i<(bitwidth-2);i++) {
451 |         bp = ((x >> (bitwidth-2-i)) & 0x3);  // Get the bit pair
452 |         //fprintf(stderr,"       bitpair %d = %d ",i,bp);
453 |         if (bp==0) {
454 |             plist0 *= p00;
455 |             plist1 *= p00;
456 |             //fprintf(stderr," plist0=%f * p00(%f)  ",plist0,p00);
457 |             //fprintf(stderr," plist1=%f * p00(%f)\n",plist1,p00);
458 |         } else if (bp==1) {
459 |             plist0 *= p01;
460 |             plist1 *= p01;
461 |             //fprintf(stderr," plist0=%f * p01(%f)  ",plist0,p01);
462 |             //fprintf(stderr," plist1=%f * p01(%f)\n",plist1,p01);
463 |         } else if (bp==2) {
464 |             plist0 *= p10;
465 |             plist1 *= p10;
466 |             //fprintf(stderr," plist0=%f * p10(%f)  ",plist0,p10);
467 |             //fprintf(stderr," plist1=%f * p10(%f)\n",plist1,p10);
468 |         } else if (bp==3) {
469 |             plist0 *= p11;
470 |             plist1 *= p11;
471 |             //fprintf(stderr," plist0=%f * p11(%f)  ",plist0,p11);
472 |             //fprintf(stderr," plist1=%f * p11(%f)\n",plist1,p11);
473 |         }
474 | 
475 |         
476 |     }
477 |     
478 |     p = (p0 * plist0) + (p1 * plist1);
479 |     
480 |     //fflush(stdout);
481 |     return p;
482 |     
483 | }
484 | 
485 | double max(double x, double y) {
486 |     if (x>y) return x;
487 |     if (y>x) return y;
488 |     return x;
489 | }
490 | 
491 | uint64_t mk_symbol(int prefix, int tbp, int postfix, int bitwidth) {
492 |     int rep;
493 |     int i;
494 |     
495 |     uint64_t pattern;
496 |     
497 |     rep = (bitwidth-2)/2;
498 |     pattern = prefix;
499 |     
500 |     for(i=0;i<rep;i++) {
501 |         pattern = (pattern << 2) + tbp; 
502 |     }
503 |     pattern = (pattern << 1) + postfix;
504 |     
505 |     return pattern;    
506 | }
507 | 
508 | uint64_t mk_symbol_nopostfix(int prefix, int tbp, int bitwidth) {
509 |     //int rep;
510 |     int i;
511 |     
512 |     uint64_t pattern;
513 |     
514 |     //rep = (bitwidth-2)/2;
515 |     pattern = prefix;
516 | 
517 |     pattern = prefix;
518 |     for(i=0;i<((bitwidth-1)/2);i++) {
519 |         pattern = (pattern << 2) + tbp; 
520 |     }
521 |     
522 |     return pattern;    
523 | }
524 | 
525 | int most_probable_transition_pair(double p01, double p10) {
526 |     double p010;
527 |     double p101;
528 |     double p000;
529 |     double p111;
530 |     double p00;
531 |     double p11;
532 |     double p0;
533 |     double p1;
534 |     
535 |     double mu;
536 | 
537 |     mu = p01/(p10+p01);
538 |     p0 = 1.0-mu;
539 |     p1 = mu;
540 |     
541 |     p00 = 1.0 - p01;
542 |     p11 = 1.0 - p10;
543 |         
544 |     p010 = p0 * p01 * p10;
545 |     p101 = p1 * p10 * p01;
546 |     p000 = p0 * p00 * p00;
547 |     p111 = p1 * p11 * p11;
548 |     
549 |     if      ((p111 >= p000) && (p111 >= p101) && (p111 >= p010)) {
550 |             return P111_MAX;
551 |     }
552 |     else if ((p000 >= p111) && (p000 >= p101) && (p000 >= p010)) {
553 |             return P000_MAX;
554 |     }
555 |     else if ((p101 >= p111) && (p101 >= p000) && (p101 >= p010)) {
556 |             return P101_MAX;
557 |     }
558 |     else if ((p010 >= p111) && (p010 >= p000) && (p010 >= p101)) {
559 |             return P010_MAX;
560 |     }
561 |     
562 |     return EQUIPROBABLE;
563 | 
564 | }
565 | 
566 | uint64_t most_probable_symbol_odd(double p01, double p10,int bitwidth) {
567 |     uint64_t mps;
568 |     int i;
569 |         
570 |     if (most_probable_transition_pair(p01, p10) == P000_MAX) {
571 |         mps = 0;
572 |     } else if (most_probable_transition_pair(p01, p10) == P111_MAX) {
573 |         for (i=0; i<((bitwidth-1)>>1); i++) {
574 |             mps = mps << 2;
575 |             mps = mps + 3;
576 |         }
577 |         mps = mps << 1;
578 |         mps = mps + 1;
579 |     } else if (most_probable_transition_pair(p01, p10) == P010_MAX) {
580 |         for (i=0; i<((bitwidth-1)>>1); i++) {
581 |             mps = mps << 2;
582 |             mps = mps + 1;
583 |         }
584 |         mps = mps << 1;
585 |         mps = mps + 0;
586 |     } else if (most_probable_transition_pair(p01, p10) == P101_MAX) {
587 |         for (i=0; i<((bitwidth-1)>>1); i++) {
588 |             mps = mps << 2;
589 |             mps = mps + 2;
590 |         }
591 |         mps = mps << 1;
592 |         mps = mps + 1;
593 |     } else {     // Equiprobable case, any value will do.
594 |         mps = 0;
595 |     }
596 |     return mps;
597 | }
598 | 
599 | uint64_t most_probable_symbol_even(double p01, double p10,int bitwidth) {
600 |     uint64_t mps;
601 |     int i;
602 |     double p00;
603 |     double p11;
604 |     //double p0;
605 |     //double p1;
606 |     
607 |     //double mu;
608 | 
609 |     //mu = p01/(p10+p01);
610 |     //p0 = 1.0-mu;
611 |     //p1 = mu;
612 |     
613 |     p00 = 1.0 - p01;
614 |     p11 = 1.0 - p10;
615 |     
616 |     mps = 0;
617 |         
618 |     if (most_probable_transition_pair(p01, p10) == P000_MAX) {
619 |         mps = 0;
620 |     } else if (most_probable_transition_pair(p01, p10) == P111_MAX) {
621 |         for (i=0; i<(bitwidth >> 1); i++) {
622 |             mps = mps << 2;
623 |             mps = mps + 3;
624 |         }
625 |     } else if (most_probable_transition_pair(p01, p10) == P010_MAX) {
626 |         for (i=0; i<((bitwidth-2) >> 1); i++) {
627 |             mps = mps << 2;
628 |             mps = mps + 1;
629 |         }
630 |         mps = mps << 2;
631 |         if (p01 > p00) {
632 |             mps = mps + 1;
633 |         } else {
634 |             mps = mps + 0;
635 |         }
636 |         
637 |     } else if (most_probable_transition_pair(p01, p10) == P101_MAX) {
638 |         for (i=0; i<((bitwidth-2) >> 1); i++) {
639 |             mps = mps << 2;
640 |             mps = mps + 2;
641 |         }
642 |         mps = mps << 2;
643 |         if (p11 > p10) {
644 |             mps = mps + 3;
645 |         } else {
646 |             mps = mps + 2;
647 |         }
648 |     } else {     // Equiprobable case, any value will do.
649 |         mps = 0;
650 |     }
651 |     return mps;
652 | }
653 | 
654 | uint64_t most_probable_symbol(double p01, double p10,int bitwidth) {
655 |     uint64_t mps;
656 |     
657 |     if ((bitwidth & 0x01)==0x01)
658 |         mps = most_probable_symbol_odd(p01,p10,bitwidth);
659 |     else
660 |         mps = most_probable_symbol_even(p01,p10,bitwidth);
661 |     
662 |     
663 |     if (verbose_mode>1) fprintf(stderr,"   MCV = 0x%" PRIx64 " \n",mps);
664 |     return mps;
665 |     
666 | }
667 | 
668 | double symbol_max_probability(double p01, double p10,int bitwidth,uint64_t *mcv) {
669 |     double mu;
670 |     double p00;
671 |     double p11;
672 |     double p0;
673 |     double p1;
674 |     uint64_t mps;
675 |     
676 |     double p_0mps;
677 |     double p_1mps;
678 |     double p_mps;
679 |     
680 |     int bits[65];
681 |     int i;
682 |     int j;
683 |     
684 |     for (i=0;i<65;i++) bits[i] = 0;
685 |     
686 |     mu = p01/(p10+p01);
687 |     p0 = 1.0-mu;
688 |     p1 = mu;
689 |     
690 |     p00 = 1.0 - p01;
691 |     p11 = 1.0 - p10;
692 |     
693 |     mps = most_probable_symbol(p01,p10,bitwidth);
694 |     *mcv = mps;
695 |     
696 |     // unpack the symbol bits into an array of bits
697 |     bits[0] = 0;   // first with x[-1]=0
698 |     for (i=0; i<bitwidth; i++) {
699 |         bits[i+1] = (mps >> (bitwidth-1-i)) & 0x01;
700 |     }
701 |     
702 |     if (verbose_mode>1) {
703 |         fprintf(stderr,"   unrolled bits 0 prefix = ");
704 |         for(j=0;j<(bitwidth+1);j++) {
705 |             fprintf(stderr,"%d",bits[j]);
706 |         }
707 |         fprintf(stderr,"\n");
708 |     }
709 |     
710 |     // Compute the symbol probability by going through the
711 |     // bits and multiplying the transition probabilities.
712 |     p_0mps = 1.0;
713 |     if (verbose_mode>1) fprintf(stderr,"   Prob = 1.0");
714 |     for (i=0;i<bitwidth; i++) {
715 |         if      ((bits[i]==0) && (bits[i+1]==0)) {
716 |             p_0mps = p_0mps * p00;
717 |             if (verbose_mode>1) fprintf(stderr, " * P00");
718 |         }
719 |         else if ((bits[i]==0) && (bits[i+1]==1)) {
720 |             p_0mps = p_0mps * p01;
721 |             if (verbose_mode>1) fprintf(stderr, " * P01");
722 |         }                     
723 |         else if ((bits[i]==1) && (bits[i+1]==0)) {
724 |             p_0mps = p_0mps * p10;
725 |             if (verbose_mode>1) fprintf(stderr, " * P10");
726 |         }
727 |         else if ((bits[i]==1) && (bits[i+1]==1)) {
728 |             p_0mps = p_0mps * p11;
729 |             if (verbose_mode>1) fprintf(stderr, " * P11");
730 |         }      
731 |     }    
732 |     if (verbose_mode>1) fprintf(stderr,"\n");
733 | 
734 |     
735 |     bits[0] = 1;   // then with x[-1]=1
736 |     
737 |     if (verbose_mode>1) {
738 |         fprintf(stderr,"   unrolled bits 1 prefix = ");
739 |         for(j=0;j<(bitwidth+1);j++) {
740 |             fprintf(stderr,"%d",bits[j]);
741 |         }
742 |         fprintf(stderr,"\n");
743 |     }
744 |     
745 |     p_1mps = 1.0;
746 |     if (verbose_mode>1) fprintf(stderr,"   Prob = 1.0");
747 |     for (i=0;i<bitwidth; i++) {
748 |         if      ((bits[i]==0) && (bits[i+1]==0)) {
749 |             p_1mps = p_1mps * p00;
750 |             if (verbose_mode>1) fprintf(stderr, " * P00");
751 |         }
752 |         else if ((bits[i]==0) && (bits[i+1]==1)) {
753 |             p_1mps = p_1mps * p01;
754 |             if (verbose_mode>1) fprintf(stderr, " * P01");
755 |         }                     
756 |         else if ((bits[i]==1) && (bits[i+1]==0)) {
757 |             p_1mps = p_1mps * p10;
758 |             if (verbose_mode>1) fprintf(stderr, " * P10");
759 |         }
760 |         else if ((bits[i]==1) && (bits[i+1]==1)) {
761 |             p_1mps = p_1mps * p11;
762 |             if (verbose_mode>1) fprintf(stderr, " * P11");
763 |         }      
764 |     }    
765 |     if (verbose_mode>1) fprintf(stderr,"\n");
766 |     
767 |     if (verbose_mode>1) {
768 |         fprintf(stderr,"   %sMCV BITS = ",KRED);
769 |         for (i=0; i<bitwidth;i++) {
770 |             fprintf(stderr,"%d",bits[i+1]);
771 |         }
772 |         fprintf(stderr,"%s\n",KWHT);
773 |     }
774 |         
775 |     p_mps = (p0 * p_0mps) + (p1 * p_1mps);
776 |     return p_mps;
777 | }
778 |     
779 |     
780 | double p_to_entropy(double p01, double p10,int bitwidth, double *mcv_prob, uint64_t *mcv) {
781 |     double smp = 0.0;
782 |     double ent;
783 |     uint64_t l_mcv;
784 |     
785 |     smp = symbol_max_probability(p01, p10, bitwidth, &l_mcv);
786 |     *mcv_prob = smp;
787 |     *mcv = l_mcv;
788 |     
789 |     ent = -log2(smp);
790 |     return ent/bitwidth;
791 | }
792 |     
793 | int near(double x,double y, double epsilon) {
794 |     return ((y > x-epsilon) && (y<x+epsilon));
795 | }
796 | 
797 | //void pick_point(double *p01, double *p10, double desired, double epsilon, int bitwidth, t_rngstate* rngstate) {
798 | //    int chosen_param;
799 | //    int chosen_side;
800 | //    int rand1;
801 | //    int rand2;
802 | //    double startpoint01;
803 | //    double startpoint10;
804 | //    double endpoint01;
805 | //    double endpoint10;
806 | //    double choice01;
807 | //    double choice10;
808 | //    double mcv_prob = -1.0;
809 | //    double Hc;
810 | //    uint64_t mcv;
811 | //    
812 | //    double edge_entropy;
813 | //    
814 | //    do {
815 | //        rand1 = getrand16(rngstate);
816 | //        rand2 = getrand16(rngstate);
817 | //        chosen_param = rand1 & 0x01;
818 | //        chosen_side = rand2 & 0x01;
819 | //        if (verbose_mode > 1) {
820 | //            fprintf(stderr,"      rand1         %04x\n", rand1);
821 | //            fprintf(stderr,"      rand2         %04x\n", rand2);
822 | //            fprintf(stderr,"      chosen_param  %04x\n", chosen_param);
823 | //            fprintf(stderr,"      chosen_side   %04x\n", chosen_side);
824 | //        }
825 | //        
826 | //        if (chosen_param==0) {
827 | //            *p01 = (double)chosen_side;
828 | //            *p10 = get_rand_double(rngstate);
829 | //        }
830 | //        else {
831 | //            *p10 = (double)chosen_side;
832 | //            *p01 = get_rand_double(rngstate);
833 | //        }
834 | //        edge_entropy=p_to_entropy(*p01, *p10, bitwidth, &mcv_prob, &mcv);
835 | //        
836 | //    } while (edge_entropy > desired);
837 | //    
838 | //    startpoint01 = 0.5;
839 | //    startpoint10 = 0.5;
840 | //    endpoint01 = *p01;
841 | //    endpoint10 = *p10;
842 | //    
843 | //    choice01 = (startpoint01 + endpoint01)/2.0;
844 | //    choice10 = (startpoint10 + endpoint10)/2.0;
845 | //    Hc = p_to_entropy(choice01, choice10, bitwidth, &mcv_prob, &mcv);
846 | //    
847 | //    if (verbose_mode > 1) {
848 | //    fprintf(stderr,"PICKING for entropy %f\n", desired);
849 | //    fprintf(stderr,"                bitwidth  %d\n", bitwidth);
850 | //    fprintf(stderr,"      first startpoint01  %f\n", startpoint01);
851 | //    fprintf(stderr,"      first startpoint10  %f\n", startpoint10);
852 | //    fprintf(stderr,"        first endpoint01  %f\n", endpoint01);
853 | //    fprintf(stderr,"        first endpoint10  %f\n", endpoint10);
854 | //    fprintf(stderr,"          first mid P01 = %f\n", choice01);
855 | //    fprintf(stderr,"          first mid P10 = %f\n", choice10);
856 | //    fprintf(stderr,"        start Hc    %f\n", Hc);
857 | //    }
858 | //    
859 | //    fflush(stdout);
860 | //
861 | //    while (!near(Hc, desired, epsilon)) {
862 | //        if (verbose_mode>1) fprintf(stderr,"WHILE ...\n");
863 | //        if (Hc > desired) {
864 | //            startpoint01 = choice01;
865 | //            startpoint10 = choice10;
866 | //        }
867 | //        else {
868 | //            endpoint01 = choice01;
869 | //            endpoint10 = choice10;
870 | //        }
871 | //        choice01 = (startpoint01 + endpoint01)/2.0;
872 | //        choice10 = (startpoint10 + endpoint10)/2.0;
873 | //        
874 | //        if (verbose_mode > 1) {
875 | //        fprintf(stderr,"          bitwidth  %d\n", bitwidth);       
876 | //        fprintf(stderr,"      startpoint01  %f\n", startpoint01);
877 | //        fprintf(stderr,"      startpoint10  %f\n", startpoint10);
878 | //        fprintf(stderr,"        endpoint01  %f\n", endpoint01);
879 | //        fprintf(stderr,"        endpoint10  %f\n", endpoint10);       
880 | //        fprintf(stderr,"   mid P01 = %f\n", choice01);
881 | //        fprintf(stderr,"   mid P10 = %f\n", choice10);
882 | //        }
883 | //        Hc = p_to_entropy(choice01,choice10,bitwidth,&mcv_prob, &mcv);
884 | //        if (verbose_mode > 1) {
885 | //            fprintf(stderr,"   Hc  = %f\n", Hc);
886 | //            fprintf(stderr,"   %sMCV Probability = %f%s\n",KCYN,mcv_prob,KWHT);
887 | //            fflush(stdout);
888 | //        }
889 | //    }
890 | //    
891 | //    if (verbose_mode >1) {
892 | //    fprintf(stderr," ** Chose P01 = %f\n", choice01);
893 | //    fprintf(stderr," ** Chose P10 = %f\n", choice10);
894 | //    }
895 | //    *p01 = choice01;
896 | //    *p10 = choice10;
897 | //    
898 | //}
899 | 
900 | 
901 | 


--------------------------------------------------------------------------------
/djent.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |     djent - A reimplementation of Fourmilab's ent with several improvements. 
   3 |     
   4 |     Copyright (C) 2017  David Johnston
   5 | 
   6 |     This program is free software; you can redistribute it and/or modify
   7 |     it under the terms of the GNU General Public License as published by
   8 |     the Free Software Foundation; either version 2 of the License, or
   9 |     (at your option) any later version.
  10 | 
  11 |     This program is distributed in the hope that it will be useful,
  12 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 |     GNU General Public License for more details.
  15 | 
  16 |     You should have received a copy of the GNU General Public License along
  17 |     with this program; if not, write to the Free Software Foundation, Inc.,
  18 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19 | 
  20 |     -----
  21 |     
  22 |     Contact. David Johnston dj@deadhat.com
  23 | */
  24 | 
  25 | /* 0 for no messages. */
  26 | #define DEBUG 10
  27 | 
  28 | #define __STDC_FORMAT_MACROS
  29 | #include <inttypes.h> 
  30 | #include <stdio.h>
  31 | #include <string.h>
  32 | #include <stdlib.h>
  33 | #include <sys/stat.h>
  34 | #include <math.h>
  35 | #include <stdint.h>
  36 | #include <string.h>
  37 | #include <ctype.h>
  38 | #include "markov2p.h"
  39 | #include "filename_parse.h"
  40 | #include "mathy_things.h"
  41 | #include "longest_run_cdf.h"
  42 | 
  43 | /* #include <regex.h>*/
  44 | 
  45 | 
  46 | #ifdef _WIN32
  47 | /* #include "vsdjent/stdafx.h" */
  48 | #include "ya_getopt/ya_getopt.h" /* NOTE: VS2015 goes not have getopt. Put ya_getopt in the directory. From here https://github.com/kubo/ya_getopt */
  49 | #else
  50 | #include <unistd.h> 
  51 | #include <getopt.h>
  52 | #define  errno_t int
  53 | #endif
  54 | 
  55 | #define MAX_ERROR_MSG 0x1000
  56 | #define QUEUESIZE 4096
  57 | #define BUFFSIZE  2048
  58 | 
  59 | #ifndef M_PI
  60 | #define M_PI 3.1415926535897932384626
  61 | #endif
  62 | 
  63 | unsigned char buffer[BUFFSIZE];
  64 | unsigned char buffer2[BUFFSIZE+4];
  65 | unsigned char queue[QUEUESIZE];
  66 | unsigned int queue_start;     /* FIFO pointers */
  67 | unsigned int queue_end;
  68 | size_t queue_size;
  69 | 
  70 | int verbose_mode =0;
  71 | 
  72 | unsigned int buffer2_size;
  73 | 
  74 | unsigned int current_byte;
  75 | unsigned int bits_used_from_byte;
  76 | unsigned int got_byte;
  77 | int64_t      current_symbol;
  78 | unsigned int bits_in_current_symbol;
  79 | int outcount;
  80 | uint64_t scc_fifo[256];
  81 | uint64_t scc_first_lagn[256];
  82 | 
  83 | uint64_t symbol_count;
  84 | uint64_t mean_total;
  85 | 
  86 | int terse;
  87 | int ent_exact;
  88 | int suppress_header;
  89 | uint64_t filebytes;
  90 | 
  91 | double voltage;
  92 | double temperature;
  93 | unsigned char deviceid[256];
  94 | unsigned char process[256];
  95 | unsigned char processing[256];
  96 | 
  97 | int opt;
  98 | unsigned int symbol_length;
  99 | int hexmode;
 100 | int print_occurrence;
 101 | int print_longest;
 102 | int fold;
 103 | int lagn;
 104 | int byte_reverse;
 105 | int word_reverse;
 106 | int parse_filename;
 107 | 
 108 | int use_stdin;
 109 | char *filename;
 110 | FILE *fp;
 111 | int terse_index;
 112 | int not_eof;
 113 | int64_t symbol;
 114 | double markov_entropy;
 115 | 
 116 | char inputlistfilename[256];
 117 | int using_inputlistfile;
 118 | 
 119 | double ent;
 120 | 
 121 | uint64_t occurrence_size;
 122 | uint64_t *occurrence_count;
 123 | uint64_t occurrence_total;
 124 | int no_occurrence_space;
 125 | 
 126 | uint64_t longest_size;
 127 | uint64_t longest_position;
 128 | uint64_t longest_new_pos;
 129 | uint64_t longest_byte_pos;
 130 | uint64_t *longest_count = (uint64_t*)0;
 131 | uint64_t longest_total;
 132 | int no_longest_space;
 133 | 
 134 | uint64_t longest_last_symbol;
 135 | uint64_t longest_run;
 136 | uint64_t longest_longest;
 137 | uint64_t longest_longest_symbol;
 138 | 
 139 | double chisq;
 140 | double chisq_sum;
 141 | double *chisq_prob;
 142 | 
 143 | uint64_t mp;
 144 | uint64_t monty_total_count;
 145 | uint64_t monty_inside_count;
 146 | 
 147 | double radiussquared;
 148 | double position_x;
 149 | double position_y;
 150 | double montepi;
 151 | uint64_t monte[6];
 152 | 
 153 | 
 154 | uint64_t count1;
 155 | uint64_t count0;
 156 | 
 157 | uint64_t count00;
 158 | uint64_t count01;
 159 | uint64_t count10;
 160 | uint64_t count11;
 161 | 
 162 | uint64_t symbol_mask;
 163 | 
 164 | uint64_t t1;
 165 | uint64_t t2;
 166 | uint64_t t3;
 167 |     
 168 | int scc_first;
 169 | uint64_t first_symbol;
 170 | uint64_t scc_previous;
 171 | uint64_t scc_count;
 172 | int scc_wrap;
 173 | 
 174 | uint64_t aeqb_count;
 175 | uint64_t mean_count;
 176 | double   other_scc;
 177 | 
 178 | double    result_mean;
 179 | uint64_t result_chisq_count;
 180 | double   result_chisq_distribution;
 181 | double   result_chisq_percent;
 182 | double  result_entropy;
 183 | double  result_min_entropy;
 184 | uint32_t result_min_entropy_symbol;
 185 | double  result_pi;
 186 | double  result_pierr;
 187 | double  result_compression;
 188 | double  result_scc;
 189 | double  result_p01;
 190 | double  result_p10;
 191 | double  result_longest_pvalue;
 192 | 
 193 | const unsigned char byte_reverse_table[] = {
 194 |   0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0, 
 195 |   0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8, 
 196 |   0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4, 
 197 |   0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC, 
 198 |   0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2, 
 199 |   0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
 200 |   0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6, 
 201 |   0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
 202 |   0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
 203 |   0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9, 
 204 |   0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
 205 |   0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
 206 |   0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3, 
 207 |   0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
 208 |   0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7, 
 209 |   0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF
 210 | };
 211 | 
 212 | 
 213 | void update_monte_carlo(unsigned char symbol);
 214 | 
 215 | void display_usage() {
 216 |     fprintf(stderr, "Usage: djent [-brRpcCuhds] [-l <n>] [-i <input file list filename>] [filename] [filename2] ...\n");
 217 |     fprintf(stderr, "\n");
 218 |     fprintf(stderr, "Compute statistics of random data.\n");
 219 |     fprintf(stderr, "  Author: David Johnston, dj@deadhat.com\n");
 220 |     fprintf(stderr, "\n");
 221 | 
 222 |     fprintf(stderr, "  -i <filename>  --inputfilelist=<filename> Read list of filenames from <filename>\n");
 223 |     fprintf(stderr, "  -p             --parse_filename           Extract CID, Process, Voltage and Temperature from filename.\n");
 224 |     fprintf(stderr, "                                            The values will be included in the output.\n");
 225 |     fprintf(stderr, "  -l <n>         --symbol_length=<n>        Treat incoming data symbols as bitlength n. Default is 8.\n");
 226 |     fprintf(stderr, "  -b             --binary                   Treat incoming data as binary. Default bit length will be -l 1\n");
 227 |     fprintf(stderr, "  -r             --byte_reverse             Reverse the bit order in incoming bytes\n");
 228 |     fprintf(stderr, "  -R             --word_reverse             Reverse the byte order in incoming 4 byte words\n");
 229 |     fprintf(stderr, "  -c             --occurrence               Print symbol occurrence counts\n");
 230 |     fprintf(stderr, "  -C             --longest                  Print symbol longest run counts\n");
 231 |     fprintf(stderr, "  -w             --scc_wrap                 Treat data as cyclical in SCC\n");
 232 |     fprintf(stderr, "  -n <n>         --lagn=<n>                 Lag gap in SCC. Default=1\n");
 233 |     fprintf(stderr, "  -S <n>         --skip=<n>                 Skip over <n> initial symbols\n");
 234 |     fprintf(stderr, "  -L <n>         --substring=<n>            Analyse no more that <n> symbols\n");
 235 |     fprintf(stderr, "  -f             --fold                     Fold uppercase letters to lower case\n");
 236 |     fprintf(stderr, "  -t             --terse                    Terse output\n");
 237 |     fprintf(stderr, "  -e             --ent_exact                Exactly match output format of ent\n");
 238 |     fprintf(stderr, "  -s             --suppress_header          Suppress the header in terse output\n");
 239 |     fprintf(stderr, "  -h or -u       --help                     Print this text\n");
 240 | 
 241 |     fprintf(stderr, "\n Notes\n");
 242 |     fprintf(stderr,   "   * By default djent is in hex mode where it reads ascii hex data and converts it to binary to analyze.\n");
 243 |     fprintf(stderr,   "     In hex mode, the symbol length defaults to 8, so normal hex files can be treated as a representation\n");
 244 |     fprintf(stderr,   "     of bytes. The symbol length can be changed to any value between 1 and 32 bits using the -l <n> option.\n");
 245 |     fprintf(stderr,   "   * With the -b option djent switches to binary reads in each byte as binary with a symbol length of 1.\n");
 246 |     fprintf(stderr,   "   * To analyze ascii text instead of hex ascii, you need djent to treat each byte as a separate symbol, so\n");
 247 |     fprintf(stderr,   "     use binary mode with a symbol length of 8. I.E. djent -b -l 8 <filename>\n");
 248 |     fprintf(stderr,   "   * By default djent treats the MSB of each byte as the first. This can be switched so that djent treats\n");
 249 |     fprintf(stderr,   "     the LSB as the first bit in each byte using the -r option.\n");
 250 |     fprintf(stderr,   "   * Terse output is requested using -t. This outputs in CSV format. The first line is the header. If\n");
 251 |     fprintf(stderr,   "     multiple files are provided, there will be one line of CSV output per file in addition to the header.\n");
 252 |     fprintf(stderr,   "     The CSV header can be suppressed with -s.\n");
 253 |     fprintf(stderr,   "   * To analyze multiple files, just give multiple file names on the command line. To read data in from\n");
 254 |     fprintf(stderr,   "     the command line, don't provide a filename and pipe the data in. <datasource> | djent\n");
 255 |     fprintf(stderr,   "   * The parse filename option =p picks takes four patterns from the filename to include in the output,\n");
 256 |     fprintf(stderr,   "     This is so that it is easy to plot test conditions that are commonly encoded in a filename.\n");
 257 |     fprintf(stderr,   "     Fields are delimited by uderscores. The four patters for CID, process, Voltage and Temperature are:\n");
 258 |     fprintf(stderr,   "     _CID-<componentID>_ , _PROC-<process info>_, _<x>p<y>V_ and _<x>p<y>C_ . 'p' is the decimal point.\n");
 259 |     fprintf(stderr,   "   * To compute the statistics, djent builds a frequency table of the symbols. This can be displayed\n");
 260 |     fprintf(stderr,   "     using the -c option. The size of this table is what limits the the maximum symbol size. For each\n");
 261 |     fprintf(stderr,   "     of the 2^n symbols, a 64 bit entry in a table is created. So for n=32, that's 32GBytes so the ability\n");
 262 |     fprintf(stderr,   "     to handle large symbol sizes is limited by the available memory and the per process allocation limit.\n");
 263 |     fprintf(stderr,   "   * The serial correlation coefficient is not wrap around by default, meaning that it does not compare\n");
 264 |     fprintf(stderr,   "     the last value in the data with the first. To get wrap around behaviour, use the -w option.\n");
 265 |     fprintf(stderr,   "   * The Lag-N correlation coefficient can be computed by using the -n <n> option. This causes the SCC\n");
 266 |     fprintf(stderr,   "     computation to compare each Xth symbol with the (X+n)th symbol instead of the (X+1)th symbol.\n");
 267 |     fprintf(stderr,   "     If you use wrap around with Lag-N, then the wrap around will reach n bits further into the start\n");
 268 |     fprintf(stderr,   "     of the sequence.\n");
 269 |     fprintf(stderr,   "   * The byte reverse option -r reverses the order of bits within each byte. The word reverse option -R\n");    
 270 |     fprintf(stderr,   "     reverses the order of bytes within each 32 bit word, from 3,2,1,0 to 0,1,2,3. Both -R and -r can\n");    
 271 |     fprintf(stderr,   "     be used together. Using -R with a data that isn't a multiple of 32 bits long will get padded with\n");    
 272 |     fprintf(stderr,   "     zeros, which may not be what you want. A padding warning will be sent to STDERR.\n");    
 273 |     fprintf(stderr,   "   * Instead of providing data file names on the command line, djent can be told to read a list of files\n");
 274 |     fprintf(stderr,   "     from a text file. The file must have one filename per line. Lines beginning with # will be ignored.\n");
 275 |     fprintf(stderr,   "     Use the -i <filename> option to request that djent reads the file list from <filename>.\n");
 276 | 
 277 |     fprintf(stderr, "\n Examples\n");
 278 |     fprintf(stderr,   "   Print this help\n");
 279 |     fprintf(stderr,   "     djent -h\n\n");
 280 |     fprintf(stderr,   "   Analyze hex file from stdin\n");
 281 |     fprintf(stderr,   "     cat datafile.hex | djent\n\n");
 282 |     fprintf(stderr,   "   Analyze binary file\n");
 283 |     fprintf(stderr,   "     djent -b datafile.bin\n\n");
 284 |     fprintf(stderr,   "   Analyze several files with CSV output\n");
 285 |     fprintf(stderr,   "     djent -t data1.hex data2.hex data3.hex\n\n");
 286 |     fprintf(stderr,   "   Analyze ascii symbols - Read in binary and set symbol size to 8.\n");
 287 |     fprintf(stderr,   "     djent -b -l 8  textfile.txt\n\n");
 288 |     fprintf(stderr,   "   Analyze binary file with parsable filename.\n");
 289 |     fprintf(stderr,   "     djent -b -t -p  rawdata_CID-X23_PROC-TTFT_1p2V_25p0C_.bin\n");
 290 | 
 291 | }
 292 | 
 293 | int count_lines_in_file(char *filename) {
 294 |      FILE *fp = fopen(filename,"r");
 295 |      int ch=0;
 296 |      int lines=0;
 297 | 
 298 |      if (fp == NULL) return 0;
 299 |      lines++;
 300 |      while ((ch = fgetc(fp)) != EOF) {
 301 |          if ((char)ch == '\n') lines++;
 302 |      }
 303 |      fclose(fp);
 304 |      return lines;
 305 | }
 306 | 
 307 | 
 308 | /* The queue
 309 |  *
 310 |  * This implements a FIFO into which bytes are pushed from a file and 
 311 |  * from which symbols (of the chosen size) are pulled from the other end.
 312 |  * Data from the file is read into buffer and that data is used to fill the
 313 |  * input side of the queue. The queue is twice as big as the buffer so the 
 314 |  * buffer read is done when the queue is less than half full.
 315 |  * It treats bits within bytes as big endian (I.E. MSB arrived first from ES).
 316 |  * There will be an option to switch to little endian at some point.
 317 |  */
 318 |  
 319 |  
 320 | void init_byte_queue() {
 321 |     int i;
 322 |     
 323 |     /* printf("Init Byte Queue\n"); */
 324 |     queue_start = 0;
 325 |     queue_end = 0;
 326 |     queue_size = 0;
 327 |     
 328 |     got_byte = 0;
 329 |     current_byte = 0;
 330 |     bits_used_from_byte = 0;
 331 |     current_symbol = 0;
 332 |     bits_in_current_symbol = 0;
 333 |     
 334 |     for (i=0;i<QUEUESIZE;i++) queue[i]=0;
 335 |     
 336 |     symbol_mask = ipow((uint64_t)2,(uint64_t)symbol_length)-1;
 337 | }
 338 | 
 339 | int ishex(unsigned char c) {
 340 |     int result;
 341 |     result = 0;
 342 |     if      (c == '0') result = 1;
 343 |     else if (c == '1') result = 1;
 344 |     else if (c == '2') result = 1;
 345 |     else if (c == '3') result = 1;
 346 |     else if (c == '4') result = 1;
 347 |     else if (c == '5') result = 1;
 348 |     else if (c == '6') result = 1;
 349 |     else if (c == '7') result = 1;
 350 |     else if (c == '8') result = 1;
 351 |     else if (c == '9') result = 1;
 352 |     else if (c == 'a') result = 1;
 353 |     else if (c == 'b') result = 1;
 354 |     else if (c == 'c') result = 1;
 355 |     else if (c == 'd') result = 1;
 356 |     else if (c == 'e') result = 1;
 357 |     else if (c == 'f') result = 1;
 358 |     else if (c == 'A') result = 1;
 359 |     else if (c == 'B') result = 1;
 360 |     else if (c == 'C') result = 1;
 361 |     else if (c == 'D') result = 1;
 362 |     else if (c == 'E') result = 1;
 363 |     else if (c == 'F') result = 1;
 364 | 
 365 |     return result;
 366 | }
 367 | 
 368 | int ishexorx(unsigned char c) {
 369 |     int result;
 370 |     result = 0;
 371 |     if (ishex(c) == 1) result = 1;
 372 |     else if (c == 'x') result = 1;
 373 | 
 374 |     return result;
 375 | }
 376 | 
 377 | unsigned char hexpair[2];
 378 | 
 379 | int hexstate;
 380 | 
 381 | void init_hex2bin() {
 382 |     hexpair[0]=0x00;
 383 |     hexpair[1]=0x00;
 384 |     
 385 |     hexstate = 0;
 386 | }
 387 | 
 388 | /* 
 389 |  * This converts input hex text to binary. It uses a little
 390 |  * state machine to pull in 2 characters then convert them
 391 |  * to a byte. The state machine state is maintained across
 392 |  * calls so we do not lose values at read buffer boundaries.
 393 |  *
 394 |  * In the processing of the second character, an x will be
 395 |  * accepted if the first character is 0, so we get '0x'.
 396 |  * This allows the 0x prefixes to be eliminated without
 397 |  * accidentally treating the 0 as part of the data.
 398 |  */
 399 |  
 400 | size_t hex2bin(unsigned char *buffer, size_t len) {
 401 |     int outpos = 0;
 402 |     int scanpos = 0;
 403 |     unsigned char c;
 404 |     int byte;
 405 |     int nybble;
 406 |     /* Fetch characters until we get a hex 1.
 407 |      * Shift it into hexpair
 408 |      * If we have a valid hex pair put it in the buffer as binary
 409 |      * If we have 0x, drop it, including the 0.
 410 |      * If we have non hex, drop it.
 411 |      */
 412 | 
 413 |     do {
 414 |         if (hexstate == 0) {
 415 |             c = buffer[scanpos];
 416 |             if (ishex(c) == 1) {
 417 |                 hexpair[0] = c;
 418 |                 hexstate = 1;
 419 |             }
 420 |             scanpos++;
 421 |         }
 422 |         else if (hexstate == 1) {
 423 |             c = buffer[scanpos];
 424 |             if (((ishexorx(c) == 1) && (hexpair[0]=='0')) || (ishex(c)==1)){
 425 |                 hexpair[1] = c;
 426 |                 hexstate = 2;
 427 |             }
 428 |             scanpos++;
 429 |         }
 430 |         else if (hexstate == 2) {
 431 |             if ((hexpair[0]=='0') && (hexpair[1]=='x')) {
 432 |                 hexstate = 0;
 433 |             }
 434 |             else { /* we have a valid hex pair */
 435 |                 nybble = 0;                
 436 |                 if ((((int)hexpair[0])>47) && (((int)hexpair[0])<58)){ /* 0-9 */
 437 |                     nybble = (int)hexpair[0] - 48;
 438 |                 }
 439 |                 else if ((((int)hexpair[0])>64) && (((int)hexpair[0])<71)){ /* A-F */
 440 |                     nybble = (int)hexpair[0] - 55;
 441 |                 }
 442 |                 else if ((((int)hexpair[0])>96) && (((int)hexpair[0])<103)){ /* a-f */
 443 |                     nybble = (int)hexpair[0] - 87;
 444 |                 }
 445 | 
 446 |                 nybble = nybble << 4;
 447 | 
 448 |                 if ((((int)hexpair[1])>47) && (((int)hexpair[1])<58)){ /* 0-9 */
 449 |                     byte = nybble + (int)hexpair[1] - 48;
 450 |                 }
 451 |                 else if ((((int)hexpair[1])>64) && (((int)hexpair[1])<71)){ /* A-F */
 452 |                     byte = nybble + (int)hexpair[1] - 55;
 453 |                 }
 454 |                 else if ((((int)hexpair[1])>96) && (((int)hexpair[1])<103)){ /* a-f */
 455 |                     byte = nybble + (int)hexpair[1] - 87;
 456 |                 }
 457 | 
 458 |                 buffer[outpos++] = (unsigned char)byte;
 459 |                 hexstate = 0;
 460 |             }
 461 |         }              
 462 |     } while (scanpos <= len);
 463 | 
 464 |     return outpos; /* return the number of bytes converted */
 465 | }
 466 | 
 467 | size_t fill_byte_queue(FILE *fp) {
 468 |     size_t len;
 469 |     size_t space;
 470 |     unsigned int i;
 471 |     unsigned int j;
 472 |     size_t total_len;
 473 |     // unsigned int buff2_remaining;
 474 | 
 475 |     total_len = 0;
 476 |     /* Pull in a loop until there is less than BUFFSIZE space in thequeue */
 477 |     do {
 478 |         space = QUEUESIZE-queue_size; /* Dont pull more data than needed */
 479 |         if (space > BUFFSIZE) space = BUFFSIZE;
 480 |         
 481 |         /* ("  queue: space=%d\n",space); */
 482 |         len = fread(buffer, (size_t)1,(size_t)space, fp);
 483 |         if (len==0) {
 484 |             /* printf("  queue: len = %d\n",len); */
 485 |             return total_len;
 486 |         }
 487 | 
 488 |         /* Convert hex buffer to binary if we are in hex mode */
 489 |         if (hexmode == 1) len = hex2bin(buffer,len); 
 490 |         
 491 |         /* Fold upper case to lower */
 492 |         if (fold==1) {
 493 |             for (i=0;i<len;i++) {
 494 |                 buffer[i]=tolower(buffer[i]);
 495 |             }
 496 |         }
 497 | 
 498 |         /* If we aren't doing word reverse, just move the buffer to the queue */ 
 499 |         if (word_reverse == 0) {
 500 |             /*  Transfer buffer to queue */
 501 |             for (i=0;i<len;i++) {
 502 |                 if (byte_reverse == 1) {
 503 |                     queue[(queue_end+i) % QUEUESIZE] = byte_reverse_table[buffer[i]];
 504 |                 } else {
 505 |                     queue[(queue_end+i) % QUEUESIZE] = buffer[i];
 506 |                 }
 507 |                 
 508 |                 /* Call the monte carlo update that operated over bytes, not symbols */
 509 |                 update_monte_carlo(buffer[i]);
 510 |             }
 511 |             
 512 |             filebytes += len;
 513 |             
 514 |             queue_size += len;
 515 |             queue_end = ((queue_end + len) % QUEUESIZE);
 516 | 
 517 |             total_len += len;
 518 |         } else {  /* word_reverse == 1  so use the word reverse buffer */
 519 |             
 520 |             /* Transfer buffer to word reverse queue */
 521 |             /*printf(" Transfer buffer to word reverse queue \n");*/
 522 |             for (i=0;i<len;i++) {
 523 |                 buffer2[buffer2_size++]=buffer[i];
 524 |             }
 525 | 
 526 |             i = 0;
 527 |             do {
 528 |                 /* printf( "    i = %d,  buffer2_size = %d \n",i,buffer2_size);*/
 529 |                 for (j=0;j<4;j++) {
 530 |                     /* printf( "    j = %d\n",j); */
 531 |                     if (byte_reverse == 1) {
 532 |                         queue[(queue_end+j) % QUEUESIZE] = byte_reverse_table[buffer2[(i*4)+(3-j)]];
 533 |                         update_monte_carlo(byte_reverse_table[buffer2[(i*4)+(3-j)]]);
 534 |                     } else {
 535 |                         queue[(queue_end+j) % QUEUESIZE] = buffer2[(i*4)+(3-j)];
 536 |                         update_monte_carlo(buffer2[(i*4)+j]);
 537 |                     }
 538 |                 }
 539 |                 //buff2_remaining -= 4;
 540 |                 buffer2_size -= 4;
 541 |                 queue_size += 4;
 542 |                 queue_end = (queue_end+4) % QUEUESIZE;
 543 |                 total_len += 4;
 544 |                 i++;
 545 |             } while (buffer2_size>3);
 546 | 
 547 |             /* Pad any leftover */
 548 |             if (buffer2_size != 0) {
 549 |                 for (j=0;j<4;j++) {
 550 |                     if (j < buffer2_size) {
 551 |                         queue[(queue_end+j) % QUEUESIZE] = 0x00;
 552 |                         update_monte_carlo(0x00);
 553 |                     } else {
 554 |                         if (byte_reverse == 1) {
 555 |                             queue[(queue_end+j) % QUEUESIZE] = byte_reverse_table[buffer2[(i*4)+(3-j)]];
 556 |                             update_monte_carlo(byte_reverse_table[buffer2[(i*4)+(3-j)]]);
 557 |                         } else {
 558 |                             queue[(queue_end+j) % QUEUESIZE] = buffer2[(i*4)+(3-j)];
 559 |                             update_monte_carlo(buffer2[(i*4)+(3-j)]);
 560 |                         }
 561 |                     }
 562 |                 }
 563 |                 fprintf(stderr,"Warning: Padded %d extra zeroes to make 4 byte boundary for word reverse\n",buffer2_size);
 564 |             }
 565 |             buffer2_size = 0;
 566 |         }
 567 |     } while ((QUEUESIZE-queue_size) > BUFFSIZE);
 568 |     return total_len;
 569 | }
 570 | 
 571 | /* pull symbol length bits off the start of the queue */
 572 | int64_t get_symbol(uint64_t symbol_length) {
 573 | 
 574 |     unsigned int temp;
 575 |     
 576 |     current_symbol = 0;
 577 |     
 578 |     /* Get a byte if we don't have one */
 579 |     if (got_byte == 0) {
 580 |         if (((queue_size*8) < symbol_length)) return -1; /* Uh oh. Empty */
 581 |         
 582 |         current_byte = queue[queue_start];
 583 |         queue_start = (queue_start+1) % QUEUESIZE;
 584 |         queue_size -= 1;
 585 |         bits_used_from_byte = 0;
 586 |         got_byte=1;
 587 |     }
 588 |     
 589 |     /* Move bits from current byte pulled from queue to current symbol */
 590 |     if (symbol_length == 1) { /* Optimize for the single bit size case */
 591 |         current_symbol = (current_byte & 0x80) >> 7;
 592 |         current_byte <<= 1;
 593 |         bits_used_from_byte++;
 594 |         if (bits_used_from_byte == 8) {
 595 |             got_byte = 0;
 596 |             bits_used_from_byte = 0;
 597 |         }
 598 |         return current_symbol;
 599 |     } else if (symbol_length == 8) { /* optimize for the byte size case */
 600 |         current_symbol = current_byte;
 601 |         got_byte = 0;
 602 |         return current_symbol;
 603 |     } else {  /* Symbol Length != 8 or 1, do it bit by bit */
 604 |               /* Later maybe optimize when > 7 bits needed */
 605 |         /* Take upper symbol_length bits */
 606 |         bits_in_current_symbol = 0;
 607 |         do {
 608 |             temp = (current_byte & 0x80) >> 7;
 609 |             current_byte = (current_byte << 1) & 0xff;
 610 |             bits_used_from_byte++;
 611 |             if (bits_used_from_byte == 8) {
 612 |                 got_byte = 0;
 613 |                 bits_used_from_byte = 0;
 614 |             }
 615 |             current_symbol = ((current_symbol << 1) | temp) & symbol_mask;
 616 |             bits_in_current_symbol++;
 617 |             
 618 |             /* fetch a new byte from queue if we aren't done yet */
 619 |             if (got_byte == 0) {
 620 |                 if (((queue_size*8) < symbol_length)) return -1; /* Uh oh. Empty */
 621 |         
 622 |                 current_byte = queue[queue_start];
 623 |                 queue_start = (queue_start+1) % QUEUESIZE;
 624 |                 queue_size -= 1;
 625 |                 bits_used_from_byte = 0;
 626 |                 got_byte=1;
 627 |             }
 628 |         } while (bits_in_current_symbol < symbol_length);
 629 |         return current_symbol;
 630 |     }
 631 | }
 632 | 
 633 | 
 634 | /* The initialize routines for the various metrics */
 635 | 
 636 | void init_mean() {
 637 |     outcount = 0;
 638 |     mean_total = 0;
 639 | };
 640 | 
 641 | void init_entropy() {
 642 |     ent = 0.0;
 643 | };
 644 | 
 645 | void init_occurrences() {
 646 |     uint64_t i;
 647 |     
 648 |     no_occurrence_space = 0;
 649 |     
 650 |     occurrence_total = 0;
 651 |     if (symbol_length > 32) {
 652 |         fprintf(stderr,"Error, symbol length cannot be longer than 32 bits for occurrence count table\n");
 653 |         exit(1);
 654 |     }
 655 |     occurrence_size = ipow(2,symbol_length);
 656 |     fflush(stdout);
 657 |     occurrence_count = (uint64_t *) malloc (sizeof(uint64_t)*occurrence_size);
 658 |     /* printf("mallocating %lld bytes\n", (sizeof(uint64_t)*occurrence_size));
 659 |      */
 660 |     if (occurrence_count == NULL) {
 661 |         #ifdef _WIN32
 662 |         fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the occurrence count\n",(sizeof(uint64_t)*occurrence_size));
 663 |         #elif __llvm__
 664 |         fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the occurrence count\n",(sizeof(uint64_t)*occurrence_size));
 665 |         #elif __linux__
 666 |         fprintf(stderr,"Warning, unable to allocate %ld bytes of memory for the occurrence count\n",(sizeof(uint64_t)*occurrence_size));
 667 |         #endif
 668 |         no_occurrence_space = 1;
 669 |     }
 670 | 
 671 |     for (i=0;i<occurrence_size;i++) occurrence_count[i] = 0;
 672 | };
 673 | 
 674 | void init_longest() {
 675 |     uint64_t i;
 676 |     
 677 |     no_longest_space = 0;
 678 |     
 679 |     longest_total = 0;
 680 |     longest_position = 0;
 681 |     if (symbol_length > 32) {
 682 |         fprintf(stderr,"Error, symbol length cannot be longer than 32 bits for longest count table\n");
 683 |         exit(1);
 684 |     }
 685 |     longest_size = ipow(2,symbol_length);
 686 |     fflush(stdout);
 687 | 
 688 |     longest_count = (uint64_t *) malloc (sizeof(uint64_t)*longest_size);
 689 |     /* printf("mallocating %lld bytes\n", (sizeof(uint64_t)*occurrence_size));
 690 |      */
 691 |     if (longest_count == NULL) {
 692 |         #ifdef _WIN32
 693 |         fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the longest run table\n",(sizeof(uint64_t)*longest_size));
 694 |         #elif __llvm__
 695 |         fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the longest run table\n",(sizeof(uint64_t)*longest_size));
 696 |         #elif __linux__
 697 |         fprintf(stderr,"Warning, unable to allocate %ld bytes of memory for the longest run table\n",(sizeof(uint64_t)*longest_size));
 698 |         #endif
 699 |         no_longest_space = 1;
 700 |     } else {
 701 |         for (i=0;i<longest_size;i++) longest_count[i] = 0;
 702 |     }
 703 | 
 704 |     longest_last_symbol=0;
 705 |     longest_run=0;
 706 |     longest_longest=0;
 707 |     longest_longest_symbol=0;
 708 | 
 709 |     longest_new_pos=0;
 710 |     longest_byte_pos=0;
 711 | 
 712 | };
 713 | 
 714 | void init_chisq() {
 715 |     int i;
 716 |     chisq = 0.0;
 717 |     chisq_prob = (double *) malloc (sizeof(double)*occurrence_size);
 718 |     /* printf("mallocating %lld bytes for chisq probability table\n", (sizeof(double)*occurrence_size));
 719 |     */
 720 |     if (chisq_prob == NULL) {
 721 |         exit(1);
 722 |     }
 723 |     for (i=0;i<occurrence_size;i++) chisq_prob[i] = 0.0;
 724 | };
 725 | 
 726 | void init_filesize() {
 727 |     /* Nothing to do here */
 728 | };
 729 | 
 730 | void init_monte_carlo() {
 731 |     mp = 0;
 732 |     monty_total_count = 0;
 733 |     monty_inside_count = 0;
 734 |     radiussquared = (256.0 * 256.0 * 256.0) - 1;
 735 |     radiussquared = radiussquared*radiussquared;
 736 | 
 737 | };
 738 | 
 739 | void init_compression() {
 740 |     /* nothing to do here */
 741 | };
 742 | 
 743 | void init_otherscc() {
 744 |     aeqb_count = 0;
 745 |     mean_count = 0;
 746 | }
 747 | 
 748 | void init_scc() {
 749 |     t1 = 0;
 750 |     t2 = 0;
 751 |     t3 = 0;
 752 |     
 753 |     scc_first = 1;
 754 |     scc_previous = 0;
 755 |     scc_count = 0;
 756 |     first_symbol = 0;
 757 |     
 758 |     count00=0;
 759 |     count01=0;
 760 |     count10=0;
 761 |     count11=0;
 762 | };           
 763 |                 
 764 | /* The update routines for the various metrics */
 765 |         
 766 | void update_mean(uint64_t symbol) {
 767 |     mean_total += symbol;
 768 |     /* printf(" mean_total = %lld,  count=%lld\n",mean_total,symbol_count); */
 769 | };
 770 | 
 771 | void update_entropy(uint64_t symbol) {
 772 |     /* nothin to do here */
 773 | };
 774 | 
 775 | void update_occurrences(uint64_t symbol) {
 776 |     occurrence_count[symbol]++;
 777 |     occurrence_total++;
 778 | };
 779 | 
 780 | void update_longest(uint64_t symbol, uint64_t symbol_pos) {
 781 |     uint64_t symbol_byte_pos;
 782 | 
 783 |     symbol_byte_pos = (symbol_length * symbol_pos)/8;
 784 |     
 785 |     if (symbol == longest_last_symbol) {
 786 |         longest_run++;
 787 |         if (longest_run > longest_count[symbol]) {
 788 |             longest_count[symbol] = longest_run;
 789 |         }
 790 |         if (longest_run > longest_longest) {
 791 |             longest_longest = longest_run;
 792 |             longest_longest_symbol = symbol;
 793 |             longest_position = longest_new_pos;
 794 |         }
 795 |     } else {
 796 |         longest_run=1;
 797 |         longest_last_symbol=symbol;
 798 |         longest_new_pos = symbol_byte_pos; 
 799 |     }
 800 | 
 801 | }
 802 | 
 803 | void update_chisq(uint64_t symbol) {
 804 |     /* Nothing to do here */
 805 | };
 806 | 
 807 | void update_filesize(uint64_t symbol) {
 808 |     /* Nothing to do here */
 809 | };
 810 | 
 811 | void update_monte_carlo(unsigned char symbol) {
 812 |     int mj;
 813 | 
 814 |     monte[mp++] = symbol;
 815 | 
 816 |     if (mp > 5) {
 817 |         mp = 0;
 818 |         monty_total_count++;
 819 |         position_x = 0;
 820 |         position_y = 0;
 821 |         for (mj = 0; mj < 3; mj++) {
 822 |             position_x = (position_x * 256.0) + monte[mj];
 823 |             position_y = (position_y * 256.0) + monte[3 + mj];
 824 |         }
 825 |         if (((position_x * position_x) + (position_y *  position_y)) <= radiussquared) {
 826 |             monty_inside_count++;
 827 |         }
 828 |     }
 829 | };
 830 | 
 831 | void update_compression(uint64_t symbol) {
 832 |  /* nothing to do here */
 833 | };
 834 | 
 835 | void update_scc(uint64_t symbol) {
 836 |     int i;
 837 |     if (lagn==1) {
 838 |         /* We need lagn+1 symbols to start, so skip the first symbol(s) */
 839 |         scc_count++;
 840 |     
 841 |         if (scc_first==1) {
 842 |             scc_first = 0;
 843 |             first_symbol = symbol;
 844 |         } else {
 845 |             t1 += (scc_previous * symbol);
 846 |             if (scc_previous == symbol) aeqb_count += 1; // Other SCC
 847 |         }
 848 |         mean_count += symbol; //Other SCC
 849 |         t2 += symbol*symbol;
 850 |         t3 += symbol;
 851 |         
 852 |     
 853 |         /* printf("symbol %02X, count=%llu t1= %llX,  t2= %llx, t3= %llx\n",symbol,scc_count,t1,t2,t3); */
 854 |         scc_previous = symbol;
 855 |     } else { /* lagn > 1 */
 856 |         scc_count++;
 857 |     
 858 |         if (scc_count <= lagn) {
 859 |             scc_fifo[scc_count-1]=symbol;
 860 |             scc_first_lagn[scc_count-1]=symbol;
 861 |         } else {
 862 |             t1 += (scc_fifo[0] * symbol);
 863 |             if (scc_fifo[0] == symbol) aeqb_count += 1;
 864 |             for(i=0;i<lagn;i++) {
 865 |                 scc_fifo[i]=scc_fifo[i+1];
 866 |             }
 867 |             mean_count += symbol;
 868 |             scc_fifo[lagn]=symbol;
 869 |             t2 += symbol*symbol;
 870 |             t3 += symbol;           
 871 |         }
 872 |         /* printf("symbol %02X, count=%llu t1= %llX,  t2= %llx, t3= %llx\n",symbol,scc_count,t1,t2,t3); */
 873 |          
 874 |     }
 875 | };
 876 | 
 877 | /* The finalization routines for the various metrics */
 878 |         
 879 | void finalize_mean() {
 880 |     double mean;
 881 |     mean = (double)mean_total/(double)symbol_count; 
 882 | 
 883 |     result_mean = mean;
 884 |     return;
 885 | 
 886 |     if (terse==1) printf("%f,",mean);
 887 |     else printf("   Mean = %f\n",mean);
 888 | };
 889 | 
 890 | void compute_markov() {
 891 |     double p01;
 892 |     double p10;
 893 |     double mcv_prob;
 894 |     uint64_t mcv;
 895 |     
 896 |     p01 = result_mean*(1.0-result_scc);
 897 |     p10 = (1.0-result_mean)*(1.0-result_scc);
 898 | 
 899 |     result_p01 = p01;
 900 |     result_p10 = p10;
 901 | 
 902 |     markov_entropy = p_to_entropy(p01,p10,8, &mcv_prob, &mcv);
 903 | }
 904 | 
 905 | void finalize_entropy() {
 906 |     unsigned int eloop;
 907 |     
 908 |     ent = 0.0;
 909 |     for (eloop = 0; eloop < occurrence_size; eloop++) {
 910 |         if (chisq_prob[eloop] > 0.0) {
 911 |             ent += (chisq_prob[eloop] * log10(1.0 / chisq_prob[eloop]) *  3.32192809488736234787);
 912 |         }
 913 |     }
 914 | 
 915 |     result_entropy = ent;
 916 |     return;
 917 | 
 918 |     if (terse == 1) printf("%f,", ent);
 919 |     else printf("   Shannon Entropy = %f\n", ent);
 920 | };
 921 | 
 922 | void finalize_occurrences() {
 923 |     unsigned int i;
 924 |     unsigned int maxc;
 925 |     unsigned int maxsymbol;
 926 |     double maxp;
 927 |     double maxp_ent;
 928 | 
 929 |     /* Find the most frequent symbol */
 930 |     maxc=0;
 931 |     maxsymbol=0;
 932 |     for (i=0;i<occurrence_size;i++) {
 933 |         if (occurrence_count[i] > maxc) {
 934 |             maxc = occurrence_count[i];
 935 |             maxsymbol = i;
 936 |         }
 937 |     }
 938 | 
 939 |     //printf("maxc: %f\n",(double)maxc);
 940 |     //printf("occurance_size: %f\n",(double)occurrence_size);
 941 |     //printf("occurance_total: %f\n",(double)occurrence_total);
 942 |     maxp = ((double)maxc)/((double)occurrence_total);
 943 |     //printf("maxp: %f\n",maxp);
 944 |     maxp_ent = (-log10(maxp)/log10(2))/symbol_length;
 945 |     //printf("maxp_ent: %f\n",maxp_ent);
 946 |     result_min_entropy = maxp_ent;
 947 |     result_min_entropy_symbol = maxsymbol;
 948 | 
 949 |     if (terse != 1) {
 950 |         printf("   Min Entropy (by max occurrence of symbol %x) = %f\n", maxsymbol, maxp_ent);
 951 |     }
 952 | };
 953 | 
 954 | void finalize_longest() {
 955 |     result_longest_pvalue = longest_run_cdf((unsigned int)longest_longest, (unsigned int)symbol_count); 
 956 |     
 957 |     if (symbol_length != 8) {
 958 |         longest_byte_pos = (symbol_count*symbol_length)/8;
 959 |     } else {
 960 |         longest_byte_pos = symbol_count;
 961 |     }
 962 |      
 963 | }
 964 | 
 965 | void finalize_chisq() {
 966 |     uint64_t i;
 967 |     double diff;
 968 |     double chisq_final_prob;
 969 |     
 970 |     double expected;
 971 |     expected = (double)occurrence_total / (double)occurrence_size;
 972 |     for (i=0; i < occurrence_size; i++) {
 973 |         diff = (double)(occurrence_count[i]) - expected;
 974 |         chisq_prob[i] = ((double)occurrence_count[i])/occurrence_total;
 975 |         chisq      += (diff*diff)/expected;
 976 |         chisq_sum  += (double)(i * occurrence_count[i]);
 977 |     }
 978 |    
 979 |     chisq_final_prob = chisqp(chisq, (occurrence_size-1)); 
 980 |     result_chisq_count = occurrence_total;
 981 |     result_chisq_distribution = chisq;
 982 |     result_chisq_percent = chisq_final_prob * 100;
 983 | 
 984 |     return;
 985 | };
 986 | 
 987 | void finalize_filesize() {
 988 | };
 989 | 
 990 | void finalize_monte_carlo() {
 991 |     double pierr;
 992 |     double montepi;
 993 | 
 994 |     montepi = 4.0 * (((double)monty_inside_count) / monty_total_count);
 995 | 
 996 |     pierr = (fabs(M_PI - montepi) / M_PI)*100.0;
 997 | 
 998 |     result_pi = montepi;
 999 |     result_pierr = pierr;
1000 | 
1001 |     return;
1002 | 
1003 | };
1004 | 
1005 | void finalize_compression() {
1006 |     double compression;
1007 | 
1008 |     compression = (100.0 * (symbol_length - ent)) / symbol_length;
1009 | 
1010 |     result_compression = compression;
1011 | 
1012 |     return;
1013 | 
1014 | };
1015 | 
1016 | void finalize_scc() {
1017 |     double scc;
1018 |     int64_t top;
1019 |     int64_t bottom;
1020 |     int i;
1021 | 
1022 |     double paeqb;
1023 |     double bias;
1024 | 
1025 |     if (scc_wrap==1) {
1026 |         if (lagn==1) {
1027 |             t1 += (scc_previous * first_symbol);
1028 |             t2 += first_symbol*first_symbol;
1029 |             t3 += first_symbol;
1030 |         } else {
1031 |             for (i=0;i<lagn;i++) {
1032 |                 t1 += (scc_fifo[i] * scc_first_lagn[i]);
1033 |                 t2 += scc_first_lagn[i]*scc_first_lagn[i];
1034 |                 t3 += scc_first_lagn[i];
1035 |             }
1036 |         }
1037 |     } else {
1038 |         scc_count -= lagn;
1039 |     }
1040 | 
1041 |     /* need signed arithmetic because we are subtracting */
1042 |     top = (int64_t)(scc_count * t1) - (int64_t)(t3*t3);
1043 |     bottom = (int64_t)(scc_count * t2) - (int64_t)(t3*t3);
1044 |     scc = (double)top/(double)bottom;
1045 | 
1046 |     result_scc = scc;
1047 | 
1048 |     // This computation is to try to use the A=B count
1049 |     // The bias masks the serial correlation
1050 |     // Hence the * 1 - (2*abs(bias-0.5)) part.
1051 | 
1052 |     bias = (double)t3/(double)scc_count;
1053 |     paeqb = (double)aeqb_count/(double)scc_count;
1054 | 
1055 |     // Conversion assuming no bias.
1056 |     other_scc = ((2*paeqb)-1); 
1057 |     // Adjust using the bias - more bias pulls SCC towards 0.
1058 |     other_scc = other_scc * pow((1.0-(2.0*fabs(bias-0.5))),2);
1059 | 
1060 |     // Debugging output for A==B computation.
1061 |     //printf("OtherSCC\n     BIAS = %f\n     paeqb = %f\n     other_scc = %f\n",bias,paeqb,other_scc); 
1062 |     //printf("     (1.0-(2.0*fabs(bias-0.5))) = %f\n",(1.0-(2.0*fabs(bias-0.5))));
1063 |     //printf("     full other scc = %f\n",(((2*paeqb)-1)*  (1.0-(2.0*fabs(bias-0.5)))));
1064 |     return;
1065 | };
1066 | 
1067 | 
1068 | /********
1069 | * main() is mostly about parsing and qualifying the command line options.
1070 | */
1071 | 
1072 | int main(int argc, char** argv)
1073 | {
1074 |     int i;
1075 |     int skip_symbol;
1076 |     int got_skip;
1077 |     int skip_amount;
1078 |     int got_substring;
1079 |     int substring;
1080 | 
1081 |     /* Defaults */
1082 |     symbol_length = 8;
1083 |     hexmode = 1;
1084 |     print_occurrence = 0;
1085 |     print_longest = 0;
1086 |     fold = 0;
1087 |     terse = 0;
1088 |     use_stdin = 1;
1089 |     fp = NULL;
1090 |     terse_index = 0;
1091 |     scc_wrap = 0;
1092 |     lagn = 1;
1093 |     using_inputlistfile = 0;
1094 |     suppress_header = 0;
1095 |     byte_reverse = 0;
1096 |     parse_filename = 0;
1097 |     word_reverse = 0; 
1098 |     buffer2_size = 0;
1099 |     ent_exact = 0;
1100 |     got_skip = 0;
1101 |     skip_amount = 0;
1102 |     got_substring = 0;
1103 |     substring = 0;
1104 |     #define ERRSTRINGSIZE 256
1105 |     #define ERRSTRINGSIZE 256
1106 |     #ifdef _WIN32
1107 |     errno_t err;
1108 |     char errstring[ERRSTRINGSIZE];
1109 |     #endif
1110 |     int filenumber = 0;
1111 |     
1112 |     int got_symbol_length=0;
1113 |     
1114 |     char optString[] = "bprRcCwftehusS:i:n:l:L:";
1115 |     int longIndex;
1116 |     static const struct option longOpts[] = {
1117 |     { "symbol_length", required_argument, NULL, 'l' },
1118 |     { "binary", no_argument, NULL, 'b' },
1119 |     { "byte_reverse", no_argument, NULL, 'r' },
1120 |     { "word_reverse", no_argument, NULL, 'R' },
1121 |     { "occurrence", no_argument, NULL, 'c' },
1122 |     { "fold", no_argument, NULL, 'f' },
1123 |     { "parse_filename", no_argument, NULL, 'p' },
1124 |     { "inputlistfile", required_argument, NULL, 'i' },
1125 |     { "scc_wrap", no_argument, NULL, 'w' },
1126 |     { "lagn", required_argument, NULL, 'n' },
1127 |     { "terse", no_argument, NULL, 't' },
1128 |     { "ent_exact", no_argument, NULL, 'e' },
1129 |     { "suppress_header", no_argument, NULL, 's' },
1130 |     { "skip",required_argument, NULL, 'S' },
1131 |     { "substring",required_argument, NULL, 'L' },
1132 |     { "help", no_argument, NULL, 'h' },
1133 |     { NULL, no_argument, NULL, 0 }
1134 |     };
1135 | 
1136 |     opt = getopt_long( argc, argv, optString, longOpts, &longIndex );
1137 |     while( opt != -1 ) {
1138 |         switch( opt ) {
1139 |             case 'b':
1140 |                 if (got_symbol_length == 0) {
1141 |                     symbol_length = 1; /* binary mode treats newlines as data */
1142 |                 }
1143 |                 hexmode = 0;
1144 |                 break;
1145 |                 
1146 |             case 'l':
1147 |                 symbol_length = atoi(optarg); /* -b -l <n> is valid. symbol length
1148 |                                                * will be <n> and newlines will be treated
1149 |                                                * as data.
1150 |                                                */
1151 |                 got_symbol_length = 1;
1152 |                 
1153 |                 break;
1154 |             case 'i':
1155 |                 strncpy(inputlistfilename,optarg,255);
1156 |                 using_inputlistfile = 1;
1157 |                 break;
1158 |  
1159 |             case 'c':                    // Print out occurence count table
1160 |                 print_occurrence = 1;
1161 |                 break;
1162 |             
1163 |             case 'C':
1164 |                 print_longest = 1;      // Print the longest run on symbols
1165 |                 break;
1166 |  
1167 |             case 'p':
1168 |                 parse_filename = 1;    // Parse the filename for voltage, temp,
1169 |                 break;                 // condition and ID to include in output
1170 | 
1171 |             case 'r':
1172 |                 byte_reverse = 1;      // Reverse the order of bit within bytes
1173 |                 break;
1174 | 
1175 |             case 'R':
1176 |                 word_reverse = 1;      // reverse the order of bytes within a word
1177 |                 break;
1178 |                             
1179 |             case 'w':
1180 |                 scc_wrap = 1;          // Compute SCC, treating the data as being
1181 |                 break;                 // circular, like in Knuth's version
1182 |                 
1183 |             case 'n':
1184 |                 lagn = atoi(optarg);   // Compute the lagn correlation.
1185 |                 break;                 // n=1 is equivalent to normal SCC
1186 |                                 
1187 |             case 'f':
1188 |                 fold = 1;              // Fold upper case to lower 
1189 |                 break;
1190 |             
1191 |             case 't':
1192 |                 terse = 1;             // Terse output - output as CSV
1193 |                 break;
1194 |            
1195 |             case 'e':
1196 |                 ent_exact = 1;         // Copy the output format of
1197 |                 break;                 // John Walker's ent
1198 |  
1199 |             case 's':
1200 |                 suppress_header = 1;   // Don't print the header of CSV
1201 |                 break;                 // output
1202 |  
1203 |             case 'S':
1204 |                 got_skip = 1;          // Skip initial symbols
1205 |                 skip_amount = atoi(optarg);
1206 |                 break;                 // output
1207 |  
1208 |             case 'L':
1209 |                 got_substring = 1;          // Read only <n> symbols
1210 |                 substring = atoi(optarg);
1211 |                 break;                 // output
1212 | 
1213 |             case 'u':                  // Help
1214 |             case 'h':   /* fall-through is intentional */
1215 |             case '?':
1216 |                 display_usage();
1217 |                 exit(0);
1218 |                  
1219 |             default:
1220 |                 /* You won't actually get here. */
1221 |                 break;
1222 |         }
1223 |          
1224 |         opt = getopt_long( argc, argv, optString, longOpts, &longIndex );
1225 |     } // end while
1226 |     
1227 | 
1228 |     /* Range check the var args */
1229 | 
1230 |     if ((fold==1) && (symbol_length != 8)) {
1231 |             fprintf(stderr,"Error: Fold must be used with 8 bit word size\n");
1232 |             exit(1);
1233 |     }
1234 |         
1235 |     if (symbol_length < 1) {
1236 |         fprintf(stderr,"Error: Symbol length %d must not be 0 or negative. \n",symbol_length);
1237 |         exit(1);
1238 |     }
1239 | 
1240 |     //init_byte_queue();
1241 |     
1242 |     /* Loop through the filenames */
1243 |     if ((optind==argc) && (using_inputlistfile==0)) {
1244 |         use_stdin = 1;
1245 |     }
1246 |     else {
1247 |         use_stdin = 0;
1248 |     }
1249 | 
1250 |     if ((parse_filename==1) && (use_stdin==1)) {
1251 |         fprintf(stderr,"Error: Can't parse filename when using stdin for input\n");
1252 |         exit(1);
1253 |     }
1254 | 
1255 |     // skip amount must be greater than zero
1256 |     if ((got_skip==1) && (skip_amount <1)) {
1257 |         fprintf(stderr,"Errror: skip amount must be greater than 0\n");
1258 |         exit(1);
1259 |     }
1260 | 
1261 |     // substring must be > 1
1262 |     if ((got_substring==1) && (substring <1)) {
1263 |         fprintf(stderr,"Errror: substring length must be greater than 0\n");
1264 |         exit(1);
1265 |     }
1266 | 
1267 |     terse_index = 0;
1268 |     filenumber = optind;
1269 | 
1270 |     char *filelist;
1271 |     int lines;
1272 |     int lineno;
1273 |     FILE *ifp;
1274 |     int filenamecount = 0;
1275 |     char * res;
1276 |     char line[256];
1277 | 
1278 |     filelist = (char*)0;
1279 |     /* build the list of filenames from the input list file */
1280 |     if (using_inputlistfile==1) {
1281 |         lines = count_lines_in_file(inputlistfilename);
1282 |         ifp = fopen(inputlistfilename,"r");
1283 |         if (ifp==NULL) {
1284 |             fprintf(stderr,"Error: Cannot open %s for reading\n",inputlistfilename);
1285 |             exit(1);
1286 |         }
1287 |         
1288 |         filelist = (char *)malloc(sizeof(char *)*256*lines);
1289 |         
1290 |         if (filelist==NULL) {
1291 |             fprintf(stderr,"Error: Cannot allocate memory for filename list from input file list file %s\n",inputlistfilename);
1292 |             exit(1);
1293 |         }
1294 |          
1295 |         for (lineno=0;lineno<lines;lineno++) {
1296 |             res = fgets(line, 256, ifp);
1297 |             if (res != NULL) {
1298 |                 /* mute the newlines from the file*/
1299 |                 for (i = 0;i<256;i++) {
1300 |                     if (line[i]=='\n') line[i]=0;
1301 |                 }
1302 | 
1303 |                 /* Grab the file names, skipping the ones beginning with # */
1304 |                 strncpy(&(filelist[256*filenamecount]),line,256);
1305 |                 /*printf(" Scanning file list, got :%s:\n",&(filelist[256*filenamecount]));*/
1306 |                 if (line[0]!='#') filenamecount++; /* ignore lines beginning with # */ 
1307 |             }
1308 |         }
1309 |         fclose(ifp);
1310 |         if (filenamecount==0) {
1311 |             fprintf(stderr,"Error: Did not file any filenames in input file list file %s\n",inputlistfilename);
1312 |             exit(1);
1313 |         }
1314 |         filenumber = 0; 
1315 |     }
1316 | 
1317 |     do {
1318 |         terse_index++;
1319 |         filebytes = 0;
1320 |         /* printf("OPTIND %d, filenumber %d, ARGC %d\n",optind,filenumber,argc); */
1321 |         if (use_stdin==1) {
1322 |             use_stdin = 1;
1323 |             if (hexmode != 1) freopen(NULL, "rb", stdin);
1324 |             fp = stdin;
1325 |         }
1326 |         else {
1327 |             if (using_inputlistfile==0) {
1328 |                 filename = argv[filenumber];
1329 |             } else {
1330 |                 /* Get file from input file list file */ 
1331 |                 filename = &(filelist[256*filenumber]);
1332 |                 /*printf("FILENUMBER = %d , Filename = %s\n",filenumber,filename);*/
1333 |             }
1334 |             if (parse_filename==1) parse_the_filename(filename);
1335 | 
1336 |             if (hexmode == 1) {
1337 |                 if ((terse == 0) && (ent_exact == 0))printf(" opening %s as hex text\n", filename);
1338 |                 #ifdef _WIN32
1339 |                 if ((err = fopen_s(&fp, filename, "r")) != 0) {
1340 |                     strerror_s(errstring, ERRSTRINGSIZE, err);
1341 |                     fprintf(stderr, "Error : Unable to open file %s, %s\n", filename, errstring);
1342 |                     exit(1);
1343 |                 }
1344 |                 #else
1345 |                 fp = fopen(filename,"r");
1346 |                 if (fp == NULL) {
1347 |                     fprintf(stderr, "Error : Unable to open file %s\n", filename);
1348 |                     exit(1);
1349 |                 }
1350 |                 #endif
1351 |             }
1352 |             else {
1353 |                 if ((terse == 0) && (ent_exact==0)) printf(" opening %s as binary\n", filename);
1354 |                 #ifdef _WIN32
1355 |                 if ((err = fopen_s(&fp, filename, "rb")) != 0) {
1356 |                     strerror_s(errstring, ERRSTRINGSIZE, err);
1357 |                     fprintf(stderr, "Error : Unable to open file %s, %s\n", filename, errstring);
1358 |                     exit(1);
1359 |                 }
1360 |                 #else
1361 |                 fp = fopen(filename,"rb");
1362 |                 if (fp == NULL) {
1363 |                     fprintf(stderr, "Error : Unable to open file %s\n", filename);
1364 |                     exit(1);
1365 |                 }
1366 |                 #endif
1367 |                 /*  fp = fopen(filename,"rb"); */
1368 |                 /* printf("           %x\n",(unsigned int)fp);*/
1369 |             }
1370 | 
1371 |             if ((terse == 0) && (ent_exact == 0))printf(" Symbol Size(bits) = %d\n", symbol_length);
1372 | 
1373 |         }
1374 |         
1375 |         /* Print terse header if necessary */
1376 |         if ((terse == 1) && (terse_index == 1) && (suppress_header==0)) {
1377 |             if (ent_exact == 1) {
1378 |                 if (symbol_length==1) {
1379 |                     printf("0,File-bits,Entropy,Chi-square,Mean,Monte-Carlo-Pi,Serial-Correlation\n");
1380 |                 } else {
1381 |                     printf("0,File-bytes,Entropy,Chi-square,Mean,Monte-Carlo-Pi,Serial-Correlation\n");
1382 |                 }
1383 |             }
1384 |             else if (parse_filename==1) {
1385 |                 printf("   0,     symbols,     CID, Process, Voltage,    Temp,     Entropy,  MinEntropy, MinEntropy-Symbol,  Chi-square,        Mean, Monte-Carlo-Pi, Serial-Correlation,              P01,              P10,  mkv_min_entropy,  Longest-Run-Symbol, Longest-Run-Length, Longest-Run-PValue, Longest-Run-Pos, Filename\n");
1386 |             } else {
1387 |                 printf("   0,     symbols,    Entropy,  Min_entropy, MinEntropy-Symbol,  Chi-square,        Mean, Monte-Carlo-Pi, Serial-Correlation,              P01,              P10,  mkv_min_entropy, Longest-Run-Symbol, Longest-Run-Length, Longest-Run-PValue, Longest-Run-Pos, Filename\n");
1388 |             }
1389 |         }
1390 | 
1391 |         /* Initialize the metrics */
1392 |         symbol_count = 0;
1393 | 
1394 |         fflush(stdout);
1395 |         init_byte_queue();
1396 |         init_mean();
1397 |         init_entropy();
1398 |         init_occurrences();
1399 |         init_longest();
1400 |         init_chisq();
1401 |         init_filesize();
1402 |         init_monte_carlo();
1403 |         init_compression();
1404 |         init_scc();
1405 | 
1406 |         /* Now process the file fp */
1407 |         /* Since we have multiple possible symbol sizes, first pull the bytes
1408 |             * then put them in a queue which behaves like a bitwise queue, then
1409 |             * pull the symbols from the queue.
1410 |             */
1411 | 
1412 |         // first skip the first bits if skip is chosen with -S option.
1413 | 
1414 |         if (got_skip==1) {
1415 |             if (queue_size == 0) {
1416 |                 not_eof = (int)fill_byte_queue(fp); /* get bytes from file into queue */
1417 |         
1418 |             }
1419 |             for (skip_symbol=0;skip_symbol < skip_amount; skip_symbol++) {
1420 |                 symbol = get_symbol(1);
1421 |             }
1422 |         }
1423 | 
1424 |         // Then do the main loop.
1425 | 
1426 | 
1427 |         do {
1428 | 
1429 |             if (queue_size == 0) {
1430 |                 not_eof = (int)fill_byte_queue(fp); /* get bytes from file into queue */
1431 |                 if (not_eof == 0) break;
1432 |             }
1433 |             symbol = get_symbol(symbol_length);      /* Pull a symbol from the queue */
1434 |             symbol_count++;
1435 | 
1436 |             /* Finish up if no symbols left in queue */
1437 |             if (symbol == -1) break;
1438 | 
1439 |             /* End if we reach end of substring */
1440 |             if ((got_substring==1) && ((symbol_count+skip_amount) > substring)) break;
1441 | 
1442 |             /* Then update the algorithms using the symbol */
1443 |             update_mean(symbol);
1444 |             update_entropy(symbol);
1445 |             if (no_occurrence_space == 0) update_occurrences(symbol);
1446 |             if (no_longest_space == 0) update_longest(symbol, symbol_count+skip_amount);
1447 |             update_chisq(symbol);
1448 |             update_filesize(symbol);
1449 |             /* Monte Carlo is different, it works on bytes, not symbols
1450 |                 * So we call the update from within the fill_byte_queue routine
1451 |                 */
1452 |                 /* update_monte_carlo(symbol); */
1453 | 
1454 |             update_compression(symbol);
1455 |             update_scc(symbol);
1456 |         } while (1 == 1);
1457 | 
1458 |         //symbol_count--; // Adjust for the fact symbol_count goes over by 1
1459 | 
1460 |         finalize_mean();
1461 |         if (no_occurrence_space == 0) finalize_occurrences();
1462 |         if (no_longest_space == 0) finalize_longest();
1463 |         finalize_chisq();
1464 |         finalize_entropy();
1465 |         finalize_filesize();
1466 |         finalize_monte_carlo();
1467 |         finalize_compression();
1468 |         finalize_scc();
1469 |         compute_markov();
1470 | 
1471 |         if (symbol_length != 8){
1472 |             longest_byte_pos = (longest_position*symbol_length)/8;
1473 |         } else {
1474 |             longest_byte_pos = longest_position;
1475 |         }
1476 | 
1477 |         if (terse == 1) {
1478 |             if (ent_exact==1) {
1479 |                 if (symbol_length == 1) {
1480 |                 printf("%d,%"PRIu64",%f,%f,%f,%f,%f\n",terse_index,filebytes*8,result_entropy,result_chisq_distribution,result_mean,result_pi,result_scc);
1481 |                 } else {
1482 |                 printf("%d,%"PRIu64",%f,%f,%f,%f,%f\n",terse_index,filebytes,result_entropy,result_chisq_distribution,result_mean,result_pi,result_scc);
1483 |                 }
1484 |             }
1485 |             else if ((parse_filename==1) && (symbol_length==1)) {
1486 |                 printf("%4d,%12"PRIu64",%8s,%8s,%8.2f,%8.2f,%12f,%12f,%18"PRIu32",%12f,%12f,%15f,   %16f, %16f, %16f, %16f, %19"PRIx64", %18"PRIu64", %18f, %15"PRIu64", %s\n", terse_index, symbol_count, deviceid,process,voltage,temperature,result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10,markov_entropy, longest_longest_symbol,longest_longest,result_longest_pvalue, longest_byte_pos, filename);
1487 | 
1488 |             } else if ((parse_filename==0) && (symbol_length==1)) {
1489 |                 printf("%4d,%12"PRIu64",%11f, %12f,%18"PRIx32",%12f,%12f,%15f,       %12f, %16f, %16f, %16f, %18"PRIx64", %18"PRIu64", %18f, %15"PRIu64", %s\n", terse_index, symbol_count, result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10, markov_entropy, longest_longest_symbol,longest_longest,result_longest_pvalue, longest_byte_pos, filename);
1490 |             }
1491 | 
1492 |             else if ((parse_filename==1) && (symbol_length!=1)) {
1493 |                 printf("%4d,%12"PRIu64",%8s,%8s,%8.2f,%8.2f,%12f,%12f,%18"PRIx32",%12f,%12f,%15f,   %16f, %16f, %16f, %16f,  %18"PRIx64", %18"PRIu64",             (null), %15"PRIu64", %s\n", terse_index, symbol_count, deviceid,process,voltage,temperature,result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10, markov_entropy, longest_longest_symbol,longest_longest, longest_byte_pos, filename);
1494 |             
1495 |             } else if ((parse_filename==0) && (symbol_length!=1)) {
1496 |                 printf("%4d,%12"PRIu64",%11f, %12f,%18"PRIx32",%12f,%12f,%15f,       %12f, %16f, %16f, %16f,  %18"PRIx64", %18"PRIu64",             (null), %15"PRIu64", %s\n", terse_index, symbol_count, result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10, markov_entropy, longest_longest_symbol,longest_longest, longest_byte_pos, filename);
1497 |             }
1498 |         }
1499 |         else {
1500 | 
1501 |             /* Output the occurrence count if requested */
1502 |             if ((print_occurrence==1) && (no_occurrence_space == 0) ) {
1503 |                 double fraction;
1504 |                 for (i=0; i<occurrence_size;i++) {
1505 |                     fraction = (double)occurrence_count[i]/(double)occurrence_total;
1506 |                     printf("   Value %4d , frequency=%"PRIu64" , fraction=%f\n", i, occurrence_count[i], fraction);
1507 |                 }
1508 |             }
1509 |             
1510 |             /* Output the occurrence count if requested */
1511 |             if ((print_longest==1) && (no_longest_space == 0) ) {
1512 |                 for (i=0; i<occurrence_size;i++) {
1513 |                     printf("   Symbol %x , Longest Run=%"PRIu64"\n", i, longest_count[i]);
1514 |                 }
1515 |             }
1516 | 
1517 |             /* Output the non terse results */
1518 |             if (ent_exact == 1) {
1519 |                 /* Make it look like this:
1520 |                  *
1521 |                  * Entropy = 4.676598 bits per byte.
1522 |                  *
1523 |                  * Optimum compression would reduce the size
1524 |                  * of this 57737 byte file by 41 percent.
1525 |                  * 
1526 |                  * Chi square distribution for 57737 samples is 1499055.27, and randomly
1527 |                  * would exceed this value less than 0.01 percent of the times.
1528 |                  *
1529 |                  * Arithmetic mean value of data bytes is 71.6317 (127.5 = random).
1530 |                  * Monte Carlo value for Pi is 4.000000000 (error 27.32 percent).
1531 |                  * Serial correlation coefficient is 0.515629 (totally uncorrelated = 0.0).
1532 |                  */
1533 |                 
1534 |                 printf("Entropy = %f bits per byte.\n\n",(result_entropy *(8.0/symbol_length)));
1535 |                 printf("Optimum compression would reduce the size\n");
1536 |                 printf("of this %d byte file by %d percent\n\n",(int)filebytes,(int)result_compression);
1537 |                 printf("Chi square distribution for %d samples is %f, and randomly\n",(int)result_chisq_count,result_chisq_distribution);
1538 |                 printf("would exceed this value less than %f percent of the times.\n\n",result_chisq_percent);
1539 |                 printf("Arithmetic mean value of data bytes is %f (127.5 = random).\n",result_mean);
1540 |                 printf("Monte Carlo value for Pi is %f (error %f percent).\n",result_pi,result_pierr);
1541 |                 printf("Serial correlation coefficient is %f (totally uncorrelated = 0.0).\n",result_scc);
1542 |             }
1543 |             else {
1544 |                 if (parse_filename==1) {
1545 |                 printf("   Device ID   : %s\n",deviceid);
1546 |                 printf("   Process     : %s\n",process);
1547 |                 printf("   Voltage     : %0.2lfV\n",voltage);
1548 |                 printf("   Temperature : %0.2lfC\n",temperature);
1549 |                 }
1550 |                 printf("   Analysing %"PRId64" %d-bit symbols\n",symbol_count,symbol_length);
1551 |                 printf("   Shannon IID Entropy = %f bits per symbol\n",result_entropy);
1552 |                 printf("   Optimal compression would compress by %f percent\n", result_compression);
1553 |                 #ifdef _WIN32
1554 |                 printf("   Chi square: symbol count=%llu, distribution=%1.2f, randomly exceeds %1.2f percent of the time\n", result_chisq_count, result_chisq_distribution, result_chisq_percent);
1555 |                 #elif __llvm__
1556 |                 printf("   Chi square: symbol count=%llu, distribution=%1.2f, randomly exceeds %1.2f percent of the time\n", result_chisq_count, result_chisq_distribution, result_chisq_percent);
1557 |                 #elif __linux__
1558 |                 printf("   Chi square: symbol count=%lu, distribution=%1.2f, randomly exceeds %1.2f percent of the time\n", result_chisq_count, result_chisq_distribution, result_chisq_percent);
1559 |                 #endif
1560 |                 printf("   Mean = %f\n",result_mean);
1561 |                 printf("   Monte Carlo value for Pi is %f (error %1.2f percent).\n", result_pi, result_pierr);
1562 |                 printf("   Serial Correlation = %f\n",result_scc);
1563 |                 printf("   Longest Run Symbol = %"PRIx64". Run Length = %"PRIu64"\n",longest_longest_symbol,longest_longest);
1564 |                 if (symbol_length == 1) printf("   Probabilty of longest run being <= %"PRIu64" = %f\n",longest_longest,result_longest_pvalue);
1565 |                 //printf("SCC by A=B Count is %f (totally uncorrelated = 0.0).\n",other_scc);
1566 |                 printf("   Position of Longest Run = %"PRIu64" (0x%"PRIx64"). Byte position %"PRIu64" (0x%"PRIx64")\n",longest_position, longest_position, longest_byte_pos, longest_byte_pos);
1567 |                 printf("   A 2 state Markov generator with transition probabilities P01=%f, P10=%f would generate data with entropy %f per bit with 8 bit symbols with the same mean and serial correlation\n",result_p01, result_p10, markov_entropy);
1568 |             }
1569 |         }
1570 | 
1571 |         /* Free the per-loop mallocs */     
1572 |         free(occurrence_count);
1573 |         free(longest_count);
1574 |         free(chisq_prob);
1575 |     
1576 |         if (fp != stdin) fclose(fp); 
1577 |         filenumber++;
1578 | 
1579 |         symbol_count = 0;
1580 |     } while (
1581 |                     ((filenumber < argc) && (use_stdin != 1) && (using_inputlistfile!=1)) /* still going through argv */
1582 |                 ||
1583 |                     ((filenumber < filenamecount) && (using_inputlistfile==1)) /* still going through file list file */
1584 |             );
1585 |     /* Free the per-run malloc */
1586 |     free(filelist);
1587 | 
1588 |     /* Find out what the various compilers give us
1589 |     #ifdef __llvm__
1590 |         printf("llvm\n");
1591 |     #endif
1592 | 
1593 |     #ifdef __clang__
1594 |         printf("clang\n");
1595 |     #endif
1596 |     
1597 |     #ifdef __gcc__
1598 |         printf("gcc\n");
1599 |     #endif
1600 |     #ifdef __linux__
1601 |         printf("linux\n");
1602 |     #endif
1603 |     #ifdef _WIN32
1604 |         printf("win32\n");
1605 |     #endif
1606 |     */
1607 |     return 0;
1608 | 
1609 | }
1610 | 
1611 | 
1612 | 


--------------------------------------------------------------------------------