├── randomdata ├── measure.sh ├── bias0p1.bin ├── bias0p2.bin ├── bias0p3.bin ├── bias0p4.bin ├── bias0p5.bin ├── bias0p6.bin ├── bias0p7.bin ├── bias0p8.bin ├── bias0p9.bin ├── corr0p1.bin ├── corr0p2.bin ├── corr0p3.bin ├── corr0p4.bin ├── corr0p5.bin ├── corr0p6.bin ├── corr0p7.bin ├── corr0p8.bin ├── corr0p9.bin ├── uniform.bin ├── corrm0p1.bin ├── corrm0p2.bin ├── corrm0p3.bin ├── corrm0p4.bin ├── corrm0p5.bin ├── corrm0p6.bin ├── corrm0p7.bin ├── corrm0p8.bin ├── corrm0p9.bin ├── filenames └── generate.sh ├── .gitignore ├── filename_parse.h ├── Makefile ├── longest_run_cdf.h ├── mathy_things.h ├── CITATION.cff ├── markov2p.h ├── longest_run_cdf.c ├── mathy_things.c ├── README.md ├── filename_parse.c ├── LICENSE ├── markov2p.c └── djent.c /randomdata/measure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | djent -b -t *.bin 4 | 5 | -------------------------------------------------------------------------------- /randomdata/bias0p1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p1.bin -------------------------------------------------------------------------------- /randomdata/bias0p2.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p2.bin -------------------------------------------------------------------------------- /randomdata/bias0p3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p3.bin -------------------------------------------------------------------------------- /randomdata/bias0p4.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p4.bin -------------------------------------------------------------------------------- /randomdata/bias0p5.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p5.bin -------------------------------------------------------------------------------- /randomdata/bias0p6.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p6.bin -------------------------------------------------------------------------------- /randomdata/bias0p7.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p7.bin -------------------------------------------------------------------------------- /randomdata/bias0p8.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p8.bin -------------------------------------------------------------------------------- /randomdata/bias0p9.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/bias0p9.bin -------------------------------------------------------------------------------- /randomdata/corr0p1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p1.bin -------------------------------------------------------------------------------- /randomdata/corr0p2.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p2.bin -------------------------------------------------------------------------------- /randomdata/corr0p3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p3.bin -------------------------------------------------------------------------------- /randomdata/corr0p4.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p4.bin -------------------------------------------------------------------------------- /randomdata/corr0p5.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p5.bin -------------------------------------------------------------------------------- /randomdata/corr0p6.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p6.bin -------------------------------------------------------------------------------- /randomdata/corr0p7.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p7.bin -------------------------------------------------------------------------------- /randomdata/corr0p8.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p8.bin -------------------------------------------------------------------------------- /randomdata/corr0p9.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corr0p9.bin -------------------------------------------------------------------------------- /randomdata/uniform.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/uniform.bin -------------------------------------------------------------------------------- /randomdata/corrm0p1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p1.bin -------------------------------------------------------------------------------- /randomdata/corrm0p2.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p2.bin -------------------------------------------------------------------------------- /randomdata/corrm0p3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p3.bin -------------------------------------------------------------------------------- /randomdata/corrm0p4.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p4.bin -------------------------------------------------------------------------------- /randomdata/corrm0p5.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p5.bin -------------------------------------------------------------------------------- /randomdata/corrm0p6.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p6.bin -------------------------------------------------------------------------------- /randomdata/corrm0p7.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p7.bin -------------------------------------------------------------------------------- /randomdata/corrm0p8.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p8.bin -------------------------------------------------------------------------------- /randomdata/corrm0p9.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dj-on-github/djent/HEAD/randomdata/corrm0p9.bin -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | *.su 34 | -------------------------------------------------------------------------------- /filename_parse.h: -------------------------------------------------------------------------------- 1 | /* look for vpattern in str. Return the match to found. Return True if found */ 2 | 3 | extern double voltage; 4 | extern double temperature; 5 | extern unsigned char deviceid[256]; 6 | extern unsigned char process[256]; 7 | 8 | int find_vpattern(char *str,char *found); 9 | int find_tpattern(char *str,char *found) ; 10 | int find_cidpattern(char *str,char *found); 11 | int find_procpattern(char *str,char *found); 12 | void parse_the_filename(char *filename); 13 | 14 | 15 | -------------------------------------------------------------------------------- /randomdata/filenames: -------------------------------------------------------------------------------- 1 | # A comment 2 | bias0p1.bin 3 | bias0p2.bin 4 | bias0p3.bin 5 | bias0p4.bin 6 | bias0p5.bin 7 | bias0p6.bin 8 | bias0p7.bin 9 | bias0p8.bin 10 | bias0p9.bin 11 | corr0p1.bin 12 | corr0p2.bin 13 | corr0p3.bin 14 | corr0p4.bin 15 | corr0p5.bin 16 | corr0p6.bin 17 | corr0p7.bin 18 | corr0p8.bin 19 | corr0p9.bin 20 | corrm0p1.bin 21 | corrm0p2.bin 22 | corrm0p3.bin 23 | corrm0p4.bin 24 | corrm0p5.bin 25 | corrm0p6.bin 26 | corrm0p7.bin 27 | corrm0p8.bin 28 | corrm0p9.bin 29 | uniform.bin 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -I/usr/local/include -m64 -g -Wall 3 | LDFLAGS = -L/usr/local/lib 4 | LDLIBS = -lm -lgmp -lmpfr 5 | 6 | all: djent 7 | 8 | longest_run_cdf.o: longest_run_cdf.c longest_run_cdf.h 9 | $(CC) -c $(CFLAGS) -o longest_run_cdf.o longest_run_cdf.c 10 | 11 | mathy_things.o: mathy_things.c mathy_things.h 12 | $(CC) -c $(CFLAGS) -o mathy_things.o mathy_things.c 13 | 14 | filename_parse.o: filename_parse.c filename_parse.h 15 | $(CC) -c $(CFLAGS) -o filename_parse.o filename_parse.c 16 | 17 | markov2p.o: markov2p.c markov2p.h 18 | $(CC) -c $(CFLAGS) -o markov2p.o markov2p.c 19 | 20 | djent.o: djent.c markov2p.h filename_parse.h mathy_things.h 21 | $(CC) -c $(CFLAGS) -o djent.o djent.c 22 | 23 | djent: djent.o markov2p.o filename_parse.o mathy_things.o longest_run_cdf.o 24 | $(CC) $(CFLAGS) $(LDFLAGS) longest_run_cdf.o mathy_things.o filename_parse.o markov2p.o djent.o -o djent $(LDLIBS) 25 | 26 | install: 27 | cp djent /usr/local/bin 28 | 29 | clean: 30 | rm -f longest_run_cdf.o 31 | rm -f filename_parse.o 32 | rm -f mathy_things.o 33 | rm -f markov2p.o 34 | rm -f djent.o 35 | rm -f djent 36 | 37 | -------------------------------------------------------------------------------- /longest_run_cdf.h: -------------------------------------------------------------------------------- 1 | /* 2 | djent - A reimplementation of Fourmilab's ent with several improvements. 3 | 4 | Copyright (C) 2017 David Johnston 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; if not, write to the Free Software Foundation, Inc., 18 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 19 | 20 | ----- 21 | 22 | Contact. David Johnston dj@deadhat.com 23 | */ 24 | 25 | #ifndef NO_GMP 26 | #include 27 | #endif 28 | 29 | // Return the probability of the longest run of heads being less than or equal to n 30 | // in a sequence of r uniform coin tosses. Use MPFR to avoid overflows. 31 | double longest_run_cdf(unsigned int ui_n,unsigned int ui_r); 32 | -------------------------------------------------------------------------------- /mathy_things.h: -------------------------------------------------------------------------------- 1 | /* 2 | djent - A reimplementation of Fourmilab's ent with several improvements. 3 | 4 | Copyright (C) 2017 David Johnston 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; if not, write to the Free Software Foundation, Inc., 18 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 19 | 20 | ----- 21 | 22 | Contact. David Johnston dj@deadhat.com 23 | */ 24 | 25 | #define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */ 26 | #define I_SQRT_PI 0.5641895835477562869480795 /* 1 / sqrt (pi) */ 27 | #define BIGX 20.0 /* max value to represent exp (x) */ 28 | #define ex(x) (((x) < -BIGX) ? 0.0 : exp(x)) 29 | 30 | uint64_t ipow(uint64_t base, uint64_t exp); 31 | double zcdf(double z); 32 | double chisqp(double ax, size_t df); 33 | 34 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: djent 6 | message: >- 7 | If you use this software, please cite it using the 8 | metadata from this file. 9 | type: software 10 | authors: 11 | - given-names: David 12 | family-names: Johnston 13 | email: dj@deadhat.com 14 | orcid: 'https://orcid.org/0009-0002-5149-9414' 15 | repository-code: 'https://github.com/dj-on-github/djent' 16 | abstract: >- 17 | djent is a reimplementation of the Fourmilab/John Walker 18 | random number test program ent. 19 | 20 | 21 | The improvements are: 22 | 23 | 24 | Multiple input file names can be provided at once. This 25 | works nicely with the CSV format output. 26 | 27 | -h works as well as -u to get the help information. 28 | 29 | The filename is present in CSV output 30 | 31 | The symbol size can be any number of bits up to 32. ent 32 | was constrained to 1 or 8. 33 | 34 | The SCC test can be either wrap-around or not wrap-around. 35 | 36 | The SCC result can be given a lag value to get a LAG-N 37 | correlation coefficient. 38 | 39 | A list of filenames to analyze can be read from a text 40 | file using -i filename. 41 | 42 | Test condition details (Volts, temp, id etc.) can be 43 | parsed from the filename and included in output. 44 | 45 | MCV Min Entropy is estimated in addition to Shannon 46 | Entropy. The symbol and entropy are both reported 47 | 48 | The longest run and the symbol in the longest run are 49 | reported. For 1 bit-per-symbol analysis, a p-value is 50 | computed of the probability of a uniform random bit 51 | sequence having a longest run length equal to or less than 52 | the meaured run length. 53 | license: GPL-2.0 54 | -------------------------------------------------------------------------------- /randomdata/generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | djenrandom -b -s -k 10 -m correlated --correlation=-0.1 > corrm0p1.bin 3 | djenrandom -b -s -k 10 -m correlated --correlation=-0.2 > corrm0p2.bin 4 | djenrandom -b -s -k 10 -m correlated --correlation=-0.3 > corrm0p3.bin 5 | djenrandom -b -s -k 10 -m correlated --correlation=-0.4 > corrm0p4.bin 6 | djenrandom -b -s -k 10 -m correlated --correlation=-0.5 > corrm0p5.bin 7 | djenrandom -b -s -k 10 -m correlated --correlation=-0.6 > corrm0p6.bin 8 | djenrandom -b -s -k 10 -m correlated --correlation=-0.7 > corrm0p7.bin 9 | djenrandom -b -s -k 10 -m correlated --correlation=-0.8 > corrm0p8.bin 10 | djenrandom -b -s -k 10 -m correlated --correlation=-0.9 > corrm0p9.bin 11 | 12 | djenrandom -b -s -k 10 -m correlated --correlation=0.1 > corr0p1.bin 13 | djenrandom -b -s -k 10 -m correlated --correlation=0.2 > corr0p2.bin 14 | djenrandom -b -s -k 10 -m correlated --correlation=0.3 > corr0p3.bin 15 | djenrandom -b -s -k 10 -m correlated --correlation=0.4 > corr0p4.bin 16 | djenrandom -b -s -k 10 -m correlated --correlation=0.5 > corr0p5.bin 17 | djenrandom -b -s -k 10 -m correlated --correlation=0.6 > corr0p6.bin 18 | djenrandom -b -s -k 10 -m correlated --correlation=0.7 > corr0p7.bin 19 | djenrandom -b -s -k 10 -m correlated --correlation=0.8 > corr0p8.bin 20 | djenrandom -b -s -k 10 -m correlated --correlation=0.9 > corr0p9.bin 21 | 22 | djenrandom -b -s -k 10 -m biased --bias=0.1 > bias0p1.bin 23 | djenrandom -b -s -k 10 -m biased --bias=0.2 > bias0p2.bin 24 | djenrandom -b -s -k 10 -m biased --bias=0.3 > bias0p3.bin 25 | djenrandom -b -s -k 10 -m biased --bias=0.4 > bias0p4.bin 26 | djenrandom -b -s -k 10 -m biased --bias=0.5 > bias0p5.bin 27 | djenrandom -b -s -k 10 -m biased --bias=0.6 > bias0p6.bin 28 | djenrandom -b -s -k 10 -m biased --bias=0.7 > bias0p7.bin 29 | djenrandom -b -s -k 10 -m biased --bias=0.8 > bias0p8.bin 30 | djenrandom -b -s -k 10 -m biased --bias=0.9 > bias0p9.bin 31 | 32 | djenrandom -b -s -k 10 > uniform.bin 33 | 34 | -------------------------------------------------------------------------------- /markov2p.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | djrandom - A utility to generate random numbers. 4 | 5 | Copyright (C) 2017 David Johnston 6 | 7 | This program is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation; either version 2 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License along 18 | with this program; if not, write to the Free Software Foundation, Inc., 19 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 | 21 | ----- 22 | 23 | Contact. David Johnston dj@deadhat.com 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | #define EQUIPROBABLE 0 38 | #define P000_MAX 1 39 | #define P111_MAX 2 40 | #define P101_MAX 3 41 | #define P010_MAX 4 42 | 43 | double symbol_prob(double p01, double p10, uint64_t x, int bitwidth) ; 44 | double max(double x, double y) ; 45 | uint64_t mk_symbol(int prefix, int tbp, int postfix, int bitwidth) ; 46 | uint64_t mk_symbol_nopostfix(int prefix, int tbp, int bitwidth) ; 47 | int most_probable_transition_pair(double p01, double p10) ; 48 | uint64_t most_probable_symbol_odd(double p01, double p10,int bitwidth) ; 49 | uint64_t most_probable_symbol_even(double p01, double p10,int bitwidth) ; 50 | uint64_t most_probable_symbol(double p01, double p10,int bitwidth) ; 51 | double symbol_max_probability(double p01, double p10,int bitwidth,uint64_t *mcv) ; 52 | double p_to_entropy(double p01, double p10,int bitwidth, double *mcv_prob, uint64_t *mcv) ; 53 | int near(double x,double y, double epsilon) ; 54 | //void pick_point(double *p01, double *p10, double desired, double epsilon, int bitwidth, t_rngstate* rngstate) ; 55 | void make_sample_table(double p01, double p10, int bitwidth, int **sampletable0, int **sampletable1) ; 56 | 57 | -------------------------------------------------------------------------------- /longest_run_cdf.c: -------------------------------------------------------------------------------- 1 | /* 2 | djent - A reimplementation of Fourmilab's ent with several improvements. 3 | 4 | Copyright (C) 2017 David Johnston 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; if not, write to the Free Software Foundation, Inc., 18 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 19 | 20 | ----- 21 | 22 | Contact. David Johnston dj@deadhat.com 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | 36 | #ifndef NO_GMP 37 | #include 38 | #endif 39 | 40 | // Return the probability of the longest run of heads being less than or equal to n 41 | // in a sequence of r uniform coin tosses. Use MPFR to avoid overflows. 42 | double longest_run_cdf(unsigned int ui_n,unsigned int ui_r) { // n=longest run. r = length of data sequence 43 | double answer; 44 | mpfr_set_default_prec(1024); 45 | mpfr_t n; 46 | mpfr_t r; 47 | mpfr_t topa; 48 | mpfr_t bottoma; 49 | mpfr_t first; 50 | mpfr_t topb; 51 | mpfr_t nplusone; 52 | mpfr_t bottomb; 53 | mpfr_t nplus2over2; 54 | mpfr_t second; 55 | mpfr_t mpfans; 56 | mpfr_set_default_prec(1024); 57 | 58 | mpfr_init(topa); 59 | mpfr_init(nplusone); 60 | mpfr_init(bottoma); 61 | mpfr_init(first); 62 | mpfr_init(topb); 63 | mpfr_init(bottomb); 64 | mpfr_init(nplus2over2); 65 | mpfr_init(second); 66 | mpfr_init(mpfans); 67 | mpfr_init_set_ui(n,ui_n,MPFR_RNDN); 68 | mpfr_init_set_ui(r,ui_r,MPFR_RNDN); 69 | 70 | mpfr_add_ui(topa,r,1,MPFR_RNDN); 71 | 72 | mpfr_add_ui(nplusone,n,1,MPFR_RNDN); 73 | 74 | 75 | mpfr_exp2(bottoma,nplusone,MPFR_RNDN); 76 | mpfr_sub(bottoma,bottoma,n,MPFR_RNDN); 77 | mpfr_sub_ui(bottoma,bottoma,2,MPFR_RNDN); 78 | 79 | mpfr_div(first,topa,bottoma,MPFR_RNDN); 80 | mpfr_neg(first,first,MPFR_RNDN); 81 | 82 | mpfr_exp(first,first,MPFR_RNDN); 83 | 84 | // Second 85 | mpfr_exp2(topb,nplusone,MPFR_RNDN); 86 | mpfr_sub_ui(topb,topb,1,MPFR_RNDN); 87 | 88 | mpfr_exp2(bottomb,nplusone,MPFR_RNDN); 89 | 90 | mpfr_add_ui(nplus2over2,n,2,MPFR_RNDN); 91 | mpfr_div_ui(nplus2over2,nplus2over2,2,MPFR_RNDN); 92 | 93 | mpfr_sub(bottomb,bottomb,nplus2over2,MPFR_RNDN); 94 | 95 | mpfr_div(second,topb,bottomb,MPFR_RNDN); 96 | 97 | //Final 98 | mpfr_mul(mpfans,first,second,MPFR_RNDN); 99 | answer = mpfr_get_d(mpfans,MPFR_RNDN); 100 | 101 | mpfr_clear(topa); 102 | mpfr_clear(nplusone); 103 | mpfr_clear(bottoma); 104 | mpfr_clear(first); 105 | mpfr_clear(topb); 106 | mpfr_clear(bottomb); 107 | mpfr_clear(nplus2over2); 108 | mpfr_clear(second); 109 | mpfr_clear(mpfans); 110 | mpfr_clear(n); 111 | mpfr_clear(r); 112 | 113 | return answer; 114 | 115 | 116 | } 117 | -------------------------------------------------------------------------------- /mathy_things.c: -------------------------------------------------------------------------------- 1 | /* 2 | djent - A reimplementation of Fourmilab's ent with several improvements. 3 | 4 | Copyright (C) 2017 David Johnston 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; if not, write to the Free Software Foundation, Inc., 18 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 19 | 20 | ----- 21 | 22 | Contact. David Johnston dj@deadhat.com 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | uint64_t ipow(uint64_t base, uint64_t exp) 36 | { 37 | uint64_t result = 1; 38 | while (exp) 39 | { 40 | if (exp & 1) 41 | result *= base; 42 | exp >>= 1; 43 | base *= base; 44 | } 45 | 46 | return result; 47 | } 48 | 49 | /* Chi Square P value computation */ 50 | 51 | double zcdf(double z) { 52 | double w; 53 | double x; 54 | double y; 55 | double result; 56 | 57 | if (z == 0.0) return 0.5; 58 | 59 | y = fabs(z)/2.0; 60 | 61 | if (y >= 3.0) return 0.0; 62 | 63 | if (y < 1.0) { 64 | w = y * y; 65 | x = 0.000124818987; 66 | x = x * w - 0.001075204047; 67 | x = x * w + 0.005198775019; 68 | x = x * w - 0.019198292004; 69 | x = x * w + 0.059054035642; 70 | x = x * w - 0.151968751364; 71 | x = x * w + 0.319152932694; 72 | x = x * w - 0.531923007300; 73 | x = x * w + 0.797884560593; 74 | x = x * 2.0 * y; 75 | } else { 76 | y -= 2.0; 77 | x = -0.000045255659; 78 | x = x * y + 0.000152529290; 79 | x = x * y - 0.000019538132; 80 | x = x * y - 0.000676904986; 81 | x = x * y + 0.001390604284; 82 | x = x * y - 0.000794620820; 83 | x = x * y - 0.002034254874; 84 | x = x * y + 0.006549791214; 85 | x = x * y - 0.010557625006; 86 | x = x * y + 0.011630447319; 87 | x = x * y - 0.009279453341; 88 | x = x * y + 0.005353579108; 89 | x = x * y - 0.002141268741; 90 | x = x * y + 0.000535310849; 91 | x = x * y + 0.999936657524; 92 | } 93 | 94 | 95 | if (z > 0.0) { 96 | result = (x/2.0)+0.5; 97 | } else { 98 | result = (0.5 - (x/2.0)); 99 | } 100 | 101 | return result; 102 | } 103 | 104 | #define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */ 105 | #define I_SQRT_PI 0.5641895835477562869480795 /* 1 / sqrt (pi) */ 106 | #define BIGX 20.0 /* max value to represent exp (x) */ 107 | #define ex(x) (((x) < -BIGX) ? 0.0 : exp(x)) 108 | 109 | double chisqp(double ax, size_t df) { 110 | double x; 111 | double a; 112 | double y; 113 | double s; 114 | double e; 115 | double c; 116 | double z; 117 | int dfeven; 118 | 119 | dfeven=0; 120 | if ((df % 2)==0) dfeven = 1; 121 | 122 | x = ax; 123 | 124 | if (x <= 0.0 || df < 1) return 1.0; 125 | 126 | a = x/2.0; 127 | 128 | if (df > 1) y = ex(-a); 129 | 130 | if (dfeven == 1) s = y; 131 | else s = 2.0 * zcdf(-sqrt(x)); 132 | 133 | if (df > 2) { 134 | x = (df - 1.0)/2.0; 135 | if (dfeven==1) z = 1.0; 136 | else z = 0.5; 137 | 138 | if (a > BIGX) { 139 | if (dfeven==1) e = 0.0; 140 | else e = LOG_SQRT_PI; 141 | 142 | c = log(a); 143 | 144 | while (z <= x) { 145 | e = log(z) + e; 146 | s += ex(c * z - a - e); 147 | z += 1.0; 148 | } 149 | return (s); 150 | } else { 151 | if (dfeven==1) e = 1.0; 152 | else e = (I_SQRT_PI / sqrt(a)); 153 | c = 0.0; 154 | while (z <= x) { 155 | e = e * (a / z); 156 | c = c + e; 157 | z += 1.0; 158 | } 159 | return (c * y + s); 160 | } 161 | } else { 162 | return s; 163 | } 164 | } 165 | 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # djent 2 | djent is a reimplementation of the Fourmilab/John Walker random number test program ent. 3 | 4 | The improvements are: 5 | 6 | * Multiple input file names can be provided at once. This works nicely with the CSV format output. 7 | * -h works as well as -u to get the help information. 8 | * The filename is present in CSV output 9 | * The symbol size can be any number of bits up to 32. ent was constrained to 1 or 8. 10 | * The SCC test can be either wrap-around or not wrap-around. 11 | * The SCC result can be given a lag value to get a LAG-N correlation coefficient. 12 | * A list of filenames to analyze can be read from a text file using -i filename. 13 | * Test condition details (Volts, temp, id etc.) can be parsed from the filename and included in output. 14 | * MCV Min Entropy is estimated in addition to Shannon Entropy. The symbol and entropy are both reported 15 | * The longest run and the symbol in the longest run are reported. For 1 bit-per-symbol analysis, a p-value is computed of the probability of a uniform random bit sequence having a longest run length equal to or less than the meaured run length. 16 | 17 | ``` 18 | djent -h 19 | Usage: djent [-brRpcCuhds] [-l ] [-i ] [filename] [filename2] ... 20 | 21 | Compute statistics of random data. 22 | Author: David Johnston, dj@deadhat.com 23 | 24 | -i --inputfilelist= Read list of filenames from 25 | -p --parse_filename Extract CID, Process, Voltage and Temperature from filename. 26 | The values will be included in the output. 27 | -l --symbol_length= Treat incoming data symbols as bitlength n. Default is 8. 28 | -b --binary Treat incoming data as binary. Default bit length will be -l 1 29 | -r --byte_reverse Reverse the bit order in incoming bytes 30 | -R --word_reverse Reverse the byte order in incoming 4 byte words 31 | -c --occurrence Print symbol occurrence counts 32 | -C --longest Print symbol longest run counts 33 | -w --scc_wrap Treat data as cyclical in SCC 34 | -n --lagn= Lag gap in SCC. Default=1 35 | -f --fold Fold uppercase letters to lower case 36 | -t --terse Terse output 37 | -e --ent_exact Exactly match output format of ent 38 | -s --suppress_header Suppress the header in terse output 39 | -h or -u --help Print this text 40 | 41 | Notes 42 | * By default djent is in hex mode where it reads ascii hex data and converts it to binary to analyze. 43 | In hex mode, the symbol length defaults to 8, so normal hex files can be treated as a representation 44 | of bytes. The symbol length can be changed to any value between 1 and 32 bits using the -l option. 45 | * With the -b option djent switches to binary reads in each byte as binary with a symbol length of 1. 46 | * To analyze ascii text instead of hex ascii, you need djent to treat each byte as a separate symbol, so 47 | use binary mode with a symbol length of 8. I.E. djent -b -l 8 48 | * By default djent treats the MSB of each byte as the first. This can be switched so that djent treats 49 | the LSB as the first bit in each byte using the -r option. 50 | * Terse output is requested using -t. This outputs in CSV format. The first line is the header. If 51 | multiple files are provided, there will be one line of CSV output per file in addition to the header. 52 | The CSV header can be suppressed with -s. 53 | * To analyze multiple files, just give multiple file names on the command line. To read data in from 54 | the command line, don't provide a filename and pipe the data in. | djent 55 | * The parse filename option =p picks takes four patterns from the filename to include in the output, 56 | This is so that it is easy to plot test conditions that are commonly encoded in a filename. 57 | Fields are delimited by uderscores. The four patters for CID, process, Voltage and Temperature are: 58 | _CID-_ , _PROC-_, _pV_ and _pC_ . 'p' is the decimal point. 59 | * To compute the statistics, djent builds a frequency table of the symbols. This can be displayed 60 | using the -c option. The size of this table is what limits the the maximum symbol size. For each 61 | of the 2^n symbols, a 64 bit entry in a table is created. So for n=32, that's 32GBytes so the ability 62 | to handle large symbol sizes is limited by the available memory and the per process allocation limit. 63 | * The serial correlation coefficient is not wrap around by default, meaning that it does not compare 64 | the last value in the data with the first. To get wrap around behaviour, use the -w option. 65 | * The Lag-N correlation coefficient can be computed by using the -n option. This causes the SCC 66 | computation to compare each Xth symbol with the (X+n)th symbol instead of the (X+1)th symbol. 67 | If you use wrap around with Lag-N, then the wrap around will reach n bits further into the start 68 | of the sequence. 69 | * The byte reverse option -r reverses the order of bits within each byte. The word reverse option -R 70 | reverses the order of bytes within each 32 bit word, from 3,2,1,0 to 0,1,2,3. Both -R and -r can 71 | be used together. Using -R with a data that isn't a multiple of 32 bits long will get padded with 72 | zeros, which may not be what you want. A padding warning will be sent to STDERR. 73 | * Instead of providing data file names on the command line, djent can be told to read a list of files 74 | from a text file. The file must have one filename per line. Lines beginning with # will be ignored. 75 | Use the -i option to request that djent reads the file list from . 76 | 77 | Examples 78 | Print this help 79 | djent -h 80 | 81 | Analyze hex file from stdin 82 | cat datafile.hex | djent 83 | 84 | Analyze binary file 85 | djent -b datafile.bin 86 | 87 | Analyze several files with CSV output 88 | djent -t data1.hex data2.hex data3.hex 89 | 90 | Analyze ascii symbols - Read in binary and set symbol size to 8. 91 | djent -b -l 8 textfile.txt 92 | 93 | Analyze binary file with parsable filename. 94 | djent -b -t -p rawdata_CID-X23_PROC-TTFT_1p2V_25p0C_.bin 95 | ``` 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /filename_parse.c: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | djent - A reimplementation of Fourmilab's ent with several improvements. 4 | 5 | Copyright (C) 2017 David Johnston 6 | 7 | This program is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation; either version 2 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License along 18 | with this program; if not, write to the Free Software Foundation, Inc., 19 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 | 21 | ----- 22 | 23 | Contact. David Johnston dj@deadhat.com 24 | */ 25 | 26 | /* Visual Studio C doesnt have a regex library. So this does the 27 | * pattern search instead so I can compile on windows, linux and macos. 28 | */ 29 | 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include "filename_parse.h" 42 | 43 | /* look for vpattern in str. Return the match to found. Return True if found */ 44 | int find_vpattern(char *str,char *found) { 45 | size_t len; 46 | int i; 47 | int start; 48 | int end; 49 | int pos; 50 | int state; 51 | int done; 52 | char c; 53 | len = strlen(str); 54 | start = 0; 55 | end = 0; 56 | done = 0; 57 | 58 | /* A little state machine to match the _pV_ pattern */ 59 | pos = 0; 60 | state = 1; 61 | done = 0; 62 | do { 63 | c = str[pos]; 64 | if (state == 1) { /* _ */ 65 | if (c=='_') { 66 | state++; 67 | start=pos; 68 | } 69 | pos++; 70 | } else if (state == 2) { /* first int */ 71 | if (isdigit((char)c)) { 72 | state++; 73 | } else { 74 | state = 1; 75 | } 76 | pos++; 77 | } else if (state == 3) { /* rest of int */ 78 | if (isdigit((char)c)) { 79 | ; /* stay here */ 80 | } else if ((c=='p') || (c=='.')) { /* decimal point */ 81 | state++; 82 | } else { 83 | state = 1; 84 | } 85 | pos++; 86 | } else if (state == 4) { /* first int */ 87 | if (isdigit((char)c)) { 88 | state++; 89 | } else { 90 | state = 1; 91 | } 92 | pos++; 93 | } else if (state == 5) { /* rest of int */ 94 | if (isdigit((char)c)) { 95 | ; /* stay here */ 96 | } else if (c=='V') { /* V */ 97 | state++; 98 | } else { 99 | state = 1; 100 | } 101 | pos++; 102 | } else if (state == 6) { /* _ */ 103 | if ((c=='_') || (c=='.')) { // Allow 1p0V.bin instead of 1p0V_.bin. 104 | done = 1; 105 | end = pos; 106 | } else { 107 | state = 1; 108 | } 109 | pos++; 110 | } 111 | 112 | } while ((pos < len) && (done == 0)); 113 | 114 | if (done == 0) return 0; 115 | 116 | for(i=start;i<=end;i++) { 117 | found[i-start]=str[i]; 118 | } 119 | found[i-start] = 0x00; 120 | return 1; 121 | 122 | } 123 | 124 | /* look for tpattern in str. Return the match to found. Return True if found */ 125 | int find_tpattern(char *str,char *found) { 126 | size_t len; 127 | int i; 128 | int start; 129 | int end; 130 | int pos; 131 | int state; 132 | int done; 133 | char c; 134 | len = strlen(str); 135 | start = 0; 136 | end = 0; 137 | done = 0; 138 | 139 | /* A little state machine to match the _pC_ pattern */ 140 | pos = 0; 141 | state = 1; 142 | done = 0; 143 | do { 144 | c = str[pos]; 145 | if (state == 1) { /* _ */ 146 | if (c=='_') { 147 | state++; 148 | start=pos; 149 | } 150 | pos++; 151 | } else if (state == 2) { /* first int */ 152 | if (isdigit((char)c) || ((char)c == '-')) { 153 | state++; 154 | } else { 155 | state = 1; 156 | } 157 | pos++; 158 | } else if (state == 3) { /* rest of int */ 159 | if (isdigit((char)c)) { 160 | ; /* Stay here */ 161 | } else if ((c=='p') || (c=='.')) { /* decimal point */ 162 | state++; 163 | } else { 164 | state = 1; 165 | } 166 | pos++; 167 | } else if (state == 4) { /* first int */ 168 | if (isdigit((char)c)) { 169 | state++; 170 | } else { 171 | state = 1; 172 | } 173 | pos++; 174 | } else if (state == 5) { /* rest of int */ 175 | if (isdigit((char)c)) { 176 | ; /* Stay here */ 177 | } else if (c=='C') { /* C */ 178 | state++; 179 | } else { 180 | state = 1; 181 | } 182 | pos++; 183 | } else if (state == 6) { /* _ */ 184 | if ((c=='_') || (c=='.')) { // Allow 10p0C.bin instead of 10p0C_.bin. 185 | done = 1; 186 | end = pos; 187 | } else { 188 | state = 1; 189 | } 190 | pos++; 191 | } 192 | 193 | } while ((pos < len) && (done == 0)); 194 | 195 | if (done == 0) return 0; 196 | 197 | for(i=start;i<=end;i++) { 198 | found[i-start]=str[i]; 199 | } 200 | found[i-start] = 0x00; 201 | return 1; 202 | 203 | } 204 | 205 | 206 | /* look for cidpattern in str. Return the match to found. Return True if found */ 207 | int find_cidpattern(char *str,char *found) { 208 | size_t len; 209 | int i; 210 | int start; 211 | int end; 212 | int pos; 213 | int state; 214 | int done; 215 | char c; 216 | len = strlen(str); 217 | start = 0; 218 | end = 0; 219 | done = 0; 220 | 221 | /* A little state machine to match the _pC_ pattern */ 222 | pos = 0; 223 | state = 1; 224 | done = 0; 225 | do { 226 | c = str[pos]; 227 | if (state == 1) { /* _ */ 228 | if (c=='_') { 229 | state++; 230 | start=pos; 231 | } 232 | pos++; 233 | } else if (state == 2) { 234 | if (c=='C') state++; 235 | else state = 1; 236 | pos++; 237 | } else if (state == 3) { 238 | if (c=='I') state++; 239 | else state = 1; 240 | pos++; 241 | } else if (state == 4) { 242 | if (c=='D') state++; 243 | else state = 1; 244 | pos++; 245 | } else if (state == 5) { 246 | if (c=='-') state++; 247 | else state = 1; 248 | pos++; 249 | } else if (state == 6) { /* first char of ID */ 250 | if (c != '_') { 251 | state++; 252 | } else { 253 | state = 1; 254 | } 255 | pos++; 256 | } else if (state == 7) { /* rest of ID */ 257 | if (c != '_') { 258 | ; /* Stay here */ 259 | } else { /* _ */ 260 | done = 1; 261 | end = pos; 262 | } 263 | pos++; 264 | } 265 | 266 | } while ((pos < len) && (done == 0)); 267 | 268 | if (done == 0) return 0; 269 | 270 | for(i=start;i<=end;i++) { 271 | found[i-start]=str[i]; 272 | } 273 | found[i-start] = 0x00; 274 | return 1; 275 | 276 | } 277 | 278 | /* look for procpattern in str. Return the match to found. Return True if found */ 279 | int find_procpattern(char *str,char *found) { 280 | size_t len; 281 | int i; 282 | int start; 283 | int end; 284 | int pos; 285 | int state; 286 | int done; 287 | char c; 288 | len = strlen(str); 289 | start = 0; 290 | end = 0; 291 | done = 0; 292 | 293 | /* A little state machine to match the _PROC-_ pattern */ 294 | pos = 0; 295 | state = 1; 296 | done = 0; 297 | do { 298 | c = str[pos]; 299 | if (state == 1) { /* _ */ 300 | if (c=='_') { 301 | state++; 302 | start=pos; 303 | } 304 | pos++; 305 | } else if (state == 2) { 306 | if (c=='P') state++; 307 | else state = 1; 308 | pos++; 309 | } else if (state == 3) { 310 | if (c=='R') state++; 311 | else state = 1; 312 | pos++; 313 | } else if (state == 4) { 314 | if (c=='O') state++; 315 | else state = 1; 316 | pos++; 317 | } else if (state == 5) { 318 | if (c=='C') state++; 319 | else state = 1; 320 | pos++; 321 | } else if (state == 6) { 322 | if (c=='-') state++; 323 | else state = 1; 324 | pos++; 325 | } else if (state == 7) { /* first char of ID */ 326 | if (c != '_') { 327 | state++; 328 | } else { 329 | state = 1; 330 | } 331 | pos++; 332 | } else if (state == 8) { /* rest of ID */ 333 | if (c != '_') { 334 | ; /* Stay here */ 335 | } else { /* _ */ 336 | done = 1; 337 | end = pos; 338 | } 339 | pos++; 340 | } 341 | 342 | } while ((pos < len) && (done == 0)); 343 | 344 | if (done == 0) return 0; 345 | 346 | for(i=start;i<=end;i++) { 347 | found[i-start]=str[i]; 348 | } 349 | found[i-start] = 0x00; 350 | return 1; 351 | 352 | } 353 | 354 | void parse_the_filename(char *filename) { 355 | 356 | char match[256]; 357 | int i; 358 | 359 | if (find_vpattern(filename,match)) { 360 | for (i=0;ipV_:\n"); 366 | voltage = 0.0; 367 | } 368 | 369 | 370 | if (find_tpattern(filename,match)) { 371 | for (i=0;ipC_:\n"); 377 | temperature = 0.0; 378 | } 379 | 380 | if (find_cidpattern(filename,match)) { 381 | match[strlen(match)-1]=0x00; 382 | sscanf(match,"_CID-%s",(char *)&deviceid); 383 | } else { 384 | fprintf(stderr,"Regex error scanning for _CID-__:\n"); 385 | } 386 | 387 | if (find_procpattern(filename,match)) { 388 | match[strlen(match)-1]=0x00; 389 | sscanf(match,"_PROC-%s",(char *)&process); 390 | } else { 391 | fprintf(stderr,"Regex error scanning for _PROC-__:\n"); 392 | } 393 | } 394 | 395 | 396 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /markov2p.c: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | djrandom - A utility to generate random numbers. 4 | 5 | Copyright (C) 2017 David Johnston 6 | 7 | This program is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation; either version 2 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License along 18 | with this program; if not, write to the Free Software Foundation, Inc., 19 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 | 21 | ----- 22 | 23 | Contact. David Johnston dj@deadhat.com 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | //#include "djenrandommodel.h" 38 | #include "markov2p.h" 39 | 40 | #define KNRM "\x1B[0m" 41 | #define KRED "\x1B[31m" 42 | #define KGRN "\x1B[32m" 43 | #define KYEL "\x1B[33m" 44 | #define KBLU "\x1B[34m" 45 | #define KMAG "\x1B[35m" 46 | #define KCYN "\x1B[36m" 47 | #define KWHT "\x1B[37m" 48 | 49 | 50 | // A library for coverting between points, scc, bias and entropy 51 | // with the 2 parameter markov model. 52 | 53 | extern int verbose_mode; 54 | 55 | char msymboltext[255]; 56 | 57 | void print_symbol(uint64_t x, int bitwidth) { 58 | int i; 59 | 60 | for(i=0;i> (bitwidth-1-i)) & 0x01)==0) msymboltext[i]='0'; 62 | else msymboltext[i]='1'; 63 | } 64 | msymboltext[bitwidth]=(char)0; 65 | } 66 | 67 | // Make two probability density functions for all the 2^bitwidth symbols 68 | // One for when the previous bit is 0, one for when it is 1. 69 | void make_pdf(double p01, double p10, int bitwidth, double *table0, double *table1) { 70 | double p00; 71 | double p11; 72 | double plist0; 73 | double plist1; 74 | int bp; 75 | int x; 76 | int i; 77 | double sum0 = 0.0; 78 | double sum1 = 0.0; 79 | 80 | p00 = 1.0-p01; 81 | p11 = 1.0-p10; 82 | 83 | // For each symbol 84 | for (x=0;x<(1 << bitwidth);x++) { 85 | //fprintf(stderr," MAKE_PDF symbol %02x \n",x); 86 | if ((p01==0.5) && (p10==0.5)){ 87 | table0[x] = 1.0/(1<> i) & 0x3); // Get the bit pair 125 | if (bp==0) { 126 | plist0 *= p00; 127 | plist1 *= p00; 128 | //if (verbose_mode==1) { 129 | // if ((x==0xaa) || (x==0xa9)) { 130 | // fprintf(stderr,"plist0_%d = %1.4f plist1_%d = %1.4f\n",0,p00,0,p00); 131 | // } 132 | //} 133 | } else if (bp==1) { 134 | plist0 *= p10; 135 | plist1 *= p10; 136 | //if (verbose_mode==1) { 137 | // if ((x==0xaa) || (x==0xa9)) { 138 | // fprintf(stderr,"plist0_%d = %1.4f plist1_%d = %1.4f\n",0,p10,0,p10); 139 | // } 140 | //} 141 | } else if (bp==2) { 142 | plist0 *= p01; 143 | plist1 *= p01; 144 | //if (verbose_mode==1) { 145 | // if ((x==0xaa) || (x==0xa9)) { 146 | // fprintf(stderr,"plist0_%d = %1.4f plist1_%d = %1.4f\n",0,p01,0,p01); 147 | // } 148 | //} 149 | } else if (bp==3) { 150 | plist0 *= p11; 151 | plist1 *= p11; 152 | //if (verbose_mode==1) { 153 | // if ((x==0xaa) || (x==0xa9)) { 154 | // fprintf(stderr,"plist0_%d = %1.4f plist1_%d = %1.4f\n",0,p11,0,p11); 155 | // } 156 | //} 157 | } 158 | } 159 | //if (verbose_mode==1) { 160 | // if ((x==0xaa) || (x==0xa9)) { 161 | // fprintf(stderr," FINAL plist0 %1.6f plist1 %1.6f\n",plist0,plist1); 162 | // } 163 | //} 164 | table0[x] = plist0; 165 | table1[x] = plist1; 166 | //if (verbose_mode==1) { 167 | // if ((x==0xaa) || (x==0xa9)) { 168 | // fprintf(stderr," SET table0[%02x]= %1.6f table1[%02x] %1.6f\n",x,plist0,x,plist1); 169 | // } 170 | //} 171 | sum0 += plist0; 172 | sum1 += plist1; 173 | } // end if else 174 | 175 | } // end for 176 | 177 | for (i=0;i<256;i++) { 178 | table0[i] = table0[i]/sum0; 179 | table1[i] = table1[i]/sum1; 180 | } 181 | 182 | //if (verbose_mode==1) { 183 | // fprintf(stderr, "END MAKE_PDF() pdf_table0[a9]=%1.6f\n",table0[0xa9]); 184 | // fprintf(stderr, "END MAKE_PDF() pdf_table0[aa]=%1.6f\n",table0[0xaa]); 185 | // fprintf(stderr, "END MAKE_PDF() pdf_table1[a9]=%1.6f\n",table1[0xa9]); 186 | // fprintf(stderr, "END MAKE_PDF() pdf_table1[aa]=%1.6f\n",table1[0xaa]); 187 | //} 188 | } 189 | 190 | // Make two cumulative density functions for all the 2^bitwidth symbols 191 | // One for when the previous bit is 0, one for when it is 1. 192 | void make_cdf(double p01, double p10, int bitwidth, double *table0, double *table1) { 193 | double p00; 194 | double p11; 195 | double plist0; 196 | double plist1; 197 | int bp; 198 | int x; 199 | int i; 200 | 201 | p00 = 1.0-p01; 202 | p11 = 1.0-p10; 203 | 204 | // For each symbol 205 | for (x=0;x<(1 << bitwidth);x++) { 206 | 207 | if ((p01==0.5) && (p10==0.5)){ 208 | if (x==0) { 209 | table0[x] = 1.0/(1<>i) & 0x3; // Get the bit pair 229 | if (bp==0) { 230 | plist0 *= p00; 231 | plist1 *= p00; 232 | } else if (bp==1) { 233 | plist0 *= p10; 234 | plist1 *= p10; 235 | } else if (bp==2) { 236 | plist0 *= p01; 237 | plist1 *= p01; 238 | } else if (bp==3) { 239 | plist0 *= p11; 240 | plist1 *= p11; 241 | } 242 | } 243 | 244 | if (x==0) { 245 | table0[x] = plist0; 246 | table1[x] = plist1; 247 | } else { 248 | table0[x] = table0[x-1]+plist0; 249 | table1[x] = table1[x-1]+plist1; 250 | } 251 | } 252 | 253 | } 254 | 255 | double max0=table0[((1 << bitwidth)-1)]; 256 | double max1=table1[((1 << bitwidth)-1)]; 257 | for (i=0;i<(1<1) && ((i+1) % 8 ==0)) fprintf(stderr,"\n"); 315 | // } 316 | // 317 | // fprintf(stderr,"\n"); 318 | // fprintf(stderr,"table0 sum == %f\n",sum); 319 | //} 320 | 321 | //if (verbose_mode==1) { 322 | // sum = 0.0; 323 | // fprintf(stderr,"PDF Table 1 ==\n"); 324 | // for (i=0;i<(1<1) && ((i+1) % 8 ==0)) fprintf(stderr,"\n"); 328 | // } 329 | // fprintf(stderr,"\n"); 330 | // fprintf(stderr,"table1 sum == %f\n",sum); 331 | //} 332 | 333 | 334 | st0 = (int *)malloc(sizeof(int)*(1 << 20)); 335 | st1 = (int *)malloc(sizeof(int)*(1 << 20)); 336 | 337 | if ((st0==0) || (st1==0)) { 338 | fprintf(stderr,"Error, could not allocate 1M int sample tables for Markov generator\n"); 339 | exit(1); 340 | } 341 | 342 | // populate the 1M table with symbols according to the CDF. 343 | // Do this by identifying the boundaries between the runs 344 | // of the same symbols and filling in the symbols up to the 345 | // boundary. 346 | double floatpos; 347 | int baseindex=0; 348 | for (x=0;x<256;x++) { 349 | floatpos = table0[x]; 350 | index = (int)(floatpos*(1<<20)); 351 | for (i=baseindex;i ((1 << 20)-1)) index=index; //fprintf(stderr,"ERROR, st0 table index too large"); 369 | // else st0[index]=x; 370 | // index++; 371 | // } 372 | //} 373 | // 374 | //index = 0; 375 | //for (x=0;x<(1 << bitwidth); x++) { 376 | // for (i=0; i<(int)(table1[x]*(1 << 20)); i++) { 377 | // if (index > ((1 << 20)-1)) index=index; //fprintf(stderr,"ERROR, st1 table index too large"); 378 | // else st1[index]=x; 379 | // index++; 380 | // } 381 | //} 382 | 383 | 384 | free(table0); 385 | free(table1); 386 | 387 | *sampletable0 = st0; 388 | *sampletable1 = st1; 389 | } 390 | 391 | void free_sample_table(int *sampletable0, int *sampletable1) { 392 | free(sampletable0); 393 | free(sampletable1); 394 | } 395 | 396 | 397 | 398 | // Compute the min entropy per symbol for the 399 | // markov 2 parameter model, given the markov model 400 | // parameters p01 and p10. 401 | double symbol_prob(double p01, double p10, uint64_t x, int bitwidth) { 402 | double p00; 403 | double p11; 404 | double mu; 405 | double p0; 406 | double p1; 407 | double plist0; 408 | double plist1; 409 | int bp; 410 | double p; 411 | 412 | int i; 413 | 414 | plist0=1.0; 415 | plist1=1.0; 416 | 417 | p00 = 1.0-p01; 418 | p11 = 1.0-p10; 419 | mu = p01/(p10+p01); 420 | p0 = 1.0-mu; 421 | p1 = mu; 422 | 423 | print_symbol(x,bitwidth); 424 | //fprintf(stderr," SYMBOL PROB p01=%f, p10=%f, x=%" PRIx64 " = b%s bitwidth=%d\n",p01,p10,x,symboltext,bitwidth); 425 | //fprintf(stderr," P01 = %f\n", p01); 426 | //fprintf(stderr," P10 = %f\n", p10); 427 | //fprintf(stderr," P00 = %f\n", p00); 428 | //fprintf(stderr," P11 = %f\n", p11); 429 | //fprintf(stderr," mu = %f\n", mu); 430 | //fprintf(stderr," P0 = %f\n", p0); 431 | //fprintf(stderr," P1 = %f\n", p1); 432 | 433 | if ((p01==0.5) && (p10==0.5)) return 1.0; 434 | 435 | plist0 = 1.0; 436 | plist1 = 1.0; 437 | 438 | if ((x>>(bitwidth-1) & 0x1)==0) { 439 | plist0 *= p00; 440 | plist1 *= p10; 441 | } 442 | else { 443 | plist0 *= p01; 444 | plist1 *= p11; 445 | } 446 | 447 | //fprintf(stderr," plist0=%f ",plist0); 448 | //fprintf(stderr," plist1=%f\n",plist1); 449 | 450 | for (i=0;i<(bitwidth-2);i++) { 451 | bp = ((x >> (bitwidth-2-i)) & 0x3); // Get the bit pair 452 | //fprintf(stderr," bitpair %d = %d ",i,bp); 453 | if (bp==0) { 454 | plist0 *= p00; 455 | plist1 *= p00; 456 | //fprintf(stderr," plist0=%f * p00(%f) ",plist0,p00); 457 | //fprintf(stderr," plist1=%f * p00(%f)\n",plist1,p00); 458 | } else if (bp==1) { 459 | plist0 *= p01; 460 | plist1 *= p01; 461 | //fprintf(stderr," plist0=%f * p01(%f) ",plist0,p01); 462 | //fprintf(stderr," plist1=%f * p01(%f)\n",plist1,p01); 463 | } else if (bp==2) { 464 | plist0 *= p10; 465 | plist1 *= p10; 466 | //fprintf(stderr," plist0=%f * p10(%f) ",plist0,p10); 467 | //fprintf(stderr," plist1=%f * p10(%f)\n",plist1,p10); 468 | } else if (bp==3) { 469 | plist0 *= p11; 470 | plist1 *= p11; 471 | //fprintf(stderr," plist0=%f * p11(%f) ",plist0,p11); 472 | //fprintf(stderr," plist1=%f * p11(%f)\n",plist1,p11); 473 | } 474 | 475 | 476 | } 477 | 478 | p = (p0 * plist0) + (p1 * plist1); 479 | 480 | //fflush(stdout); 481 | return p; 482 | 483 | } 484 | 485 | double max(double x, double y) { 486 | if (x>y) return x; 487 | if (y>x) return y; 488 | return x; 489 | } 490 | 491 | uint64_t mk_symbol(int prefix, int tbp, int postfix, int bitwidth) { 492 | int rep; 493 | int i; 494 | 495 | uint64_t pattern; 496 | 497 | rep = (bitwidth-2)/2; 498 | pattern = prefix; 499 | 500 | for(i=0;i= p000) && (p111 >= p101) && (p111 >= p010)) { 550 | return P111_MAX; 551 | } 552 | else if ((p000 >= p111) && (p000 >= p101) && (p000 >= p010)) { 553 | return P000_MAX; 554 | } 555 | else if ((p101 >= p111) && (p101 >= p000) && (p101 >= p010)) { 556 | return P101_MAX; 557 | } 558 | else if ((p010 >= p111) && (p010 >= p000) && (p010 >= p101)) { 559 | return P010_MAX; 560 | } 561 | 562 | return EQUIPROBABLE; 563 | 564 | } 565 | 566 | uint64_t most_probable_symbol_odd(double p01, double p10,int bitwidth) { 567 | uint64_t mps; 568 | int i; 569 | 570 | if (most_probable_transition_pair(p01, p10) == P000_MAX) { 571 | mps = 0; 572 | } else if (most_probable_transition_pair(p01, p10) == P111_MAX) { 573 | for (i=0; i<((bitwidth-1)>>1); i++) { 574 | mps = mps << 2; 575 | mps = mps + 3; 576 | } 577 | mps = mps << 1; 578 | mps = mps + 1; 579 | } else if (most_probable_transition_pair(p01, p10) == P010_MAX) { 580 | for (i=0; i<((bitwidth-1)>>1); i++) { 581 | mps = mps << 2; 582 | mps = mps + 1; 583 | } 584 | mps = mps << 1; 585 | mps = mps + 0; 586 | } else if (most_probable_transition_pair(p01, p10) == P101_MAX) { 587 | for (i=0; i<((bitwidth-1)>>1); i++) { 588 | mps = mps << 2; 589 | mps = mps + 2; 590 | } 591 | mps = mps << 1; 592 | mps = mps + 1; 593 | } else { // Equiprobable case, any value will do. 594 | mps = 0; 595 | } 596 | return mps; 597 | } 598 | 599 | uint64_t most_probable_symbol_even(double p01, double p10,int bitwidth) { 600 | uint64_t mps; 601 | int i; 602 | double p00; 603 | double p11; 604 | //double p0; 605 | //double p1; 606 | 607 | //double mu; 608 | 609 | //mu = p01/(p10+p01); 610 | //p0 = 1.0-mu; 611 | //p1 = mu; 612 | 613 | p00 = 1.0 - p01; 614 | p11 = 1.0 - p10; 615 | 616 | mps = 0; 617 | 618 | if (most_probable_transition_pair(p01, p10) == P000_MAX) { 619 | mps = 0; 620 | } else if (most_probable_transition_pair(p01, p10) == P111_MAX) { 621 | for (i=0; i<(bitwidth >> 1); i++) { 622 | mps = mps << 2; 623 | mps = mps + 3; 624 | } 625 | } else if (most_probable_transition_pair(p01, p10) == P010_MAX) { 626 | for (i=0; i<((bitwidth-2) >> 1); i++) { 627 | mps = mps << 2; 628 | mps = mps + 1; 629 | } 630 | mps = mps << 2; 631 | if (p01 > p00) { 632 | mps = mps + 1; 633 | } else { 634 | mps = mps + 0; 635 | } 636 | 637 | } else if (most_probable_transition_pair(p01, p10) == P101_MAX) { 638 | for (i=0; i<((bitwidth-2) >> 1); i++) { 639 | mps = mps << 2; 640 | mps = mps + 2; 641 | } 642 | mps = mps << 2; 643 | if (p11 > p10) { 644 | mps = mps + 3; 645 | } else { 646 | mps = mps + 2; 647 | } 648 | } else { // Equiprobable case, any value will do. 649 | mps = 0; 650 | } 651 | return mps; 652 | } 653 | 654 | uint64_t most_probable_symbol(double p01, double p10,int bitwidth) { 655 | uint64_t mps; 656 | 657 | if ((bitwidth & 0x01)==0x01) 658 | mps = most_probable_symbol_odd(p01,p10,bitwidth); 659 | else 660 | mps = most_probable_symbol_even(p01,p10,bitwidth); 661 | 662 | 663 | if (verbose_mode>1) fprintf(stderr," MCV = 0x%" PRIx64 " \n",mps); 664 | return mps; 665 | 666 | } 667 | 668 | double symbol_max_probability(double p01, double p10,int bitwidth,uint64_t *mcv) { 669 | double mu; 670 | double p00; 671 | double p11; 672 | double p0; 673 | double p1; 674 | uint64_t mps; 675 | 676 | double p_0mps; 677 | double p_1mps; 678 | double p_mps; 679 | 680 | int bits[65]; 681 | int i; 682 | int j; 683 | 684 | for (i=0;i<65;i++) bits[i] = 0; 685 | 686 | mu = p01/(p10+p01); 687 | p0 = 1.0-mu; 688 | p1 = mu; 689 | 690 | p00 = 1.0 - p01; 691 | p11 = 1.0 - p10; 692 | 693 | mps = most_probable_symbol(p01,p10,bitwidth); 694 | *mcv = mps; 695 | 696 | // unpack the symbol bits into an array of bits 697 | bits[0] = 0; // first with x[-1]=0 698 | for (i=0; i> (bitwidth-1-i)) & 0x01; 700 | } 701 | 702 | if (verbose_mode>1) { 703 | fprintf(stderr," unrolled bits 0 prefix = "); 704 | for(j=0;j<(bitwidth+1);j++) { 705 | fprintf(stderr,"%d",bits[j]); 706 | } 707 | fprintf(stderr,"\n"); 708 | } 709 | 710 | // Compute the symbol probability by going through the 711 | // bits and multiplying the transition probabilities. 712 | p_0mps = 1.0; 713 | if (verbose_mode>1) fprintf(stderr," Prob = 1.0"); 714 | for (i=0;i1) fprintf(stderr, " * P00"); 718 | } 719 | else if ((bits[i]==0) && (bits[i+1]==1)) { 720 | p_0mps = p_0mps * p01; 721 | if (verbose_mode>1) fprintf(stderr, " * P01"); 722 | } 723 | else if ((bits[i]==1) && (bits[i+1]==0)) { 724 | p_0mps = p_0mps * p10; 725 | if (verbose_mode>1) fprintf(stderr, " * P10"); 726 | } 727 | else if ((bits[i]==1) && (bits[i+1]==1)) { 728 | p_0mps = p_0mps * p11; 729 | if (verbose_mode>1) fprintf(stderr, " * P11"); 730 | } 731 | } 732 | if (verbose_mode>1) fprintf(stderr,"\n"); 733 | 734 | 735 | bits[0] = 1; // then with x[-1]=1 736 | 737 | if (verbose_mode>1) { 738 | fprintf(stderr," unrolled bits 1 prefix = "); 739 | for(j=0;j<(bitwidth+1);j++) { 740 | fprintf(stderr,"%d",bits[j]); 741 | } 742 | fprintf(stderr,"\n"); 743 | } 744 | 745 | p_1mps = 1.0; 746 | if (verbose_mode>1) fprintf(stderr," Prob = 1.0"); 747 | for (i=0;i1) fprintf(stderr, " * P00"); 751 | } 752 | else if ((bits[i]==0) && (bits[i+1]==1)) { 753 | p_1mps = p_1mps * p01; 754 | if (verbose_mode>1) fprintf(stderr, " * P01"); 755 | } 756 | else if ((bits[i]==1) && (bits[i+1]==0)) { 757 | p_1mps = p_1mps * p10; 758 | if (verbose_mode>1) fprintf(stderr, " * P10"); 759 | } 760 | else if ((bits[i]==1) && (bits[i+1]==1)) { 761 | p_1mps = p_1mps * p11; 762 | if (verbose_mode>1) fprintf(stderr, " * P11"); 763 | } 764 | } 765 | if (verbose_mode>1) fprintf(stderr,"\n"); 766 | 767 | if (verbose_mode>1) { 768 | fprintf(stderr," %sMCV BITS = ",KRED); 769 | for (i=0; i x-epsilon) && (y 1) { 820 | // fprintf(stderr," rand1 %04x\n", rand1); 821 | // fprintf(stderr," rand2 %04x\n", rand2); 822 | // fprintf(stderr," chosen_param %04x\n", chosen_param); 823 | // fprintf(stderr," chosen_side %04x\n", chosen_side); 824 | // } 825 | // 826 | // if (chosen_param==0) { 827 | // *p01 = (double)chosen_side; 828 | // *p10 = get_rand_double(rngstate); 829 | // } 830 | // else { 831 | // *p10 = (double)chosen_side; 832 | // *p01 = get_rand_double(rngstate); 833 | // } 834 | // edge_entropy=p_to_entropy(*p01, *p10, bitwidth, &mcv_prob, &mcv); 835 | // 836 | // } while (edge_entropy > desired); 837 | // 838 | // startpoint01 = 0.5; 839 | // startpoint10 = 0.5; 840 | // endpoint01 = *p01; 841 | // endpoint10 = *p10; 842 | // 843 | // choice01 = (startpoint01 + endpoint01)/2.0; 844 | // choice10 = (startpoint10 + endpoint10)/2.0; 845 | // Hc = p_to_entropy(choice01, choice10, bitwidth, &mcv_prob, &mcv); 846 | // 847 | // if (verbose_mode > 1) { 848 | // fprintf(stderr,"PICKING for entropy %f\n", desired); 849 | // fprintf(stderr," bitwidth %d\n", bitwidth); 850 | // fprintf(stderr," first startpoint01 %f\n", startpoint01); 851 | // fprintf(stderr," first startpoint10 %f\n", startpoint10); 852 | // fprintf(stderr," first endpoint01 %f\n", endpoint01); 853 | // fprintf(stderr," first endpoint10 %f\n", endpoint10); 854 | // fprintf(stderr," first mid P01 = %f\n", choice01); 855 | // fprintf(stderr," first mid P10 = %f\n", choice10); 856 | // fprintf(stderr," start Hc %f\n", Hc); 857 | // } 858 | // 859 | // fflush(stdout); 860 | // 861 | // while (!near(Hc, desired, epsilon)) { 862 | // if (verbose_mode>1) fprintf(stderr,"WHILE ...\n"); 863 | // if (Hc > desired) { 864 | // startpoint01 = choice01; 865 | // startpoint10 = choice10; 866 | // } 867 | // else { 868 | // endpoint01 = choice01; 869 | // endpoint10 = choice10; 870 | // } 871 | // choice01 = (startpoint01 + endpoint01)/2.0; 872 | // choice10 = (startpoint10 + endpoint10)/2.0; 873 | // 874 | // if (verbose_mode > 1) { 875 | // fprintf(stderr," bitwidth %d\n", bitwidth); 876 | // fprintf(stderr," startpoint01 %f\n", startpoint01); 877 | // fprintf(stderr," startpoint10 %f\n", startpoint10); 878 | // fprintf(stderr," endpoint01 %f\n", endpoint01); 879 | // fprintf(stderr," endpoint10 %f\n", endpoint10); 880 | // fprintf(stderr," mid P01 = %f\n", choice01); 881 | // fprintf(stderr," mid P10 = %f\n", choice10); 882 | // } 883 | // Hc = p_to_entropy(choice01,choice10,bitwidth,&mcv_prob, &mcv); 884 | // if (verbose_mode > 1) { 885 | // fprintf(stderr," Hc = %f\n", Hc); 886 | // fprintf(stderr," %sMCV Probability = %f%s\n",KCYN,mcv_prob,KWHT); 887 | // fflush(stdout); 888 | // } 889 | // } 890 | // 891 | // if (verbose_mode >1) { 892 | // fprintf(stderr," ** Chose P01 = %f\n", choice01); 893 | // fprintf(stderr," ** Chose P10 = %f\n", choice10); 894 | // } 895 | // *p01 = choice01; 896 | // *p10 = choice10; 897 | // 898 | //} 899 | 900 | 901 | -------------------------------------------------------------------------------- /djent.c: -------------------------------------------------------------------------------- 1 | /* 2 | djent - A reimplementation of Fourmilab's ent with several improvements. 3 | 4 | Copyright (C) 2017 David Johnston 5 | 6 | This program is free software; you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation; either version 2 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; if not, write to the Free Software Foundation, Inc., 18 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 19 | 20 | ----- 21 | 22 | Contact. David Johnston dj@deadhat.com 23 | */ 24 | 25 | /* 0 for no messages. */ 26 | #define DEBUG 10 27 | 28 | #define __STDC_FORMAT_MACROS 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "markov2p.h" 39 | #include "filename_parse.h" 40 | #include "mathy_things.h" 41 | #include "longest_run_cdf.h" 42 | 43 | /* #include */ 44 | 45 | 46 | #ifdef _WIN32 47 | /* #include "vsdjent/stdafx.h" */ 48 | #include "ya_getopt/ya_getopt.h" /* NOTE: VS2015 goes not have getopt. Put ya_getopt in the directory. From here https://github.com/kubo/ya_getopt */ 49 | #else 50 | #include 51 | #include 52 | #define errno_t int 53 | #endif 54 | 55 | #define MAX_ERROR_MSG 0x1000 56 | #define QUEUESIZE 4096 57 | #define BUFFSIZE 2048 58 | 59 | #ifndef M_PI 60 | #define M_PI 3.1415926535897932384626 61 | #endif 62 | 63 | unsigned char buffer[BUFFSIZE]; 64 | unsigned char buffer2[BUFFSIZE+4]; 65 | unsigned char queue[QUEUESIZE]; 66 | unsigned int queue_start; /* FIFO pointers */ 67 | unsigned int queue_end; 68 | size_t queue_size; 69 | 70 | int verbose_mode =0; 71 | 72 | unsigned int buffer2_size; 73 | 74 | unsigned int current_byte; 75 | unsigned int bits_used_from_byte; 76 | unsigned int got_byte; 77 | int64_t current_symbol; 78 | unsigned int bits_in_current_symbol; 79 | int outcount; 80 | uint64_t scc_fifo[256]; 81 | uint64_t scc_first_lagn[256]; 82 | 83 | uint64_t symbol_count; 84 | uint64_t mean_total; 85 | 86 | int terse; 87 | int ent_exact; 88 | int suppress_header; 89 | uint64_t filebytes; 90 | 91 | double voltage; 92 | double temperature; 93 | unsigned char deviceid[256]; 94 | unsigned char process[256]; 95 | unsigned char processing[256]; 96 | 97 | int opt; 98 | unsigned int symbol_length; 99 | int hexmode; 100 | int print_occurrence; 101 | int print_longest; 102 | int fold; 103 | int lagn; 104 | int byte_reverse; 105 | int word_reverse; 106 | int parse_filename; 107 | 108 | int use_stdin; 109 | char *filename; 110 | FILE *fp; 111 | int terse_index; 112 | int not_eof; 113 | int64_t symbol; 114 | double markov_entropy; 115 | 116 | char inputlistfilename[256]; 117 | int using_inputlistfile; 118 | 119 | double ent; 120 | 121 | uint64_t occurrence_size; 122 | uint64_t *occurrence_count; 123 | uint64_t occurrence_total; 124 | int no_occurrence_space; 125 | 126 | uint64_t longest_size; 127 | uint64_t longest_position; 128 | uint64_t longest_new_pos; 129 | uint64_t longest_byte_pos; 130 | uint64_t *longest_count = (uint64_t*)0; 131 | uint64_t longest_total; 132 | int no_longest_space; 133 | 134 | uint64_t longest_last_symbol; 135 | uint64_t longest_run; 136 | uint64_t longest_longest; 137 | uint64_t longest_longest_symbol; 138 | 139 | double chisq; 140 | double chisq_sum; 141 | double *chisq_prob; 142 | 143 | uint64_t mp; 144 | uint64_t monty_total_count; 145 | uint64_t monty_inside_count; 146 | 147 | double radiussquared; 148 | double position_x; 149 | double position_y; 150 | double montepi; 151 | uint64_t monte[6]; 152 | 153 | 154 | uint64_t count1; 155 | uint64_t count0; 156 | 157 | uint64_t count00; 158 | uint64_t count01; 159 | uint64_t count10; 160 | uint64_t count11; 161 | 162 | uint64_t symbol_mask; 163 | 164 | uint64_t t1; 165 | uint64_t t2; 166 | uint64_t t3; 167 | 168 | int scc_first; 169 | uint64_t first_symbol; 170 | uint64_t scc_previous; 171 | uint64_t scc_count; 172 | int scc_wrap; 173 | 174 | uint64_t aeqb_count; 175 | uint64_t mean_count; 176 | double other_scc; 177 | 178 | double result_mean; 179 | uint64_t result_chisq_count; 180 | double result_chisq_distribution; 181 | double result_chisq_percent; 182 | double result_entropy; 183 | double result_min_entropy; 184 | uint32_t result_min_entropy_symbol; 185 | double result_pi; 186 | double result_pierr; 187 | double result_compression; 188 | double result_scc; 189 | double result_p01; 190 | double result_p10; 191 | double result_longest_pvalue; 192 | 193 | const unsigned char byte_reverse_table[] = { 194 | 0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0, 195 | 0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8, 196 | 0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4, 197 | 0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC, 198 | 0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2, 199 | 0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA, 200 | 0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6, 201 | 0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE, 202 | 0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1, 203 | 0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9, 204 | 0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5, 205 | 0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD, 206 | 0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3, 207 | 0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB, 208 | 0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7, 209 | 0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF 210 | }; 211 | 212 | 213 | void update_monte_carlo(unsigned char symbol); 214 | 215 | void display_usage() { 216 | fprintf(stderr, "Usage: djent [-brRpcCuhds] [-l ] [-i ] [filename] [filename2] ...\n"); 217 | fprintf(stderr, "\n"); 218 | fprintf(stderr, "Compute statistics of random data.\n"); 219 | fprintf(stderr, " Author: David Johnston, dj@deadhat.com\n"); 220 | fprintf(stderr, "\n"); 221 | 222 | fprintf(stderr, " -i --inputfilelist= Read list of filenames from \n"); 223 | fprintf(stderr, " -p --parse_filename Extract CID, Process, Voltage and Temperature from filename.\n"); 224 | fprintf(stderr, " The values will be included in the output.\n"); 225 | fprintf(stderr, " -l --symbol_length= Treat incoming data symbols as bitlength n. Default is 8.\n"); 226 | fprintf(stderr, " -b --binary Treat incoming data as binary. Default bit length will be -l 1\n"); 227 | fprintf(stderr, " -r --byte_reverse Reverse the bit order in incoming bytes\n"); 228 | fprintf(stderr, " -R --word_reverse Reverse the byte order in incoming 4 byte words\n"); 229 | fprintf(stderr, " -c --occurrence Print symbol occurrence counts\n"); 230 | fprintf(stderr, " -C --longest Print symbol longest run counts\n"); 231 | fprintf(stderr, " -w --scc_wrap Treat data as cyclical in SCC\n"); 232 | fprintf(stderr, " -n --lagn= Lag gap in SCC. Default=1\n"); 233 | fprintf(stderr, " -S --skip= Skip over initial symbols\n"); 234 | fprintf(stderr, " -L --substring= Analyse no more that symbols\n"); 235 | fprintf(stderr, " -f --fold Fold uppercase letters to lower case\n"); 236 | fprintf(stderr, " -t --terse Terse output\n"); 237 | fprintf(stderr, " -e --ent_exact Exactly match output format of ent\n"); 238 | fprintf(stderr, " -s --suppress_header Suppress the header in terse output\n"); 239 | fprintf(stderr, " -h or -u --help Print this text\n"); 240 | 241 | fprintf(stderr, "\n Notes\n"); 242 | fprintf(stderr, " * By default djent is in hex mode where it reads ascii hex data and converts it to binary to analyze.\n"); 243 | fprintf(stderr, " In hex mode, the symbol length defaults to 8, so normal hex files can be treated as a representation\n"); 244 | fprintf(stderr, " of bytes. The symbol length can be changed to any value between 1 and 32 bits using the -l option.\n"); 245 | fprintf(stderr, " * With the -b option djent switches to binary reads in each byte as binary with a symbol length of 1.\n"); 246 | fprintf(stderr, " * To analyze ascii text instead of hex ascii, you need djent to treat each byte as a separate symbol, so\n"); 247 | fprintf(stderr, " use binary mode with a symbol length of 8. I.E. djent -b -l 8 \n"); 248 | fprintf(stderr, " * By default djent treats the MSB of each byte as the first. This can be switched so that djent treats\n"); 249 | fprintf(stderr, " the LSB as the first bit in each byte using the -r option.\n"); 250 | fprintf(stderr, " * Terse output is requested using -t. This outputs in CSV format. The first line is the header. If\n"); 251 | fprintf(stderr, " multiple files are provided, there will be one line of CSV output per file in addition to the header.\n"); 252 | fprintf(stderr, " The CSV header can be suppressed with -s.\n"); 253 | fprintf(stderr, " * To analyze multiple files, just give multiple file names on the command line. To read data in from\n"); 254 | fprintf(stderr, " the command line, don't provide a filename and pipe the data in. | djent\n"); 255 | fprintf(stderr, " * The parse filename option =p picks takes four patterns from the filename to include in the output,\n"); 256 | fprintf(stderr, " This is so that it is easy to plot test conditions that are commonly encoded in a filename.\n"); 257 | fprintf(stderr, " Fields are delimited by uderscores. The four patters for CID, process, Voltage and Temperature are:\n"); 258 | fprintf(stderr, " _CID-_ , _PROC-_, _pV_ and _pC_ . 'p' is the decimal point.\n"); 259 | fprintf(stderr, " * To compute the statistics, djent builds a frequency table of the symbols. This can be displayed\n"); 260 | fprintf(stderr, " using the -c option. The size of this table is what limits the the maximum symbol size. For each\n"); 261 | fprintf(stderr, " of the 2^n symbols, a 64 bit entry in a table is created. So for n=32, that's 32GBytes so the ability\n"); 262 | fprintf(stderr, " to handle large symbol sizes is limited by the available memory and the per process allocation limit.\n"); 263 | fprintf(stderr, " * The serial correlation coefficient is not wrap around by default, meaning that it does not compare\n"); 264 | fprintf(stderr, " the last value in the data with the first. To get wrap around behaviour, use the -w option.\n"); 265 | fprintf(stderr, " * The Lag-N correlation coefficient can be computed by using the -n option. This causes the SCC\n"); 266 | fprintf(stderr, " computation to compare each Xth symbol with the (X+n)th symbol instead of the (X+1)th symbol.\n"); 267 | fprintf(stderr, " If you use wrap around with Lag-N, then the wrap around will reach n bits further into the start\n"); 268 | fprintf(stderr, " of the sequence.\n"); 269 | fprintf(stderr, " * The byte reverse option -r reverses the order of bits within each byte. The word reverse option -R\n"); 270 | fprintf(stderr, " reverses the order of bytes within each 32 bit word, from 3,2,1,0 to 0,1,2,3. Both -R and -r can\n"); 271 | fprintf(stderr, " be used together. Using -R with a data that isn't a multiple of 32 bits long will get padded with\n"); 272 | fprintf(stderr, " zeros, which may not be what you want. A padding warning will be sent to STDERR.\n"); 273 | fprintf(stderr, " * Instead of providing data file names on the command line, djent can be told to read a list of files\n"); 274 | fprintf(stderr, " from a text file. The file must have one filename per line. Lines beginning with # will be ignored.\n"); 275 | fprintf(stderr, " Use the -i option to request that djent reads the file list from .\n"); 276 | 277 | fprintf(stderr, "\n Examples\n"); 278 | fprintf(stderr, " Print this help\n"); 279 | fprintf(stderr, " djent -h\n\n"); 280 | fprintf(stderr, " Analyze hex file from stdin\n"); 281 | fprintf(stderr, " cat datafile.hex | djent\n\n"); 282 | fprintf(stderr, " Analyze binary file\n"); 283 | fprintf(stderr, " djent -b datafile.bin\n\n"); 284 | fprintf(stderr, " Analyze several files with CSV output\n"); 285 | fprintf(stderr, " djent -t data1.hex data2.hex data3.hex\n\n"); 286 | fprintf(stderr, " Analyze ascii symbols - Read in binary and set symbol size to 8.\n"); 287 | fprintf(stderr, " djent -b -l 8 textfile.txt\n\n"); 288 | fprintf(stderr, " Analyze binary file with parsable filename.\n"); 289 | fprintf(stderr, " djent -b -t -p rawdata_CID-X23_PROC-TTFT_1p2V_25p0C_.bin\n"); 290 | 291 | } 292 | 293 | int count_lines_in_file(char *filename) { 294 | FILE *fp = fopen(filename,"r"); 295 | int ch=0; 296 | int lines=0; 297 | 298 | if (fp == NULL) return 0; 299 | lines++; 300 | while ((ch = fgetc(fp)) != EOF) { 301 | if ((char)ch == '\n') lines++; 302 | } 303 | fclose(fp); 304 | return lines; 305 | } 306 | 307 | 308 | /* The queue 309 | * 310 | * This implements a FIFO into which bytes are pushed from a file and 311 | * from which symbols (of the chosen size) are pulled from the other end. 312 | * Data from the file is read into buffer and that data is used to fill the 313 | * input side of the queue. The queue is twice as big as the buffer so the 314 | * buffer read is done when the queue is less than half full. 315 | * It treats bits within bytes as big endian (I.E. MSB arrived first from ES). 316 | * There will be an option to switch to little endian at some point. 317 | */ 318 | 319 | 320 | void init_byte_queue() { 321 | int i; 322 | 323 | /* printf("Init Byte Queue\n"); */ 324 | queue_start = 0; 325 | queue_end = 0; 326 | queue_size = 0; 327 | 328 | got_byte = 0; 329 | current_byte = 0; 330 | bits_used_from_byte = 0; 331 | current_symbol = 0; 332 | bits_in_current_symbol = 0; 333 | 334 | for (i=0;i47) && (((int)hexpair[0])<58)){ /* 0-9 */ 437 | nybble = (int)hexpair[0] - 48; 438 | } 439 | else if ((((int)hexpair[0])>64) && (((int)hexpair[0])<71)){ /* A-F */ 440 | nybble = (int)hexpair[0] - 55; 441 | } 442 | else if ((((int)hexpair[0])>96) && (((int)hexpair[0])<103)){ /* a-f */ 443 | nybble = (int)hexpair[0] - 87; 444 | } 445 | 446 | nybble = nybble << 4; 447 | 448 | if ((((int)hexpair[1])>47) && (((int)hexpair[1])<58)){ /* 0-9 */ 449 | byte = nybble + (int)hexpair[1] - 48; 450 | } 451 | else if ((((int)hexpair[1])>64) && (((int)hexpair[1])<71)){ /* A-F */ 452 | byte = nybble + (int)hexpair[1] - 55; 453 | } 454 | else if ((((int)hexpair[1])>96) && (((int)hexpair[1])<103)){ /* a-f */ 455 | byte = nybble + (int)hexpair[1] - 87; 456 | } 457 | 458 | buffer[outpos++] = (unsigned char)byte; 459 | hexstate = 0; 460 | } 461 | } 462 | } while (scanpos <= len); 463 | 464 | return outpos; /* return the number of bytes converted */ 465 | } 466 | 467 | size_t fill_byte_queue(FILE *fp) { 468 | size_t len; 469 | size_t space; 470 | unsigned int i; 471 | unsigned int j; 472 | size_t total_len; 473 | // unsigned int buff2_remaining; 474 | 475 | total_len = 0; 476 | /* Pull in a loop until there is less than BUFFSIZE space in thequeue */ 477 | do { 478 | space = QUEUESIZE-queue_size; /* Dont pull more data than needed */ 479 | if (space > BUFFSIZE) space = BUFFSIZE; 480 | 481 | /* (" queue: space=%d\n",space); */ 482 | len = fread(buffer, (size_t)1,(size_t)space, fp); 483 | if (len==0) { 484 | /* printf(" queue: len = %d\n",len); */ 485 | return total_len; 486 | } 487 | 488 | /* Convert hex buffer to binary if we are in hex mode */ 489 | if (hexmode == 1) len = hex2bin(buffer,len); 490 | 491 | /* Fold upper case to lower */ 492 | if (fold==1) { 493 | for (i=0;i3); 546 | 547 | /* Pad any leftover */ 548 | if (buffer2_size != 0) { 549 | for (j=0;j<4;j++) { 550 | if (j < buffer2_size) { 551 | queue[(queue_end+j) % QUEUESIZE] = 0x00; 552 | update_monte_carlo(0x00); 553 | } else { 554 | if (byte_reverse == 1) { 555 | queue[(queue_end+j) % QUEUESIZE] = byte_reverse_table[buffer2[(i*4)+(3-j)]]; 556 | update_monte_carlo(byte_reverse_table[buffer2[(i*4)+(3-j)]]); 557 | } else { 558 | queue[(queue_end+j) % QUEUESIZE] = buffer2[(i*4)+(3-j)]; 559 | update_monte_carlo(buffer2[(i*4)+(3-j)]); 560 | } 561 | } 562 | } 563 | fprintf(stderr,"Warning: Padded %d extra zeroes to make 4 byte boundary for word reverse\n",buffer2_size); 564 | } 565 | buffer2_size = 0; 566 | } 567 | } while ((QUEUESIZE-queue_size) > BUFFSIZE); 568 | return total_len; 569 | } 570 | 571 | /* pull symbol length bits off the start of the queue */ 572 | int64_t get_symbol(uint64_t symbol_length) { 573 | 574 | unsigned int temp; 575 | 576 | current_symbol = 0; 577 | 578 | /* Get a byte if we don't have one */ 579 | if (got_byte == 0) { 580 | if (((queue_size*8) < symbol_length)) return -1; /* Uh oh. Empty */ 581 | 582 | current_byte = queue[queue_start]; 583 | queue_start = (queue_start+1) % QUEUESIZE; 584 | queue_size -= 1; 585 | bits_used_from_byte = 0; 586 | got_byte=1; 587 | } 588 | 589 | /* Move bits from current byte pulled from queue to current symbol */ 590 | if (symbol_length == 1) { /* Optimize for the single bit size case */ 591 | current_symbol = (current_byte & 0x80) >> 7; 592 | current_byte <<= 1; 593 | bits_used_from_byte++; 594 | if (bits_used_from_byte == 8) { 595 | got_byte = 0; 596 | bits_used_from_byte = 0; 597 | } 598 | return current_symbol; 599 | } else if (symbol_length == 8) { /* optimize for the byte size case */ 600 | current_symbol = current_byte; 601 | got_byte = 0; 602 | return current_symbol; 603 | } else { /* Symbol Length != 8 or 1, do it bit by bit */ 604 | /* Later maybe optimize when > 7 bits needed */ 605 | /* Take upper symbol_length bits */ 606 | bits_in_current_symbol = 0; 607 | do { 608 | temp = (current_byte & 0x80) >> 7; 609 | current_byte = (current_byte << 1) & 0xff; 610 | bits_used_from_byte++; 611 | if (bits_used_from_byte == 8) { 612 | got_byte = 0; 613 | bits_used_from_byte = 0; 614 | } 615 | current_symbol = ((current_symbol << 1) | temp) & symbol_mask; 616 | bits_in_current_symbol++; 617 | 618 | /* fetch a new byte from queue if we aren't done yet */ 619 | if (got_byte == 0) { 620 | if (((queue_size*8) < symbol_length)) return -1; /* Uh oh. Empty */ 621 | 622 | current_byte = queue[queue_start]; 623 | queue_start = (queue_start+1) % QUEUESIZE; 624 | queue_size -= 1; 625 | bits_used_from_byte = 0; 626 | got_byte=1; 627 | } 628 | } while (bits_in_current_symbol < symbol_length); 629 | return current_symbol; 630 | } 631 | } 632 | 633 | 634 | /* The initialize routines for the various metrics */ 635 | 636 | void init_mean() { 637 | outcount = 0; 638 | mean_total = 0; 639 | }; 640 | 641 | void init_entropy() { 642 | ent = 0.0; 643 | }; 644 | 645 | void init_occurrences() { 646 | uint64_t i; 647 | 648 | no_occurrence_space = 0; 649 | 650 | occurrence_total = 0; 651 | if (symbol_length > 32) { 652 | fprintf(stderr,"Error, symbol length cannot be longer than 32 bits for occurrence count table\n"); 653 | exit(1); 654 | } 655 | occurrence_size = ipow(2,symbol_length); 656 | fflush(stdout); 657 | occurrence_count = (uint64_t *) malloc (sizeof(uint64_t)*occurrence_size); 658 | /* printf("mallocating %lld bytes\n", (sizeof(uint64_t)*occurrence_size)); 659 | */ 660 | if (occurrence_count == NULL) { 661 | #ifdef _WIN32 662 | fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the occurrence count\n",(sizeof(uint64_t)*occurrence_size)); 663 | #elif __llvm__ 664 | fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the occurrence count\n",(sizeof(uint64_t)*occurrence_size)); 665 | #elif __linux__ 666 | fprintf(stderr,"Warning, unable to allocate %ld bytes of memory for the occurrence count\n",(sizeof(uint64_t)*occurrence_size)); 667 | #endif 668 | no_occurrence_space = 1; 669 | } 670 | 671 | for (i=0;i 32) { 682 | fprintf(stderr,"Error, symbol length cannot be longer than 32 bits for longest count table\n"); 683 | exit(1); 684 | } 685 | longest_size = ipow(2,symbol_length); 686 | fflush(stdout); 687 | 688 | longest_count = (uint64_t *) malloc (sizeof(uint64_t)*longest_size); 689 | /* printf("mallocating %lld bytes\n", (sizeof(uint64_t)*occurrence_size)); 690 | */ 691 | if (longest_count == NULL) { 692 | #ifdef _WIN32 693 | fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the longest run table\n",(sizeof(uint64_t)*longest_size)); 694 | #elif __llvm__ 695 | fprintf(stderr,"Warning, unable to allocate %lld bytes of memory for the longest run table\n",(sizeof(uint64_t)*longest_size)); 696 | #elif __linux__ 697 | fprintf(stderr,"Warning, unable to allocate %ld bytes of memory for the longest run table\n",(sizeof(uint64_t)*longest_size)); 698 | #endif 699 | no_longest_space = 1; 700 | } else { 701 | for (i=0;i longest_count[symbol]) { 788 | longest_count[symbol] = longest_run; 789 | } 790 | if (longest_run > longest_longest) { 791 | longest_longest = longest_run; 792 | longest_longest_symbol = symbol; 793 | longest_position = longest_new_pos; 794 | } 795 | } else { 796 | longest_run=1; 797 | longest_last_symbol=symbol; 798 | longest_new_pos = symbol_byte_pos; 799 | } 800 | 801 | } 802 | 803 | void update_chisq(uint64_t symbol) { 804 | /* Nothing to do here */ 805 | }; 806 | 807 | void update_filesize(uint64_t symbol) { 808 | /* Nothing to do here */ 809 | }; 810 | 811 | void update_monte_carlo(unsigned char symbol) { 812 | int mj; 813 | 814 | monte[mp++] = symbol; 815 | 816 | if (mp > 5) { 817 | mp = 0; 818 | monty_total_count++; 819 | position_x = 0; 820 | position_y = 0; 821 | for (mj = 0; mj < 3; mj++) { 822 | position_x = (position_x * 256.0) + monte[mj]; 823 | position_y = (position_y * 256.0) + monte[3 + mj]; 824 | } 825 | if (((position_x * position_x) + (position_y * position_y)) <= radiussquared) { 826 | monty_inside_count++; 827 | } 828 | } 829 | }; 830 | 831 | void update_compression(uint64_t symbol) { 832 | /* nothing to do here */ 833 | }; 834 | 835 | void update_scc(uint64_t symbol) { 836 | int i; 837 | if (lagn==1) { 838 | /* We need lagn+1 symbols to start, so skip the first symbol(s) */ 839 | scc_count++; 840 | 841 | if (scc_first==1) { 842 | scc_first = 0; 843 | first_symbol = symbol; 844 | } else { 845 | t1 += (scc_previous * symbol); 846 | if (scc_previous == symbol) aeqb_count += 1; // Other SCC 847 | } 848 | mean_count += symbol; //Other SCC 849 | t2 += symbol*symbol; 850 | t3 += symbol; 851 | 852 | 853 | /* printf("symbol %02X, count=%llu t1= %llX, t2= %llx, t3= %llx\n",symbol,scc_count,t1,t2,t3); */ 854 | scc_previous = symbol; 855 | } else { /* lagn > 1 */ 856 | scc_count++; 857 | 858 | if (scc_count <= lagn) { 859 | scc_fifo[scc_count-1]=symbol; 860 | scc_first_lagn[scc_count-1]=symbol; 861 | } else { 862 | t1 += (scc_fifo[0] * symbol); 863 | if (scc_fifo[0] == symbol) aeqb_count += 1; 864 | for(i=0;i 0.0) { 911 | ent += (chisq_prob[eloop] * log10(1.0 / chisq_prob[eloop]) * 3.32192809488736234787); 912 | } 913 | } 914 | 915 | result_entropy = ent; 916 | return; 917 | 918 | if (terse == 1) printf("%f,", ent); 919 | else printf(" Shannon Entropy = %f\n", ent); 920 | }; 921 | 922 | void finalize_occurrences() { 923 | unsigned int i; 924 | unsigned int maxc; 925 | unsigned int maxsymbol; 926 | double maxp; 927 | double maxp_ent; 928 | 929 | /* Find the most frequent symbol */ 930 | maxc=0; 931 | maxsymbol=0; 932 | for (i=0;i maxc) { 934 | maxc = occurrence_count[i]; 935 | maxsymbol = i; 936 | } 937 | } 938 | 939 | //printf("maxc: %f\n",(double)maxc); 940 | //printf("occurance_size: %f\n",(double)occurrence_size); 941 | //printf("occurance_total: %f\n",(double)occurrence_total); 942 | maxp = ((double)maxc)/((double)occurrence_total); 943 | //printf("maxp: %f\n",maxp); 944 | maxp_ent = (-log10(maxp)/log10(2))/symbol_length; 945 | //printf("maxp_ent: %f\n",maxp_ent); 946 | result_min_entropy = maxp_ent; 947 | result_min_entropy_symbol = maxsymbol; 948 | 949 | if (terse != 1) { 950 | printf(" Min Entropy (by max occurrence of symbol %x) = %f\n", maxsymbol, maxp_ent); 951 | } 952 | }; 953 | 954 | void finalize_longest() { 955 | result_longest_pvalue = longest_run_cdf((unsigned int)longest_longest, (unsigned int)symbol_count); 956 | 957 | if (symbol_length != 8) { 958 | longest_byte_pos = (symbol_count*symbol_length)/8; 959 | } else { 960 | longest_byte_pos = symbol_count; 961 | } 962 | 963 | } 964 | 965 | void finalize_chisq() { 966 | uint64_t i; 967 | double diff; 968 | double chisq_final_prob; 969 | 970 | double expected; 971 | expected = (double)occurrence_total / (double)occurrence_size; 972 | for (i=0; i < occurrence_size; i++) { 973 | diff = (double)(occurrence_count[i]) - expected; 974 | chisq_prob[i] = ((double)occurrence_count[i])/occurrence_total; 975 | chisq += (diff*diff)/expected; 976 | chisq_sum += (double)(i * occurrence_count[i]); 977 | } 978 | 979 | chisq_final_prob = chisqp(chisq, (occurrence_size-1)); 980 | result_chisq_count = occurrence_total; 981 | result_chisq_distribution = chisq; 982 | result_chisq_percent = chisq_final_prob * 100; 983 | 984 | return; 985 | }; 986 | 987 | void finalize_filesize() { 988 | }; 989 | 990 | void finalize_monte_carlo() { 991 | double pierr; 992 | double montepi; 993 | 994 | montepi = 4.0 * (((double)monty_inside_count) / monty_total_count); 995 | 996 | pierr = (fabs(M_PI - montepi) / M_PI)*100.0; 997 | 998 | result_pi = montepi; 999 | result_pierr = pierr; 1000 | 1001 | return; 1002 | 1003 | }; 1004 | 1005 | void finalize_compression() { 1006 | double compression; 1007 | 1008 | compression = (100.0 * (symbol_length - ent)) / symbol_length; 1009 | 1010 | result_compression = compression; 1011 | 1012 | return; 1013 | 1014 | }; 1015 | 1016 | void finalize_scc() { 1017 | double scc; 1018 | int64_t top; 1019 | int64_t bottom; 1020 | int i; 1021 | 1022 | double paeqb; 1023 | double bias; 1024 | 1025 | if (scc_wrap==1) { 1026 | if (lagn==1) { 1027 | t1 += (scc_previous * first_symbol); 1028 | t2 += first_symbol*first_symbol; 1029 | t3 += first_symbol; 1030 | } else { 1031 | for (i=0;i is valid. symbol length 1148 | * will be and newlines will be treated 1149 | * as data. 1150 | */ 1151 | got_symbol_length = 1; 1152 | 1153 | break; 1154 | case 'i': 1155 | strncpy(inputlistfilename,optarg,255); 1156 | using_inputlistfile = 1; 1157 | break; 1158 | 1159 | case 'c': // Print out occurence count table 1160 | print_occurrence = 1; 1161 | break; 1162 | 1163 | case 'C': 1164 | print_longest = 1; // Print the longest run on symbols 1165 | break; 1166 | 1167 | case 'p': 1168 | parse_filename = 1; // Parse the filename for voltage, temp, 1169 | break; // condition and ID to include in output 1170 | 1171 | case 'r': 1172 | byte_reverse = 1; // Reverse the order of bit within bytes 1173 | break; 1174 | 1175 | case 'R': 1176 | word_reverse = 1; // reverse the order of bytes within a word 1177 | break; 1178 | 1179 | case 'w': 1180 | scc_wrap = 1; // Compute SCC, treating the data as being 1181 | break; // circular, like in Knuth's version 1182 | 1183 | case 'n': 1184 | lagn = atoi(optarg); // Compute the lagn correlation. 1185 | break; // n=1 is equivalent to normal SCC 1186 | 1187 | case 'f': 1188 | fold = 1; // Fold upper case to lower 1189 | break; 1190 | 1191 | case 't': 1192 | terse = 1; // Terse output - output as CSV 1193 | break; 1194 | 1195 | case 'e': 1196 | ent_exact = 1; // Copy the output format of 1197 | break; // John Walker's ent 1198 | 1199 | case 's': 1200 | suppress_header = 1; // Don't print the header of CSV 1201 | break; // output 1202 | 1203 | case 'S': 1204 | got_skip = 1; // Skip initial symbols 1205 | skip_amount = atoi(optarg); 1206 | break; // output 1207 | 1208 | case 'L': 1209 | got_substring = 1; // Read only symbols 1210 | substring = atoi(optarg); 1211 | break; // output 1212 | 1213 | case 'u': // Help 1214 | case 'h': /* fall-through is intentional */ 1215 | case '?': 1216 | display_usage(); 1217 | exit(0); 1218 | 1219 | default: 1220 | /* You won't actually get here. */ 1221 | break; 1222 | } 1223 | 1224 | opt = getopt_long( argc, argv, optString, longOpts, &longIndex ); 1225 | } // end while 1226 | 1227 | 1228 | /* Range check the var args */ 1229 | 1230 | if ((fold==1) && (symbol_length != 8)) { 1231 | fprintf(stderr,"Error: Fold must be used with 8 bit word size\n"); 1232 | exit(1); 1233 | } 1234 | 1235 | if (symbol_length < 1) { 1236 | fprintf(stderr,"Error: Symbol length %d must not be 0 or negative. \n",symbol_length); 1237 | exit(1); 1238 | } 1239 | 1240 | //init_byte_queue(); 1241 | 1242 | /* Loop through the filenames */ 1243 | if ((optind==argc) && (using_inputlistfile==0)) { 1244 | use_stdin = 1; 1245 | } 1246 | else { 1247 | use_stdin = 0; 1248 | } 1249 | 1250 | if ((parse_filename==1) && (use_stdin==1)) { 1251 | fprintf(stderr,"Error: Can't parse filename when using stdin for input\n"); 1252 | exit(1); 1253 | } 1254 | 1255 | // skip amount must be greater than zero 1256 | if ((got_skip==1) && (skip_amount <1)) { 1257 | fprintf(stderr,"Errror: skip amount must be greater than 0\n"); 1258 | exit(1); 1259 | } 1260 | 1261 | // substring must be > 1 1262 | if ((got_substring==1) && (substring <1)) { 1263 | fprintf(stderr,"Errror: substring length must be greater than 0\n"); 1264 | exit(1); 1265 | } 1266 | 1267 | terse_index = 0; 1268 | filenumber = optind; 1269 | 1270 | char *filelist; 1271 | int lines; 1272 | int lineno; 1273 | FILE *ifp; 1274 | int filenamecount = 0; 1275 | char * res; 1276 | char line[256]; 1277 | 1278 | filelist = (char*)0; 1279 | /* build the list of filenames from the input list file */ 1280 | if (using_inputlistfile==1) { 1281 | lines = count_lines_in_file(inputlistfilename); 1282 | ifp = fopen(inputlistfilename,"r"); 1283 | if (ifp==NULL) { 1284 | fprintf(stderr,"Error: Cannot open %s for reading\n",inputlistfilename); 1285 | exit(1); 1286 | } 1287 | 1288 | filelist = (char *)malloc(sizeof(char *)*256*lines); 1289 | 1290 | if (filelist==NULL) { 1291 | fprintf(stderr,"Error: Cannot allocate memory for filename list from input file list file %s\n",inputlistfilename); 1292 | exit(1); 1293 | } 1294 | 1295 | for (lineno=0;lineno substring)) break; 1441 | 1442 | /* Then update the algorithms using the symbol */ 1443 | update_mean(symbol); 1444 | update_entropy(symbol); 1445 | if (no_occurrence_space == 0) update_occurrences(symbol); 1446 | if (no_longest_space == 0) update_longest(symbol, symbol_count+skip_amount); 1447 | update_chisq(symbol); 1448 | update_filesize(symbol); 1449 | /* Monte Carlo is different, it works on bytes, not symbols 1450 | * So we call the update from within the fill_byte_queue routine 1451 | */ 1452 | /* update_monte_carlo(symbol); */ 1453 | 1454 | update_compression(symbol); 1455 | update_scc(symbol); 1456 | } while (1 == 1); 1457 | 1458 | //symbol_count--; // Adjust for the fact symbol_count goes over by 1 1459 | 1460 | finalize_mean(); 1461 | if (no_occurrence_space == 0) finalize_occurrences(); 1462 | if (no_longest_space == 0) finalize_longest(); 1463 | finalize_chisq(); 1464 | finalize_entropy(); 1465 | finalize_filesize(); 1466 | finalize_monte_carlo(); 1467 | finalize_compression(); 1468 | finalize_scc(); 1469 | compute_markov(); 1470 | 1471 | if (symbol_length != 8){ 1472 | longest_byte_pos = (longest_position*symbol_length)/8; 1473 | } else { 1474 | longest_byte_pos = longest_position; 1475 | } 1476 | 1477 | if (terse == 1) { 1478 | if (ent_exact==1) { 1479 | if (symbol_length == 1) { 1480 | printf("%d,%"PRIu64",%f,%f,%f,%f,%f\n",terse_index,filebytes*8,result_entropy,result_chisq_distribution,result_mean,result_pi,result_scc); 1481 | } else { 1482 | printf("%d,%"PRIu64",%f,%f,%f,%f,%f\n",terse_index,filebytes,result_entropy,result_chisq_distribution,result_mean,result_pi,result_scc); 1483 | } 1484 | } 1485 | else if ((parse_filename==1) && (symbol_length==1)) { 1486 | printf("%4d,%12"PRIu64",%8s,%8s,%8.2f,%8.2f,%12f,%12f,%18"PRIu32",%12f,%12f,%15f, %16f, %16f, %16f, %16f, %19"PRIx64", %18"PRIu64", %18f, %15"PRIu64", %s\n", terse_index, symbol_count, deviceid,process,voltage,temperature,result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10,markov_entropy, longest_longest_symbol,longest_longest,result_longest_pvalue, longest_byte_pos, filename); 1487 | 1488 | } else if ((parse_filename==0) && (symbol_length==1)) { 1489 | printf("%4d,%12"PRIu64",%11f, %12f,%18"PRIx32",%12f,%12f,%15f, %12f, %16f, %16f, %16f, %18"PRIx64", %18"PRIu64", %18f, %15"PRIu64", %s\n", terse_index, symbol_count, result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10, markov_entropy, longest_longest_symbol,longest_longest,result_longest_pvalue, longest_byte_pos, filename); 1490 | } 1491 | 1492 | else if ((parse_filename==1) && (symbol_length!=1)) { 1493 | printf("%4d,%12"PRIu64",%8s,%8s,%8.2f,%8.2f,%12f,%12f,%18"PRIx32",%12f,%12f,%15f, %16f, %16f, %16f, %16f, %18"PRIx64", %18"PRIu64", (null), %15"PRIu64", %s\n", terse_index, symbol_count, deviceid,process,voltage,temperature,result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10, markov_entropy, longest_longest_symbol,longest_longest, longest_byte_pos, filename); 1494 | 1495 | } else if ((parse_filename==0) && (symbol_length!=1)) { 1496 | printf("%4d,%12"PRIu64",%11f, %12f,%18"PRIx32",%12f,%12f,%15f, %12f, %16f, %16f, %16f, %18"PRIx64", %18"PRIu64", (null), %15"PRIu64", %s\n", terse_index, symbol_count, result_entropy, result_min_entropy,result_min_entropy_symbol, result_chisq_percent, result_mean, result_pi, result_scc, result_p01, result_p10, markov_entropy, longest_longest_symbol,longest_longest, longest_byte_pos, filename); 1497 | } 1498 | } 1499 | else { 1500 | 1501 | /* Output the occurrence count if requested */ 1502 | if ((print_occurrence==1) && (no_occurrence_space == 0) ) { 1503 | double fraction; 1504 | for (i=0; i