├── ASD.py └── README.md /ASD.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from scipy.stats import norm as normal 3 | from scipy.stats import mannwhitneyu as Utest 4 | import numpy as np 5 | # import matplotlib.pyplot as plt 6 | 7 | 8 | F = [] 9 | G = [] 10 | n = 0 11 | m = 0 12 | def buildOrigCDFs(f, g): 13 | global F 14 | global G 15 | global n 16 | global m 17 | F = np.sort(f) 18 | n = len(F) 19 | G = np.sort(g) 20 | m = len(G) 21 | 22 | 23 | def buildNewCDFs(f, g): 24 | global Fb 25 | global Gb 26 | Fb = np.sort(f) 27 | Gb = np.sort(g) 28 | 29 | 30 | def invG(p): 31 | index = int(np.ceil(p*m)) 32 | if index >= m: 33 | return G[m-1] 34 | elif index == 0: 35 | return G[0] 36 | return G[index-1] 37 | 38 | 39 | def invF(p): 40 | index = int(np.ceil(p*n)) 41 | if index >= n: 42 | return F[n-1] 43 | elif index == 0: 44 | return F[0] 45 | return F[index-1] 46 | 47 | 48 | def invGnew(p, M): 49 | index = int(np.ceil(p*M)) 50 | if index >= M: 51 | return Gb[M-1] 52 | elif index == 0: 53 | return Gb[0] 54 | return Gb[index-1] 55 | 56 | 57 | def invFnew(p, N): 58 | index = int(np.ceil(p*N)) 59 | if index >= N: 60 | return Fb[N-1] 61 | elif index == 0: 62 | return Fb[0] 63 | return Fb[index-1] 64 | 65 | 66 | def epsilon(dp): 67 | s = 0.0 68 | se = 0.0 69 | for p in np.arange(0, 1, dp): 70 | temp = invG(p)-invF(p) 71 | tempe = max(temp, 0) 72 | s = s+temp*temp*dp 73 | se = se+tempe*tempe*dp 74 | if s != 0: 75 | return se/s 76 | else: 77 | print("The denominator is 0") 78 | return 0.0 79 | 80 | 81 | def epsilonNew(dp, N, M): 82 | denom = 0.0 83 | numer = 0.0 84 | for p in np.arange(0, 1, dp): 85 | diff = invGnew(p, M) - invFnew(p, N) # check when F-1(t) 20 111 | if n<20 or m<20: 112 | print("Use only when the number of observation in each sample is > 20") 113 | return 1.0 114 | _, pval = Utest(data_A, data_B, alternative='less') 115 | return pval 116 | 117 | 118 | ############################################################## 119 | def main(): 120 | if len(sys.argv) < 3: 121 | print("Not enough arguments\n") 122 | sys.exit() 123 | 124 | filename_A = sys.argv[1] # scores from algorithm A 125 | filename_B = sys.argv[2] # scores from algorithm B 126 | alpha = float(sys.argv[3]) # significance level of statistical test 127 | 128 | with open(filename_A) as f: 129 | data_A = f.read().splitlines() 130 | 131 | with open(filename_B) as f: 132 | data_B = f.read().splitlines() 133 | 134 | data_A = list(map(float, data_A)) 135 | data_B = list(map(float, data_B)) 136 | 137 | buildOrigCDFs(data_A, data_B) 138 | 139 | # constants 140 | dp = 0.005 # differential of the variable p - for integral calculations 141 | N = 1000 # num of samples from F for sigma estimate 142 | M = 1000 # num of samples from G for sigma estimate 143 | B = 1000 # bootstrap iterations for sigma estimate 144 | 145 | # calculate the epsilon quotient 146 | eps_FnGm = epsilon(dp) 147 | 148 | # estimate the variance 149 | lamda = (0.0 + N) / (N + M) 150 | const = np.sqrt((1.0 * N * M) / (N + M + 0.0)) 151 | samples = [] 152 | for b in range(B): 153 | Fb = [] 154 | Gb = [] 155 | Fvalues = [] 156 | Gvalues = [] 157 | uniF = np.random.uniform(0, 1, N) 158 | uniG = np.random.uniform(0, 1, M) 159 | for i in range(0, N): 160 | Fvalues.append(invF(uniF[i])) 161 | for j in range(0, M): 162 | Gvalues.append(invG(uniG[j])) 163 | buildNewCDFs(Fvalues, Gvalues) 164 | distance = epsilonNew(dp, N, M) 165 | samples.append(distance) 166 | 167 | sigma = np.std(samples) 168 | 169 | min_epsilon = min(max(eps_FnGm - (1/const) * sigma * normal.ppf(alpha), 0.0), 1.0) 170 | print("The minimal epsilon for which Algorithm A is almost " 171 | "stochastically greater than algorithm B is ", min_epsilon) 172 | if min_epsilon <= 0.5 and min_epsilon > 0.0: 173 | print("since epsilon <= 0.5 we will claim that A is " 174 | "better than B with significance level alpha=", alpha) 175 | elif min_epsilon == 0.0: 176 | print('since epsilon = 0, algorithm A is stochatically dominant over B') 177 | 178 | else: 179 | print("since epsilon > 0.5 we will claim that A " 180 | "is not better than B with significance level alpha=", alpha) 181 | 182 | # print(MannWhitney(data_A, data_B) 0.5 we will claim that A is not better than B with significance level alpha= ______ 70 | ``` 71 | For more details about the meaning of the output please read our paper: Deep Dominance - How to properly compare deep neural models. 72 | 73 | ### Citation 74 | If you make use of this code for research purposes, we'll appreciate citing the following: 75 | ``` 76 | @InProceedings{P, 77 | author = "Dror, Rotem 78 | and Shlomov, Segev 79 | and Reichart, Roi", 80 | title = "Deep Dominance - How to Properly Compare Deep Neural Models", 81 | booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", 82 | year = "2019", 83 | publisher = "Association for Computational Linguistics", 84 | pages = "", 85 | location = "Florence, Italy", 86 | url = "" 87 | } 88 | ``` 89 | 90 | ## Contact Information 91 | This file and the code was written by Rotem Dror. The methods are described in the above paper [(Dror et al., 2019)](https://www.aclweb.org/anthology/P19-1266/). For questions please write to: rtmdrr@seas.upenn.edu 92 | 93 | 94 | 95 | 96 | --------------------------------------------------------------------------------