├── README.md └── corrstats.py /README.md: -------------------------------------------------------------------------------- 1 | CorrelationStats 2 | ================ 3 | 4 | This Python script enables you to compute statistical significance tests 5 | on both dependent and independent correlation coefficients. For each case two methods to choose from 6 | are available. 7 | 8 | For details, please refer to: http://www.philippsinger.info/?p=347 9 | -------------------------------------------------------------------------------- /corrstats.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for calculating the statistical significant differences between two dependent or independent correlation 3 | coefficients. 4 | The Fisher and Steiger method is adopted from the R package http://personality-project.org/r/html/paired.r.html 5 | and is described in detail in the book 'Statistical Methods for Psychology' 6 | The Zou method is adopted from http://seriousstats.wordpress.com/2012/02/05/comparing-correlations/ 7 | Credit goes to the authors of above mentioned packages! 8 | 9 | Author: Philipp Singer (www.philippsinger.info) 10 | """ 11 | 12 | from __future__ import division 13 | 14 | __author__ = 'psinger' 15 | 16 | import numpy as np 17 | from scipy.stats import t, norm 18 | from math import atanh, pow 19 | from numpy import tanh 20 | 21 | def rz_ci(r, n, conf_level = 0.95): 22 | zr_se = pow(1/(n - 3), .5) 23 | moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se 24 | zu = atanh(r) + moe 25 | zl = atanh(r) - moe 26 | return tanh((zl, zu)) 27 | 28 | def rho_rxy_rxz(rxy, rxz, ryz): 29 | num = (ryz-1/2.*rxy*rxz)*(1-pow(rxy,2)-pow(rxz,2)-pow(ryz,2))+pow(ryz,3) 30 | den = (1 - pow(rxy,2)) * (1 - pow(rxz,2)) 31 | return num/float(den) 32 | 33 | def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'): 34 | """ 35 | Calculates the statistic significance between two dependent correlation coefficients 36 | @param xy: correlation coefficient between x and y 37 | @param xz: correlation coefficient between x and z 38 | @param yz: correlation coefficient between y and z 39 | @param n: number of elements in x, y and z 40 | @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method 41 | @param conf_level: confidence level, only works for 'zou' method 42 | @param method: defines the method uses, 'steiger' or 'zou' 43 | @return: t and p-val 44 | """ 45 | if method == 'steiger': 46 | d = xy - xz 47 | determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz 48 | av = (xy + xz)/2 49 | cube = (1 - yz) * (1 - yz) * (1 - yz) 50 | 51 | t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube))) 52 | p = 1 - t.cdf(abs(t2), n - 3) 53 | 54 | if twotailed: 55 | p *= 2 56 | 57 | return t2, p 58 | elif method == 'zou': 59 | L1 = rz_ci(xy, n, conf_level=conf_level)[0] 60 | U1 = rz_ci(xy, n, conf_level=conf_level)[1] 61 | L2 = rz_ci(xz, n, conf_level=conf_level)[0] 62 | U2 = rz_ci(xz, n, conf_level=conf_level)[1] 63 | rho_r12_r13 = rho_rxy_rxz(xy, xz, yz) 64 | lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5) 65 | upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5) 66 | return lower, upper 67 | else: 68 | raise Exception('Wrong method!') 69 | 70 | def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'): 71 | """ 72 | Calculates the statistic significance between two independent correlation coefficients 73 | @param xy: correlation coefficient between x and y 74 | @param xz: correlation coefficient between a and b 75 | @param n: number of elements in xy 76 | @param n2: number of elements in ab (if distinct from n) 77 | @param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method 78 | @param conf_level: confidence level, only works for 'zou' method 79 | @param method: defines the method uses, 'fisher' or 'zou' 80 | @return: z and p-val 81 | """ 82 | 83 | if method == 'fisher': 84 | xy_z = 0.5 * np.log((1 + xy)/(1 - xy)) 85 | ab_z = 0.5 * np.log((1 + ab)/(1 - ab)) 86 | if n2 is None: 87 | n2 = n 88 | 89 | se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3)) 90 | diff = xy_z - ab_z 91 | z = abs(diff / se_diff_r) 92 | p = (1 - norm.cdf(z)) 93 | if twotailed: 94 | p *= 2 95 | 96 | return z, p 97 | elif method == 'zou': 98 | L1 = rz_ci(xy, n, conf_level=conf_level)[0] 99 | U1 = rz_ci(xy, n, conf_level=conf_level)[1] 100 | L2 = rz_ci(ab, n2, conf_level=conf_level)[0] 101 | U2 = rz_ci(ab, n2, conf_level=conf_level)[1] 102 | lower = xy - ab - pow((pow((xy - L1), 2) + pow((U2 - ab), 2)), 0.5) 103 | upper = xy - ab + pow((pow((U1 - xy), 2) + pow((ab - L2), 2)), 0.5) 104 | return lower, upper 105 | else: 106 | raise Exception('Wrong method!') 107 | 108 | print(dependent_corr(.40, .50, .10, 103, method='steiger')) 109 | print(independent_corr(0.5 , 0.6, 103, 103, method='fisher')) 110 | 111 | #print dependent_corr(.396, .179, .088, 200, method='zou') 112 | #print independent_corr(.560, .588, 100, 353, method='zou') 113 | --------------------------------------------------------------------------------