├── README.md
└── corrstats.py


/README.md:
--------------------------------------------------------------------------------
1 | CorrelationStats
2 | ================
3 | 
4 | This Python script enables you to compute statistical significance tests
5 | on both dependent and independent correlation coefficients. For each case two methods to choose from
6 | are available.
7 | 
8 | For details, please refer to: http://www.philippsinger.info/?p=347
9 | 


--------------------------------------------------------------------------------
/corrstats.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for calculating the statistical significant differences between two dependent or independent correlation
  3 | coefficients.
  4 | The Fisher and Steiger method is adopted from the R package http://personality-project.org/r/html/paired.r.html
  5 | and is described in detail in the book 'Statistical Methods for Psychology'
  6 | The Zou method is adopted from http://seriousstats.wordpress.com/2012/02/05/comparing-correlations/
  7 | Credit goes to the authors of above mentioned packages!
  8 | 
  9 | Author: Philipp Singer (www.philippsinger.info)
 10 | """
 11 | 
 12 | from __future__ import division
 13 | 
 14 | __author__ = 'psinger'
 15 | 
 16 | import numpy as np
 17 | from scipy.stats import t, norm
 18 | from math import atanh, pow
 19 | from numpy import tanh
 20 | 
 21 | def rz_ci(r, n, conf_level = 0.95):
 22 |     zr_se = pow(1/(n - 3), .5)
 23 |     moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se
 24 |     zu = atanh(r) + moe
 25 |     zl = atanh(r) - moe
 26 |     return tanh((zl, zu))
 27 | 
 28 | def rho_rxy_rxz(rxy, rxz, ryz):
 29 |     num = (ryz-1/2.*rxy*rxz)*(1-pow(rxy,2)-pow(rxz,2)-pow(ryz,2))+pow(ryz,3)
 30 |     den = (1 - pow(rxy,2)) * (1 - pow(rxz,2))
 31 |     return num/float(den)
 32 | 
 33 | def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'):
 34 |     """
 35 |     Calculates the statistic significance between two dependent correlation coefficients
 36 |     @param xy: correlation coefficient between x and y
 37 |     @param xz: correlation coefficient between x and z
 38 |     @param yz: correlation coefficient between y and z
 39 |     @param n: number of elements in x, y and z
 40 |     @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
 41 |     @param conf_level: confidence level, only works for 'zou' method
 42 |     @param method: defines the method uses, 'steiger' or 'zou'
 43 |     @return: t and p-val
 44 |     """
 45 |     if method == 'steiger':
 46 |         d = xy - xz
 47 |         determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
 48 |         av = (xy + xz)/2
 49 |         cube = (1 - yz) * (1 - yz) * (1 - yz)
 50 | 
 51 |         t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube)))
 52 |         p = 1 - t.cdf(abs(t2), n - 3)
 53 | 
 54 |         if twotailed:
 55 |             p *= 2
 56 | 
 57 |         return t2, p
 58 |     elif method == 'zou':
 59 |         L1 = rz_ci(xy, n, conf_level=conf_level)[0]
 60 |         U1 = rz_ci(xy, n, conf_level=conf_level)[1]
 61 |         L2 = rz_ci(xz, n, conf_level=conf_level)[0]
 62 |         U2 = rz_ci(xz, n, conf_level=conf_level)[1]
 63 |         rho_r12_r13 = rho_rxy_rxz(xy, xz, yz)
 64 |         lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
 65 |         upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
 66 |         return lower, upper
 67 |     else:
 68 |         raise Exception('Wrong method!')
 69 | 
 70 | def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'):
 71 |     """
 72 |     Calculates the statistic significance between two independent correlation coefficients
 73 |     @param xy: correlation coefficient between x and y
 74 |     @param xz: correlation coefficient between a and b
 75 |     @param n: number of elements in xy
 76 |     @param n2: number of elements in ab (if distinct from n)
 77 |     @param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method
 78 |     @param conf_level: confidence level, only works for 'zou' method
 79 |     @param method: defines the method uses, 'fisher' or 'zou'
 80 |     @return: z and p-val
 81 |     """
 82 | 
 83 |     if method == 'fisher':
 84 |         xy_z = 0.5 * np.log((1 + xy)/(1 - xy))
 85 |         ab_z = 0.5 * np.log((1 + ab)/(1 - ab))
 86 |         if n2 is None:
 87 |             n2 = n
 88 | 
 89 |         se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3))
 90 |         diff = xy_z - ab_z
 91 |         z = abs(diff / se_diff_r)
 92 |         p = (1 - norm.cdf(z))
 93 |         if twotailed:
 94 |             p *= 2
 95 | 
 96 |         return z, p
 97 |     elif method == 'zou':
 98 |         L1 = rz_ci(xy, n, conf_level=conf_level)[0]
 99 |         U1 = rz_ci(xy, n, conf_level=conf_level)[1]
100 |         L2 = rz_ci(ab, n2, conf_level=conf_level)[0]
101 |         U2 = rz_ci(ab, n2, conf_level=conf_level)[1]
102 |         lower = xy - ab - pow((pow((xy - L1), 2) + pow((U2 - ab), 2)), 0.5)
103 |         upper = xy - ab + pow((pow((U1 - xy), 2) + pow((ab - L2), 2)), 0.5)
104 |         return lower, upper
105 |     else:
106 |         raise Exception('Wrong method!')
107 | 
108 | print(dependent_corr(.40, .50, .10, 103, method='steiger'))
109 | print(independent_corr(0.5 , 0.6, 103, 103, method='fisher'))
110 | 
111 | #print dependent_corr(.396, .179, .088, 200, method='zou')
112 | #print independent_corr(.560, .588, 100, 353, method='zou')
113 | 


--------------------------------------------------------------------------------