├── ADHD.txt
├── README.md
├── hsb2.csv
├── mrstat.ipynb
└── mrstat.py
/ADHD.txt:
--------------------------------------------------------------------------------
1 | D0 D60
2 | 57 62
3 | 27 49
4 | 32 30
5 | 31 34
6 | 34 38
7 | 38 36
8 | 71 77
9 | 33 51
10 | 34 45
11 | 53 42
12 | 36 43
13 | 42 57
14 | 26 36
15 | 52 58
16 | 36 35
17 | 55 60
18 | 36 33
19 | 42 49
20 | 36 33
21 | 54 59
22 | 34 35
23 | 29 37
24 | 33 45
25 | 33 29
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mrstat - удобрый скрипт для статистического анализа
2 |
--------------------------------------------------------------------------------
/hsb2.csv:
--------------------------------------------------------------------------------
1 | "id","female","race","ses","schtyp","prog","read","write","math","science","socst"
2 | 70,0,4,1,1,1,57,52,41,47,57
3 | 121,1,4,2,1,3,68,59,53,63,61
4 | 86,0,4,3,1,1,44,33,54,58,31
5 | 141,0,4,3,1,3,63,44,47,53,56
6 | 172,0,4,2,1,2,47,52,57,53,61
7 | 113,0,4,2,1,2,44,52,51,63,61
8 | 50,0,3,2,1,1,50,59,42,53,61
9 | 11,0,1,2,1,2,34,46,45,39,36
10 | 84,0,4,2,1,1,63,57,54,58,51
11 | 48,0,3,2,1,2,57,55,52,50,51
12 | 75,0,4,2,1,3,60,46,51,53,61
13 | 60,0,4,2,1,2,57,65,51,63,61
14 | 95,0,4,3,1,2,73,60,71,61,71
15 | 104,0,4,3,1,2,54,63,57,55,46
16 | 38,0,3,1,1,2,45,57,50,31,56
17 | 115,0,4,1,1,1,42,49,43,50,56
18 | 76,0,4,3,1,2,47,52,51,50,56
19 | 195,0,4,2,2,1,57,57,60,58,56
20 | 114,0,4,3,1,2,68,65,62,55,61
21 | 85,0,4,2,1,1,55,39,57,53,46
22 | 167,0,4,2,1,1,63,49,35,66,41
23 | 143,0,4,2,1,3,63,63,75,72,66
24 | 41,0,3,2,1,2,50,40,45,55,56
25 | 20,0,1,3,1,2,60,52,57,61,61
26 | 12,0,1,2,1,3,37,44,45,39,46
27 | 53,0,3,2,1,3,34,37,46,39,31
28 | 154,0,4,3,1,2,65,65,66,61,66
29 | 178,0,4,2,2,3,47,57,57,58,46
30 | 196,0,4,3,2,2,44,38,49,39,46
31 | 29,0,2,1,1,1,52,44,49,55,41
32 | 126,0,4,2,1,1,42,31,57,47,51
33 | 103,0,4,3,1,2,76,52,64,64,61
34 | 192,0,4,3,2,2,65,67,63,66,71
35 | 150,0,4,2,1,3,42,41,57,72,31
36 | 199,0,4,3,2,2,52,59,50,61,61
37 | 144,0,4,3,1,1,60,65,58,61,66
38 | 200,0,4,2,2,2,68,54,75,66,66
39 | 80,0,4,3,1,2,65,62,68,66,66
40 | 16,0,1,1,1,3,47,31,44,36,36
41 | 153,0,4,2,1,3,39,31,40,39,51
42 | 176,0,4,2,2,2,47,47,41,42,51
43 | 177,0,4,2,2,2,55,59,62,58,51
44 | 168,0,4,2,1,2,52,54,57,55,51
45 | 40,0,3,1,1,1,42,41,43,50,41
46 | 62,0,4,3,1,1,65,65,48,63,66
47 | 169,0,4,1,1,1,55,59,63,69,46
48 | 49,0,3,3,1,3,50,40,39,49,47
49 | 136,0,4,2,1,2,65,59,70,63,51
50 | 189,0,4,2,2,2,47,59,63,53,46
51 | 7,0,1,2,1,2,57,54,59,47,51
52 | 27,0,2,2,1,2,53,61,61,57,56
53 | 128,0,4,3,1,2,39,33,38,47,41
54 | 21,0,1,2,1,1,44,44,61,50,46
55 | 183,0,4,2,2,2,63,59,49,55,71
56 | 132,0,4,2,1,2,73,62,73,69,66
57 | 15,0,1,3,1,3,39,39,44,26,42
58 | 67,0,4,1,1,3,37,37,42,33,32
59 | 22,0,1,2,1,3,42,39,39,56,46
60 | 185,0,4,2,2,2,63,57,55,58,41
61 | 9,0,1,2,1,3,48,49,52,44,51
62 | 181,0,4,2,2,2,50,46,45,58,61
63 | 170,0,4,3,1,2,47,62,61,69,66
64 | 134,0,4,1,1,1,44,44,39,34,46
65 | 108,0,4,2,1,1,34,33,41,36,36
66 | 197,0,4,3,2,2,50,42,50,36,61
67 | 140,0,4,2,1,3,44,41,40,50,26
68 | 171,0,4,2,1,2,60,54,60,55,66
69 | 107,0,4,1,1,3,47,39,47,42,26
70 | 81,0,4,1,1,2,63,43,59,65,44
71 | 18,0,1,2,1,3,50,33,49,44,36
72 | 155,0,4,2,1,1,44,44,46,39,51
73 | 97,0,4,3,1,2,60,54,58,58,61
74 | 68,0,4,2,1,2,73,67,71,63,66
75 | 157,0,4,2,1,1,68,59,58,74,66
76 | 56,0,4,2,1,3,55,45,46,58,51
77 | 5,0,1,1,1,2,47,40,43,45,31
78 | 159,0,4,3,1,2,55,61,54,49,61
79 | 123,0,4,3,1,1,68,59,56,63,66
80 | 164,0,4,2,1,3,31,36,46,39,46
81 | 14,0,1,3,1,2,47,41,54,42,56
82 | 127,0,4,3,1,2,63,59,57,55,56
83 | 165,0,4,1,1,3,36,49,54,61,36
84 | 174,0,4,2,2,2,68,59,71,66,56
85 | 3,0,1,1,1,2,63,65,48,63,56
86 | 58,0,4,2,1,3,55,41,40,44,41
87 | 146,0,4,3,1,2,55,62,64,63,66
88 | 102,0,4,3,1,2,52,41,51,53,56
89 | 117,0,4,3,1,3,34,49,39,42,56
90 | 133,0,4,2,1,3,50,31,40,34,31
91 | 94,0,4,3,1,2,55,49,61,61,56
92 | 24,0,2,2,1,2,52,62,66,47,46
93 | 149,0,4,1,1,1,63,49,49,66,46
94 | 82,1,4,3,1,2,68,62,65,69,61
95 | 8,1,1,1,1,2,39,44,52,44,48
96 | 129,1,4,1,1,1,44,44,46,47,51
97 | 173,1,4,1,1,1,50,62,61,63,51
98 | 57,1,4,2,1,2,71,65,72,66,56
99 | 100,1,4,3,1,2,63,65,71,69,71
100 | 1,1,1,1,1,3,34,44,40,39,41
101 | 194,1,4,3,2,2,63,63,69,61,61
102 | 88,1,4,3,1,2,68,60,64,69,66
103 | 99,1,4,3,1,1,47,59,56,66,61
104 | 47,1,3,1,1,2,47,46,49,33,41
105 | 120,1,4,3,1,2,63,52,54,50,51
106 | 166,1,4,2,1,2,52,59,53,61,51
107 | 65,1,4,2,1,2,55,54,66,42,56
108 | 101,1,4,3,1,2,60,62,67,50,56
109 | 89,1,4,1,1,3,35,35,40,51,33
110 | 54,1,3,1,2,1,47,54,46,50,56
111 | 180,1,4,3,2,2,71,65,69,58,71
112 | 162,1,4,2,1,3,57,52,40,61,56
113 | 4,1,1,1,1,2,44,50,41,39,51
114 | 131,1,4,3,1,2,65,59,57,46,66
115 | 125,1,4,1,1,2,68,65,58,59,56
116 | 34,1,1,3,2,2,73,61,57,55,66
117 | 106,1,4,2,1,3,36,44,37,42,41
118 | 130,1,4,3,1,1,43,54,55,55,46
119 | 93,1,4,3,1,2,73,67,62,58,66
120 | 163,1,4,1,1,2,52,57,64,58,56
121 | 37,1,3,1,1,3,41,47,40,39,51
122 | 35,1,1,1,2,1,60,54,50,50,51
123 | 87,1,4,2,1,1,50,52,46,50,56
124 | 73,1,4,2,1,2,50,52,53,39,56
125 | 151,1,4,2,1,3,47,46,52,48,46
126 | 44,1,3,1,1,3,47,62,45,34,46
127 | 152,1,4,3,1,2,55,57,56,58,61
128 | 105,1,4,2,1,2,50,41,45,44,56
129 | 28,1,2,2,1,1,39,53,54,50,41
130 | 91,1,4,3,1,3,50,49,56,47,46
131 | 45,1,3,1,1,3,34,35,41,29,26
132 | 116,1,4,2,1,2,57,59,54,50,56
133 | 33,1,2,1,1,2,57,65,72,54,56
134 | 66,1,4,2,1,3,68,62,56,50,51
135 | 72,1,4,2,1,3,42,54,47,47,46
136 | 77,1,4,1,1,2,61,59,49,44,66
137 | 61,1,4,3,1,2,76,63,60,67,66
138 | 190,1,4,2,2,2,47,59,54,58,46
139 | 42,1,3,2,1,3,46,52,55,44,56
140 | 2,1,1,2,1,3,39,41,33,42,41
141 | 55,1,3,2,2,2,52,49,49,44,61
142 | 19,1,1,1,1,1,28,46,43,44,51
143 | 90,1,4,3,1,2,42,54,50,50,52
144 | 142,1,4,2,1,3,47,42,52,39,51
145 | 17,1,1,2,1,2,47,57,48,44,41
146 | 122,1,4,2,1,2,52,59,58,53,66
147 | 191,1,4,3,2,2,47,52,43,48,61
148 | 83,1,4,2,1,3,50,62,41,55,31
149 | 182,1,4,2,2,2,44,52,43,44,51
150 | 6,1,1,1,1,2,47,41,46,40,41
151 | 46,1,3,1,1,2,45,55,44,34,41
152 | 43,1,3,1,1,2,47,37,43,42,46
153 | 96,1,4,3,1,2,65,54,61,58,56
154 | 138,1,4,2,1,3,43,57,40,50,51
155 | 10,1,1,2,1,1,47,54,49,53,61
156 | 71,1,4,2,1,1,57,62,56,58,66
157 | 139,1,4,2,1,2,68,59,61,55,71
158 | 110,1,4,2,1,3,52,55,50,54,61
159 | 148,1,4,2,1,3,42,57,51,47,61
160 | 109,1,4,2,1,1,42,39,42,42,41
161 | 39,1,3,3,1,2,66,67,67,61,66
162 | 147,1,4,1,1,2,47,62,53,53,61
163 | 74,1,4,2,1,2,57,50,50,51,58
164 | 198,1,4,3,2,2,47,61,51,63,31
165 | 161,1,4,1,1,2,57,62,72,61,61
166 | 112,1,4,2,1,2,52,59,48,55,61
167 | 69,1,4,1,1,3,44,44,40,40,31
168 | 156,1,4,2,1,2,50,59,53,61,61
169 | 111,1,4,1,1,1,39,54,39,47,36
170 | 186,1,4,2,2,2,57,62,63,55,41
171 | 98,1,4,1,1,3,57,60,51,53,37
172 | 119,1,4,1,1,1,42,57,45,50,43
173 | 13,1,1,2,1,3,47,46,39,47,61
174 | 51,1,3,3,1,1,42,36,42,31,39
175 | 26,1,2,3,1,2,60,59,62,61,51
176 | 36,1,3,1,1,1,44,49,44,35,51
177 | 135,1,4,1,1,2,63,60,65,54,66
178 | 59,1,4,2,1,2,65,67,63,55,71
179 | 78,1,4,2,1,2,39,54,54,53,41
180 | 64,1,4,3,1,3,50,52,45,58,36
181 | 63,1,4,1,1,1,52,65,60,56,51
182 | 79,1,4,2,1,2,60,62,49,50,51
183 | 193,1,4,2,2,2,44,49,48,39,51
184 | 92,1,4,3,1,1,52,67,57,63,61
185 | 160,1,4,2,1,2,55,65,55,50,61
186 | 32,1,2,3,1,3,50,67,66,66,56
187 | 23,1,2,1,1,2,65,65,64,58,71
188 | 158,1,4,2,1,1,52,54,55,53,51
189 | 25,1,2,2,1,1,47,44,42,42,36
190 | 188,1,4,3,2,2,63,62,56,55,61
191 | 52,1,3,1,1,2,50,46,53,53,66
192 | 124,1,4,1,1,3,42,54,41,42,41
193 | 175,1,4,3,2,1,36,57,42,50,41
194 | 184,1,4,2,2,3,50,52,53,55,56
195 | 30,1,2,3,1,2,41,59,42,34,51
196 | 179,1,4,2,2,2,47,65,60,50,56
197 | 31,1,2,2,2,1,55,59,52,42,56
198 | 145,1,4,2,1,3,42,46,38,36,46
199 | 187,1,4,2,2,1,57,41,57,55,52
200 | 118,1,4,2,1,1,55,62,58,58,61
201 | 137,1,4,3,1,2,63,65,65,53,61
202 |
--------------------------------------------------------------------------------
/mrstat.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# MR STAT \n",
8 | "by glebmikh"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "На данный момент сообщество python не имеет полноценного пакета для статистического анализа:\n",
16 | "\n",
17 | "- реализации статистических тестов разбросаны по разным библиотекам;\n",
18 | "- некоторые из основных тестов не имеют стандартной реализации.\n",
19 | "\n",
20 | "Я решил, что будет удобно собрать в один скрипт все стандартные реализации и дополнить их недостающими методами. Сделав это, больше не придется вспоминать, где что лежит или копировать куски кода из предыдущих проектов."
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "### Содержание"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "- [1. Одновыборочный ти-тест](#1.-Одновыборочный-ти-тест)\n",
35 | "\t- [1.1 Гипотеза о среднием](#1.1-Гипотеза-о-среднием)\n",
36 | "\t- [1.2 Доверительный интервал для среднего](#1.2-Доверительный-интервал-для-среднего)\n",
37 | "- [2. Тесты на распределение](#2.-Тесты-на-распределение)\n",
38 | "\t- [2.1 Критерий Шапиро-Уилка](#2.1-Критерий-Шапиро-Уилка)\n",
39 | "\t- [2.2 КуКу график](#2.2-КуКу-график)\n",
40 | "\t- [2.3 Тест Колмогорова-Смирнова для проверки формы распределения](#2.3-Тест-Колмогорова-Смирнова-для-проверки-формы-распределения)\n",
41 | "\t- [2.4 Двухвыборочный тест Колмогорова-Смирнова](#2.4-Двухвыборочный-тест-Колмогорова-Смирнова)\n",
42 | "- [3. Двухвыборочные ти-тесты](#3.-Двухвыборочные-ти-тесты)\n",
43 | "\t- [3.1 Ти-тест для двух независимых выборок](#3.1-Ти-тест-для-двух-независимых-выборок)\n",
44 | "\t- [3.2 Доверительный интервал разности средних для двух независимых выборок](#3.2-Доверительный-интервал-разности-средних-для-двух-независимых-выборок)\n",
45 | "- [4. Пропорция](#4.-Пропорция)\n",
46 | "\t- [4.1. Тест для одной доли](#4.1.-Тест-для-одной-доли)\n",
47 | "\t- [4.2 Доверительная интервал для одной доли](#4.2-Доверительная-интервал-для-одной-доли)\n",
48 | "\t- [4.3 Размер выборки для заданной доли и ширины интервала](#4.3-Размер-выборки-для-заданной-доли-и-ширины-интервала)\n",
49 | "- [5. Две доли и АБ тестинг](#5.-Две-доли-и-АБ-тестинг)\n",
50 | "\t- [5.1 Тест разности двух независимых долей](#5.1-Тест-разности-двух-независимых-долей)\n",
51 | "\t- [5.2 Доверительный интервал для разности двух незавимых долей](#5.2-Доверительный-интервал-для-разности-двух-незавимых-долей)\n",
52 | "\t- [5.3 Тест Хи-квадрат](#5.3-Тест-Хи-квадрат)\n",
53 | "\t- [5.4 Точный тест Фишера](#5.4-Точный-тест-Фишера)\n",
54 | "\t- [5.5 Размер выборок для для двух пропорций](#5.5-Размер-выборок-для-для-двух-пропорций)\n",
55 | "- [6. Непараметрические критерии](#6.-Непараметрические-критерии)\n",
56 | "\t- [6.1 Критерий знаков](#6.1-Критерий-знаков)\n",
57 | "\t- [6.2 Критерий знаковых рангов Вилкоксона](#6.2-Критерий-знаковых-рангов-Вилкоксона)\n",
58 | "\t- [6.3 Критерий Манна-Уитни](#6.3-Критерий-Манна-Уитни)\n",
59 | "\t- [6.4 Бутстреп](#6.4-Бутстреп)\n",
60 | "- [7. Корреляция](#7.-Корреляция)\n",
61 | "\t- [7.1 Коэффициет корреляции Пирсона](#7.1-Коэффициет-корреляции-Пирсона)\n",
62 | "\t- [7.2 Коэффициет корреляции Спирмена](#7.2-Коэффициет-корреляции-Спирмена)\n",
63 | "\t- [7.3 Коэффициент Крамера](#7.3-Коэффициент-Крамера)\n",
64 | "- [8. Связанные выборки](#8.-Связанные-выборки)\n",
65 | "\t- [8.1 Ти-тест для связанных выборок](#8.1-Ти-тест-для-связанных-выборок)\n",
66 | "\t- [8.2 Тест для разности двух долей - связанные выборки](#8.2-Тест-для-разности-двух-долей---связанные-выборки)\n",
67 | "\t- [8.3 Доверительный интервал для разности долей ](#8.3-Доверительный-интервал-для-разности-долей-)\n",
68 | "\t- [8.4 Непараметрические критерии для связанных выборок](#8.4-Непараметрические-критерии-для-связанных-выборок)\n",
69 | "- [9. Дисперсионный анализ](#9.-Дисперсионный-анализ)\n",
70 | "\t- [9.1 Однофакторная ANOVA](#9.1-Однофакторная-ANOVA)\n",
71 | "\t- [9.2 Критерий Краскела-Уоллиса](#9.2-Критерий-Краскела-Уоллиса)\n",
72 | "\t- [9.3 Двухфакторная ANOVA](#9.3-Двухфакторная-ANOVA)"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "В данном руководстве я буду использовать дата-сет [hsb2](https://github.com/rpruim/OpenIntro/blob/master/data/hsb2.csv) и примеры из статьи [What statistical analysis should I use?](http://www.ats.ucla.edu/stat/stata/whatstat/whatstat.htm), которые дополню некоторыми другими полезными случаями. В данных hsb2 содержится 200 наблюдений об учениках старших классов: социодемографические данные и оценки полученные на тестах по разным предметам."
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "Если импорт выдаст ошибки - просто установите недостающие библиотеки."
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 1,
92 | "metadata": {
93 | "collapsed": true
94 | },
95 | "outputs": [],
96 | "source": [
97 | "import mrstat\n",
98 | "import numpy as np\n",
99 | "from matplotlib import pyplot as plt\n",
100 | "from scipy import stats\n",
101 | "import pandas as pd\n",
102 | "%matplotlib inline\n",
103 | "hsb = pd.read_csv('hsb2.csv')"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 2,
109 | "metadata": {
110 | "collapsed": false
111 | },
112 | "outputs": [
113 | {
114 | "data": {
115 | "text/html": [
116 | "
\n",
117 | "
\n",
118 | " \n",
119 | " \n",
120 | " | \n",
121 | " id | \n",
122 | " female | \n",
123 | " race | \n",
124 | " ses | \n",
125 | " schtyp | \n",
126 | " prog | \n",
127 | " read | \n",
128 | " write | \n",
129 | " math | \n",
130 | " science | \n",
131 | " socst | \n",
132 | "
\n",
133 | " \n",
134 | " \n",
135 | " \n",
136 | " 0 | \n",
137 | " 70 | \n",
138 | " 0 | \n",
139 | " 4 | \n",
140 | " 1 | \n",
141 | " 1 | \n",
142 | " 1 | \n",
143 | " 57 | \n",
144 | " 52 | \n",
145 | " 41 | \n",
146 | " 47 | \n",
147 | " 57 | \n",
148 | "
\n",
149 | " \n",
150 | " 1 | \n",
151 | " 121 | \n",
152 | " 1 | \n",
153 | " 4 | \n",
154 | " 2 | \n",
155 | " 1 | \n",
156 | " 3 | \n",
157 | " 68 | \n",
158 | " 59 | \n",
159 | " 53 | \n",
160 | " 63 | \n",
161 | " 61 | \n",
162 | "
\n",
163 | " \n",
164 | " 2 | \n",
165 | " 86 | \n",
166 | " 0 | \n",
167 | " 4 | \n",
168 | " 3 | \n",
169 | " 1 | \n",
170 | " 1 | \n",
171 | " 44 | \n",
172 | " 33 | \n",
173 | " 54 | \n",
174 | " 58 | \n",
175 | " 31 | \n",
176 | "
\n",
177 | " \n",
178 | " 3 | \n",
179 | " 141 | \n",
180 | " 0 | \n",
181 | " 4 | \n",
182 | " 3 | \n",
183 | " 1 | \n",
184 | " 3 | \n",
185 | " 63 | \n",
186 | " 44 | \n",
187 | " 47 | \n",
188 | " 53 | \n",
189 | " 56 | \n",
190 | "
\n",
191 | " \n",
192 | " 4 | \n",
193 | " 172 | \n",
194 | " 0 | \n",
195 | " 4 | \n",
196 | " 2 | \n",
197 | " 1 | \n",
198 | " 2 | \n",
199 | " 47 | \n",
200 | " 52 | \n",
201 | " 57 | \n",
202 | " 53 | \n",
203 | " 61 | \n",
204 | "
\n",
205 | " \n",
206 | "
\n",
207 | "
"
208 | ],
209 | "text/plain": [
210 | " id female race ses schtyp prog read write math science socst\n",
211 | "0 70 0 4 1 1 1 57 52 41 47 57\n",
212 | "1 121 1 4 2 1 3 68 59 53 63 61\n",
213 | "2 86 0 4 3 1 1 44 33 54 58 31\n",
214 | "3 141 0 4 3 1 3 63 44 47 53 56\n",
215 | "4 172 0 4 2 1 2 47 52 57 53 61"
216 | ]
217 | },
218 | "execution_count": 2,
219 | "metadata": {},
220 | "output_type": "execute_result"
221 | }
222 | ],
223 | "source": [
224 | "hsb.head()"
225 | ]
226 | },
227 | {
228 | "cell_type": "markdown",
229 | "metadata": {
230 | "collapsed": true
231 | },
232 | "source": [
233 | "### 1. Одновыборочный ти-тест"
234 | ]
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {},
239 | "source": [
240 | "#### 1.1 Гипотеза о среднием"
241 | ]
242 | },
243 | {
244 | "cell_type": "markdown",
245 | "metadata": {},
246 | "source": [
247 | "Проверим гипотезу, что средняя оценка на тесте по письму (write) равна 50 пунктам."
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": 3,
253 | "metadata": {
254 | "collapsed": false
255 | },
256 | "outputs": [
257 | {
258 | "data": {
259 | "text/plain": [
260 | "Ttest_1sampResult(statistic=4.140324966963024, pvalue=5.1209194607163552e-05)"
261 | ]
262 | },
263 | "execution_count": 3,
264 | "metadata": {},
265 | "output_type": "execute_result"
266 | }
267 | ],
268 | "source": [
269 | "mrstat.ttest_1samp(hsb['write'],50)"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "P-value получилось меньше 0.05 - это значит, что гипотеза о среднем равным 50 не принимается."
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {},
282 | "source": [
283 | "#### 1.2 Доверительный интервал для среднего"
284 | ]
285 | },
286 | {
287 | "cell_type": "markdown",
288 | "metadata": {},
289 | "source": [
290 | "Чтобы получить представление о среднем для write построим 95% доверительный интервал."
291 | ]
292 | },
293 | {
294 | "cell_type": "code",
295 | "execution_count": 4,
296 | "metadata": {
297 | "collapsed": false
298 | },
299 | "outputs": [
300 | {
301 | "data": {
302 | "text/plain": [
303 | "(51.461359138353302, 54.088640861646695)"
304 | ]
305 | },
306 | "execution_count": 4,
307 | "metadata": {},
308 | "output_type": "execute_result"
309 | }
310 | ],
311 | "source": [
312 | "mrstat.zconfint(hsb['write'])"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "Данный интервал с 95% вероятностью содержит истинное среднее для оценки write. Так же доверительный интервал можно записать в следующем виде:"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 5,
325 | "metadata": {
326 | "collapsed": false
327 | },
328 | "outputs": [
329 | {
330 | "name": "stdout",
331 | "output_type": "stream",
332 | "text": [
333 | "52.775 +/- 1.31364086165\n"
334 | ]
335 | }
336 | ],
337 | "source": [
338 | "lb, hb = mrstat.zconfint(hsb['write'])\n",
339 | "print hsb.write.mean(), '+/-', (hb-lb)/2."
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {},
345 | "source": [
346 | "### 2. Тесты на распределение"
347 | ]
348 | },
349 | {
350 | "cell_type": "markdown",
351 | "metadata": {},
352 | "source": [
353 | "#### 2.1 Критерий Шапиро-Уилка"
354 | ]
355 | },
356 | {
357 | "cell_type": "markdown",
358 | "metadata": {},
359 | "source": [
360 | "Проверяем гипотезу о том, что случайная величина распределена нормально."
361 | ]
362 | },
363 | {
364 | "cell_type": "code",
365 | "execution_count": 6,
366 | "metadata": {
367 | "collapsed": false
368 | },
369 | "outputs": [
370 | {
371 | "data": {
372 | "text/plain": [
373 | "(0.9470317363739014, 9.865516403806396e-07)"
374 | ]
375 | },
376 | "execution_count": 6,
377 | "metadata": {},
378 | "output_type": "execute_result"
379 | }
380 | ],
381 | "source": [
382 | "mrstat.shapiro(hsb['write'])"
383 | ]
384 | },
385 | {
386 | "cell_type": "markdown",
387 | "metadata": {},
388 | "source": [
389 | "P-value < 0.05 следовательно гипотеза о нормальности распределения оценок по write отвергается. Не лишним так же всегда взглянуть на гистограмму."
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": 7,
395 | "metadata": {
396 | "collapsed": false
397 | },
398 | "outputs": [
399 | {
400 | "data": {
401 | "text/plain": [
402 | ""
403 | ]
404 | },
405 | "execution_count": 7,
406 | "metadata": {},
407 | "output_type": "execute_result"
408 | },
409 | {
410 | "data": {
411 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEZVJREFUeJzt3X+I3HV+x/HX62KKIXskEe2wROleQSziYmyGq8VyzGo9\nclqqQpHKVWK1rH/cBf8IlPT+aBU5CMWcbaEcpGd64dq7JRwnSs4quTR7Ioje7jVmE6N4yMq5xOTS\n0+DY4JG7d/+Yb2Auzux8Z+Y7M9/5+HzAsPP9Md957Wcnr/3uN9/vjCNCAIDx95lRBwAAFINCB4BE\nUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgERQ6ACTismE+2ZVXXhlTU1Mtl3300Udav379MOP0\nhJzFG5es5CzWuOSURp91cXHxbERc1XHFiBjabevWrdHOkSNH2i4rE3IWb1yykrNY45IzYvRZJS1E\njo7lkAsAJIJCB4BEUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgERQ6ACRiqJf+AyiPqV0/HNlz\nL+++c2TPnTL20AEgERQ6ACSCQgeARFDoAJAICh0AEkGhA0AiOha67cttv2r7NdsnbD+WzX/U9ort\no9ntjsHHBQC0k+c89I8l3RoRddtrJb1k+7+yZU9GxBODiwcAyKtjoWcff1TPJtdmtxhkKABA93Id\nQ7e9xvZRSWckHYqIV7JFO2wfs73P9qaBpQQAdOTGDnjOle2Nkp6WtEPSLySdVWNv/XFJkxHxYIvH\nzEqalaRKpbJ1bm6u5bbr9bomJia6zT905CzeuGRNLefSyrkhpGltevOGsRlPafQ/+5mZmcWIqHZa\nr6tClyTbfy/p/5qPndueknQwIm5Y7bHVajUWFhZaLpufn1etVusqyyiQs3jjkjW1nKN+L5dxGU9p\n9D9727kKPc9ZLldle+ayvU7S7ZLesD3ZtNo9ko73GhYA0L88Z7lMStpve40avwAORMRB29+xvUWN\nQy7Lkh4eXEwAQCd5znI5JummFvPvH0giAEBPuFIUABJBoQNAIih0AEgEhQ4AiaDQASARFDoAJIJC\nB4BEUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgERQ6ACSCQgeARFDoAJAICh0AEkGhA0AiKHQA\nSETHQrd9ue1Xbb9m+4Ttx7L5V9g+ZPut7OumwccFALSTZw/9Y0m3RsSNkrZI2mb7Zkm7JB2OiGsl\nHc6mAQAj0rHQo6GeTa7NbiHpLkn7s/n7Jd09kIQAgFxyHUO3vcb2UUlnJB2KiFckVSLiVLbKe5Iq\nA8oIAMjBEZF/ZXujpKcl7ZD0UkRsbFr2fkR84ji67VlJs5JUqVS2zs3Ntdx2vV7XxMREd+lHgJzF\nG5esqeVcWjk3hDTtVdZJp88P9zmnN2/o6XGj/tnPzMwsRkS103qXdbPRiPjA9hFJ2ySdtj0ZEads\nT6qx997qMXsl7ZWkarUatVqt5bbn5+fVblmZkLN445I1tZwP7Prh4MOsYuf0Be1Z6qqC+rb85VpP\njxuXn32es1yuyvbMZXudpNslvSHpWUnbs9W2S3pmUCEBAJ3l+fU4KWm/7TVq/AI4EBEHbb8s6YDt\nhyS9I+neAeYEAHTQsdAj4pikm1rM/19Jtw0iFACge1wpCgCJoNABIBEUOgAkgkIHgERQ6ACQCAod\nABIx3Mu0gJKaynHV5M7pCwO5unJ5952FbxOfTuyhA0AiKHQASASFDgCJoNABIBEUOgAkgkIHgERQ\n6ACQCAodABJBoQNAIih0AEgEhQ4AiaDQASARHQvd9jW2j9h+3fYJ249k8x+1vWL7aHa7Y/BxAQDt\n5Hm3xQuSdkbET21/VtKi7UPZsicj4onBxQMA5NWx0CPilKRT2f0PbZ+UtHnQwQAA3enqGLrtKUk3\nSXolm7XD9jHb+2xvKjgbAKALjoh8K9oTkn4s6esR8QPbFUlnJYWkxyVNRsSDLR43K2lWkiqVyta5\nubmW26/X65qYmOjpmxgmchavDFmXVs51XKeyTjp9vvjnnt68odDt5R3PPN/zIA1qPFfT61iP+jU6\nMzOzGBHVTuvlKnTbayUdlPRCRHyjxfIpSQcj4obVtlOtVmNhYaHlsvn5edVqtY5ZRo2cxStD1ryf\nWLRnqfgP+Sr6E4vyjmee73mQBjWeq+l1rEf9GrWdq9DznOViSU9JOtlc5rYnm1a7R9LxXoICAIqR\n59fjLZLul7Rk+2g272uS7rO9RY1DLsuSHh5IQgBALnnOcnlJklsseq74OACAXnGlKAAkgkIHgERQ\n6ACQCAodABJBoQNAIih0AEjEcC/TAoAR6vXq2J3TF/RAn1fWFn1FcCvsoQNAIih0AEgEhQ4AiaDQ\nASARFDoAJIJCB4BEUOgAkAgKHQASQaEDQCIodABIBJf+AyNW9Ic1F3GZOsYTe+gAkIiOhW77GttH\nbL9u+4TtR7L5V9g+ZPut7OumwccFALSTZw/9gqSdEXG9pJslfcX29ZJ2STocEddKOpxNAwBGpGOh\nR8SpiPhpdv9DSSclbZZ0l6T92Wr7Jd09qJAAgM66OoZue0rSTZJekVSJiFPZovckVQpNBgDoiiMi\n34r2hKQfS/p6RPzA9gcRsbFp+fsR8Ynj6LZnJc1KUqVS2To3N9dy+/V6XRMTEz18C8NFzuKVIevS\nyrmO61TWSafPDyFMn8hZvCKyTm/e0PNjZ2ZmFiOi2mm9XIVue62kg5JeiIhvZPPelFSLiFO2JyXN\nR8R1q22nWq3GwsJCy2Xz8/Oq1Wods4waOYtXhqx5Th3cOX1Be5bKf6YvOYtXRNZ+PrHIdq5Cz3OW\niyU9JenkxTLPPCtpe3Z/u6RnegkKAChGnl85t0i6X9KS7aPZvK9J2i3pgO2HJL0j6d7BRAQA5NGx\n0CPiJUlus/i2YuMAAHrFlaIAkAgKHQASQaEDQCIodABIBIUOAImg0AEgEeNxmdanVD8ffNDPhxz0\nc0UbgNFhDx0AEkGhA0AiKHQASASFDgCJoNABIBEUOgAkgkIHgERQ6ACQCAodABJBoQNAIrj0H5/Q\nz1sO9KL5bQp42wGgd+yhA0AiOha67X22z9g+3jTvUdsrto9mtzsGGxMA0EmePfRvS9rWYv6TEbEl\nuz1XbCwAQLc6FnpEvCjpl0PIAgDoQz/H0HfYPpYdktlUWCIAQE8cEZ1XsqckHYyIG7LpiqSzkkLS\n45ImI+LBNo+dlTQrSZVKZevc3FzL56jX65qYmOj+OxiyYeZcWjnX82Mr66TT5wsMM0DNWac3bxhJ\nhjxjPS5jSs7iFZG1n9f2zMzMYkRUO63XU6HnXXaparUaCwsLLZfNz8+rVqt1zDJqw8zZ7ycW7Vka\nj7NSm7OO6rTFPGM9LmNKzuIVkbWf17btXIXe0yEX25NNk/dIOt5uXQDAcHT8lWP7e5Jqkq60/a6k\nf5BUs71FjUMuy5IeHmBGAEAOHQs9Iu5rMfupAWQBAPSBK0UBIBEUOgAkgkIHgERQ6ACQCAodABJB\noQNAIih0AEgEhQ4AiaDQASARFDoAJIJCB4BEUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgEePx\nCa341Ojng7GBTzv20AEgER0L3fY+22dsH2+ad4XtQ7bfyr5uGmxMAEAnefbQvy1p2yXzdkk6HBHX\nSjqcTQMARqhjoUfEi5J+ecnsuyTtz+7vl3R3wbkAAF3q9Rh6JSJOZfffk1QpKA8AoEeOiM4r2VOS\nDkbEDdn0BxGxsWn5+xHR8ji67VlJs5JUqVS2zs3NtXyOer2uiYmJbvMP3TBzLq2c6/mxlXXS6fMF\nhhmgcclKzmKNS06pmKzTmzf0/NiZmZnFiKh2Wq/X0xZP256MiFO2JyWdabdiROyVtFeSqtVq1Gq1\nluvNz8+r3bIyGWbOB/o4hW/n9AXtWRqPs1LHJSs5izUuOaVisi5/uVZMmFX0esjlWUnbs/vbJT1T\nTBwAQK/ynLb4PUkvS7rO9ru2H5K0W9Lttt+S9KfZNABghDr+DRER97VZdFvBWQAAfeBKUQBIBIUO\nAImg0AEgERQ6ACSCQgeARFDoAJCI8bhMS+X54IOd0xf6uoITAAaFPXQASASFDgCJoNABIBEUOgAk\ngkIHgERQ6ACQCAodABJBoQNAIih0AEgEhQ4AiaDQASARFDoAJKKvN+eyvSzpQ0m/lnQhIqpFhAIA\ndK+Id1uciYizBWwHANAHDrkAQCL6LfSQ9CPbi7ZniwgEAOiNI6L3B9ubI2LF9u9KOiRpR0S8eMk6\ns5JmJalSqWydm5trua16va6JiYm2z7W0cq7nnEWqrJNOnx91is7GJac0PlnJWaxxySkVk3V684ae\nHzszM7OY5/8o+yr039qQ/aikekQ80W6darUaCwsLLZfNz8+rVqu13X6ZPrFoz1L5P+hpXHJK45OV\nnMUal5xSMVmXd9/Z82Nt5yr0ng+52F5v+7MX70v6oqTjvW4PANCffn7lVCQ9bfvidr4bEc8XkgoA\n0LWeCz0i3pZ0Y4FZAAB94LRFAEgEhQ4AiaDQASARFDoAJIJCB4BEUOgAkAgKHQASQaEDQCIodABI\nBIUOAImg0AEgERQ6ACSCQgeARFDoAJAICh0AEkGhA0AiKHQASASFDgCJoNABIBF9FbrtbbbftP0z\n27uKCgUA6F7PhW57jaR/lfQlSddLus/29UUFAwB0p5899M9L+llEvB0Rv5I0J+muYmIBALrVT6Fv\nlvTzpul3s3kAgBFwRPT2QPsvJG2LiL/Jpu+X9EcR8dVL1puVNJtNXifpzTabvFLS2Z7CDBc5izcu\nWclZrHHJKY0+6+9FxFWdVrqsjydYkXRN0/TV2bzfEhF7Je3ttDHbCxFR7SPPUJCzeOOSlZzFGpec\n0vhk7eeQy08kXWv7c7Z/R9JfSnq2mFgAgG71vIceERdsf1XSC5LWSNoXEScKSwYA6Eo/h1wUEc9J\neq6gLB0Py5QEOYs3LlnJWaxxySmNSdae/1MUAFAuXPoPAIkYeqHbvtz2q7Zfs33C9mPZ/CtsH7L9\nVvZ107Cz5cz5qO0V20ez2x2jzHmR7TW2/8f2wWy6VOPZrEXW0o2p7WXbS1mehWxeKce0TdYyjulG\n29+3/Ybtk7b/uIxj2iZn6cazlVHsoX8s6daIuFHSFknbbN8saZekwxFxraTD2fQotcspSU9GxJbs\nVtT/IfTrEUknm6bLNp7NLs0qlXNMZ7I8F09XK/OYXppVKt+Y/rOk5yPiDyTdqMZroIxj2iqnVL7x\n/IShF3o01LPJtdkt1HjbgP3Z/P2S7h52tmar5Cwd21dLulPSt5pml2o8L2qTdVyUckzHge0Nkr4g\n6SlJiohfRcQHKtmYrpJzLIzkGHr2J/dRSWckHYqIVyRVIuJUtsp7kiqjyNasTU5J2mH7mO19ZfgT\nUdI/SfpbSb9pmle68cy0yiqVb0xD0o9sL2ZXO0vlHdNWWaVyjennJP1C0r9nh9u+ZXu9yjem7XJK\n5RrPlkZS6BHx64jYosbVpZ+3fcMly0Ml2Btuk/Obkn5fjcMwpyTtGWFE2f4zSWciYrHdOmUZz1Wy\nlmpMM3+S/ey/JOkrtr/QvLAsY5pplbVsY3qZpD+U9M2IuEnSR7rk8EpJxrRdzrKNZ0sjPcsl+1Pm\niKRtkk7bnpSk7OuZUWZr1pwzIk5nRf8bSf+mxrtOjtItkv7c9rIa73h5q+3/UDnHs2XWEo6pImIl\n+3pG0tNqZCrjmLbMWsIxfVfSu01/5X5fjeIs25i2zFnC8WxpFGe5XGV7Y3Z/naTbJb2hxtsGbM9W\n2y7pmWFna9Yu58UXX+YeScdHke+iiPi7iLg6IqbUePuF/46Iv1LJxlNqn7VsY2p7ve3PXrwv6YtZ\nptKNabusZRvTiHhP0s9tX5fNuk3S6yrZmLbLWbbxbKevK0V7NClpvxsfkPEZSQci4qDtlyUdsP2Q\npHck3TuCbM3a5fyO7S1q/Gm4LOnhEWZczW6VazxX848lG9OKpKdtS41/I9+NiOdt/0TlG9N2Wcv4\nOt0h6T/deO+ntyX9tbJ/WyUb01Y5/6WE4/kJXCkKAIngSlEASASFDgCJoNABIBEUOgAkgkIHgERQ\n6ACQCAodABJBoQNAIv4fx3wV4k2PReUAAAAASUVORK5CYII=\n",
412 | "text/plain": [
413 | ""
414 | ]
415 | },
416 | "metadata": {},
417 | "output_type": "display_data"
418 | }
419 | ],
420 | "source": [
421 | "hsb['write'].hist()"
422 | ]
423 | },
424 | {
425 | "cell_type": "markdown",
426 | "metadata": {},
427 | "source": [
428 | "Распределения действительно не симметричное и не очень похоже на нормальное. Попробуем найти нормальное распределение среди оценок."
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": 8,
434 | "metadata": {
435 | "collapsed": false
436 | },
437 | "outputs": [
438 | {
439 | "name": "stdout",
440 | "output_type": "stream",
441 | "text": [
442 | "read (0.9797889590263367, 0.005552584305405617)\n",
443 | "write (0.9470317363739014, 9.865516403806396e-07)\n",
444 | "math (0.976807177066803, 0.002145080827176571)\n",
445 | "science (0.9852479696273804, 0.03476548567414284)\n",
446 | "socst (0.9606784582138062, 2.343731830478646e-05)\n"
447 | ]
448 | }
449 | ],
450 | "source": [
451 | "for col in hsb.columns[-5:]:\n",
452 | " print col, mrstat.shapiro(hsb[col])"
453 | ]
454 | },
455 | {
456 | "cell_type": "markdown",
457 | "metadata": {},
458 | "source": [
459 | "Самый большой p-value получился у science. Посмотрим на гистограмму."
460 | ]
461 | },
462 | {
463 | "cell_type": "code",
464 | "execution_count": 9,
465 | "metadata": {
466 | "collapsed": false
467 | },
468 | "outputs": [
469 | {
470 | "data": {
471 | "text/plain": [
472 | ""
473 | ]
474 | },
475 | "execution_count": 9,
476 | "metadata": {},
477 | "output_type": "execute_result"
478 | },
479 | {
480 | "data": {
481 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEiJJREFUeJzt3XFsnHd9x/H3l7ZTs5ol6dqdrJTNoFWdqnpNFwuYmKZz\nS1FpK1qkCVGVKh1MRhpUnRTEAn9sZQgpfxBgqtCkCDqiUbA6RpUq7UAh1CAmVGaXULe0VRFLR6M0\npqTNcImYAt/94SeZmzrc2b67J/4975dk3fP87jk/32/O97knv7vnLjITSdLa95q6C5Ak9YaBLkmF\nMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSrEuYPc2UUXXZQjIyOD3OWyvfzyy1xwwQV1\nl1ELe29m79Ds/tdC7zMzMy9k5sWdthtooI+MjDA9PT3IXS7b1NQU7Xa77jJqYe/tusuoTZP7Xwu9\nR8Sz3WznlIskFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBVioGeKSmer2UPH\nuH37g3WXMVAHd9xQdwnqsa6P0CPinIj4fkTsrdYvjIh9EfFMdbmxf2VKkjpZzpTLncCTi9a3A/sz\n81Jgf7UuSapJV4EeEZcANwCfWzR8E7C7Wt4N3Nzb0iRJy9HtEfpngA8Dv1401srMw9Xy80Crl4VJ\nkpYnMvM3bxBxI3B9Zv51RLSBD2XmjRHxUmZuWLTdi5n5qnn0iJgAJgBardaWycnJnjbQa/Pz8wwN\nDdVdRi2a3Pvc0WMcOV53FYM1umn9qeUm3/droffx8fGZzBzrtF0373J5C/COiLgeOB/4nYj4InAk\nIoYz83BEDANzS904M3cBuwDGxsbybP/c4bXw2cj90uTe7753Dztnm/Wmr4O3tk8tN/m+L6n3jlMu\nmfmRzLwkM0eAdwPfzMz3AA8AW6vNtgJ7+lalJKmj1ZxYtAO4NiKeAd5arUuSarKs/2Nm5hQwVS3/\nDLim9yVJklbCU/8lqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJA\nl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYXoGOgRcX5EfC8ifhART0TEx6rxuyLiUEQcqH6u73+5\nkqQz6eYbi34JXJ2Z8xFxHvCdiPj36rpPZ+Yn+1eeJKlbHQM9MxOYr1bPq36yn0VJkpavqzn0iDgn\nIg4Ac8C+zHykuuqOiHgsIu6JiI19q1KS1FEsHIB3uXHEBuB+4A7gp8ALLBytfxwYzsz3LnGbCWAC\noNVqbZmcnOxB2f0zPz/P0NBQ3WXUosm9zx09xpHjdVcxWKOb1p9abvJ9vxZ6Hx8fn8nMsU7bLSvQ\nASLi74BfLJ47j4gRYG9mXvGbbjs2NpbT09PL2t+gTU1N0W636y6jFk3u/e5797BztpuXlMpxcMcN\np5abfN+vhd4joqtA7+ZdLhdXR+ZExDrgWuCpiBhetNk7gcdXWqwkafW6OSQZBnZHxDksPAHcl5l7\nI+JfImIzC1MuB4H3969MSVIn3bzL5THgqiXGb+tLRZKkFfFMUUkqhIEuSYUw0CWpEAa6JBXCQJek\nQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSpE\nN98pen5EfC8ifhART0TEx6rxCyNiX0Q8U11u7H+5kqQz6eYI/ZfA1Zl5JbAZuC4i3gxsB/Zn5qXA\n/mpdklSTjoGeC+ar1fOqnwRuAnZX47uBm/tSoSSpK5GZnTeKOAeYAf4Q+Gxm/m1EvJSZG6rrA3jx\n5Pppt50AJgBardaWycnJXtbfc/Pz8wwNDdVdRi2a3Pvc0WMcOV53FYM1umn9qeUm3/droffx8fGZ\nzBzrtN253fyyzPwVsDkiNgD3R8QVp12fEbHkM0Nm7gJ2AYyNjWW73e5ml7WZmpribK+xX5rc+933\n7mHnbFcPh2IcvLV9arnJ931JvS/rXS6Z+RLwMHAdcCQihgGqy7nelydJ6lY373K5uDoyJyLWAdcC\nTwEPAFurzbYCe/pVpCSps27+jzkM7K7m0V8D3JeZeyPiu8B9EfE+4FngXX2sU5LUQcdAz8zHgKuW\nGP8ZcE0/ipIkLZ9nikpSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiGa9fFy\nkk4Z2f7gqeVtoye4fdF6vx3cccPA9tUkHqFLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQnTzFXSv\ni4iHI+KHEfFERNxZjd8VEYci4kD1c33/y5UknUk370M/AWzLzEcj4rXATETsq677dGZ+sn/lSZK6\n1c1X0B0GDlfLP4+IJ4FN/S5MkrQ8y5pDj4gRFr5f9JFq6I6IeCwi7omIjT2uTZK0DJGZ3W0YMQR8\nC/hEZn41IlrAC0ACHweGM/O9S9xuApgAaLVaWyYnJ3tVe1/Mz88zNDRUdxm1aHLvc0ePceR43VXU\np7WOgfY/umn94HbWwVr4ux8fH5/JzLFO23UV6BFxHrAX+HpmfmqJ60eAvZl5xW/6PWNjYzk9Pd1x\nf3Wampqi3W7XXUYtmtz73ffuYedscz/aaNvoiYH2fzZ9lsta+LuPiK4CvZt3uQTweeDJxWEeEcOL\nNnsn8PhKCpUk9UY3T8lvAW4DZiPiQDX2UeCWiNjMwpTLQeD9falQktSVbt7l8h0glrjqod6XI0la\nqeZOGuqMRgb4udinO5vmVqW1xlP/JakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw\n0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKkQ3X0H3uoh4OCJ+GBFPRMSd1fiFEbEvIp6p\nLjf2v1xJ0pl0c4R+AtiWmZcDbwY+EBGXA9uB/Zl5KbC/Wpck1aRjoGfm4cx8tFr+OfAksAm4Cdhd\nbbYbuLlfRUqSOlvWHHpEjABXAY8Arcw8XF31PNDqaWWSpGWJzOxuw4gh4FvAJzLzqxHxUmZuWHT9\ni5n5qnn0iJgAJgBardaWycnJ3lTeJ/Pz8wwNDdVdRi1O9j576FhtNYxuWl/LfueOHuPI8Vp2fVZo\nrWOg/dd1Py9lLTzmx8fHZzJzrNN2XX1JdEScB/wbcG9mfrUaPhIRw5l5OCKGgbmlbpuZu4BdAGNj\nY9lut7vZZW2mpqY422vsl5O9317nl0Tf2q5lv3ffu4eds839zvRtoycG2n9d9/NSSnrMd/MulwA+\nDzyZmZ9adNUDwNZqeSuwp/flSZK61c1T8luA24DZiDhQjX0U2AHcFxHvA54F3tWfEiVJ3egY6Jn5\nHSDOcPU1vS1HkrRSzZ00XANGBjyXvW30RK3z55JWx1P/JakQBrokFcJAl6RCGOiSVAgDXZIKYaBL\nUiEMdEkqhIEuSYUw0CWpEJ4pKmngBn0W9EkHd9xQy34HxSN0SSqEgS5JhTDQJakQBrokFcIXRXVW\nqevFsm2jtexW6qluvoLunoiYi4jHF43dFRGHIuJA9XN9f8uUJHXSzZTLF4Drlhj/dGZurn4e6m1Z\nkqTl6hjomflt4OgAapEkrcJqXhS9IyIeq6ZkNvasIknSikRmdt4oYgTYm5lXVOst4AUggY8Dw5n5\n3jPcdgKYAGi1WlsmJyd7Uni/zM/PMzQ0VHcZAMweOjbQ/bXWwZHjA93lWaPJvUNz+h/dtP5VY2fT\nY/5MxsfHZzJzrNN2Kwr0bq873djYWE5PT3fcX52mpqZot9t1lwHU8yXRO2eb+canJvcOzel/qVP/\nz6bH/JlERFeBvqIpl4gYXrT6TuDxM20rSRqMjk/JEfFloA1cFBHPAX8PtCNiMwtTLgeB9/exRklS\nFzoGembessTw5/tQiyRpFTz1X5IKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12S\nCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgrRMdAj4p6ImIuIxxeNXRgR+yLi\nmepyY3/LlCR10s0R+heA604b2w7sz8xLgf3VuiSpRh0DPTO/DRw9bfgmYHe1vBu4ucd1SZKWKTKz\n80YRI8DezLyiWn8pMzdUywG8eHJ9idtOABMArVZry+TkZG8q75P5+XmGhoZeMTZ76FhN1QxWax0c\nOV53FfVocu/QnP5HN61/1dhSj/mzzfj4+ExmjnXa7tzV7igzMyLO+KyQmbuAXQBjY2PZbrdXu8u+\nmpqa4vQab9/+YD3FDNi20RPsnF31n8Sa1OTeoTn9H7y1/aqxpR7za9VK3+VyJCKGAarLud6VJEla\niZUG+gPA1mp5K7CnN+VIklaqm7ctfhn4LnBZRDwXEe8DdgDXRsQzwFurdUlSjTpOmmXmLWe46poe\n1yJJWgXPFJWkQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgph\noEtSIQx0SSqEgS5JhTDQJakQBrokFWJV3wobEQeBnwO/Ak50863UkqT+6MXXfI9n5gs9+D2SpFVw\nykWSCrHaQE/gGxExExETvShIkrQykZkrv3HEpsw8FBG/B+wD7sjMb5+2zQQwAdBqtbZMTk6upt6+\nm5+fZ2ho6BVjs4eO1VTNYLXWwZHjdVdRjyb3Ds3pf3TT+leNLfWYP9uMj4/PdPMa5aoC/RW/KOIu\nYD4zP3mmbcbGxnJ6eron++uXqakp2u32K8ZGtj9YTzEDtm30BDtne/GyytrT5N6hOf0f3HHDq8aW\nesyfbSKiq0Bf8ZRLRFwQEa89uQy8DXh8pb9PkrQ6q3lKbgH3R8TJ3/OlzPxaT6qSJC3bigM9M38M\nXNnDWiRJq+DbFiWpEAa6JBXCQJekQhjoklQIA12SClH+mQSSVFnqJMFtoye4fQAnDy51UlOveYQu\nSYUw0CWpEAa6JBXCQJekQqyZF0UH9YmHg3qBRJJ6zSN0SSqEgS5JhTDQJakQBrokFcJAl6RCrCrQ\nI+K6iHg6In4UEdt7VZQkaflW852i5wCfBd4OXA7cEhGX96owSdLyrOYI/Y3AjzLzx5n5v8AkcFNv\nypIkLddqAn0T8JNF689VY5KkGkRmruyGEX8BXJeZf1Wt3wa8KTM/eNp2E8BEtXoZ8PTKyx2Ii4AX\n6i6iJvbeXE3ufy30/geZeXGnjVZz6v8h4HWL1i+pxl4hM3cBu1axn4GKiOnMHKu7jjrYezN7h2b3\nX1Lvq5ly+U/g0oh4fUT8FvBu4IHelCVJWq4VH6Fn5omI+CDwdeAc4J7MfKJnlUmSlmVVn7aYmQ8B\nD/WolrPFmpke6gN7b64m919M7yt+UVSSdHbx1H9JKkRjAz0izo+I70XEDyLiiYj4WDV+YUTsi4hn\nqsuNddfaLxFxTkR8PyL2VutN6v1gRMxGxIGImK7GGtF/RGyIiK9ExFMR8WRE/GmDer+sus9P/vxP\nRPxNKf03NtCBXwJXZ+aVwGbguoh4M7Ad2J+ZlwL7q/VS3Qk8uWi9Sb0DjGfm5kVvWWtK//8IfC0z\n/wi4koW/gUb0nplPV/f5ZmAL8AvgfkrpPzMb/wP8NvAo8CYWTnwarsaHgafrrq9PPV/Cwh/u1cDe\naqwRvVf9HQQuOm2s+P6B9cB/Ub1+1qTel/i3eBvwHyX13+Qj9JNTDgeAOWBfZj4CtDLzcLXJ80Cr\ntgL76zPAh4FfLxprSu8ACXwjImaqs5mhGf2/Hvgp8M/VdNvnIuICmtH76d4NfLlaLqL/Rgd6Zv4q\nF/7rdQnwxoi44rTrk4UHflEi4kZgLjNnzrRNqb0v8mfVff924AMR8eeLryy4/3OBPwH+KTOvAl7m\ntOmFgns/pToZ8h3Av55+3Vruv9GBflJmvgQ8DFwHHImIYYDqcq7O2vrkLcA7IuIgC5+SeXVEfJFm\n9A5AZh6qLudYmEN9I83o/znguep/owBfYSHgm9D7Ym8HHs3MI9V6Ef03NtAj4uKI2FAtrwOuBZ5i\n4eMLtlabbQX21FNh/2TmRzLzkswcYeG/nd/MzPfQgN4BIuKCiHjtyWUW5lIfpwH9Z+bzwE8i4rJq\n6BrghzSg99Pcwv9Pt0Ah/Tf2xKKI+GNgNwsfW/Aa4L7M/IeI+F3gPuD3gWeBd2Xm0foq7a+IaAMf\nyswbm9J7RLyBhaNyWJiC+FJmfqJB/W8GPgf8FvBj4C+pHgMU3jucehL/b+ANmXmsGivivm9soEtS\naRo75SJJpTHQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqxP8BiBp5J9CujDUAAAAASUVO\nRK5CYII=\n",
482 | "text/plain": [
483 | ""
484 | ]
485 | },
486 | "metadata": {},
487 | "output_type": "display_data"
488 | }
489 | ],
490 | "source": [
491 | "hsb.science.hist()"
492 | ]
493 | },
494 | {
495 | "cell_type": "markdown",
496 | "metadata": {},
497 | "source": [
498 | "Распределение действительно более симметричное и напоминает нормальное."
499 | ]
500 | },
501 | {
502 | "cell_type": "markdown",
503 | "metadata": {},
504 | "source": [
505 | "#### 2.2 КуКу график"
506 | ]
507 | },
508 | {
509 | "cell_type": "markdown",
510 | "metadata": {},
511 | "source": [
512 | "Это визуальный спобоб проверить распределение на нормальность. Посмотрим на write."
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": 10,
518 | "metadata": {
519 | "collapsed": false
520 | },
521 | "outputs": [
522 | {
523 | "data": {
524 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xmc1XP7x/HX1YK7ckeLZGnKdt8GlRpL9nCTfblxR4gi\nsuV2W+/87LnthEjkLnfDXVFkS0SSkPaVO1IIbbKGarp+f3y+o9M0c86ZmXPmzJzzfj4e53HO93u+\ny3VGznU+u7k7IiKSu2plOgAREcksJQIRkRynRCAikuOUCEREcpwSgYhIjlMiEBHJcUoEkrXM7CYz\nG1LBc88xswlx3n/VzLqWdqyZ/WRmO1TkvuWMcZyZnZfu+0j2UyKQasXMFprZL9GX6RIzG2RmDTId\nV0nufpS7Dy7jvQbuvgAgiv+2it4nFX8PM2tpZm5mdSoah2Q3JQKpjo5z9wZAO6AAuL7kARbkyr/f\nhH8PkcrIlf+RpAZy98XAq8Du8HtVSB8zexdYBexgZtuY2Sgz+9bMPjGz80tcZjMzG2pmP5rZVDNr\nU/yGmV1rZp9G7801s5NKnGtm9rCZfW9mH5nZYTFvlFktE/363snMegBdgKujX/QvmtlVZvZcieMf\nNLO+5f17lLhGLTO73swWmdlSM3vKzBpGb4+Pnr+L4uiQ6F6SW5QIpNoys+2Bo4FpMbvPAnoAmwOL\ngP8CXwLbAKcAt5vZoTHHnwAMBxoBTwPPm1nd6L1PgQOBhsDNwBAzax5z7j7RMU2AG4ERZtYo2fjd\nfQBQCNwVVRcdBwwBOpnZFtFnrAN0Bp5KdL0y/h7FzokeHYEdgAbAw9F7B0XPW0RxvJfsZ5DcoEQg\n1dHzZvYdMAF4G7g95r1B7j7H3dcCWwP7A9e4+6/uPh14Ajg75vgp7v6su68B7gM2A/YFcPfh7v6V\nu69z96HAfGDvmHOXAg+4+5ro/Y+BYyrzwdz9a8Iv9FOjXZ2A5e4+Jc5p8f4exboA97n7Anf/CbgO\n6Kx2AUmG/pFIdXSiu79RxntfxLzeBvjW3X+M2beIUI++0fHuvs7MiksPmNnZwBVAy+iQBoRf/8UW\n+4azMi4qPreSBgM9gceBM4H/JDg+3t+j2DaE+IotIvz/3ayiQUruUIlAaprYL+avgEZmtnnMvhbA\n4pjt7YtfRI3L2wFfmVke4Yv4EqCxu28BzAYs5txtzSx2u0V0z4rGW+x5oLWZ7Q4cS6g+qqyvgLyY\n7RbAWmBJGTGI/E6JQGosd/8CmAj8y8w2M7PWQHdCPXyx9mZ2clRFcjnwG/A+UJ/wBbkMwMzOZeNG\n2K2Ay8ysrpmdCuwKvFLOMJcQ6uxj4/4VeJbQZjHJ3T8v5zVL8wzwdzNrFXUvvR0YGlWhLQPWlYxD\npJgSgdR0pxOqdr4CRgI3lqhGeQH4G7CS0NB8clTnPxe4F3iP8GW9B/BuiWt/AOwMLAf6AKe4+4py\nxjcQyDez78zs+Zj9g6N7JqoWStaT0bXGA58BvwKXArj7KkL870Zx7Juie0qWMC1MI1L1zKwF8BGw\ntbv/kOl4JLepRCBSxaK2iiuA/yoJSHWgXkMiVcjM6hOqohYRuo6KZJyqhkREclxaq4bM7O9mNsfM\nZpvZM1HPjkZm9rqZzY+et0xnDCIiEl/aSgRmti1hJGS+u/9iZsMIXe/yCYOA7jCza4Et3f2aeNdq\n0qSJt2zZMi1xiohkqylTpix396aJjkt3G0Ed4A9mtgaoR+jidx1wSPT+YGAcEDcRtGzZksmTJ6cv\nShGRLGRmixIflcaqoWimxHuAz4Gvge/dfQzQLJpvBeAbyhgCb2Y9zGyymU1etmxZusIUEcl5aUsE\nUd3/CUArwjwo9c3szNhjonlcSq2bcvcB7l7g7gVNmyYs2YiISAWls7H4cOAzd18Wzfw4AtgPWFI8\n1W/0vDSNMYiISALpTASfA/uaWb1o4q7DgHnAKKBrdExXwhQAIiKSIWlrLHb3D8zsWWAqYRbEacAA\nwlS/w8ysO2FQzWnpikFERBJLa68hd7+RsLJTrN8IpQMREakGNNeQiEiOUyIQEamOVqyAyy+H779P\n+62UCEREqhN3GD4c8vOhXz8YPz7tt1QiEBGpLr7+Gk4+GU47DbbfHqZMgeOOS/ttlQhERDLNHZ58\nEnbdFUaPhrvugvffh9atq+T2Wo9ARCSTFiyAHj1g7Fg46CB44gnYeecqDUElAhGRTCgqggcegD32\ngEmT4NFH4a23qjwJgEoEIiJVb+5c6N49VP8cfTT07x/aBDJEJQIRkaqyejXceivsuSfMnw9DhsBL\nL2U0CYBKBCIiVWPy5FAKmDkTOneGvn1hq60yHRWgEoGISHqtWgVXXw377APLl8MLL8Azz1SbJAAq\nEYiIpM/bb8N558Enn8D558Pdd0PDhpmOaiMqEYiIpNoPP0DPnnDIIbBuXegaOmBAtUwCoEQgIpJa\nL78Mu+0WvvivuAJmzYJDD810VHEpEYiIpMLy5XDmmXDsseGX/8SJcO+9UK9epiNLSIlARKQy3OG/\n/w3TQwwbBjfdBFOnhsbhGkKNxSIiFbV4cWgLePFF2HtvGDgQdt8901GVm0oEIiLl5Q6PPx6min7j\njVAFNHFijUwCoBKBiEj5fPpp6Ar61lvQsWNICDvumOmoKkUlAhGRZBQVwX33hUnipkwJvYLGjq3x\nSQBUIhARSWz27DA9xKRJYaGYRx+FbbfNdFQpoxKBiEhZVq8OvYDatYPPPgu9g154IauSAKhEICJS\nukmToFs3mDMHunQJawc0aZLpqNJCJQIRkVirVsE//gEdOsD334dpoocMydokACoRiIis99ZbYZK4\nBQvgwgvhzjvhj3/MdFRppxKBiMj334d1gw89FGrVgnHjQoNwDiQBUCIQkVw3alQYGDZwIFx1FcyY\nAQcfnOmoqpQSgYjkpqVLw0phJ5wAjRvDBx/AXXfViEniUk2JQERyizsUFoZSwMiRYQ3hyZOhoCDT\nkWWMGotFJHd88UWYJO7ll2HffUN1UH5+pqPKuLSVCMzsT2Y2Pebxg5ldbmaNzOx1M5sfPW+ZrhhE\nRICwSlj//mHBmLfeCmMCJkxQEoikLRG4+8fu3tbd2wLtgVXASOBaYKy77wyMjbZFRNJj/vzQG6hn\nz7BGwOzZ0KsX1K6d6ciqjapqIzgM+NTdFwEnAIOj/YOBE6soBhHJJWvXhsXiW7eG6dNDNdCYMdCq\nVaYjq3aqqo2gM/BM9LqZu38dvf4GaFZFMYhIrpgxI0wSN2UKnHgi9OsH22yT6aiqrbSXCMxsE+B4\nYHjJ99zdAS/jvB5mNtnMJi9btizNUYpIVvjtN/i//ws9gL74IiwdOWKEkkACVVE1dBQw1d2XRNtL\nzKw5QPS8tLST3H2Auxe4e0HTpk2rIEwRqdHeew/23BNuuw3OOAPmzoVTTwWzTEdW7VVFIjid9dVC\nAKOArtHrrsALVRCDiGSrn3+Gyy+H/feHn36CV16BwYPDIDFJSloTgZnVB/4CjIjZfQfwFzObDxwe\nbYuIlN8bb4R1gvv2hYsuClNGH3VUpqOqcdLaWOzuPwONS+xbQehFJCJSMStXwpVXwpNPwi67wPjx\ncOCBmY6qxtIUEyJSs4wcGQaCDR4M114beggpCVSKppgQkZphyRK49FIYPhzatg3TRLRrl+mosoJK\nBCJSvbnDU0/BrruG9YL79AnLSCoJpIwSgYhUX59/DkcfDV27hkQwYwb8859Qt27c0woLoWXLsMZM\ny5ahHTnedmFh6ecV70/0Xrx7l3VcIqm6TlLcvdo/2rdv7yKSQ4qK3B9+2L1BA/f69d0feijsS8KQ\nIe716rmHokRyj3r13Hv23Pi8evXC9Uq7ZvF7ie5d2nEV+QwVuQ4w2ZP4jrVwbPVWUFDgkydPznQY\nIlIVPv44rBs8YQIccQQ89lj4SZykli1h0aLy37Z2bSgq2nh/Xl54Lu2aeXmwcGHie5c8LpFUXcfM\nprh7woUWVDUkItXDmjVwxx3Qpk0YDzBoEIweXa4kAKE2qSJKSwLF1yvrmiX3J3tcIqm6TrKUCEQk\n86ZNC1NEX3cdHHtsmB6ia9cKTQ/RokXFQihrVuoWLcq+Zsn9yR6XSKqukywlAhHJnF9/hd69Ya+9\n4Kuv4Nlnw2PrrSt8yT59yr/scL160KPHxufVqxeuV9o1i99LdO/SjkskVddJWjINCZl+qLFYJAtN\nmOD+pz+FltBzznFfsSJllx4yxD0vz90sPPfsGX+7uBG25HmxjbPx3ot37/I28KbyOqixWESqpR9/\nDF1A+/ULdR0DBoRGYUk5NRaLSJUqLIQmTUK1fslH7drhuZO9xqI/7s66h/vxoF9Kg0WzqXP0EZhB\nnTps8NykyfrrldxXq1bZr9Pe5z4LaYoJEam0wkI499zQ8ac0Ddd9y31cwTkMZh5/5kDeYSL7hzej\n3jpFJZ5XrFh/fmn7ynq9aFGo7wfo0qVinyfXqEQgIpXWu3fZSeBknmMu+ZzJEG6jN3sybX0SSJNV\nq0JMkhyVCESk0krr3741X/Mwl/BXRjCVPenEaGbQNqMxSelUIhCRStuwf7vTlUHMJZ9jeJlruIO9\nmVSlSWDjmCQeJQIRqbQ+fcI8cHks5DWOZBDnMos9aM1M7uIaiqq48iGtfe6zkBKBiFRal85FTOz8\nILPZnQ68x0X04xDGMZ9dgNCbBzYcKFy8r3hEb8nnxo3XLztccp9Z2a/z8kKPVDUUJ09tBCJSOfPm\nwXnnUTBxInTqBI89xiMtWvBIpuOSpKlEICLlUjxeoK6tobf14bf8tqyY+BFn8RRNJr1C4TuqnK9p\nVCIQkaQVjxfYfc1UnqQbbZnBUE7jMh5kKc3gW+jWLRyrqpmaQyUCEUnaLdf9wi1rrmUSe7MVSzmR\nkXRmaEgCkdWr1Ye/plGJQESSM348L35xHrswnyfozpXcw/dsUeqh6sNfs6hEIJLl4s0BlMzjj/YD\n/exiOPhg6rCWw3iD83mizCQA6sNf06hEIJLFEs0BlEgnXuUxLmA7vuR+Lud6bmMV9eOes8km6sNf\n05SrRGBmW5pZ63QFIyKpFW8OoHgasYLBnM2rHM2PbM5+TOQK7k+YBBo3hiefVENxTZOwRGBm44Dj\no2OnAEvN7F13vyLNsYlIJZW/rt45leE8zCVsyUpu4f/oQ29Ws2mZZ5jBunWVClMyLJkSQUN3/wE4\nGXjK3fcBDk9vWCJSGYWF0KABlGfdqeZ8xUhOYhh/YxF5tGcKN3JL3CQAag/IBskkgjpm1hw4DXgp\nzfGISCUVFsLZZ8PPPyd7htONgcwlnyN5jSu5mw68xywS1wJrTp/skEwiuAV4DfjU3T80sx2A+ekN\nS0Qqqnfv5KtqWrGANzicgZzHdNqyB7O4lyvjThJXPEeQ5vTJHlqzWCTL1KoVv0rIDNatKYKHHgpZ\no3ZtuPtuOP/89d/ykhVStmaxme1iZmPNbHa03drMrk8yiC3M7Fkz+8jM5plZBzNrZGavm9n86HnL\nZK4lImWLHSuQ6LfdYVvPgf33h7//HTp2hLlz4YILlARyWDL/5R8HrgPWALj7TKBzktfvC4x29z8D\nbYB5wLXAWHffGRgbbYtIBRWPFYhdt7c0dVnNjbVuYfTSPeGTT8KJL74I221XNYFKtZXMgLJ67j7J\nYicSh7WJTjKzhsBBwDkA7r4aWG1mJwCHRIcNBsYB1yQdsYhsIJmxAgV8yL9rdWf3dbPg9NOhb19o\n2rRqApRqL5kSwXIz2xFwADM7Bfg6ifNaAcuAf5vZNDN7wszqA83cvfj8byBmtqoYZtbDzCab2eRl\ny5YlcTuR3BRvrMAfWMVdXMX77Mvuzb+FUaPg6aeVBGQDySSCi4HHgD+b2WLgcqBnEufVAdoBj7r7\nnsDPlKgG8tBSXWqNprsPcPcCdy9oqn+0kmUqO/9P7KOsNoGDGccM2nAV9zC0wXkwZw4cd1zVflCp\nERImAndf4O6HA02BP7v7Ae6+MIlrfwl86e4fRNvPEhLDkmhcAtHz0gpFLlJDJVunX1F/5Hse5ULG\n0RHDOaLOm3j/x6Bhw/TcUGq8ZKaYuKHENgDufku889z9GzP7wsz+5O4fA4cBc6NHV+CO6PmFioUu\nUjNVdP6fZBzDS/TnQprzNffwD/o2uoU7Hqynvv4SVzKNxbHjEzcDjiX0/knGpUChmW0CLADOJZRC\nhplZd2ARYcSySM5Ix1z9TVhGX3pxBs8wi935KyP4wPfmytTfSrJQwkTg7vfGbpvZPYSRxgm5+3Sg\ntMEMhyUVnUgWatECFi1K1dWczvyXB7mMhnzPDdzMHVzLNnmbpOoGkgMqMoKkHqCOxyIV1KcP1K1b\n+etsy5eM4nie4QwWsAPtmMqt3IDX2UTz/0i5JNNGMIv1PXtqExqN47YPiEjZiuvre/WqWIOxsY7z\neIK7uYq6rOHv3MeDXMY6atOgAfTvr/l/pHwSzjVkZnkxm2uBJe6ecEBZKmmuIZHIJ5+EOYHGjQvT\nQzz+OOy4Y6ajkmqq0nMNRXMCNQJ+jHn8Avwx2i8iZbjoojB1TzLjAJo0CV1K41q7Fu69F1q3hqlT\nQwIYO1ZJQFIiXtXQFEKVkJXyngM7pCUikRruoovg0UeTP37FCujWLbwutUpn1izo3h0+/BCOPx4e\neQS23TYlsYqApqEWSbk6daCoqPzn5eXBwoUxO377DW6/PTy23DJMG33aaaEYIZKEZKuGkhlHQDRV\n9M6EcQQAuPv4iocnkr0qkgSgxPiCDz4IpYA5c+DMM+H++0MdkkgaJNNr6DygF6HL6HRgX+A94ND0\nhiZSM9WuXbFk0KIFYX3J//s/eOCBUP3z0ktwzDEpj1EkVjLjCHoBewGL3L0jsCfwXVqjEqnBevQo\n/zmbbAJPnPFmaAy+/3648MJQGlASkCqQTNXQr+7+q5lhZpu6+0dm9qe0RyZSQz3ySHju3z/xamEA\nrbb8jjFtrmKnfz0BO+0UuoYefHBaYxSJlUyJ4Esz2wJ4HnjdzF4gzBEkIiUUdxt99NGQBBo0gCFD\nwutSH8+/wILN8tlp/JNw9dUwc6aSgFS5ZOYaOil6eZOZvQU0BEanNSqRGqi0bqM//QTnnBNeb9A1\ndOlSuOwyGDo0VAeNGgUFCTt3iKRFvAFlr5jZmWbWoHifu7/t7qOiZSdFJMaAAaXvX7s2TD0NhGLA\nkCGw664wciTceitMnqwkIBkVr2roMeAY4DMzG2ZmJ0XTSYtIKeL1FPr8c+CLL+DYY+Gss2CXXWDa\nNLj++tTMQCdSCWUmAnd/wd1PB/KA54Czgc/N7N9m9peqClAkk8ozVURZjHX8c8tHYbfdQkPwAw/A\nhAmQn19ln0MknmTaCFYBQ4GhZtYaGExICrXTHJtIRpV3qojS7Mz/GMh5HPjtO3D44aH+qFWr1AQo\nkiIJew2ZWTMzu9TM3iX0HHqNsPawSFYrq84/GbVZy1XcxQzasHe9WfDkkzBmjJKAVEtllgjM7Hzg\ndOBPhKqhq9x9YlUFJpJpFZ0qojUzeJJutGcqIziJkz/pB82bpzY4kRSKVzXUAfgXMNbd11VRPCLV\nRq1asK4c//I34Teu5zau5Q6+pRGnMJzJLf7Kyc01SZxUb2UmAnfvVpWBiFQnCdcHKGFf3mMg3cln\nHoM5myu4jx/qNGbQ7emJTySVKrJmsUjW6907udJAfX7ifi7nXfanPj/TiVc5h8GsbtCYQYO0ZKTU\nDElNQy2SazaYEjqGWUyCeP31MMPcwoVwySXk3X47ozffvKpCFEmZhEtVlvWoyiBFUqWwMEzrn2hM\nQFmTxbVoAaxcGZYUO+II2HRTeOedsGiMkoDUUMkuVdkCWBm93gL4HFA/OKlRCgvh3HNhzZqKnV+n\nDgw+cSTkXwTLlsF118ENN8BmmyU+WaQaizeyuJW77wC8ARzn7k3cvTFwLDCmqgIUSZXevSueBJrx\nDU+vPZWD+54MW28NkyaFJSSVBCQLJNNYvK+7v1K84e6vAvulLySR9Cir3j8+52wGM5d8juPF8OU/\naRK005hKyR7JJIKvzOx6M2sZPXoDX6U7MJFUa9GinMeziFc5isGcwzx2pX2t6aE6SJPESZZJJhGc\nDjQFRgIjotenpzMokXTo0ye573BjHRfzMHPYjQOYwCU8xIG8w8EX/Dn9QYpkQDKTzn0L9DKz+u7+\ncxXEJJIWxX36e/WCFStKP2YXPmYg3TmAdxnNkVzAY3xZK48LL1i/BKVItklm0rn9zGwuMC/abmNm\n+l9CaqQuXWD58lKWjFy9Br/9X3y8aRsO2HIuDBpEp3WvssjzKCpSEpDslkzV0P3AkcAKAHefARyU\nzMXNbKGZzTKz6WY2OdrXyMxeN7P50fOWFQ1epDwuuih0ATULzxddFL0xbRrssw/8859w3HEwdy50\n7Rp/kQGRLJLUFBPu/kWJXeWZl7Gju7d19+K1+K4lTGS3MzA22hZJq+K1BYpnFC0qgicf/ZXR7f4J\ne+0FX30Fzz0Hw4eH7qEiOSSZRPCFme0HuJnVNbMriaqJKugEwuI2RM8nVuJaIkkpubbA/kxgBm3o\nNO1fcPbZMG8enHxyZoITybBkEsGFwMXAtsBioG20nQwH3jCzKWbWI9rXzN2/jl5/AzQr7UQz62Fm\nk81s8rJly5K8nUjpiksCDfiRh7iECRzIJqzmCF4Li8ZsqRpKyV1xE4GZ1QbOcvcu7t7M3bdy9zPd\nvYw+Fxs5wN3bAkcBF5vZBm0L7u6EZLERdx/g7gXuXtC0adMkbye5KtEcQgBH8Bqz2Z2LeIS+XMYe\nzOLN2kdkNnCRaiBuInD3IuCMil7c3RdHz0sJ4xD2BpaYWXOA6HlpRa8vAuvnECqrS+iWfMsguvIa\nnVhFPQ5gApfTl59pQI8epZ8jkkuSqRqaYGYPm9mBZtau+JHoJDOrb2abF78GjgBmA6OArtFhXYEX\nKhi7CBBvDiHnrzzLPHblDJ7mNnqzJ9N4L5ohpX59dQsVgeTWI2gbPd8Ss8+BQxOc1wwYaaFcXgd4\n2t1Hm9mHwDAz6w4sAk4rX8giGyptDqGt+Zp+XMzJjGQK7TiS15jx+z/lYNWqKgpQpJpLZmRxx4pc\n2N0XAG1K2b8COKwi1xQpTYsWsGhR8ZZzDoO4jyvYjF+5mju5jysoKuWfennnHhLJVsmMLG5mZgPN\n7NVoOz/6NS9SLRTPIdSSzxjDEfybbsxiD9owg7u5utQksMkm4TwRSa6NYBDwGrBNtP0/4PJ0BSRS\nXl06FzGx84PMZnf25X168giHMI757FLq8Y0bhx6jWk9YJEgmETRx92HAOgB3X0v5RhaLpM+8eXDg\ngRT8pxf1jzqYzRfN4VHvyTqvtfF8QtFj+XIlAZFYySSCn82sMVF/fzPbF/g+rVGJJLJmDdx2G0Wt\n2/Lt+x9zJv/BXn2ZJu1aUFiY6eBEapZkeg1dQejyuaOZvUtYj+CUtEYlEs+UKWHx+JkzGWGncbE/\nxDK2AsJYgm7dwmH61S+SnIQlAnefChxMWJ7yAmA3d5+Z7sBENvLLL3DNNbD33rBsGT2ajuQ0H/p7\nEii2enUYWyAiySmzRGBmZc3AtYuZ4e4j0hSTyMbGj4fzzoP588Pz3XfzRKMtyjy8YusTi+SmeFVD\nx0XPWxFKA29G2x2BiYRlK0XS64cf+N/J17LL2EdZQCvO5w3efOIweCL+aRojIJK8MhOBu58LYGZj\ngPziGUOj+YEGVUl0ktteeYWfz7qAnb5dzH38nf/jVlZRP+FpdepojIBIeSTTa2j7mGmjAZYA+r0l\n6bN8OZx5JhxzDIt//CP7MZF/cF9SSQCgYUM1FIuURzK9hsaa2WvAM9H234A30heS5Cx3GDYMLr0U\nVq6EG26g9S3/5Dc2Lddlvv02TfGJZKlk5hq6xMxOYv06xQPcfWR6w5Kc89VX0LMnjBoFBQXwxhvQ\nujUN+sFvya5+EVH7gEj5xE0E0cI0b0QTz+nLX1LPHQYOhCuvhN9+g3vugV69oE4dCgvhhx/KdznN\nISRSfsksTLPOzBpWUTySSxYsgMMPh/PPh7ZtYdYs+Mc/Qmsv8dYZKJ3mEBKpmGTaCH4CZpnZ68DP\nxTvd/bK0RSXZragIHnwwfNPXqQOPPRbGBtTa8HdJWWMBzGDduiqIUyRHJJMIRqAxA5IiL90xm+1u\n7E7b1ZN4iWO4kP4svmC7MGY9SWoDEEmtZBLBUGCn6PUn7v5rGuORbLV6NTNP/xdHjOjD9zTkdJ7m\nv3QGrFyXURuASOqV2UZgZnXM7C7gS2Aw8BTwhZndZWZ1qypAyQIffgjt29N6xE0M51Tymct/OZ3y\nJgGAzTdXG4BIqsVrLL4baAS0cvf27t4O2BHYArinKoKTGm7VqtAbaN99YeVKjmMUZ1LIcppW+JIa\nIyCSevGqho4FdnF3L97h7j+YWU/gI6BXuoOTGmzcuNAA/OmncMEFDGt/Jy9f0DBa1aLi1D4gknrx\nEoHHJoGYnUVmVsn/nSVrff89XH01DBgAO+4Ib74JHTtydcswZKAy1D4gkh7xqobmmtnZJXea2ZmE\nEoHIhl58EfLz4YknQpXQzJnQsSNQ+WmhNUZAJH3ilQguBkaYWTdgSrSvAPgDcFK6A5MaZNmyMBr4\nmWdgjz3g+edhr71+f7uwMPT9L61EkJcHCxdWXagisrF401AvBvYxs0OB3aLdr7j72CqJTKo/9/Dl\nf9llYS6Im2+Ga68NdTiRwkI499zSB4Cpqkekekhm0rk3Wb8ojUjw5ZdhkriXXoJ99gnzBe2220aH\nxZsmQl1BRaqHZNYjEFlv3bowJUR+PowdC/fdB+++W2oSgPhtA+oKKlI9KBFI8ubPh0MPhQsvZPwv\ne7HDL7OxK/6O1amNGaU+4vUUUldQkepBiUASW7s2TA/dujWrJ03jgtqPc/DaN/iMHSp8SbUPiFQf\nycw1JLls5kzo3h0mT4bjj+fAKY8wafG2lbpkrVrqCipSnahEIKX77Te48UZo3x4WLYKhQ+H55/nw\nq8olAQjVRUoCItVH2hOBmdU2s2lm9lK03cjMXjez+dHzlumOQcrp/fehXTu45Rbo3Jnht8yjyUWn\nYbWs0qOOY17NAAAPRUlEQVSDQW0DItVNVZQIegHzYravBca6+87A2GhbqoOff4YrroD99gvjAl5+\nmcJO/6HLZY1ZUc51g8uitgGR6ieticDMtgOOAZ6I2X0CYVproucT0xmDJGns2DAq+P774cILYc4c\nOProci8XGY+miRCpntLdWPwAcDWwecy+Zu7+dfT6G6BZaSeaWQ+gB0AL1SWkz3ffhXmBBg6EnXeG\nt9+Ggw76/e1k5gjS0pEiNVvaSgRmdiyw1N2nlHVMNLtpqbXO7j7A3QvcvaBp04rPXy9xvPACq1rl\ns3bgIO7gGhp8OgM7+CDq1Alf7rVqJTdjqPK0SM2WzhLB/sDxZnY0sBnwRzMbAiwxs+bu/rWZNQeW\npjEGKc2SJWF+oGHD+MTacC4vMpX2EP2qLyoKz8kkgXr1VOcvUtOlrUTg7te5+3bu3hLoDLzp7mcC\no4Cu0WFdgRfSFYOU4A7/+U+YHuL557l7i9to7x+GJFAOtaJ/NXl5YdkB1fmL1GyZGFB2BzDMzLoD\ni4DTMhBD7vn889AI/Oqr0KEDDBzINbvtWu4Fw8zWlxhEJDtUSSJw93HAuOj1CuCwqrivEFpx+/eH\na64Jr/v2hYsvhtq1adSIcncLVXuASPbRFBPZ7H//C+sGv/MOHH54qMdp1QoI6wT88EP5LqcxACLZ\nSVNMZKO1a+HOO6F1a5g1K3TeHzPm9yQA8dcJqF07PJut36cxACLZSyWCbDNjBnTrBlOnwkknQb9+\n0Lz5RoeVNT7ALOQREckdKhFki19/heuvh4ICWLwYnn0WRoyg8M3mtGwZvuCLxwc0abLhr/1YagMQ\nyT0qEWSDiRPDVNEffQRdu4ZVwxo1orAQevSAVavCYcW9fcpqINaYAJHcpBJBTfbTT2Fg2AEHhG/7\n0aNh0CBo1AgI7QDFSSCR2rU1JkAkVykR1FRjxsDuu8PDD4fuoLNnw5FHbnBIMvMEFVu3TklAJFcp\nEdQ0K1fCueeGL/3NNoPx4+Ghh2DzzSkshJYtw8jfeO0ApVHbgEjuUhtBTTJiRPj1v2wZXHcd3HBD\nSAawUXtAeQaKqW1AJLepRFATfPMNnHIK/PWvsPXW8OGHcPvtvycBKF97QOPG4WGm+YJERCWC6s0d\nBg8Oq4atWhW+/K+8EurW3ehQrRsgIhWlEkF1tXAhdOoU2gPy82H69FAdFCWBirQHqB1AREqjEkF1\ns25dGA183XXh2/3hh6Fnz/VzP1Ox9gC1A4hIWVQiqE4++igsE1k8NmD27NA4XGvD/0zJtAeYqR1A\nRJKjEkF1sGYN3H033Hwz1K8f2gXOOqvM+p5kxwcsX57CGEUka6lEkGlTp8Lee4ef+ccfD/Pmwdln\nx630T6auX+0BIpIsJYJM+eWX0A6w996he+hzz8Hw4dCsWcJT+/QJdf5lUXuAiJSHEkEmTJgAbdvC\nHXeEX/9z58LJJyd9epcuoc4/L299W4DaA0SkotRGUJV+/DGUAvr1C30/x4yBv/ylQpfq0kVf9iKS\nGioRVJXRo8MkcY88Ar16MfT6WbQ8/y/UqhVyQmHhhmMDWraEiy6Kv11YmMHPIyJZw9w90zEkVFBQ\n4JMnT850GBWzYkUYGfzUU7DrrjBwIIULOmwwDgDCODEzWL06+UvXq6dqIBEpm5lNcfeCRMepRJAu\n7qHxNz8fnn46rB42bRp06FDqOIA1a8qXBCBco3fv1IUsIrlJbQTp8PXXoR7n+eehffvQFtCmze9v\nl2edgERSeS0RyU0qEaSSOzz5ZKgCGj0a7rwT3n9/gyQAqe3jr/ECIlJZSgSp8tlncMQRYe3gNm1g\nxgy4+uqwYnwJpY0DqFsXNtmkfLfUeAERSQUlgsoqKoK+fUOPoA8+gEcfhbfegl12KfOUkuMA8vLg\n3/8OhYnYfT17xt9WQ7GIpIJ6DVXG3LmhBPD++3DUUfDYY7D99pmOSkQEUK+hjfrkV7TPfanXWb0a\nbr0V9twT5s+HIUMoPONlWh64fan3S1UsIiJp4e7V/tG+fXsvjyFD3OvVcw+tt+FRr17YX9nr7L/p\nh/5ti9Zh429/c1+yJO79UhWLiEh5AZM9ie/YrKwaatkSFi3aeH9eXlj4qyLX2YxfuJkb+Qf3sqz2\n1mz93CNwwgkJ7wepiUVEpLwyXjVkZpuZ2SQzm2Fmc8zs5mh/IzN73czmR89bpvreZfWtL2+f++Lj\nD+JtZtKaq7mbgXRn16I5vyeBRPdLVSwiIumSzjaC34BD3b0N0BboZGb7AtcCY919Z2BstJ1SZfWt\nL2+f+/ztfuARevI2h1CLdRzKWC5gAA3ztkj6fqmKRUQkXdKWCKIqqp+izbrRw4ETgMHR/sHAiam+\nd2n99Mvd5/7ll/ng593owQDu5QpaM5O3OLTU68S7X0piERFJp2QaEir6AGoD04GfgDujfd/FvG+x\n2yXO7QFMBia3aNGi3I0kQ4a45+W5m4XnpBtnly1z79IltOrm5/urN72f1HXi3a/CsYiIVALVqbHY\nzLYARgKXAhPcfYuY91a6e9x2gioZR+AOQ4fCpZfCd9+F2dyuuw423TS99xURSZOMNxbHcvfvgLeA\nTsASM2sOED0vrYoY4lq8GE48EU4/HVq1CusI33STkoCI5IR09hpqGpUEMLM/AH8BPgJGAV2jw7oC\nL6QrhoTc4fHHw1TRr78O99wD770He+yRsZBERKpaOqehbg4MNrPahIQzzN1fMrP3gGFm1h1YBJyW\nxhjK9umncP75YV6gQw4JCWGnnTISiohIJqUtEbj7TGDPUvavAA5L130TKp4k7vrrw5Sfjz0G550X\n5n8QEclBubUwzezZYZK4SZPg2GPDTKHbbZfpqEREMio3fgavXg033wzt2sGCBWHpyFGjlARERMiF\nEsGkSaEUMHs2nHEGPPAANG2a6ahERKqN7C4R3HYbdOgAK1fCiy+G+Z+VBERENpDdiWDHHUPPoDlz\nQpuAiIhsJLurhk4/PTxERKRM2V0iEBGRhJQIRERynBKBiEiOUyIQEclxSgQiIjlOiUBEJMcpEYiI\n5DglAhGRHFclS1VWlpktI6xdkI2aAMszHUQa6fPVfNn+GbP58+W5e8J5dWpEIshmZjY5mTVFayp9\nvpov2z9jtn++ZKhqSEQkxykRiIjkOCWCzBuQ6QDSTJ+v5sv2z5jtny8htRGIiOQ4lQhERHKcEoGI\nSI5TIsgwM7vbzD4ys5lmNtLMtsh0TKlmZqea2RwzW2dmWdNNz8w6mdnHZvaJmV2b6XhSzcyeNLOl\nZjY707Gkg5ltb2Zvmdnc6N9nr0zHlClKBJn3OrC7u7cG/gdcl+F40mE2cDIwPtOBpIqZ1Qb6AUcB\n+cDpZpaf2ahSbhDQKdNBpNFa4B/ung/sC1ychf8Nk6JEkGHuPsbd10ab7wPbZTKedHD3ee7+cabj\nSLG9gU/cfYG7rwb+C5yQ4ZhSyt3HA99mOo50cfev3X1q9PpHYB6wbWajygwlguqlG/BqpoOQpGwL\nfBGz/SU5+iWSDcysJbAn8EFmI8mM7F68vpowszeArUt5q7e7vxAd05tQVC2sythSJZnPKFIdmVkD\n4Dngcnf/IdPxZIISQRVw98PjvW9m5wDHAod5DR3YkegzZqHFwPYx29tF+6QGMbO6hCRQ6O4jMh1P\npqhqKMPMrBNwNXC8u6/KdDyStA+Bnc2slZltAnQGRmU4JikHMzNgIDDP3e/LdDyZpESQeQ8DmwOv\nm9l0M+uf6YBSzcxOMrMvgQ7Ay2b2WqZjqqyogf8S4DVCI+Mwd5+T2ahSy8yeAd4D/mRmX5pZ90zH\nlGL7A2cBh0b/7003s6MzHVQmaIoJEZEcpxKBiEiOUyIQEclxSgQiIjlOiUBEJMcpEYiI5DglAqlS\nZtY4pqveN2a2OHr9nZnNreJY2sZ2FzSz4ys6i6iZLTSzJqmLrlz3PsfMtonZfqJ48rRMxiU1hxKB\nVCl3X+Hubd29LdAfuD963RZYl+r7mVm80fNtgd8TgbuPcvc7Uh1DFTgH+D0RuPt57l6lSVVqNiUC\nqU5qm9nj0dzwY8zsDwBmtqOZjTazKWb2jpn9Odrf0szejNZyGGtmLaL9g8ysv5l9ANxlZvWjufUn\nmdk0MzshGg18C/C3qETyt+iX9cPRNZpF60PMiB77Rfufj+KYY2Y9En0gMzvXzP4X3fvxmOsPMrNT\nYo77KXpuEH2WqWY2y8xOiPms80r+faJrFACF0ef4g5mNK23dBzM7M4pjupk9Zma1o8cgM5sd3e/v\nlfjvJzWUEoFUJzsD/dx9N+A74K/R/gHApe7eHrgSeCTa/xAwOFrLoRB4MOZa2wH7ufsVQG/gTXff\nG+gI3A3UBW4AhkYllKElYnkQeNvd2wDtgOJRw92iOAqAy8yscVkfxsyaAzcTRrAeQFi3IJFfgZPc\nvV0U673RVAil/n3c/VlgMtAl+hy/lBHLrsDfgP2jElgR0IVQKtrW3Xd39z2AfycRo2QZTTon1cln\n7j49ej0FaBnNDLkfMHz99yGbRs8dCAveAPwHuCvmWsPdvSh6fQRwvJldGW1vBrRIEMuhwNkA0XW+\nj/ZfZmYnRa+3J3w5ryjjGvsA49x9GYCZDQV2SXBfA243s4MIVWXbAs2i9zb6+yS4VqzDgPbAh9Hf\n8Q/AUuBFYAczewh4GRhTjmtKllAikOrkt5jXRYQvq1rAd9Gv2PL4Oea1EX49b7A4jpntU54Lmtkh\nwOFAB3dfZWbjCEmlItYSlcjNrBawSbS/C9AUaO/ua8xsYcw9Svv7JB0+ofS00Qp4ZtYGOBK4EDiN\nsC6G5BBVDUm1Fs0P/5mZnQphxsjoiwtgImHWTwhfoO+UcZnXgEuLq1jMbM9o/4+ECf9KMxboGR1f\n28waAg2BlVES+DNhecN4PgAOjnpK1QVOjXlvIeEXOsDxhKoqonssjZJARyAvwT0SfY7Yz3OKmW0V\nfaZGZpYX9Siq5e7PAdcTqsEkxygRSE3QBehuZjMIdfXFS0JeCpxrZjMJs0iWtfj4rYQv2plmNifa\nBngLyC9uLC5xTi+go5nNIlTD5AOjgTpmNg+4g7C0aJnc/WvgJsIMnu8SZikt9jghScwgVHEVl2AK\ngYLovmcDH8W7R2QQ0L+4sbiMWOYSvujHRH+v14HmhKqncWY2HRhCdq6ZLQlo9lGRKmJhAaICd78k\n07GIxFKJQEQkx6lEICKS41QiEBHJcUoEIiI5TolARCTHKRGIiOQ4JQIRkRz3/1k9DzO9rjhdAAAA\nAElFTkSuQmCC\n",
525 | "text/plain": [
526 | ""
527 | ]
528 | },
529 | "metadata": {},
530 | "output_type": "display_data"
531 | }
532 | ],
533 | "source": [
534 | "_,_ = mrstat.qq_plot(hsb.write,dist='norm',plot=plt)"
535 | ]
536 | },
537 | {
538 | "cell_type": "markdown",
539 | "metadata": {},
540 | "source": [
541 | "На этом графике сравниваются теотические квантили распределения (красная прямая) и квантили полученные из фактического распределения переменной (синии точки). Если точки лежат вдоль прямой - то распределение можно считать нормальным. В данном случае точки с прямой не совпадают, в основном из-за тяжелого правого хвоста. Взглянем на куку график для science."
542 | ]
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": 11,
547 | "metadata": {
548 | "collapsed": false
549 | },
550 | "outputs": [
551 | {
552 | "data": {
553 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xm81PMex/HXp1NJlrRJaEOujlYdlGtfu7ZwCTdXElGh\nZItc2coe2UopRQeFUtfSItK1plUbsnQiWiVLSJ3P/eP3m5pOZ5lTZ5Zz5v18POYx8/vNb37zmSPz\nme/y+3zN3RERkfRVLtkBiIhIcikRiIikOSUCEZE0p0QgIpLmlAhERNKcEoGISJpTIpAyy8xuN7OR\n2/naS8zsvUKef9PMOuR3rJn9amb7bc/7FjPGqWZ2WbzfR8o+JQJJKWa2xMx+D79MV5jZcDPbNdlx\n5eXu/3D3EQU8t6u7fw0Qxn/39r5PSfw9zKy+mbmZld/eOKRsUyKQVHSGu+8KHAJkAbfmPcAC6fLv\nt8i/h8iOSJf/kaQUcvdlwJtAY9jcFdLXzN4H1gP7mdneZjbezH40sy/N7PI8p6lkZqPM7Bczm2Vm\nzSJPmFkvM/sqfG6hmZ2d57VmZo+b2Toz+8zMToh6osBumfDX9wFm1hloD9wY/qL/r5ndYGav5Dn+\nUTMbUNy/R55zlDOzW80sx8xWmtmzZlYlfHpaeP9TGEfrot5L0osSgaQsM6sDnArMjtr9b6AzsBuQ\nA7wIfAfsDZwL9DOz46OObwu8BFQDngdeNbMK4XNfAUcBVYA7gJFmVjvqtYeHx9QA+gBjzKxarPG7\n+2AgG7g/7C46AxgJtDGzPcLPWB64AHi2qPMV8PeIuCS8HQfsB+wKPB4+d3R4v0cYx4exfgZJD0oE\nkopeNbOfgPeAd4F+Uc8Nd/cF7r4R2Av4O3CTu//h7nOAp4GLo46f6e4vu/tfQH+gEtAKwN1fcvfv\n3T3X3UcBi4HDol67EnjE3f8Kn/8cOG1HPpi7/0DwC/28cFcbYLW7zyzkZYX9PSLaA/3d/Wt3/xW4\nGbhA4wISC/0jkVR0lru/VcBz30Y93hv40d1/idqXQ9CPvs3x7p5rZpHWA2Z2MdATqB8esivBr/+I\nZb51VcacyGt30AigCzAEuAh4rojjC/t7ROxNEF9EDsH/37W2N0hJH2oRSGkT/cX8PVDNzHaL2lcX\nWBa1XSfyIBxc3hf43szqEXwRXwVUd/c9gPmARb12HzOL3q4bvuf2xhvxKtDUzBoDpxN0H+2o74F6\nUdt1gY3AigJiENlMiUBKLXf/FvgAuMfMKplZU6ATQT98REszOyfsIukB/Al8BOxC8AW5CsDMOrLt\nIOyewDVmVsHMzgMaAW8UM8wVBH320XH/AbxMMGYx3d2XFvOc+XkBuNbMGoTTS/sBo8IutFVAbt44\nRCKUCKS0u5Cga+d7YCzQJ083yjjgfGAtwUDzOWGf/0LgIeBDgi/rJsD7ec79MdAQWA30Bc519zXF\njG8okGlmP5nZq1H7R4TvWVS3UKyGheeaBnwD/AFcDeDu6wnifz+Mo1UJvaeUEaaFaUQSz8zqAp8B\ne7n7z8mOR9KbWgQiCRaOVfQEXlQSkFSgWUMiCWRmuxB0ReUQTB0VSTp1DYmIpDl1DYmIpLlS0TVU\no0YNr1+/frLDEBEpVWbOnLna3WsWdVypSAT169dnxowZyQ5DRKRUMbOcoo+Kc9eQmV1rZgvMbL6Z\nvRBe9FPNzCab2eLwvmo8YxARkcLFLRGY2T7ANUCWuzcGMgiqLPYCprh7Q2BKuC0iIkkS78Hi8sDO\n4eX9lQmu/mxLcFUl4f1ZcY5BREQKEbdEEC6i8SCwFPgBWOfuk4BaYSlegOUUUB3RzDqb2Qwzm7Fq\n1ap4hSkikvbi2TVUleDXfwOCErm7mNlF0ceEJX7zvZDB3Qe7e5a7Z9WsWeSgt4iIbKd4dg2dCHzj\n7qvCRUHGAEcAKyKrQIX3K+MYg4iIFCGeiWAp0MrMKoc13U8AFgHjgQ7hMR0IqkOKiEiSxHOM4GOC\nmuuzgHnhew0G7gVOMrPFBK2Ge+MVg4hIqbVmDfToAevWxf2t4npBmbv3IVj0O9qfBK0DERHJyx1e\nfhmuugp+/BFOOAHOOCOub6laQyIiqeKHH+Ccc6BdO6hTB2bOjHsSACUCEZHkc4dhw6BRI5gwAe6/\nHz76CJo2Tcjbl4paQyIiZdbXX0PnzjBlChx9NDz9NDRsmNAQ1CIQEUmGTZvgkUegSROYPh0GDoR3\n3kl4EgC1CEREEm/hQujUKej+OfVUGDQoGBNIErUIREQSZcMGuOsuaNECFi+GkSPhtdeSmgRALQIR\nkcSYMSNoBXz6KVxwAQwYAHvumeyoALUIRETia/16uPFGOPxwWL0axo2DF15ImSQAahGIiMTPu+/C\nZZfBl1/C5ZfDAw9AlSrJjmobahGIiJS0n3+GLl3g2GMhNzeYGjp4cEomAVAiEBEpWa+/DgcfHHzx\n9+wJ8+bB8ccnO6pCKRGIiJSE1avhoovg9NODX/4ffAAPPQSVKyc7siIpEYiI7Ah3ePHFoDzE6NFw\n++0wa1YwOFxKKBGIiGyvZcugbVu48ELYb78gAfTpAxUr7vCps7Ohfn0oVy64z87e4VMWSIlARKS4\n3GHIEMjMhLfeCrqAPvgAGjcukdNnZwflh3JygrfKyQm245UMlAhERIrjq6+CNQI6d4aWLYPB4J49\nISOjxN6id+/g8oNo69cH++NBiUBEJBabNkH//kGRuJkzg1lBU6bA/vuX+FstXVq8/TtKiUBEpCjz\n58MRR8B118GJJwZF4y6/HMzi8nZ16xZv/45SIhARKciGDcEsoEMOgW++CWYHjRsH++wT17ft23fb\nWaeVKwf740GJQEQkP9OnBwngjjuCpSMXLoTzz49bKyBa+/ZBz1O9esHb1asXbLdvH5/3U60hEZFo\n69fDf/4TLBqz995BmejTTkt4GO3bx++LPy+1CEREIt55JxgM7t8/mBW0YMHmJBDrvP5Ezv8vKWoR\niIisWwc33BBcG3DAATB1KhxzzOanI/P6I1M6I/P6Yetf7bEel2rM3ZMdQ5GysrJ8xowZyQ5DRMqi\n8eODSqHLlwezgm6/fZuR2vr1gy/1vOrVgyVLin9copjZTHfPKuo4dQ2JSHpauTJYKaxtW6heHT7+\nGO6/P98icbHO60/0/P+SokQgIunFPejDycyEsWODNYRnzICsgn84xzqvP9Hz/0uKEoGIpI9vv4Uz\nzgjKRTdsCLNnw623FlkkLtZ5/Yme/19SlAhEpOzLzYVBg4IFY955J5ga+t57QasgBrHO60/0/P+S\nErfBYjP7GzAqatd+wG3As+H++sASoJ27ry3sXBosFpHttnhxUA7i3XeD8hCDB0ODBsmOKiGSPljs\n7p+7e3N3bw60BNYDY4FewBR3bwhMCbdFRErWxo3BYvFNm8KcOTB0KEyalDZJoDgS1TV0AvCVu+cA\nbYER4f4RwFkJikFE0sXcudCqFdx4I7RpE5SHuPTShJSHKI0SlQguAF4IH9dy9x/Cx8uBWvm9wMw6\nm9kMM5uxatWqRMQoIqXdn38G5SGysoKB4dGjYcyYoFSEFCjuicDMKgJnAi/lfc6DAYp8ByncfbC7\nZ7l7Vs2aNeMcpYiUNtGlHGrUgDZVPmRhpRZw9918tN+/aF5xIeXOP4/6DYyuXbcu+5B3O1IGojSW\nhygR7h7XG0FX0KSo7c+B2uHj2sDnRZ2jZcuWLiISMXKke+XK7uBemV/9Ybr7JsxzqONteMODiwVi\nv1Wu7N6ly5ZzRu8fOTLZn3b7ATM8hu/pRHQNXciWbiGA8UCH8HEHYFwCYhCRMiSylOMJvMV8GtOD\nATxJVw5mARP4R7HPt359MJkokctDppK4JgIz2wU4CRgTtfte4CQzWwycGG6LiMTs55y1PE0n3uIk\nNlCRo5jG1TzOr+y23efctCn//aleHqIkxLX6qLv/BlTPs28NwSwiEZHiGzuWzzK6Um3TKu6hF3fQ\nhz+ptMOnzcjIPxmkenmIkqAri0WkdFixIlgp7JxzKL/vXhxTaTq3cE+JJIHKlYNy0aWxPERJUCIQ\nkdTmDs8+C40aBesF9+1LtcXT6fr0IZtLOVSvHtwiZR26dNm6zENR24MHw5NPls7yECVB6xGISOpa\nuhSuuAImTIAjjuC/Zw3l6icOYunSoMumb9/0+KLeXrGWmNAKZSKSenJzYeBA6NUraBE89hjZVbrS\n+cpypW71r9JAXUMiklo+/zxYJvKqq+CII2D+fLjqKnr/p1zaTu+MNyUCEUkNf/0F994LzZoFi8YP\nHx50CdWvD5Te1b9KAyUCEYmrSNkGs6B0g1lwy8gI7suXhxY2m1kVD4ebb+aVP09nr7ULqXFdB2rU\ntM3HFDScmQ7TO+NNYwQiEjfZ2UE/fqRLJ/rLPDcXduIP/rPpLm7iPlZTg3/yMmP4Z3DAmi3HFnSx\nV7pM74w3tQhEJG4ipSDycwTvM4fm9KYfz/FvMlm4JQnEIJ2md8abWgQiEjf59d/vyi/04xa68QRL\nqcvJTGQyJxfrvGawZEnJxChqEYhInGRnB2MC0U5mIvNpTDee4DGupjHzi50EQOMCJU2JQERKXGRs\nINK3X5UfeYZLmEgb1lOZo/gfPRjAb+xa7HNrXKDkKRGISImLHhs4h1dYSCYXMZK76U0LZvMBf9/c\nWsjIyP8+UjYiep/GBeJDYwQiUuJycmAvfuBxruKfjGEWLWjDBD615uTmJjs6yUstAhEpUdkjnUsY\nzkIyOY3XuYl7OYzpzKW5+vZTlFoEIlJyliyhzuWdeYbJTOMoLuNpFnMgEMz0Ud9+alKLQER23KZN\n8Oij0LgxLf74kK48wbFM3ZwEILiYTH37qUmJQES2kZ0NNWpsKQdR2K2RLeL98kdD9+68+dtRHMwC\nBtKVvEui16uXpA8jRVIiEJGtZGdDx46wZk3hx5XnL26hL3NozkF8xr95llN5g2/ZdiBAUz5Tm8YI\nRGQrvXsHhUAL04JZDONSmjOXUbTjGh5lJbXyPTYjQ1M+U51aBCKylcLKOlfid+6hF9M5jD1ZyVmM\n5QJGFZgEICgupySQ2pQIRGQrBU3xPIppzKUZvbiP4VxCJgsZx1nbfT5JHUoEIrKVvn2hQoUt27vx\nM4/TjWkcQ3k2cgJvcTlPs449ijxXxYoaGygNlAhEZCvt28MzzwTlHdrwJvNpTBcG8jA9aMI83uaE\nmM5TvToMG6ZuodKgWIPFZlYVqOPun8YpHhEpIdnZ0L170bN/8lPD1tDfr+VinuOnfTIp9/IHXNuq\nFdeWfJiSAopsEZjZVDPb3cyqAbOAIWbWP/6hicj2inUK6Lac8xjNAm/EhbzAnfyHBj/OIvurVvEI\nU1JELF1DVdz9Z+Ac4Fl3Pxw4Mb5hiciOiGUKaF61+Z6xnM1ozieHerRkJn24k59+34neveMTp6SG\nWBJBeTOrDbQDXotzPCJSAgqbArot51KGspBMTmEi1/MArfmQeTTdzvNJaRNLIrgTmAh85e6fmNl+\nwOJYTm5me5jZy2b2mZktMrPWZlbNzCab2eLwvuqOfAAR2VasUzYb8DVvcSJDuYw5NKcJ83iI69mU\nZ/hQU0DLtiITgbu/5O5N3b1LuP21u8e6wvQAYIK7HwQ0AxYBvYAp7t4QmBJui0gJyjsFNK9ybKI7\njzCPJhzKJ1zBII7nbb7igG2OVXmIsi+WweIDzWyKmc0Pt5ua2a0xvK4KcDQwFMDdN7j7T0BbYER4\n2AiI4YoUESmW6CmgeWWygPf5O49wLe9wHJksZDBXbFUkLrJ6mFYESw/m7oUfYPYucAPwlLu3CPfN\nd/fGRbyuOTAYWEjQGpgJdAeWufse4TEGrI1s53l9Z6AzQN26dVvm5OQU86OJyFY2bIB774W774bd\ndw/KRl94YVBCVMokM5vp7llFHRfLGEFld5+eZ9/GGF5XHjgEGBgmkN/I0w3kQRbKNxO5+2B3z3L3\nrJo1a8bwdiISLTsb6tcPft2fWfsT1h6QBX36wLnnwqJF8K9/KQkIEFsiWG1m+xN+YZvZucAPMbzu\nO+A7d/843H6ZIDGsCGchEd6vLHbUIlKo7Gzo3BlW5qznPr+Bsctbsf67H5naczw8/zzox5VEiSUR\ndAOeAg4ys2VAD6BLUS9y9+XAt2b2t3DXCQTdROOBDuG+DsC44gYtIoXr3RsOXT+VuTTjBh7kaS4j\n0xdwyStnJDs0SUFFlphw96+BE81sF6Ccu/9SjPNfDWSbWUXga6AjQfIZbWadgByC6xNEJI+uXWHQ\noGCJx+LYnXXcx01cyVN8yf4cx9tM5TgAftH1AJKPIhOBmd2WZxsAd7+zqNe6+xwgv4GK2KpWiaSp\nrl1h4MDiv+40XmMQV1KbH3iQ67iNO/mdypuf1/UAkp9YuoZ+i7ptAv4B1I9jTCJpb/Dg4h1fg1Vk\n8y9e4wzWUpXWfMgNPLhVEjDT9QCSv1i6hh6K3jazBwmuNBaRONm0KdYjnQt4kUe5hiqs4zbu4F56\n8RcVtz3SdT2A5G971iyuDOxb0oGIyBblygVLPBZmH75jIF04g9f4mMPoxFAWUPDlPfXqlXCQUmbE\nMkYwjy1z/TOAmgT1h0QkDrKzC3/eyOUynuYBbqACf3Et/XmUa8glo8DXaKUwKUwsLYLTox5vBFa4\neywXlInIdujdu+DWwP58yRAu5zim8jbHcTlD+Jr9Cz1f9eowYIC6haRgBSaCcCEagLzTRXc3M9z9\nx/iFJZK+8iv5nMFGejCAB3f+T1BN7qEhHN+pE1/pymApAYW1CGYSdAnl9y/Ngf3iEpFIGsvODmb3\nRF870Jh5DKUTh/EJnHQmPPkk7LNP8oKUMqfARODuDRIZiEi6iywvGekWqsif3EI/bqEfa6nKe1e9\nyJGPtlN9IClxMc0aChePaQhUiuxz92nxCkokHUUvL3kYHzOUTjRmAc9xEXdVfZgvHquR3AClzIpl\nPYLLgGkE1w7cEd7fHt+wREq/7GyoUSP4AR/LLScHKvMbD9GTD2lNFdZxGq9xMc/x5U9KAhI/sVxZ\n3B04FMhx9+OAFsBPcY1KpJSLdPOsWRP7a47jbT6lKT15mEFcycEs4A1OA1QaQuIrlkTwh7v/AWBm\nO7n7Z8DfiniNSFqL7uYpShV+YjCX8zYnkEs5jmEq3XiSX9h98zG6BkDiKZYxgu/MbA/gVWCyma0l\nqBoqIgXIbwpofs5kHAPpQi1WcB83cju38wc7b3OcrgGQeIql1tDZ4cPbzewdoAowIa5RiZRiRV0Z\nDFCTlTzKNVzAKObSlDMZz8x8C/WqNITEX4FdQ2b2hpldZGa7Rva5+7vuPt7dNyQmPJHSJTsbLr64\nsDUEnPaMZBGNOJux3MpdZDGjwCSg0hCSCIWNETwFnAZ8Y2ajzezscIEZESlAYeUh9uVbXuN0RvJv\nvuBAWjCbvtzKRirke3z16jBsmLqFJP4KTATuPs7dLwTqAa8AFwNLzewZMzspUQGKlCb5jQ0YuVzJ\nQBZwMKdVngqPPELrje+x0DNxp8Db6tVKApIYRc4acvf17j4qHCs4GWiOxggkzcR6TUDeLqGGfMFU\njmUgXZnO4TB/PnTvDhkFVwoVSbRYLiirZWZXm9n7BDOHJgKHxD0ykRSxPdcEZLCRG7ifuTSjCfPo\nyDDGXDkJGqhyi6SewgaLLzezt4FZBOUlbnD3/dy9l7vPTViEIklWnGsCAJoyl485nPu5iTf5B5ks\nZOcuHXlyoGoESWoqbPpoa+AeYIq7F7FWkkjZFes1ARX5k1u5m17cy49U41xe4hX+CRhPPhnXEEV2\nSGHVRy9NZCAiqapu3aAOUGFa8SFD6UQmixjBxfSkPz9SHdBwgKS+WEpMiKS1vn2DtWDyswu/8jA9\neJ+/swu/0YY3uYQRm5MAQOfOCQpUZDspEYgUoX17eOaZYF5/tBOZzDya0IMBPEE3GjOfibTZ/Hy5\nctClC+oWkpQXy1KV+dJSlZIusrODGZ+RWUP7VV3LpCbXsf+0Z+Bvf4On/8fVRx7J1ckNU2S7xbpU\nZV1gbfh4D2ApoHlwUuZFpo5GZg2dxVieXNuVmtNWMf+Mm2k8+jaoVKnwk4ikuMKuLG7g7vsBbwFn\nuHsNd68OnA5MSlSAIskUmTpai+WM5jzGcg7L2YvDmM7pn/ZTEpAyIZYxglbu/kZkw93fBI6IX0gi\nqWNpjnMxI1hIJmfwX26mH4cxndkcEvO0UpFUF0si+N7MbjWz+uGtN/B9LCc3syVmNs/M5pjZjHBf\nNTObbGaLw/uqO/IBRGKRnQ277hr7spFmUM9yeIN/MIJLWEQjmjOHe7l5c5E4rRomZUUsieBCoCYw\nFhgTPr6wGO9xnLs3d/dInd1eBBepNQSmhNsicRMpDf3bb7Edb+TSjcdZwMEcyXtcxWMcxf/4nIM2\nH6Py0FKWxLIwzY9AdzPbxd1j/F+pUG2BY8PHI4CpwE0lcF6RfBVWGjqvA/mcoXTiSN5nAqdwBU+x\nlG1XhlF5aClLYik6d4SZLQQWhdvNzCzWmdEOvGVmM80scllNLXf/IXy8HKhVwPt2NrMZZjZj1apV\nMb6dyLZi6csvz1/04h7m0oxMFtKB4fyDN/NNAqAkIGVLLGsWPwycAowHcPe5ZnZ0jOc/0t2Xmdme\nBOsdfxb9pLu7meW7lpO7DwYGA2RlZRW43pNIYbKz8y8PHa05sxlKJw5hNi9xLlfzGCvYq8DjVTJC\nypqYrix292/z7NoU4+uWhfcrCcYYDgNWmFltgPB+ZczRihRD5BqAgrqFduIP+nILn3Aoe/M95/AK\n7Xip0CQAKhkhZU8sieBbMzsCcDOrYGbXE3YTFcbMdjGz3SKPCRa1mU/QsugQHtYBGLddkYsUobDy\n0X/nPebSjFu4h2e5mEYsYiznFHo+lYyQsiqWrqErgQHAPsAygovJusXwulrAWDOLvM/z7j7BzD4B\nRptZJyAHaLc9gYsUJb+KobvyC/dwM1fxBNSvD09N5NKTT0aldiWdFZoIzCwD+Le7F3tozN2/Bprl\ns38NcEJxzydSHNnZ2+47mYkMpjN1+JZndruGjvP6BhcXiKS5QruG3H0T8K8ExSJSYnr33vK4Kj8y\nnA5MpA3rqcyRvEfFgQOUBERCsXQNvWdmjwOjgM3XEbj7rLhFJbKDgimjzj95hSfoRjV+5G56cze3\n8ieV+EDTP0U2iyURNA/v74za58DxJR+OSGzylobOay9+4Am6cQ5jmckhnMJE5ob/lOvlf2mASNqK\n5cri4xIRiEis8paG3ppzCcPpT08q8Qc3ch/96cmm8J+6SkOIbCuWK4trmdlQM3sz3M4MZ/yIJEVB\n00Lr8w2TOJlnuJR5NKEZc3mAGzcngXLlVBpCJD+xXEcwHJgI7B1ufwH0iFdAIkXJWzKiHJu4mkeZ\nT2Na8RFdeJJjmcpiDtzqOHclAZH8xJIIarj7aCAXwN03EuOVxSLxUC1qEdWDWMT/OIpH6c67HMPB\nLGAQXfB8/mmrbLRI/mJJBL+ZWXWCAWLMrBWwLq5RiRQgOxt+/jkoEtebu5lDc/7G51zEc5zG63xL\n/t/2GhsQKVgss4Z6EpSF2N/M3idYj+DcuEYlUoDevaHJXzMZxqU041NG0Y6reYxV7Fnga6pXhwED\n1C0kUpBYZg3NMrNjgL8RLF7/ubsXUMFFJI5+/50uObdzPQ+yglqcxVjGcRYQVBiNdc0BEdlagYnA\nzAqqwHWgmeHuY+IUk8i2pk3j5/Mv4yYWM4TLuIEHWMcem59W/7/I9iusRXBGeL8nwWL1b4fbxwEf\nECxbKRJfP/8MvXrBwIH8VL4BZ/MWb+cpVWWm/n+RHVFgInD3jgBmNgnIjKwqFq4hMDwh0Ul6e+MN\nuOIKWLYMrr2WzIfv4jd22eYwTQsV2TGxzBqqE7W0JMAKKGBqhsh2ys6GGjWCX/c1bDUj7SI47TQW\nfLc7rfwD7OH++SYBUMkIkR0Vy6yhKWY2EXgh3D4feCt+IUm62VIywmnHaB7jaqqylju4jX7cwgZ2\nKvC15curW0hkR5kXtphr5CCzs4HIOsXT3H1sXKPKIysry2fMmJHIt5QEql8fNuR8z0C60JbxfEIW\nnRjKPJoW+VrNFhIpmJnNdPesoo6LZWGat8LCcwn98pc04c5JOUN5gOvZiT+5jgcZQPfN9YFieLmI\n7KBYFqbJNbMqCYpH0snXX8OJJzKEy5lDc5owj/5cF3MSAMjIiGN8Imkilv/jfgXmmdlktl6Y5pq4\nRSVl26ZN8OijwWXC5cvz8aVPccqzl7FhYyxzF7bWuXMc4hNJM7EkgjHomgEpKfPnQ6dOMH06nHYa\nDBrE4fvuy7DjC19oJq9y5YKZpU8+Gd9wRdJBLIlgFHBA+PhLd/8jjvFIGZF3BbEKbOBm7qE3fVlH\nFa7heV58/QKoY1u9Tl/wIolXWImJ8kA/4FIgh6DOUB0zewborXpDUpC8K4hl8QnDuJQmzCebf9GD\nR1hNzXxfm5sLAwcGj5UMRBKjsE7ZB4BqQAN3b+nuhwD7A3sADyYiOCmdIiuI7cx6HuB6PqIVVVnL\nGYznIrILTALRBg9OQKAiAhTeNXQ6cKBHXWjg7j+bWRfgM6B7vIOT0mnpUjiGqTzNZRzAVwziCm7i\nPn4m9slnm7T0kUjCFNYicM/narNwSqlmb0v+1q1j5C5XMJXjADiOt+nCoGIlAdC0UJFEKiwRLDSz\ni/PuNLOLCFoEIlv7738hM5MLfnuah8pdT1M+3ZwQikvTQkUSp7CuoW7AGDO7FJgZ7ssCdgbOjndg\nUoqsWhVMEXrhBWjShHKvvspeXxxK5e7we4zTQSM0a0gk8YqsNWRmxwMHh5sL3X1K3KPKQ7WGUpR7\n8OV/zTXBugG33hqsHVCxYrIjExFirzVU5KWc7v62uz8W3oqdBMwsw8xmm9lr4XY1M5tsZovD+6rF\nPaekgO++gzPPDBYCOOAAmD2b7P1vo/6BFTELqoKaBQXlsrOTHayIFKb41/QXX3dgUdR2L2CKuzcE\npoTbUlrk5sJTT0FmJkyZAv37w/vvkz3nYDp3hpyc4LDIrJ+cnKC/X8lAJHXFNRGY2b7AacDTUbvb\nAiPCxyORISk2AAAQHklEQVQgXH1cUt/ixXD88XDllXDooUG5iGuvhYwMeveG9evzf9n69cG1BSKS\nmuLdIngEuBGIrhhfK2rFs+VArfxeaGadzWyGmc1YtWpVnMNMX9nZQfdNdHdO5L5cuXDbNnK9Pcjv\nBzZl3buzuYwh2NtvYfvvh1lwTKQlUJClSxPycURkO8QtEZjZ6cBKd59Z0DHhdQr5jla7+2B3z3L3\nrJo1i74SVYovO5t8u3Mi9+7QhE/5kNY8yA1M4mQyWchQLiOoOBK7ulrcVCRlxV74vfj+DpxpZqcC\nlYDdzWwksMLMarv7D2ZWG1gZxxikEIV151TkT26hH7fQj7VUpR2jeInzKG4CAKhcWctJiqSyuLUI\n3P1md9/X3esDFwBvu/tFwHigQ3hYB2BcvGKQwhXUXXM4HzGLQ+jDnbzIBTRiES/Rju1JAvXqBXWD\n2rffsVhFJH7i2SIoyL3AaDPrRFDVtF0SYhCC7provv3K/MZd/IcePMIy9uFUXudNTt3u89erB0uW\n7HicIhJfiZg+irtPdffTw8dr3P0Ed2/o7ie6+4+JiEG21bdv0G0DcDxTmEcTevIwg7iSg1mwQ0mg\nYkV1B4mUFsloEUiKaN8eKvz2E7nXXc8Fvw7lCxpyNO/yQcbRbNoUzAbansXhq1eHAQPUHSRSWigR\npLNx42h3exf4fSXcdBMH9unDtJ13TnZUIpJgSgTpaMWKoD7Q6NHQrFlQNbRly2RHJSJJkpAxAkkR\n7vDcc0F5iFdfhbvvhk8+URIQSXNqEaSLpUuD0hBvvgmtW8PQodCoUbKjEpEUoBZBWZebGxT3P/hg\nePfdYBT3f/9TEhCRzZQIyrIvvmBl5rHQrRuTf23FAX/Mx7pfQ41aGdSogcpFiwigRFA2bdwI993H\npsZNqfD5PDoyjJOZxFe5DQBYsya4gcpFi4gSQdkzdy4cfjj06sXk8qeSyUKG05FYykOoXLRIelIi\nKCv++CNYKjIrC5YtY9o1L/OP38ewnNrFOo3KRYukH80aKgs++AA6dYLPPoMOHXipdX8u6Vltu06l\nctEi6UctgtLs11+DC8OOPDLo15kwAYYP54Z7qhVYXrowKhctkp6UCEqrSZOgcWN4/HHo1i1YNvKU\nU4Ciu3eqVw9uABkZwb3KRYukLyWCFBZZRrJcueC+a1doWmctz1hHOOUUFn9biaN8GjVeeIwaDXbb\nPB20oEJx9eoFz61eHdzcgwlG7kG5aCUBkfSkMYIUFVlGMtLFk5MDyweOYSLdqMkq+nEzd+bexp9U\ngjVbXheZDpqXun1EpCBqEaSo6GUka7GclziXMfyT5ezFoXxCb/oFSSAG6vYRkcKoRZCign5+pwMj\n6E9PKrOem+nHg1zPRirEfB4zrRImIoVTiyBJ8vb/Z2dvva+BLWECbRhORxaSSXPmcC83FysJgKaD\nikjR1CJIgvz6/zt2DH69/7Uhl248wT1+M47RjccZSBd8O3K2xgVEJBZqESRBdP9/xF9/QYMNnzGN\no3mMa3iPI2nMfJ7K6AZWjnr1oEuXoL/fbMsU0OjHoOmgIlJ8ahEkQd55/uX5ixt4gD7cwW/swsWM\n4Dn+DRiWG1SSFhGJF7UIkiC6374Fs5jOYfSjN+M5k0Ys4jkuJlIkTn38IhJvSgRJ0LcvVNv5d/px\nM9M5jL1YTruMV7io4kuspNbm49THLyKJoESQBO3rvcc3VZpzM/fyHBdzyr4LaTviHIYN2zIGoD5+\nEUkUJYISkt900LzP7W6/MGL3q+Coo9i90gaYNImOPoxPv61K+/bBl/6SJcGYgEo+iEiiaLC4BOQ3\nHbRz5y3Pd+4MR62fwFNcQZ1fvuWJ8t2pcevdnH/SrskJWEQkinlBFcpSSFZWls+YMSPZYRSofv3g\nyz+vevVgj01ruPa7nnTgWRbSiE4M5SNaU6+ervgVkfgys5nunlXUcXHrGjKzSmY23czmmtkCM7sj\n3F/NzCab2eLwvmq8YkiU/Ms+O4flvMTE7zL5F89zF7fSgtl8ROtCXiMiknjxHCP4Ezje3ZsBzYE2\nZtYK6AVMcfeGwJRwu1TLO8VzL35gDOcwmnasqFiHLGZwG3exgZ0KfI2ISLLELRF44Ndws0J4c6At\nMCLcPwI4K14xJErfvsFUT3A6MoxFNKINE5h9wX3MH/IRX1ZuttXxmhYqIqkkroPFZpYBzAQOAJ5w\n94/NrJa7/xAeshyiJs6XUu3bwy4rv6H6LZ056o+3+Hino1lx9xDOvP5AWgCeEZSVWLo0aAn07asZ\nQSKSOuI6fdTdN7l7c2Bf4DAza5zneSdoJWzDzDqb2Qwzm7Fq1ap4hlmowqaFAsFKMAMGcNatjTmq\nwscwcCCHr3+HM68/cPMhmhYqIqksIdcRuPtPwDtAG2CFmdUGCO9XFvCawe6e5e5ZNWvWTESY24hM\nC83JCZZzjEwL3ZwMFi4MFo7v0QOOOQYWLIArrwyyhohIKRHPWUM1zWyP8PHOwEnAZ8B4oEN4WAdg\nXLxi2FH5VQldvx5uv2UD3HUXtGgBixfDyJHw+utQp05yAhUR2QHxHCOoDYwIxwnKAaPd/TUz+xAY\nbWadgBygXRxj2CH5TfFsyQyGLu0Et30K558Pjz4Ke+6Z+OBEREpI3BKBu38KtMhn/xrghHi9b0mq\nW3fLhWKV+J076MN1PMSqjL3glVehbdvkBigiUgLUmV2IyLTQo3mXT2nKjTzAiIxOTHtygZKAiJQZ\nSgSFaH/Gz8xu3YV3OZZy5HLhnlPYacRg2nXeI9mhiYiUGBWdK8jrr8OVV3Lg999Dz57sf+edvLDL\nLsmOSkSkxKlFkNfq1XDRRXD66bD77vDBB/DQQ6AkICJllBJBhDu8+CI0agSjRkGfPjBrFhx+eLIj\nExGJK3UNASxbBl27wvjxcOihMHQoNGmS7KhERBKizLYIiiwNAUErYMgQyMyEyZPhwQfhww+VBEQk\nrZTJFkFhK4ZtrvPz1Vdw+eXwzjtw7LFBQjjggGSEKyKSVGWyRVBQaYjevQmKxPXvH/zqnzkTnnoK\npkxREhCRtFUmWwQFrf61W858OKITTJ8ezAoaOBD23TexwYmIpJgy2SLIu/pXBTZwG3cwk0Pg66/h\n+eeDgWElARGRspkItqwYBocynZm05A5uZ9kR5wWloy+8EMySG6SISIook11DkQHhH7rdzbXr+rAy\nozZTe/yXYx88PbmBiYikoDKZCCBMBuX2h3cvp/Z991G7SpVkhyQikpLKbCIAgi6gCy9MdhQiIimt\nTI4RiIhI7JQIRETSnBKBiEiaUyIQEUlzSgQiImlOiUBEJM0pEYiIpDklAhGRNGfunuwYimRmq4Cc\nZMcRJzWA1ckOIo70+Uq/sv4Zy/Lnq+fuNYs6qFQkgrLMzGa4e1ay44gXfb7Sr6x/xrL++WKhriER\nkTSnRCAikuaUCJJvcLIDiDN9vtKvrH/Gsv75iqQxAhGRNKcWgYhImlMiEBFJc0oESWZmD5jZZ2b2\nqZmNNbM9kh1TSTOz88xsgZnlmlmZmaZnZm3M7HMz+9LMeiU7npJmZsPMbKWZzU92LPFgZnXM7B0z\nWxj+++ye7JiSRYkg+SYDjd29KfAFcHOS44mH+cA5wLRkB1JSzCwDeAL4B5AJXGhmmcmNqsQNB9ok\nO4g42ghc5+6ZQCugWxn8bxgTJYIkc/dJ7r4x3PwI2DeZ8cSDuy9y98+THUcJOwz40t2/dvcNwItA\n2yTHVKLcfRrwY7LjiBd3/8HdZ4WPfwEWAfskN6rkUCJILZcCbyY7CInJPsC3UdvfkaZfImWBmdUH\nWgAfJzeS5Cjbi9enCDN7C9grn6d6u/u48JjeBE3V7ETGVlJi+YwiqcjMdgVeAXq4+8/JjicZlAgS\nwN1PLOx5M7sEOB04wUvphR1FfcYyaBlQJ2p733CflCJmVoEgCWS7+5hkx5Ms6hpKMjNrA9wInOnu\n65Mdj8TsE6ChmTUws4rABcD4JMckxWBmBgwFFrl7/2THk0xKBMn3OLAbMNnM5pjZoGQHVNLM7Gwz\n+w5oDbxuZhOTHdOOCgf4rwImEgwyjnb3BcmNqmSZ2QvAh8DfzOw7M+uU7JhK2N+BfwPHh//vzTGz\nU5MdVDKoxISISJpTi0BEJM0pEYiIpDklAhGRNKdEICKS5pQIRETSnBKBJJSZVY+aqrfczJaFj38y\ns4UJjqV59HRBMztze6uImtkSM6tRctEV670vMbO9o7afjhRPS2ZcUnooEUhCufsad2/u7s2BQcDD\n4ePmQG5Jv5+ZFXb1fHNgcyJw9/Hufm9Jx5AAlwCbE4G7X+buCU2qUropEUgqyTCzIWFt+ElmtjOA\nme1vZhPMbKaZ/c/MDgr31zezt8O1HKaYWd1w/3AzG2RmHwP3m9kuYW396WY228zahlcD3wmcH7ZI\nzg9/WT8enqNWuD7E3PB2RLj/1TCOBWbWuagPZGYdzeyL8L2HRJ1/uJmdG3Xcr+H9ruFnmWVm88ys\nbdRnXZT37xOeIwvIDj/HzmY2Nb91H8zsojCOOWb2lJllhLfhZjY/fL9rd+C/n5RSSgSSShoCT7j7\nwcBPwD/D/YOBq929JXA98GS4/zFgRLiWQzbwaNS59gWOcPeeQG/gbXc/DDgOeACoANwGjApbKKPy\nxPIo8K67NwMOASJXDV8axpEFXGNm1Qv6MGZWG7iD4ArWIwnWLSjKH8DZ7n5IGOtDYSmEfP8+7v4y\nMANoH36O3wuIpRFwPvD3sAW2CWhP0Crax90bu3sT4JkYYpQyRkXnJJV84+5zwsczgfphZcgjgJe2\nfB+yU3jfmmDBG4DngPujzvWSu28KH58MnGlm14fblYC6RcRyPHAxQHiedeH+a8zs7PBxHYIv5zUF\nnONwYKq7rwIws1HAgUW8rwH9zOxogq6yfYBa4XPb/H2KOFe0E4CWwCfh33FnYCXwX2A/M3sMeB2Y\nVIxzShmhRCCp5M+ox5sIvqzKAT+Fv2KL47eox0bw63mrxXHM7PDinNDMjgVOBFq7+3ozm0qQVLbH\nRsIWuZmVAyqG+9sDNYGW7v6XmS2Jeo/8/j4xh0/QetpmBTwzawacAlwJtCNYF0PSiLqGJKWF9eG/\nMbPzIKgYGX5xAXxAUPUTgi/Q/xVwmonA1ZEuFjNrEe7/haDgX36mAF3C4zPMrApQBVgbJoGDCJY3\nLMzHwDHhTKkKwHlRzy0h+IUOcCZBVxXhe6wMk8BxQL0i3qOozxH9ec41sz3Dz1TNzOqFM4rKufsr\nwK0E3WCSZpQIpDRoD3Qys7kEffWRJSGvBjqa2acEVSQLWnz8LoIv2k/NbEG4DfAOkBkZLM7zmu7A\ncWY2j6AbJhOYAJQ3s0XAvQRLixbI3X8Abieo4Pk+QZXSiCEESWIuQRdXpAWTDWSF73sx8Flh7xEa\nDgyKDBYXEMtCgi/6SeHfazJQm6DraaqZzQFGUjbXzJYiqPqoSIJYsABRlrtflexYRKKpRSAikubU\nIhARSXNqEYiIpDklAhGRNKdEICKS5pQIRETSnBKBiEia+z/EXwUC6VC0VgAAAABJRU5ErkJggg==\n",
554 | "text/plain": [
555 | ""
556 | ]
557 | },
558 | "metadata": {},
559 | "output_type": "display_data"
560 | }
561 | ],
562 | "source": [
563 | "_,_ = mrstat.qq_plot(hsb.science,dist='norm',plot=plt)"
564 | ]
565 | },
566 | {
567 | "cell_type": "markdown",
568 | "metadata": {},
569 | "source": [
570 | "Видно, что синие точки на этом графике гораздо сильнее привязаны к красной линии. Это говорит о том, что science распределен более нормально. КуКу график можно строить не только для нормального распределения, но и для любого другого."
571 | ]
572 | },
573 | {
574 | "cell_type": "markdown",
575 | "metadata": {},
576 | "source": [
577 | "#### 2.3 Тест Колмогорова-Смирнова для проверки формы распределения"
578 | ]
579 | },
580 | {
581 | "cell_type": "markdown",
582 | "metadata": {},
583 | "source": [
584 | "Нормальность (и не только) можно проверить так же и при помощи этого теста."
585 | ]
586 | },
587 | {
588 | "cell_type": "code",
589 | "execution_count": 12,
590 | "metadata": {
591 | "collapsed": false
592 | },
593 | "outputs": [
594 | {
595 | "data": {
596 | "text/plain": [
597 | "KstestResult(statistic=0.1343270786922024, pvalue=0.0013129370215807512)"
598 | ]
599 | },
600 | "execution_count": 12,
601 | "metadata": {},
602 | "output_type": "execute_result"
603 | }
604 | ],
605 | "source": [
606 | "mrstat.kstest(hsb['write'],'norm',args=(hsb['write'].mean(),\n",
607 | " hsb['write'].std(ddof=1)))"
608 | ]
609 | },
610 | {
611 | "cell_type": "markdown",
612 | "metadata": {},
613 | "source": [
614 | "P-value < 0.05 следовательно гипотеза о нормальности не принимается. Как вы поняли, в args передаются параметры нормального распределения, с которым мы хотим сравнить распределение выборки. Я предал туда выборочное среднее и стандартное отклонения. Проверим science."
615 | ]
616 | },
617 | {
618 | "cell_type": "code",
619 | "execution_count": 13,
620 | "metadata": {
621 | "collapsed": false
622 | },
623 | "outputs": [
624 | {
625 | "data": {
626 | "text/plain": [
627 | "KstestResult(statistic=0.076233630374236583, pvalue=0.18585568226600646)"
628 | ]
629 | },
630 | "execution_count": 13,
631 | "metadata": {},
632 | "output_type": "execute_result"
633 | }
634 | ],
635 | "source": [
636 | "mrstat.kstest(hsb['science'],'norm',args=(hsb['science'].mean(),\n",
637 | " hsb['science'].std(ddof=1)))"
638 | ]
639 | },
640 | {
641 | "cell_type": "markdown",
642 | "metadata": {},
643 | "source": [
644 | "P-value получился больше 0.05 следовательно можно заключить, что данная выборка принадлежит к нормальному распределению. С помощью этого теста можно сравнивать выборки не только с нормальным распределением, но и с другими."
645 | ]
646 | },
647 | {
648 | "cell_type": "markdown",
649 | "metadata": {},
650 | "source": [
651 | "#### 2.4 Двухвыборочный тест Колмогорова-Смирнова"
652 | ]
653 | },
654 | {
655 | "cell_type": "markdown",
656 | "metadata": {},
657 | "source": [
658 | "С помощью этого теста можно проверить принадлежат ли две выборки к одному распределению."
659 | ]
660 | },
661 | {
662 | "cell_type": "code",
663 | "execution_count": 14,
664 | "metadata": {
665 | "collapsed": false
666 | },
667 | "outputs": [
668 | {
669 | "data": {
670 | "text/plain": [
671 | "Ks_2sampResult(statistic=0.15000000000000002, pvalue=0.01973175474986974)"
672 | ]
673 | },
674 | "execution_count": 14,
675 | "metadata": {},
676 | "output_type": "execute_result"
677 | }
678 | ],
679 | "source": [
680 | "mrstat.ks_2samp(hsb['write'],hsb['science'])"
681 | ]
682 | },
683 | {
684 | "cell_type": "markdown",
685 | "metadata": {},
686 | "source": [
687 | "Если p-value больше 0.05 - можно заключить, что выборки принадлежат к одному распределению. Write и science принадлежат к разным. А вот write и socst к одному."
688 | ]
689 | },
690 | {
691 | "cell_type": "code",
692 | "execution_count": 15,
693 | "metadata": {
694 | "collapsed": false
695 | },
696 | "outputs": [
697 | {
698 | "data": {
699 | "text/plain": [
700 | "Ks_2sampResult(statistic=0.125, pvalue=0.080917080808807579)"
701 | ]
702 | },
703 | "execution_count": 15,
704 | "metadata": {},
705 | "output_type": "execute_result"
706 | }
707 | ],
708 | "source": [
709 | "mrstat.ks_2samp(hsb['write'],hsb['socst'])"
710 | ]
711 | },
712 | {
713 | "cell_type": "markdown",
714 | "metadata": {
715 | "collapsed": true
716 | },
717 | "source": [
718 | "### 3. Двухвыборочные ти-тесты"
719 | ]
720 | },
721 | {
722 | "cell_type": "markdown",
723 | "metadata": {},
724 | "source": [
725 | "#### 3.1 Ти-тест для двух независимых выборок"
726 | ]
727 | },
728 | {
729 | "cell_type": "markdown",
730 | "metadata": {},
731 | "source": [
732 | "Данный тест позволяет понять если значимые различия между средними двух независимых выборок. Сравним оценки по тесту science для мальчиков и девочек."
733 | ]
734 | },
735 | {
736 | "cell_type": "code",
737 | "execution_count": 16,
738 | "metadata": {
739 | "collapsed": false
740 | },
741 | "outputs": [
742 | {
743 | "name": "stdout",
744 | "output_type": "stream",
745 | "text": [
746 | "53.2307692308 50.6972477064\n"
747 | ]
748 | }
749 | ],
750 | "source": [
751 | "s_boys = hsb[hsb['female'] == 0]['science']\n",
752 | "s_girls = hsb[hsb['female'] == 1]['science']\n",
753 | "print s_boys.mean(), s_girls.mean()"
754 | ]
755 | },
756 | {
757 | "cell_type": "markdown",
758 | "metadata": {},
759 | "source": [
760 | "Средняя оценка по научным дисциплинам у мальчиков выше. Проверим, имеет ли это утверждение статистическую значимость."
761 | ]
762 | },
763 | {
764 | "cell_type": "code",
765 | "execution_count": 17,
766 | "metadata": {
767 | "collapsed": false
768 | },
769 | "outputs": [
770 | {
771 | "data": {
772 | "text/plain": [
773 | "Ttest_indResult(statistic=1.7847013349359799, pvalue=0.076026848666140553)"
774 | ]
775 | },
776 | "execution_count": 17,
777 | "metadata": {},
778 | "output_type": "execute_result"
779 | }
780 | ],
781 | "source": [
782 | "mrstat.ttest_ind(s_boys,s_girls,equal_var=False)"
783 | ]
784 | },
785 | {
786 | "cell_type": "markdown",
787 | "metadata": {},
788 | "source": [
789 | "P-value >= 0.05, а значит нельзя утверждать, что средние различны. Теперь проведем такой же тест для write."
790 | ]
791 | },
792 | {
793 | "cell_type": "code",
794 | "execution_count": 18,
795 | "metadata": {
796 | "collapsed": false
797 | },
798 | "outputs": [
799 | {
800 | "name": "stdout",
801 | "output_type": "stream",
802 | "text": [
803 | "50.1208791209 54.9908256881\n"
804 | ]
805 | }
806 | ],
807 | "source": [
808 | "w_boys = hsb[hsb['female'] == 0]['write']\n",
809 | "w_girls = hsb[hsb['female'] == 1]['write']\n",
810 | "print w_boys.mean(), w_girls.mean()"
811 | ]
812 | },
813 | {
814 | "cell_type": "code",
815 | "execution_count": 19,
816 | "metadata": {
817 | "collapsed": false
818 | },
819 | "outputs": [
820 | {
821 | "data": {
822 | "text/plain": [
823 | "Ttest_indResult(statistic=-3.6564080478875276, pvalue=0.00034088493594266187)"
824 | ]
825 | },
826 | "execution_count": 19,
827 | "metadata": {},
828 | "output_type": "execute_result"
829 | }
830 | ],
831 | "source": [
832 | "mrstat.ttest_ind(w_boys,w_girls,equal_var=False)"
833 | ]
834 | },
835 | {
836 | "cell_type": "markdown",
837 | "metadata": {},
838 | "source": [
839 | "Получилось, что средние оценки по тесту write у мальчиков и девочек статистически значимо разные."
840 | ]
841 | },
842 | {
843 | "cell_type": "markdown",
844 | "metadata": {},
845 | "source": [
846 | "#### 3.2 Доверительный интервал разности средних для двух независимых выборок"
847 | ]
848 | },
849 | {
850 | "cell_type": "markdown",
851 | "metadata": {},
852 | "source": [
853 | "Построим 95% доверительный интервал для разности средних оценок science."
854 | ]
855 | },
856 | {
857 | "cell_type": "code",
858 | "execution_count": 20,
859 | "metadata": {
860 | "collapsed": false
861 | },
862 | "outputs": [
863 | {
864 | "data": {
865 | "text/plain": [
866 | "(-0.22316404940007306, 5.2902070980945028)"
867 | ]
868 | },
869 | "execution_count": 20,
870 | "metadata": {},
871 | "output_type": "execute_result"
872 | }
873 | ],
874 | "source": [
875 | "mrstat.mean_diff_confint_ind(s_boys,s_girls)"
876 | ]
877 | },
878 | {
879 | "cell_type": "markdown",
880 | "metadata": {},
881 | "source": [
882 | "Истинное значение разницы для двух средних с 95% вероятностью лежит в этом интервале. Данный интервая содержит 0 - это значит, что разницу двух средних нельзя считать отличной от нуля. Построим такой же интервал для оценок по write."
883 | ]
884 | },
885 | {
886 | "cell_type": "code",
887 | "execution_count": 21,
888 | "metadata": {
889 | "collapsed": false
890 | },
891 | "outputs": [
892 | {
893 | "data": {
894 | "text/plain": [
895 | "(2.2980585638240707, 7.4418345705644668)"
896 | ]
897 | },
898 | "execution_count": 21,
899 | "metadata": {},
900 | "output_type": "execute_result"
901 | }
902 | ],
903 | "source": [
904 | "mrstat.mean_diff_confint_ind(w_girls,w_boys)"
905 | ]
906 | },
907 | {
908 | "cell_type": "markdown",
909 | "metadata": {},
910 | "source": [
911 | "Данный интервал не содержит ноль - значит разницу можно признать отличной от нуля."
912 | ]
913 | },
914 | {
915 | "cell_type": "markdown",
916 | "metadata": {},
917 | "source": [
918 | "### 4. Пропорция"
919 | ]
920 | },
921 | {
922 | "cell_type": "markdown",
923 | "metadata": {},
924 | "source": [
925 | "#### 4.1. Тест для одной доли"
926 | ]
927 | },
928 | {
929 | "cell_type": "markdown",
930 | "metadata": {},
931 | "source": [
932 | "Рассчитаем долю девочек в выборке."
933 | ]
934 | },
935 | {
936 | "cell_type": "code",
937 | "execution_count": 22,
938 | "metadata": {
939 | "collapsed": false
940 | },
941 | "outputs": [
942 | {
943 | "data": {
944 | "text/plain": [
945 | "0.545"
946 | ]
947 | },
948 | "execution_count": 22,
949 | "metadata": {},
950 | "output_type": "execute_result"
951 | }
952 | ],
953 | "source": [
954 | "hsb.female.mean()"
955 | ]
956 | },
957 | {
958 | "cell_type": "markdown",
959 | "metadata": {},
960 | "source": [
961 | "Сравним долю девочек в выборке с гипотетической долей 0.5"
962 | ]
963 | },
964 | {
965 | "cell_type": "code",
966 | "execution_count": 23,
967 | "metadata": {
968 | "collapsed": false
969 | },
970 | "outputs": [
971 | {
972 | "data": {
973 | "text/plain": [
974 | "0.20125699204448533"
975 | ]
976 | },
977 | "execution_count": 23,
978 | "metadata": {},
979 | "output_type": "execute_result"
980 | }
981 | ],
982 | "source": [
983 | "mrstat.prop_test(hsb.female,0.5)"
984 | ]
985 | },
986 | {
987 | "cell_type": "markdown",
988 | "metadata": {},
989 | "source": [
990 | "P-value получился > 0.05 значит мы не можем утверждать, что истинная доля девочек отлична от 0.5"
991 | ]
992 | },
993 | {
994 | "cell_type": "markdown",
995 | "metadata": {},
996 | "source": [
997 | "#### 4.2 Доверительная интервал для одной доли"
998 | ]
999 | },
1000 | {
1001 | "cell_type": "code",
1002 | "execution_count": 24,
1003 | "metadata": {
1004 | "collapsed": false
1005 | },
1006 | "outputs": [
1007 | {
1008 | "data": {
1009 | "text/plain": [
1010 | "(0.47598602492749653, 0.61401397507250355)"
1011 | ]
1012 | },
1013 | "execution_count": 24,
1014 | "metadata": {},
1015 | "output_type": "execute_result"
1016 | }
1017 | ],
1018 | "source": [
1019 | "mrstat.prop_confint(hsb.female)"
1020 | ]
1021 | },
1022 | {
1023 | "cell_type": "markdown",
1024 | "metadata": {},
1025 | "source": [
1026 | "Истинная доля девочек с 95% вероятностью лежит в этом интервале. Интервал содержит 0.5 - значит нет оснований утверждать, что истинная доля отлична от 0.5. Запишем так же в стиле плюс-минус:"
1027 | ]
1028 | },
1029 | {
1030 | "cell_type": "code",
1031 | "execution_count": 25,
1032 | "metadata": {
1033 | "collapsed": false
1034 | },
1035 | "outputs": [
1036 | {
1037 | "name": "stdout",
1038 | "output_type": "stream",
1039 | "text": [
1040 | "0.545 +/- 0.0690139750725\n"
1041 | ]
1042 | }
1043 | ],
1044 | "source": [
1045 | "lb, hb = mrstat.prop_confint(hsb['female'])\n",
1046 | "print hsb.female.mean(), '+/-', (hb-lb)/2."
1047 | ]
1048 | },
1049 | {
1050 | "cell_type": "markdown",
1051 | "metadata": {},
1052 | "source": [
1053 | "#### 4.3 Размер выборки для заданной доли и ширины интервала"
1054 | ]
1055 | },
1056 | {
1057 | "cell_type": "markdown",
1058 | "metadata": {},
1059 | "source": [
1060 | "С попощью данной процедуры можно определить какой размер выборки потребуется для того, чтобы получить доверительный интервал заданной ширины. Например нужно понять, сколько наблюдений потребуется, чтобы утверждать, что истинное среднее (доля) равно 0.5 плюс-минус 0.05. Для этого:"
1061 | ]
1062 | },
1063 | {
1064 | "cell_type": "code",
1065 | "execution_count": 26,
1066 | "metadata": {
1067 | "collapsed": false
1068 | },
1069 | "outputs": [
1070 | {
1071 | "data": {
1072 | "text/plain": [
1073 | "384.14588206941266"
1074 | ]
1075 | },
1076 | "execution_count": 26,
1077 | "metadata": {},
1078 | "output_type": "execute_result"
1079 | }
1080 | ],
1081 | "source": [
1082 | "mrstat.samplesize_confint_proportion(0.5,0.05)"
1083 | ]
1084 | },
1085 | {
1086 | "cell_type": "markdown",
1087 | "metadata": {},
1088 | "source": [
1089 | "Потребуется выборка размером 384."
1090 | ]
1091 | },
1092 | {
1093 | "cell_type": "markdown",
1094 | "metadata": {},
1095 | "source": [
1096 | "### 5. Две доли и АБ тестинг"
1097 | ]
1098 | },
1099 | {
1100 | "cell_type": "markdown",
1101 | "metadata": {},
1102 | "source": [
1103 | "#### 5.1 Тест разности двух независимых долей"
1104 | ]
1105 | },
1106 | {
1107 | "cell_type": "markdown",
1108 | "metadata": {},
1109 | "source": [
1110 | "Проверим различается ли доля тех, кто ходит в частную школу среди мальчиков и девочек. Для начала построим таблицу сопряженности."
1111 | ]
1112 | },
1113 | {
1114 | "cell_type": "code",
1115 | "execution_count": 27,
1116 | "metadata": {
1117 | "collapsed": false
1118 | },
1119 | "outputs": [
1120 | {
1121 | "data": {
1122 | "text/html": [
1123 | "\n",
1124 | "
\n",
1125 | " \n",
1126 | " \n",
1127 | " female | \n",
1128 | " 0 | \n",
1129 | " 1 | \n",
1130 | "
\n",
1131 | " \n",
1132 | " schtyp | \n",
1133 | " | \n",
1134 | " | \n",
1135 | "
\n",
1136 | " \n",
1137 | " \n",
1138 | " \n",
1139 | " 1 | \n",
1140 | " 77 | \n",
1141 | " 91 | \n",
1142 | "
\n",
1143 | " \n",
1144 | " 2 | \n",
1145 | " 14 | \n",
1146 | " 18 | \n",
1147 | "
\n",
1148 | " \n",
1149 | "
\n",
1150 | "
"
1151 | ],
1152 | "text/plain": [
1153 | "female 0 1\n",
1154 | "schtyp \n",
1155 | "1 77 91\n",
1156 | "2 14 18"
1157 | ]
1158 | },
1159 | "execution_count": 27,
1160 | "metadata": {},
1161 | "output_type": "execute_result"
1162 | }
1163 | ],
1164 | "source": [
1165 | "table = hsb.pivot_table(values=u'id',index=u'schtyp',columns=u'female',aggfunc='count')\n",
1166 | "table"
1167 | ]
1168 | },
1169 | {
1170 | "cell_type": "markdown",
1171 | "metadata": {},
1172 | "source": [
1173 | "Теперь рассчитаем долю учащихся в частных школах для мальчиков и девочек, а так же размер выборок."
1174 | ]
1175 | },
1176 | {
1177 | "cell_type": "code",
1178 | "execution_count": 28,
1179 | "metadata": {
1180 | "collapsed": false
1181 | },
1182 | "outputs": [
1183 | {
1184 | "data": {
1185 | "text/plain": [
1186 | "(0.15384615384615385, 91, 0.16513761467889909, 109)"
1187 | ]
1188 | },
1189 | "execution_count": 28,
1190 | "metadata": {},
1191 | "output_type": "execute_result"
1192 | }
1193 | ],
1194 | "source": [
1195 | "mrstat.get_props_and_lens(table)"
1196 | ]
1197 | },
1198 | {
1199 | "cell_type": "markdown",
1200 | "metadata": {},
1201 | "source": [
1202 | "Доля учащихся частных школ среди мальчиков равна 0.15, а для девочек 0.17. Проверим значима ли эта разница."
1203 | ]
1204 | },
1205 | {
1206 | "cell_type": "code",
1207 | "execution_count": 29,
1208 | "metadata": {
1209 | "collapsed": false
1210 | },
1211 | "outputs": [
1212 | {
1213 | "data": {
1214 | "text/plain": [
1215 | "0.82828242684033948"
1216 | ]
1217 | },
1218 | "execution_count": 29,
1219 | "metadata": {},
1220 | "output_type": "execute_result"
1221 | }
1222 | ],
1223 | "source": [
1224 | "mrstat.proportions_diff_ind(*mrstat.get_props_and_lens(table))"
1225 | ]
1226 | },
1227 | {
1228 | "cell_type": "markdown",
1229 | "metadata": {},
1230 | "source": [
1231 | "P-value получился гораздо больше 0.05 - следовательно нельзя отвергнуть гипотезу о том, что доли равны. Делаем вывод, что доля тех, кто учится в частных школах не зависит от пола."
1232 | ]
1233 | },
1234 | {
1235 | "cell_type": "markdown",
1236 | "metadata": {},
1237 | "source": [
1238 | "#### 5.2 Доверительный интервал для разности двух незавимых долей"
1239 | ]
1240 | },
1241 | {
1242 | "cell_type": "markdown",
1243 | "metadata": {},
1244 | "source": [
1245 | "Построим 95% доверительный интервал для разности долей частных школ среди мальчиков и девочек."
1246 | ]
1247 | },
1248 | {
1249 | "cell_type": "code",
1250 | "execution_count": 30,
1251 | "metadata": {
1252 | "collapsed": false
1253 | },
1254 | "outputs": [
1255 | {
1256 | "data": {
1257 | "text/plain": [
1258 | "(-0.11304660507498858, 0.090463683409498105)"
1259 | ]
1260 | },
1261 | "execution_count": 30,
1262 | "metadata": {},
1263 | "output_type": "execute_result"
1264 | }
1265 | ],
1266 | "source": [
1267 | "mrstat.proportions_confint_diff_ind(*mrstat.get_props_and_lens(table))"
1268 | ]
1269 | },
1270 | {
1271 | "cell_type": "markdown",
1272 | "metadata": {},
1273 | "source": [
1274 | "Доверительный интервал получился очень широким, а так же он содержит 0. На основе этого тоже можно сделать вывод, что доли учащихся в частных школах среди мальчиков и девочек значимо не отличаются."
1275 | ]
1276 | },
1277 | {
1278 | "cell_type": "markdown",
1279 | "metadata": {},
1280 | "source": [
1281 | "Обратите внимание, что в mrstat есть несколько похожих функций для проверки гипотез о доле и построения доверительных интрервалов:\n",
1282 | "\n",
1283 | " mrstat.proportions_confint_diff_ind(p1,n1,p2,n2) - берет на вход доли и размеры выборок\n",
1284 | " mrstat.proportions_confint_diff_ind_samples(sample1,sample2) - берет на вход две выборки из 0 и 1\n",
1285 | " mrstat.proportions_confint_diff_ind_table(contigency_table) - берет на вход таблицу сопряженности"
1286 | ]
1287 | },
1288 | {
1289 | "cell_type": "markdown",
1290 | "metadata": {},
1291 | "source": [
1292 | "#### 5.3 Тест Хи-квадрат"
1293 | ]
1294 | },
1295 | {
1296 | "cell_type": "markdown",
1297 | "metadata": {},
1298 | "source": [
1299 | "С помощью теста хи-квадрат можно проверить имеются ли статистически значимые различия между долей частных школ среди мальчиков и девочек. Для этого сначала нужно посчитать таблицу сопряженности."
1300 | ]
1301 | },
1302 | {
1303 | "cell_type": "code",
1304 | "execution_count": 31,
1305 | "metadata": {
1306 | "collapsed": false
1307 | },
1308 | "outputs": [
1309 | {
1310 | "data": {
1311 | "text/html": [
1312 | "\n",
1313 | "
\n",
1314 | " \n",
1315 | " \n",
1316 | " female | \n",
1317 | " 0 | \n",
1318 | " 1 | \n",
1319 | "
\n",
1320 | " \n",
1321 | " schtyp | \n",
1322 | " | \n",
1323 | " | \n",
1324 | "
\n",
1325 | " \n",
1326 | " \n",
1327 | " \n",
1328 | " 1 | \n",
1329 | " 77 | \n",
1330 | " 91 | \n",
1331 | "
\n",
1332 | " \n",
1333 | " 2 | \n",
1334 | " 14 | \n",
1335 | " 18 | \n",
1336 | "
\n",
1337 | " \n",
1338 | "
\n",
1339 | "
"
1340 | ],
1341 | "text/plain": [
1342 | "female 0 1\n",
1343 | "schtyp \n",
1344 | "1 77 91\n",
1345 | "2 14 18"
1346 | ]
1347 | },
1348 | "execution_count": 31,
1349 | "metadata": {},
1350 | "output_type": "execute_result"
1351 | }
1352 | ],
1353 | "source": [
1354 | "table"
1355 | ]
1356 | },
1357 | {
1358 | "cell_type": "code",
1359 | "execution_count": 32,
1360 | "metadata": {
1361 | "collapsed": false
1362 | },
1363 | "outputs": [
1364 | {
1365 | "data": {
1366 | "text/plain": [
1367 | "(0.04704775346977183, 0.82828242684033948, 1L, array([[ 76.44, 91.56],\n",
1368 | " [ 14.56, 17.44]]))"
1369 | ]
1370 | },
1371 | "execution_count": 32,
1372 | "metadata": {},
1373 | "output_type": "execute_result"
1374 | }
1375 | ],
1376 | "source": [
1377 | "mrstat.chi2_contingency(table,correction=False)"
1378 | ]
1379 | },
1380 | {
1381 | "cell_type": "markdown",
1382 | "metadata": {},
1383 | "source": [
1384 | "P-value получился равным 0.83, это больше чем 0.05 следовательно гипотеза о том, что доли различаются, отвергается. Обратите внимание, что p-value получилось абсолютно такой же, как в тесте для двух независимых долей в пункте 5.1. Это происходит из-за того, что в основе теста для двух долей лежит нормальное распределение, а хи-квадрат - это распределение, которое является производной от нормального распределениея, и для данного случая они совпадают."
1385 | ]
1386 | },
1387 | {
1388 | "cell_type": "markdown",
1389 | "metadata": {},
1390 | "source": [
1391 | "#### 5.4 Точный тест Фишера"
1392 | ]
1393 | },
1394 | {
1395 | "cell_type": "markdown",
1396 | "metadata": {},
1397 | "source": [
1398 | "Если в таблице сопряженности есть ячейки содержащие значения меньше 5, то хи-квадрат использовать нельзя. Но можно использовать точный тест Фишера. На вход он принимает ту же таблицу сопряженности."
1399 | ]
1400 | },
1401 | {
1402 | "cell_type": "code",
1403 | "execution_count": 33,
1404 | "metadata": {
1405 | "collapsed": false
1406 | },
1407 | "outputs": [
1408 | {
1409 | "data": {
1410 | "text/plain": [
1411 | "(1.0879120879120878, 0.84917505847685981)"
1412 | ]
1413 | },
1414 | "execution_count": 33,
1415 | "metadata": {},
1416 | "output_type": "execute_result"
1417 | }
1418 | ],
1419 | "source": [
1420 | "mrstat.fisher_exact(table)"
1421 | ]
1422 | },
1423 | {
1424 | "cell_type": "markdown",
1425 | "metadata": {},
1426 | "source": [
1427 | "Да, кстати всё это примеры АБ тестинга)."
1428 | ]
1429 | },
1430 | {
1431 | "cell_type": "markdown",
1432 | "metadata": {},
1433 | "source": [
1434 | "#### 5.5 Размер выборок для для двух пропорций"
1435 | ]
1436 | },
1437 | {
1438 | "cell_type": "markdown",
1439 | "metadata": {},
1440 | "source": [
1441 | "При планировании аб тестирования требуется понимать, какого объема выборка потребуется. Для того, чтобы это сделать нужно знать следующие параметры:\n",
1442 | "\n",
1443 | "- базовый уровень показателя p1. т.е. это та цифра, котороую необходимо улучшить\n",
1444 | "- желаемый уровень показателя, т.е цифра, при которой тестирование будет считаться успешным. Определяется экспертно.\n",
1445 | "- достигаемый уровень значимости и мощность. Общепринятые стандарты это 0.05 и 0.8\n",
1446 | "- пропорции контрольной и тестовой выборок, если необходимо.\n",
1447 | "\n",
1448 | "Рассмотрим на примере. Допустим есть какой-либо показатель доли в 10% и мы хотим его увеличить с помощью новой технологии. Увеличение будет считаться успеным, если показатель выростет на 20%. Так же мы не хотим делать большую контрольную группу, чтобы сразу получить выгоду от внедрения новой технологии - сделаем контрольную группу 20% от всего теста. Уровень значимости и мощность оставим стандартными. Вот что получится:"
1449 | ]
1450 | },
1451 | {
1452 | "cell_type": "code",
1453 | "execution_count": 34,
1454 | "metadata": {
1455 | "collapsed": true
1456 | },
1457 | "outputs": [],
1458 | "source": [
1459 | "p1 = 0.1\n",
1460 | "p2 = 0.1 * 1.2"
1461 | ]
1462 | },
1463 | {
1464 | "cell_type": "code",
1465 | "execution_count": 35,
1466 | "metadata": {
1467 | "collapsed": false
1468 | },
1469 | "outputs": [
1470 | {
1471 | "data": {
1472 | "text/plain": [
1473 | "(2396.5, 9586.0)"
1474 | ]
1475 | },
1476 | "execution_count": 35,
1477 | "metadata": {},
1478 | "output_type": "execute_result"
1479 | }
1480 | ],
1481 | "source": [
1482 | "mrstat.two_proportions_sample_size(p1,p2,frac=0.2)"
1483 | ]
1484 | },
1485 | {
1486 | "cell_type": "markdown",
1487 | "metadata": {},
1488 | "source": [
1489 | "Получается, что для того чтобы считать тестирование состоявшимся необходимо накопить 2396.5 в контроле и 9586.0 в пилоте или 11982.5 для всего тестирования. Данные значения нужно округлять до целого, а еще лучше до круглых чисел в большую сторону."
1490 | ]
1491 | },
1492 | {
1493 | "cell_type": "markdown",
1494 | "metadata": {},
1495 | "source": [
1496 | "### 6. Непараметрические критерии"
1497 | ]
1498 | },
1499 | {
1500 | "cell_type": "markdown",
1501 | "metadata": {},
1502 | "source": [
1503 | "Непараметрические критерии следует использовать с крайне ненормальными и несимметричными распределениями."
1504 | ]
1505 | },
1506 | {
1507 | "cell_type": "markdown",
1508 | "metadata": {},
1509 | "source": [
1510 | "#### 6.1 Критерий знаков"
1511 | ]
1512 | },
1513 | {
1514 | "cell_type": "markdown",
1515 | "metadata": {},
1516 | "source": [
1517 | "Применяется для проверки гипотезы о медиане. Не предъявляет никаких требований к распределению. Проверим гипотезу о том, что медиана оценок по write равна 50."
1518 | ]
1519 | },
1520 | {
1521 | "cell_type": "code",
1522 | "execution_count": 36,
1523 | "metadata": {
1524 | "collapsed": false
1525 | },
1526 | "outputs": [
1527 | {
1528 | "data": {
1529 | "text/plain": [
1530 | "54.0"
1531 | ]
1532 | },
1533 | "execution_count": 36,
1534 | "metadata": {},
1535 | "output_type": "execute_result"
1536 | }
1537 | ],
1538 | "source": [
1539 | "np.median(hsb.write)"
1540 | ]
1541 | },
1542 | {
1543 | "cell_type": "code",
1544 | "execution_count": 37,
1545 | "metadata": {
1546 | "collapsed": false
1547 | },
1548 | "outputs": [
1549 | {
1550 | "data": {
1551 | "text/plain": [
1552 | "(27.0, 0.00015185854094876164)"
1553 | ]
1554 | },
1555 | "execution_count": 37,
1556 | "metadata": {},
1557 | "output_type": "execute_result"
1558 | }
1559 | ],
1560 | "source": [
1561 | "mrstat.sign_test(hsb.write,50)"
1562 | ]
1563 | },
1564 | {
1565 | "cell_type": "markdown",
1566 | "metadata": {},
1567 | "source": [
1568 | "P-value маленький следовательно гипотеза о медиане равной 50 не принимается."
1569 | ]
1570 | },
1571 | {
1572 | "cell_type": "markdown",
1573 | "metadata": {},
1574 | "source": [
1575 | "#### 6.2 Критерий знаковых рангов Вилкоксона"
1576 | ]
1577 | },
1578 | {
1579 | "cell_type": "markdown",
1580 | "metadata": {},
1581 | "source": [
1582 | "Этот криетерий использует больше информации, чем просто критерий знаков. Проверим ту же гипотезу."
1583 | ]
1584 | },
1585 | {
1586 | "cell_type": "code",
1587 | "execution_count": 38,
1588 | "metadata": {
1589 | "collapsed": false
1590 | },
1591 | "outputs": [
1592 | {
1593 | "data": {
1594 | "text/plain": [
1595 | "WilcoxonResult(statistic=6524.0, pvalue=3.6917631525880871e-05)"
1596 | ]
1597 | },
1598 | "execution_count": 38,
1599 | "metadata": {},
1600 | "output_type": "execute_result"
1601 | }
1602 | ],
1603 | "source": [
1604 | "mrstat.wilcoxon(hsb.write.values-50)"
1605 | ]
1606 | },
1607 | {
1608 | "cell_type": "markdown",
1609 | "metadata": {},
1610 | "source": [
1611 | "В данном случае гипотеза о медиане равной 50 отвергается с еще большей силой. Критерий знаков и критерий знаковых рангов Вилкоксона являются альтернативой для одновыборочного ти-теста, а критерий знаковых рангов может еще применяться провеки гипотезы о равенстве средних для двух зависимых выборок."
1612 | ]
1613 | },
1614 | {
1615 | "cell_type": "markdown",
1616 | "metadata": {},
1617 | "source": [
1618 | "#### 6.3 Критерий Манна-Уитни"
1619 | ]
1620 | },
1621 | {
1622 | "cell_type": "markdown",
1623 | "metadata": {},
1624 | "source": [
1625 | "Критерий Манна-Уитни является альтернативой ти-теста для двух независимых выборок и может применяться при любом типе распределения. Этот критерий проверяет гипотезу о том, что две выборки взяты из одной генеральной совокупности, в частности, насколько значения одной выборки больше чем в другой. Применим критерий Манна-Уитни к случаю с оценкой по тесту write для мальчиков и девочек."
1626 | ]
1627 | },
1628 | {
1629 | "cell_type": "code",
1630 | "execution_count": 39,
1631 | "metadata": {
1632 | "collapsed": false
1633 | },
1634 | "outputs": [
1635 | {
1636 | "data": {
1637 | "text/plain": [
1638 | "MannwhitneyuResult(statistic=3606.0, pvalue=0.00043746993681806757)"
1639 | ]
1640 | },
1641 | "execution_count": 39,
1642 | "metadata": {},
1643 | "output_type": "execute_result"
1644 | }
1645 | ],
1646 | "source": [
1647 | "mrstat.mannwhitneyu(w_boys,w_girls)"
1648 | ]
1649 | },
1650 | {
1651 | "cell_type": "markdown",
1652 | "metadata": {},
1653 | "source": [
1654 | "P-value < 0.05 следовательно гипотеза о том, что две выборки пришли из одного распределения отвергается. Результаты критения Манна-Уитни можно применять на равне с обычным ти-тестом.\n",
1655 | "\n",
1656 | "Кстата, критерий Манна-Уитни является официальным \"сдаточным\" критерием для ряда АБ тестов в Yandex."
1657 | ]
1658 | },
1659 | {
1660 | "cell_type": "markdown",
1661 | "metadata": {},
1662 | "source": [
1663 | "#### 6.4 Бутстреп"
1664 | ]
1665 | },
1666 | {
1667 | "cell_type": "markdown",
1668 | "metadata": {},
1669 | "source": [
1670 | "С помощью бутстрепа можно проверять любой параметр вне зависимости от распределения выборки."
1671 | ]
1672 | },
1673 | {
1674 | "cell_type": "code",
1675 | "execution_count": 40,
1676 | "metadata": {
1677 | "collapsed": false
1678 | },
1679 | "outputs": [
1680 | {
1681 | "name": "stdout",
1682 | "output_type": "stream",
1683 | "text": [
1684 | "52.775\n",
1685 | "54.0\n"
1686 | ]
1687 | }
1688 | ],
1689 | "source": [
1690 | "print np.mean(hsb.write)\n",
1691 | "print np.median(hsb.write)"
1692 | ]
1693 | },
1694 | {
1695 | "cell_type": "code",
1696 | "execution_count": 41,
1697 | "metadata": {
1698 | "collapsed": false
1699 | },
1700 | "outputs": [
1701 | {
1702 | "name": "stdout",
1703 | "output_type": "stream",
1704 | "text": [
1705 | "(p-value for mean = 51) = 0.004\n",
1706 | "(p-value for median = 53) = 0.16\n"
1707 | ]
1708 | }
1709 | ],
1710 | "source": [
1711 | "print '(p-value for mean = 51) = ', mrstat.bootstrap_test(hsb.write.values,51,np.mean)\n",
1712 | "print '(p-value for median = 53) = ', mrstat.bootstrap_test(hsb.write.values,53,np.median)"
1713 | ]
1714 | },
1715 | {
1716 | "cell_type": "markdown",
1717 | "metadata": {},
1718 | "source": [
1719 | "Гипотеза о среднем равном 51 отвергается (p-value < 0.05), гипотеза о медиане равной 53 принимается (p-value > 0.05)."
1720 | ]
1721 | },
1722 | {
1723 | "cell_type": "markdown",
1724 | "metadata": {},
1725 | "source": [
1726 | "С помощью бутстрепа так же можно строить доверительные интервалы."
1727 | ]
1728 | },
1729 | {
1730 | "cell_type": "code",
1731 | "execution_count": 42,
1732 | "metadata": {
1733 | "collapsed": false
1734 | },
1735 | "outputs": [
1736 | {
1737 | "name": "stdout",
1738 | "output_type": "stream",
1739 | "text": [
1740 | "mean interval [ 51.389875 54.105 ]\n",
1741 | "median interval [ 52. 57.]\n"
1742 | ]
1743 | }
1744 | ],
1745 | "source": [
1746 | "print 'mean interval ', mrstat.bootstrap_conf_int(hsb.write.values,np.mean)\n",
1747 | "print 'median interval ', mrstat.bootstrap_conf_int(hsb.write.values,np.median)"
1748 | ]
1749 | },
1750 | {
1751 | "cell_type": "markdown",
1752 | "metadata": {},
1753 | "source": [
1754 | "А так же доверительный интревал для разницы двух параметров."
1755 | ]
1756 | },
1757 | {
1758 | "cell_type": "code",
1759 | "execution_count": 43,
1760 | "metadata": {
1761 | "collapsed": false
1762 | },
1763 | "outputs": [
1764 | {
1765 | "name": "stdout",
1766 | "output_type": "stream",
1767 | "text": [
1768 | "[-1.3 2.4405]\n",
1769 | "[ 0. 7.]\n"
1770 | ]
1771 | }
1772 | ],
1773 | "source": [
1774 | "print mrstat.bootstrap_diff_conf_int(hsb.write.values,hsb.read.values,np.mean)\n",
1775 | "print mrstat.bootstrap_diff_conf_int(hsb.write.values,hsb.read.values,np.median)"
1776 | ]
1777 | },
1778 | {
1779 | "cell_type": "markdown",
1780 | "metadata": {},
1781 | "source": [
1782 | "Мы видим, что разницы и для средних и для медиан между оценками по write и read содержат 0, следовательно нельзя утверждать, что между этими параметрами существует значимая разница."
1783 | ]
1784 | },
1785 | {
1786 | "cell_type": "markdown",
1787 | "metadata": {},
1788 | "source": [
1789 | "Бутстреп можно использовать тогда, когда нельзя применить другие модели из-за различных ограничений, например, по размеру выборки."
1790 | ]
1791 | },
1792 | {
1793 | "cell_type": "markdown",
1794 | "metadata": {},
1795 | "source": [
1796 | "### 7. Корреляция"
1797 | ]
1798 | },
1799 | {
1800 | "cell_type": "markdown",
1801 | "metadata": {},
1802 | "source": [
1803 | "#### 7.1 Коэффициет корреляции Пирсона"
1804 | ]
1805 | },
1806 | {
1807 | "cell_type": "code",
1808 | "execution_count": 44,
1809 | "metadata": {
1810 | "collapsed": false
1811 | },
1812 | "outputs": [
1813 | {
1814 | "data": {
1815 | "text/plain": [
1816 | ""
1817 | ]
1818 | },
1819 | "execution_count": 44,
1820 | "metadata": {},
1821 | "output_type": "execute_result"
1822 | },
1823 | {
1824 | "data": {
1825 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHwZJREFUeJzt3X+MXeV95/H312AMxRDM2B28sQ2kdumyBTswm0JdEOGX\n0i0CVqloqyBoF9a7UtWm2W3BRKvVJtpWOFVLWmmVLus2chL6w4Llh7wrFmMXQVhCMnaM2RqovQFq\nW8wYBthgEowL3/3jnjEz9tw55/o+5znPc8/nJVl37vGde77nOXfmO+c5z/N9zN0REZH2mtN0ACIi\n0iwlAhGRllMiEBFpOSUCEZGWUyIQEWk5JQIRkZZTIhARaTklAhGRllMiEBFpuRObDqCKhQsX+jnn\nnNN0GCIiWdm2bdsb7r6o7HVZJIJzzjmH0dHRpsMQEcmKmb1a5XXqGhIRaTklAhGRllMiEBFpOSUC\nEZGWUyIQEWk5JQKRxE0cPMRze99m4uChpkNpjba1eRbDR0Xa6uEd+7nzgZ3MnTOHwx9+yFc+eyHX\nr/p402ENtDa2ua4IRBI1cfAQdz6wk/cOf8g7h/6R9w5/yB0P7GzNX6lNaGubKxGIJGrfWz9m7pzp\nP6Jz58xh31s/biiiwdfWNlciEEnUkgWncPjDD6dtO/zhhyxZcEpDEQ2+tra5EoFIoobmz+Mrn72Q\nk+fO4bR5J3Ly3Dl85bMXMjR/XtOhDay2trm5e9MxlBoZGXHVGpK2mjh4iH1v/ZglC04Z+F9IqRiU\nNjezbe4+UvY6jRoSSdzQ/HlZ/zLKUdvaXF1DIiItp0QgItJySgQiIi2nRCAi0nJKBCIiLadEICLZ\nqFIMruw1uRSU2zP+DveP7mXP+Du170vDR0UkC1WKwZW9JpeCcv/xoef5xnf+4cjzWy5dxpdvuKC2\n/emKQESSV6UYXNlrcikot2f8nWlJAOAbz/xDrVcGSgQikrwqxeDKXpNLQbkde9/uaXsISgQikrwq\nxeDKXpNLQblVS8/oaXsISgQikrwqxeDKXpNLQbnlw6dxy6XLpm275dJlLB8+rbZ9quiciGSjSjG4\nstfkUlBuz/g77Nj7NquWnnHcSUBF50Rk4FQpBlf2mlwKyi0fPq3Wq4Cp1DUkIlKDmPMA+qUrAhGR\nwGLPA+iXrghERAJqYh5Av2pLBGZ2npntmPLvh2b2O2Z2ppltNrPdxeOCumIQEYmtiXkA/aotEbj7\nS+6+yt1XARcDPwIeBNYCW9x9BbCleC4iMhCamAfQr1hdQ1cB/9fdXwVuADYU2zcAN0aKQUSkdk3M\nA+hXrJvFvwr8VfH1sLu/Vnw9BgxHikFEJIov33ABt1xyTt/zAGKpPRGY2UnA9cBdR/+fu7uZzTij\nzczWAGsAli1bNtNLRESSFXMeQL9idA39IrDd3ceL5+NmthigeDww0ze5+73uPuLuI4sWLYoQpohI\nO8VIBL/GR91CAI8AtxZf3wo8HCEGERHpotZEYGanAtcA/33K5ruBa8xsN3B18VxERBpS6z0Cd38X\nGDpq2wSdUUQiIpIAzSwWaYFc1ul9aPtebt/wPR7avnfG/w9xHLFqAKnWkIgkI5d1ei/5g82M/fB9\nAB5/4QDrHn2RZ754zZH/D3EcsWoAqdaQiCQjl3V6H9q+90gSmPTaD98/cmUQ4jhi1QBSrSERSUou\n6/Ruen5s1u0hjiNWDSDVGhKRpOSyTu91F5w16/YQxxGrBpBqDYlIUnJZp/fGi5ay+PSTpm1bfPpJ\n3HjRUiDMccSqAZRjrSGtWSzSArms0/vQ9r1sen6M6y4460gSmCrEcYRYCzil/cym6prFSgQiIgOq\naiJQ15CISIJizv3QPAIRkcTEnvuhKwIRkYQ0MfdDiUBEJCFNzP1QIhARSUgTcz+UCERaIJeiczEK\ntcVqi9GXJ/jjx15i9OWJnr5vcs7ESScY806cw0knWO1zP3SzWGTA5VJ0LkahtlhtcfP67/DtPZ0E\n8Kdb93DZ8iG+efsllb9/9JU3ef8DBzrD+0dffVM3i0Xk+ORSdC5GobZYbTH68sSRJDDpqT0Tla8M\nmihap0QgMsByKToXo1BbrLZ4cvcbPW0/WhNF65QIRAZYLkXnYhRqi9UWl69Y2NP2ozVRtE6JQGSA\n5VJ0LkahtlhtMXLuEJctn7ZCL5ctH2Lk3KEu3zFdE0XrVGtIpAVyKToXo1BbrLYYfXmCJ3e/weUr\nFlZOAlOFaAsVnRMRaTkVnZNklY3j1pj36drUHmXvEaItqrxHiGM53nkETdA8AomqbBy3xrxP16b2\nKHuPEG1R5T1CHEu/8whi0xWBRFM2jltj3qdrU3uUvUeItqjyHiGOpd95BE1QIpBoysZxa8z7dG1q\nj7L3CNEWVd4jxLH0O4+gCUoEEk3ZOG6NeZ+uTe1R9h4h2qLKe4Q4ln7nETRBiUCiKRvHrTHv07Wp\nPcreI0RbVHmPEMfS7zyCJmj4qERXNo5bY96na1N7lL1HiLao8h4hjqXfeQQhaB6BiEjLaR6BiGQn\nlzkTg0bzCEQkCbnMmRhEuiIQkcblMmdiUNWaCMzsDDO738xeNLMXzOxSMzvTzDab2e7icUGdMYhI\n+nKZMzGo6r4i+BPgUXf/GWAl8AKwFtji7iuALcVzkaAGqa85xLGkXvcm5pwJ1Yg6Vm33CMzsY8Dl\nwK8DuPv7wPtmdgNwRfGyDcATwJ11xSHtM0h9zSGOJYe6N0Pz53HTxUumlXe4aWRJ8OGyqhE1szqv\nCM4FXge+bmbfN7P1ZnYqMOzurxWvGQOGa4xBWmaQ+ppDHEsudW8mDh5i47Z907ZtHN0X9LypRlR3\ndSaCE4GLgK+5+yeBdzmqG8g7kxhmnMhgZmvMbNTMRl9//fUaw5RBMkh9zSGOJZe6NzHOm2pEdVdn\nItgH7HP3Z4vn99NJDONmthigeDww0ze7+73uPuLuI4sWLaoxTBkkudTnqSLEseRS9ybGeVONqO5q\nSwTuPgbsNbPzik1XAbuAR4Bbi223Ag/XFYO0Ty71eaoIcSy51L2Jcd5UI6q7WktMmNkqYD1wEvAD\n4DfoJJ+NwDLgVeAmd39ztvdRiQnpVS71eaoIcSwp1L2pIsZ5a1ONKNUaEhFpOdUaEhGRSpQIRERa\nTolARKTllAhERFpOiUB6klP9lBSEaK8QtXHK4ggRZ4z3yKnuUqyaRiFoPQKpLLf6KU0L0V4hauOU\nxREizhjvkVPdpVg1jULRFYFUkmP9lCaFaK8QtXHK4ggRZ4z3yKnuUqyaRiEpEUglOdZPaVKI9gpR\nG6csjhBxxniPnOouxappFJISgVSSY/2UJoVorxC1ccriCBFnjPfIqe5SrJpGISkRSCU51k9pUoj2\nClEbpyyOEHHGeI+c6i7FqmkUkkpMSE9SqJ+SkxDtFaI2TlkcIeKM8R451V2KVdNoNqo1JCLScqo1\nJCIilSgRtEgqk4ZCSCGOFGKoKkasqUz2KotDi9cfSxPKWiKVSUMhpBBHCjFUFSPWVCZ7lcWhxetn\npiuCFkhl0lAIKcSRQgxVxYg1lcleZXFo8frulAhaIJVJQyGkEEcKMVQVI9ZUJnuVxaHF67tTImiB\nVCYNhZBCHCnEUFWMWFOZ7FUWRy8Tvfrp38/p8zFJiaAFUpk0FEIKcaQQQ1UxYk1lsldZHFUnej28\nYz+r123l5vXPsnrdVh7Zsb9yDFXiSJHmEbRIKpOGQkghjhRiqCpGrKlM9iqLY7aJXhMHD7F63Vbe\nO/zRX/Qnz53D03de2fMxpfD5qDqPYNZRQ2b2PNA1U7j7hccRmzRkaP68vj+QId4jhBTiSCGGqmLE\nGmIfI+f2X/KhLI7lw6d1nek72b//Hh8lgsn+/V6PLafPR9nw0euKx98sHr9ZPH6unnBERPrTz1/i\nOfbvhzBrInD3VwHM7Bp3/+SU/1prZtuBtXUGJyLSi37H70/2799x1Hvk8pf98ao6oczMbLW7P108\n+Xl0o1lEEjJ1/P5k184dD+xk9fKFPf0iv37Vx1m9fGHj/fsxVU0EtwF/YWYfAwx4C/hXtUUlItKj\ntvbvh1Dpr3p33+buK4GVwIXuvsrdt9cbmqQol5o1MeQSJ5TX10mlDtWWXWPcef9zbNk11vP3Vu3f\nj3Xe+t1PzM9X5VpDZvZLwD8DTjYzANz9yzXFJQnKpWZNDLnECeX1dVKpQ3XtPU/w9+PvAvA3o/s4\nb/hU/tcXrqj8/VX692Odt373E/vzVWkegZn9GfATwKeB9cAvA99199tqi2wKzSNoXsjx1U3uI4Rc\n4oTOlcDV9zx5zPbHv3A5y4dPC3IsId5jy64xbvvGtmO2//ktF3PV+WdVeo+p8czUvx/rvPW7n5Bx\nhl6P4Ofd/RbgLXf/EnAp8NM9RSRZy6VmTQy5xAnl9XVSqUP12K7xnrbPZmj+PFYuPeOYX5qxzlu/\n+2ni81U1EUxG8CMz+yfAYWBxPSFJinKpWRNDLnFCeX2dVOpQXXv+cE/bj0es89bvfpr4fFVNBJvM\n7AzgD4HtwCvAX9UVlKQnl5o1MeQSJ5TX10mlDtVV55/FecOnTtt23vCpPXcL1R1njP008fnqudaQ\nmc0DTnb3/1dPSMfSPYJ05FKzJoZc4oTyhdRTqUO1ZdcYj+0a59rzh4Mmgalinbd+9xMizqCL15vZ\nTwD/Hljm7v/azFYA57n7puOKrkdKBCIivQt9s/jrwCE6N4kB9gP/uUIQr5jZ82a2w8xGi21nmtlm\nM9tdPC6oGEPPUhkbnYsqa7mWtUcq7RVjXdpYx9rP2PrcxDhvsdYs7leK8wh+yt1/xcx+DcDdf2ST\nkwnKfdrdpy4ztBbY4u53m9na4vmd1UOuJpWx0bmospZrWXuk0l4x1qWNdaz9jq3PSYzzFmvN4n7F\n/lmqekXwvpmdQlGS2sx+is4VwvG4AdhQfL0BuPE436erQVqjN4Yqa7mWtUcq7RVjXdpYx7pl19iR\nJDDppfF3B/LKIMZ5i7Vmcb+a+FkqTQTFX/5/BjwKLDWz+4AtwB0V3t+Bx81sm5mtKbYNu/trxddj\nwIzjw8xsjZmNmtno66+/XmFXH0llbHQuqqzlWtYeqbRXjHVpYx1ryLH1qYtx3mKtWdyvJn6WSruG\n3N3N7PeAK4BL6BSd+/xR3T3d/IK77zeznwQ2m9mLM7z3jHer3f1e4F7o3CyusK8jUhkbnYsqa7mW\ntUcq7dXLurTHK9axXnv+MH8zum/G7YMmxnmLsY8QUp5HsB34hLv/D3ffVDEJ4O77i8cDwIPAp4Bx\nM1sMUDwe6D3s2aUyNjoXVdZyLWuPVNqr6rq0/Yh1rDHG1qcixnmLsY8Qkp1HUPwlvxx4FXiXzlWB\nz7ZUpZmdCsxx93eKrzcDXwauAiam3Cw+091n7WY63uGjqYyNzkXZWHMob49U2qvKsfQr1rHGGFuf\nihjnLcY+QkhxHsHZM22fXMGsy/d8gs5VAHS6oP7S3X/fzIaAjcAyOonlJnd/c7b9ax6BiEjvgixe\nP2m2X/izfM8P6KxfcPT2CTpXBSIikgAtNylynFKZPCfSr8oL04jIR1KZPCcSgq4IRHqUyuQ5kVCU\nCER6lMrkOZFQBjoRDFIfbowCelX2kcsi6KMvT/DHj73E6MsTwfcRe8JP2bFUkUsxtxDHWnZuQ+wj\nRBwp6Xk9giYcz/DRQerDjVFAr8o+clkE/eb13+Hbez76Ib9s+RDfvP2SoPt4ZMf+YxZJr+PzVXYs\nVeRSzC3EsZad2xD7CBFHLKHLUGdlkPpwYxTQq7KPsoJdqRT6G315YtoPOsBTeyaO/PUX6rNx/aqP\n8/SdV/Kt23+Op++8spYf8rJjqSKXYm4hjrXs3IbYR4g4UjSQiWCQ+nBjFNCrso9cFkF/cvfM1U8m\nt4f8bHRbJD2UsmOpIpdibiGOtezchthHiDhSNJCJIJUCaCHEKKBXZR+5LIJ++YqFs27P6bNRdixV\n5FLMLcSxlp3bEPsIEUeKBjIRpFIALYQYBfSq7COXRdBHzh3isuVD07ZdtnyIkXOHgu0jlpFzh/jp\nGYrOTR5LFbkUcys7b1WUndsQ+wgRR4oG9mYxpFMALYQYBfSq7COXRdBHX57gyd1vcPmKhTP+oOfw\n2Zg4eIjV67by3uGP/ro8ee4cnr7zyp5jzqWYW9l5q6Ls3IbYR4g4YghadK5pKjonbfTc3re5ef2z\nvHPoH49sO23eiXzr9p9jZWI19CVNrR41JDIIYvY1pzL/Q5qhWkMiiZrsaz56vkLoboZU5n9Ic9Q1\nJJK4OvuaQ9yHCHkvQ8JS15DIgKhzvkIq8z+kWUoEJXLp94wVZ4z9PLR9L7dv+B4Pbd9b2z5CqFJb\nJ0R71VknKJX5H9Is3SOYRS79nrHijLGfS/5gM2M/fB+Ax184wLpHX+SZL14TdB8hVKmtE6K96q4T\nFOI+RKx7GVIf3SPoIpd+z1hxxtjPQ9v38jsbdx6z/as3XciNFy0Nso8Q9oy/w9X3PHnM9se/cPmR\n8fMh2qvKfkJJZf6HhKV7BH3Kpd8zVpwx9rPp+bGetjelSm2dEO0Vo07QpBD3IequvST1USLoIpd+\nz1hxxtjPdRec1dP2plSprROivULWCYpxbyeX+2lyLCWCLnKpFxIrzhj7ufGipSw+/aRp2xafflJS\n3UJQrbZOiPYKVSfo4R37Wb1uKzevf5bV67byyI79PX1/KvuQ+ugeQYlc+j1jxRljPw9t38um58e4\n7oKzkksCU1WprROivfqp4RPj3k4u99PaqOo9Ao0aKjE0f14WH+ZYccbYz40XLU06AUxaPnxa6S/m\nEO1VZT/dTN6reI+PfklP3qsIdR5j7EPqNdBdQ7n0WcaKM8R49Bhr34agPvGOGPd2crmfJt0N7BWB\n5gBMF2I8eoy1b0OI0aa5fL6G5s9j2Zmn8Pfj7x7ZdvaZYbv1hubPY+TsBdOWgfznZy/Q1UBGBvKK\nIJc1Q2PFGWJN2Rhr34YQo01z+XxBp/b+1CQA8NL4u0HX6d0z/s6MawGn9tmQ7gYyEWgOwHQhxqPH\nHNPejxhtmsvnC+Ks05vLZ0O6G8hEkEufZaw4Q4xHj7H2bQhV27Sf/v1cPl8QZ53e3OY7yLEGMhFo\nDsB0Icajx1j7NoQqbdrvmPdcPl8QZ53enOY7yMwGeh6B5gBMF2JN2Rhr34bQrU1DjnnP5fMFcdbp\nTX2+QxtpHgGaA3C0fsajh3yPGLq1acgx77l8vqBzZVDnQu2Q/nwH6a72riEzO8HMvm9mm4rnZ5rZ\nZjPbXTwuqDsGkUk59e+3ic5Ls2LcI/g88MKU52uBLe6+AthSPBeJIqf+/TbReWlWrfcIzGwJsAH4\nfeDfuft1ZvYScIW7v2Zmi4En3P282d5HaxZLaDn177eJzktYqdwj+CpwBzC143DY3V8rvh4Dhmf6\nRjNbA6wBWLZs2UwvETluOfXvt4nOSzNq6xoys+uAA+6+rdtrvHM5MuMlibvf6+4j7j6yaNGiusIU\nEWm9Ou8RrAauN7NXgL8GrjSzbwHjRZcQxeOBugIIMTllkCa45NIeg9TmMVRpr1yKBUozausacve7\ngLsAzOwK4Hfd/WYz+0PgVuDu4vHhOvYfoihYLoXFqsilPQapzWOo0l65FAuU5jQxs/hu4Boz2w1c\nXTwPKkRRsJwKi5XJpT0Gqc1jqNJeuRQLlGZFSQTu/oS7X1d8PeHuV7n7Cne/2t3fDL2/EEXBcios\nViaX9hikNo+hSnupIJxUMZC1hkJMThmkCS65tMcgtXkMVdorl2KB0qyBTAQhJqcM0gSXXNpjkNo8\nhirtlUuxQGmWis5FeI9U5NIeg9TmMVRpr1yKBUpYVSeUDXQiEBFps6qJYCC7hgZRm8bWt+lYRVIw\n0GWoB0Wbxta36VhFUqErgsS1aWx9m45VJCVKBIlr09j6Nh2rSEqUCBLXprH1uR2r7mVMp/bIl+4R\nJG5yrPgdR/WbD+KwyqH587jp4iXTSiLcNLIkyWPVvYzp1B550/DRTLRhbH0uC5jnEmcsao90afjo\ngBmaP4+VS88Y6B+sXO4R5BJnLGqP/CkRRKC+02pyuUeQS5yxqD3yp0RQs4d37Gf1uq3cvP5ZVq/b\nyiM79jcdUrJyqTWUS5yxqD3yp3sENVLf6fHJ5X5ILnHGovZITyqL17faZN/pe3yUCCb7TvWD0l0u\nC5jnEmcsao98qWuoRD/9+yH7Ttt0n0FrK4vEpSuCWfQ7NjrUuPg2jdHW2soi8emKoItQ6/xu3LZv\n2raNo/uSWys4FVpbWaQZSgRdpLLOb5vGaOfSXm06J9IOSgRdpLLOb5vGaOfSXm06J9IOSgRdpLLO\nb5vGaOfSXm06J9IOmkdQIpV1fts0RjuX9mrTOZE8ac1iEZGWU9E5ERGpRIlARKTllAhERFpOiUBE\npOWUCEREWk6JIAEqXiYiTVLRuYapeJmINE1XBA1S8TIRSUFticDMTjaz75rZc2b2d2b2pWL7mWa2\n2cx2F48L6oohdSpeJiIpqPOK4BBwpbuvBFYBnzGzS4C1wBZ3XwFsKZ63koqXiUgKaksE3nGweDq3\n+OfADcCGYvsG4Ma6YkidipeJSApqvVlsZicA24DlwH9x92fNbNjdXyteMgYM1xlD6q5f9XFWL1+o\n4mUi0phaE4G7fwCsMrMzgAfN7GeP+n83sxmr3pnZGmANwLJly+oMs3Fa9FtEmhRl1JC7vw38LfAZ\nYNzMFgMUjwe6fM+97j7i7iOLFi2KEaaISCvVOWpoUXElgJmdAlwDvAg8AtxavOxW4OG6YhARkXJ1\ndg0tBjYU9wnmABvdfZOZPQNsNLPbgFeBm2qMQUREStSWCNx9J/DJGbZPAFfVtV8REemNZhbLQFL9\nJpHqVGtIBo7qN4n0RlcEMlBUv0mkd0oEMlBUv0mkd0oEMlBUv0mkd0oEMlBUv0mkd7pZLANH9ZtE\neqNEIANJ9ZtEqlPXkIhIyykRiIi0nBKBiEjLKRGIiLScEoGISMuZ+4wLhCXFzF6nU7J6JguBNyKG\nc7wUZ3i5xKo4w8olTmg+1rPdvXRlrywSwWzMbNTdR5qOo4ziDC+XWBVnWLnECfnEqq4hEZGWUyIQ\nEWm5QUgE9zYdQEWKM7xcYlWcYeUSJ2QSa/b3CEREpD+DcEUgIiJ9yCYRmNnJZvZdM3vOzP7OzL5U\nbD/TzDab2e7icUGicf4nM9tvZjuKf/+iyTgnmdkJZvZ9M9tUPE+qPaeaIdbk2tTMXjGz54t4Rott\nybVplziTa08AMzvDzO43sxfN7AUzuzTRNp0pziTb9GjZJALgEHClu68EVgGfMbNLgLXAFndfAWwp\nnjepW5wA97j7quLf/2wuxGk+D7ww5Xlq7TnV0bFCmm366SKeyWGDqbbp0XFCmu35J8Cj7v4zwEo6\nn4EU23SmOCHNNp0mm0TgHQeLp3OLfw7cAGwotm8AbmwgvCNmiTM5ZrYE+CVg/ZTNSbXnpC6x5iLJ\nNs2BmX0MuBz4cwB3f9/d3yaxNp0lzixkkwjgSNfADuAAsNndnwWG3f214iVjwHBjARa6xAnwW2a2\n08z+IoVLWeCrwB3A1LUdk2vPwkyxQnpt6sDjZrbNzNYU21Js05nihPTa81zgdeDrRbfgejM7lfTa\ntFuckF6bHiOrRODuH7j7KmAJ8Ckz+9mj/t9J4K/vLnF+DfgEne6i14A/ajBEzOw64IC7b+v2mlTa\nc5ZYk2rTwi8U5/4Xgd80s8un/mcqbcrMcabYnicCFwFfc/dPAu9yVDdQIm3aLc4U2/QYWSWCScUl\n198CnwHGzWwxQPF4oMnYppoap7uPFwniQ+C/AZ9qNjpWA9eb2SvAXwNXmtm3SLM9Z4w1wTbF3fcX\njweAB+nElFybzhRniu0J7AP2Tbmqvp/OL9zU2nTGOBNt02NkkwjMbJGZnVF8fQpwDfAi8Ahwa/Gy\nW4GHm4mwo1uckx/awr8E/k8T8U1y97vcfYm7nwP8KrDV3W8msfaE7rGm1qZmdqqZnTb5NXBtEVNS\nbdotztTaE8Ddx4C9ZnZesekqYBeJtWm3OFNs05nktGbxYmCDmZ1AJ4FtdPdNZvYMsNHMbqNTofSm\nJoOke5zfNLNVdC5hXwH+TYMxzuZu0mrP2XwlsTYdBh40M+j8bP2luz9qZt8jrTbtFmeqn9HfAu4z\ns5OAHwC/QfGzlVCbwsxx/mmibTqNZhaLiLRcNl1DIiJSDyUCEZGWUyIQEWk5JQIRkZZTIhARaTkl\nApE+FKUEzi++/mLT8YgcDw0fFTlOZnaCu38w5flBd5/fZEwix0NXBCJTmNnvmdlvF1/fY2Zbi6+v\nNLP7zOygmf2RmT0HXGpmT5jZiJndDZxS1Jy/r/iem62zNsUOM/uvxSRDkeQoEYhM9xRwWfH1CDDf\nzOYW254ETgWedfeV7v7tyW9y97XAj4ua858zs38K/Aqwuiju9gHwuZgHIlJVTiUmRGLYBlxsZqfT\nWWRoO52EcBnw23R+oT9Q4X2uAi4GvleUcjiF5gujicxIiUBkCnc/bGYvA78O/G9gJ/BpYDmdFafe\nm3pfYBYGbHD3u+qKVSQUdQ2JHOsp4HfpdAU9Bfxb4PtePrLicNGNBJ3lE3/ZzH4SjqxbfHZdAYv0\nQ4lA5FhP0aki+4y7jwPvFdvK3AvsNLP73H0X8B+Ax8xsJ7C5eE+R5Gj4qIhIy+mKQESk5ZQIRERa\nTolARKTllAhERFpOiUBEpOWUCEREWk6JQESk5ZQIRERa7v8D9IFxdYjzlk0AAAAASUVORK5CYII=\n",
1826 | "text/plain": [
1827 | ""
1828 | ]
1829 | },
1830 | "metadata": {},
1831 | "output_type": "display_data"
1832 | }
1833 | ],
1834 | "source": [
1835 | "hsb.plot(kind='scatter',x='write',y='read')"
1836 | ]
1837 | },
1838 | {
1839 | "cell_type": "code",
1840 | "execution_count": 45,
1841 | "metadata": {
1842 | "collapsed": false
1843 | },
1844 | "outputs": [
1845 | {
1846 | "data": {
1847 | "text/plain": [
1848 | "(0.59677647908804532, 1.1056411255416739e-20)"
1849 | ]
1850 | },
1851 | "execution_count": 45,
1852 | "metadata": {},
1853 | "output_type": "execute_result"
1854 | }
1855 | ],
1856 | "source": [
1857 | "mrstat.pearsonr(hsb.write,hsb.read)"
1858 | ]
1859 | },
1860 | {
1861 | "cell_type": "markdown",
1862 | "metadata": {},
1863 | "source": [
1864 | "Корреляция получилась равной 0.5968, а p-value для нулейвой гипотезы об отсутсвии корреляции < 0.05, следовательно можно заключить, что между этими переменными существует статистически значимая корреляция."
1865 | ]
1866 | },
1867 | {
1868 | "cell_type": "markdown",
1869 | "metadata": {},
1870 | "source": [
1871 | "С помощью [этой игры](http://guessthecorrelation.com/) можно научиться определять корреляцию на глаз."
1872 | ]
1873 | },
1874 | {
1875 | "cell_type": "markdown",
1876 | "metadata": {},
1877 | "source": [
1878 | "#### 7.2 Коэффициет корреляции Спирмена"
1879 | ]
1880 | },
1881 | {
1882 | "cell_type": "markdown",
1883 | "metadata": {},
1884 | "source": [
1885 | "Корреляция Спирмена способна уловить нелинейную связь между переменными. Возьмем напремер x и посчитаем корреляцию Пирсона с его экспонентой."
1886 | ]
1887 | },
1888 | {
1889 | "cell_type": "code",
1890 | "execution_count": 46,
1891 | "metadata": {
1892 | "collapsed": false
1893 | },
1894 | "outputs": [
1895 | {
1896 | "data": {
1897 | "text/plain": [
1898 | "(0.25080845773967969, 0.011412991126929171)"
1899 | ]
1900 | },
1901 | "execution_count": 46,
1902 | "metadata": {},
1903 | "output_type": "execute_result"
1904 | }
1905 | ],
1906 | "source": [
1907 | "x = np.linspace(0,100,101)\n",
1908 | "y = np.exp(x)\n",
1909 | "mrstat.pearsonr(x,y)"
1910 | ]
1911 | },
1912 | {
1913 | "cell_type": "markdown",
1914 | "metadata": {},
1915 | "source": [
1916 | "Коэффициент получился очень маленьким, но связь определенно есть - функция монотонно возрастает. Взглянем на график."
1917 | ]
1918 | },
1919 | {
1920 | "cell_type": "code",
1921 | "execution_count": 47,
1922 | "metadata": {
1923 | "collapsed": false
1924 | },
1925 | "outputs": [
1926 | {
1927 | "data": {
1928 | "text/plain": [
1929 | ""
1930 | ]
1931 | },
1932 | "execution_count": 47,
1933 | "metadata": {},
1934 | "output_type": "execute_result"
1935 | },
1936 | {
1937 | "data": {
1938 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEUJJREFUeJzt3X+s3Xddx/Hni+4CVyEU7A1sdy0lZikCAsWbMQeSCZLC\nJGxRjEPxByFZIKBgsIZpgjH+oaQEBQYsC0yYkJE4mtrAsKJOAeMmt93cTwpV1PV2sgJ2Y+4K3Xz7\nx/l2Obvc9p7be8499376fCQn93w/38853/fntH313M/3c843VYUkqS2PG3cBkqThM9wlqUGGuyQ1\nyHCXpAYZ7pLUIMNdkho01nBPck2S+5LcsYzH/HySSjLTbT8zyYEktya5M8mbR1exJK0PGec69yQv\nAx4Erq2q5w3Q/8nA54DHA2+rqtkkj6c3ju8leRJwB3BhVR0ZZe2StJaN9Z17VX0R+E5/W5IfTfJX\nSfYn+VKSZ/ft/kPgPcD/9j3H96vqe93mE3CqSZLWZBBeDfxGVf0E8NvAhwGSvAjYXFWfW/iAJJuT\n3AbcA7zHd+2SznRnjbuAft20yoXAXyQ50fyEJI8D3gf8+mKPq6p7gOcnOQfYk+T6qvrmKpQsSWvS\nmgp3er9JHKuqF/Y3JnkK8Dzg77vQfwawN8lrq2r2RL+qOtKdnP0p4PrVK1uS1pY1NS1TVQ8A30jy\nCwDpeUFV3V9Vm6pqa1VtBW4CXtudUD03yWTX/6nAS4GD4xqDJK0F414KeR3wT8C2JIeTvAn4ZeBN\nSf4FuBO4ZImn+THg5q7/PwDvrarbR1m3JK11Y10KKUkajTU1LSNJGo6xnVDdtGlTbd26dVyHl6R1\naf/+/d+qqqml+o0t3Ldu3crs7OzSHSVJj0ryH4P0c1pGkhpkuEtSgwx3SWqQ4S5JDTLcJalBa+27\nZSSpWXtumWPXvoMcOTbPORsn2bljG5dunx7JsQx3SVoFe26Z44rdtzN//BEA5o7Nc8Xu3jeljCLg\nnZaRpFWwa9/BR4P9hPnjj7Br32i+59Bwl6RVcOTY/LLaV8pwl6RVcM7GyWW1r5ThLkmrYOeObUxO\nbHhM2+TEBnbu2DaS43lCVZJWwYmTpq6WkaTGXLp9emRhvpDTMpLUIMNdkhpkuEtSgwx3SWqQ4S5J\nDTLcJalBS4Z7ks1JbkxyV5I7k7x9kT4XJbk/ya3d7d2jKVeSNIhB1rk/DLyzqg4keTKwP8kXququ\nBf2+VFWvGX6JkqTlWvKde1XdW1UHuvvfBe4GVmcVviTptCxrzj3JVmA7cPMiuy9McluSzyd57kke\nf3mS2SSzR48eXXaxkqTBDBzuSZ4EfAZ4R1U9sGD3AWBLVT0f+CCwZ7HnqKqrq2qmqmampqZOt2ZJ\n0hIGCvckE/SC/VNVtXvh/qp6oKoe7O7fAEwk2TTUSiVJAxtktUyAjwF3V9X7TtLnGV0/kpzfPe+3\nh1moJGlwg6yWeQnwK8DtSW7t2n4X2AJQVVcBrwPekuRhYB64rKpqBPVKkgawZLhX1ZeBLNHnSuDK\nYRUlSVoZP6EqSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1\nyHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMM\nd0lqkOEuSQ0y3CWpQYa7JDXIcJekBi0Z7kk2J7kxyV1J7kzy9kX6JMkHkhxKcluSF42mXEnSIM4a\noM/DwDur6kCSJwP7k3yhqu7q6/Nq4Lzu9mLgI91PSdIYLPnOvaruraoD3f3vAncD0wu6XQJcWz03\nARuTnD30aiVJA1nWnHuSrcB24OYFu6aBe/q2D/OD/wGQ5PIks0lmjx49urxKJUkDGzjckzwJ+Azw\njqp64HQOVlVXV9VMVc1MTU2dzlNIkgYwULgnmaAX7J+qqt2LdJkDNvdtn9u1SZLGYJDVMgE+Btxd\nVe87Sbe9wK92q2YuAO6vqnuHWKckaRkGWS3zEuBXgNuT3Nq1/S6wBaCqrgJuAC4GDgEPAW8cfqmS\npEEtGe5V9WUgS/Qp4K3DKkqStDJ+QlWSGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLU\nIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y\n3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatCS4Z7kmiT3JbnjJPsvSnJ/klu727uHX6Yk\naTnOGqDPx4ErgWtP0edLVfWaoVQkSVqxJd+5V9UXge+sQi2SpCEZ1pz7hUluS/L5JM8d0nNKkk7T\nINMySzkAbKmqB5NcDOwBzlusY5LLgcsBtmzZMoRDS5IWs+J37lX1QFU92N2/AZhIsukkfa+uqpmq\nmpmamlrpoSVJJ7HicE/yjCTp7p/fPee3V/q8kqTTt+S0TJLrgIuATUkOA78PTABU1VXA64C3JHkY\nmAcuq6oaWcWSpCUtGe5V9fol9l9Jb6mkJGmN8BOqktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGG\nuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhL\nUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatCS4Z7kmiT3JbnjJPuT\n5ANJDiW5LcmLhl+mJGk5zhqgz8eBK4FrT7L/1cB53e3FwEe6n5J0xttzyxy79h3kyLF5ztk4yc4d\n27h0+/TIj7vkO/eq+iLwnVN0uQS4tnpuAjYmOXtYBUrSerXnljmu2H07c8fmKWDu2DxX7L6dPbfM\njfzYw5hznwbu6ds+3LX9gCSXJ5lNMnv06NEhHFqS1q5d+w4yf/yRx7TNH3+EXfsOjvzYq3pCtaqu\nrqqZqpqZmppazUNL0qo7cmx+We3DNIxwnwM2922f27VJ0hntnI2Ty2ofpmGE+17gV7tVMxcA91fV\nvUN4Xkla13bu2MbkxIbHtE1ObGDnjm0jP/aSq2WSXAdcBGxKchj4fWACoKquAm4ALgYOAQ8BbxxV\nsZK0npxYFTOO1TKpqpEfZDEzMzM1Ozs7lmNL0nqVZH9VzSzVz0+oSlKDDHdJapDhLkkNMtwlqUGG\nuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhL\nUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEDhXuS\nVyU5mORQknctsv+iJPcnubW7vXv4pUqSBnXWUh2SbAA+BLwSOAx8JcneqrprQdcvVdVrRlCjJGmZ\nBnnnfj5wqKr+raq+D3wauGS0ZUmSVmKQcJ8G7unbPty1LXRhktuSfD7Jcxd7oiSXJ5lNMnv06NHT\nKFeSNIhhnVA9AGypqucDHwT2LNapqq6uqpmqmpmamhrSoSVJCw0S7nPA5r7tc7u2R1XVA1X1YHf/\nBmAiyaahVSlJWpZBwv0rwHlJnpXk8cBlwN7+DkmekSTd/fO75/32sIuVJA1mydUyVfVwkrcB+4AN\nwDVVdWeSN3f7rwJeB7wlycPAPHBZVdUI65YknULGlcEzMzM1Ozs7lmNL0nqVZH9VzSzVz0+oSlKD\nlpyWkSQtz55b5ti17yBHjs1zzsZJdu7YxqXbF1tBPjqGuyQN0Z5b5rhi9+3MH38EgLlj81yx+3aA\nVQ14p2UkaYh27Tv4aLCfMH/8EXbtO7iqdRjukjRER47NL6t9VAx3SRqiczZOLqt9VAx3SRqinTu2\nMTmx4TFtkxMb2Llj26rW4QlVSRqiEydNXS0jSY25dPv0qof5Qk7LSFKDDHdJapDhLkkNMtwlqUGG\nuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg/yEqiQNwVq4QEc/w12SVmitXKCjn9MykrRCa+UCHf0Md0la\nobVygY5+hrskrdBauUBHP8NdklZorVygo58nVCXpNPWvkHnK5ARPnHgcxx467moZSVqvFq6QOTZ/\nnMmJDfzJL75w7BfqAKdlJOm0rMUVMv185y5Jp7Dww0k//ewpbvzqUebW4AqZfgOFe5JXAe8HNgAf\nrao/XrA/3f6LgYeAX6+qA0Ou9QfmtxIend868YIv3Hcm3/d18XXxdVnZ/f9+6DgBqsuguWPzfPKm\n/zxlTo1zhUy/VNWpOyQbgK8BrwQOA18BXl9Vd/X1uRj4DXrh/mLg/VX14lM978zMTM3Ozg5c6ML5\nLUlaayYnNvBHP/fjI51zT7K/qmaW6jfInPv5wKGq+req+j7waeCSBX0uAa6tnpuAjUnOXnbVp7DY\n/JYkrRXTGydHHuzLMci0zDRwT9/2YXrvzpfqMw3c298pyeXA5QBbtmxZVqFrZR5Lkhaa3jjJP77r\n5eMu4zFWdbVMVV1dVTNVNTM1NbWsx66VeSxJ6jfuDyudzCDhPgds7ts+t2tbbp8VWewTYJK0GtL9\nnN44yRsu2ML0xknC2puK6TfItMxXgPOSPIteYF8G/NKCPnuBtyX5NL0pm/ur6l6G6MSL52oZVz/4\nuvi6rPbrMu5Pm56OJcO9qh5O8jZgH72lkNdU1Z1J3tztvwq4gd5KmUP0lkK+cRTFXrp9et29wJI0\nDgOtc6+qG+gFeH/bVX33C3jrcEuTJJ0uv35AkhpkuEtSgwx3SWqQ4S5JDVryu2VGduDkKPAfp/nw\nTcC3hljOeuCYzwyO+cywkjE/s6qW/BTo2MJ9JZLMDvLFOS1xzGcGx3xmWI0xOy0jSQ0y3CWpQes1\n3K8edwFj4JjPDI75zDDyMa/LOXdJ0qmt13fukqRTMNwlqUHrLtyTvCrJwSSHkrxr3PWMQpLNSW5M\ncleSO5O8vWt/WpIvJPl69/Op4651mJJsSHJLks92262Pd2OS65N8NcndSX7yDBjzb3V/p+9Icl2S\nJ7Y25iTXJLkvyR19bScdY5Irujw7mGTHsOpYV+HeXaz7Q8CrgecAr0/ynPFWNRIPA++squcAFwBv\n7cb5LuBvq+o84G+77Za8Hbi7b7v18b4f+KuqejbwAnpjb3bMSaaB3wRmqup59L5C/DLaG/PHgVct\naFt0jN2/68uA53aP+XCXcyu2rsKdwS7Wve5V1b1VdaC7/116/+in6Y31E123TwCXjqfC4UtyLvCz\nwEf7mlse71OAlwEfA6iq71fVMRoec+csYDLJWcAPAUdobMxV9UXgOwuaTzbGS4BPV9X3quob9K6J\ncf4w6lhv4X6yC3E3K8lWYDtwM/D0vitc/Rfw9DGVNQp/CvwO8H99bS2P91nAUeDPuqmojyb5YRoe\nc1XNAe8F/hO4l94V2/6ahsfc52RjHFmmrbdwP6MkeRLwGeAdVfVA/77uAilNrGNN8hrgvqraf7I+\nLY23cxbwIuAjVbUd+B8WTEe0NuZunvkSev+xnQP8cJI39PdpbcyLWa0xrrdwH/mFuNeKJBP0gv1T\nVbW7a/5mkrO7/WcD942rviF7CfDaJP9Ob6rt5Uk+Sbvjhd47tMNVdXO3fT29sG95zD8DfKOqjlbV\ncWA3cCFtj/mEk41xZJm23sL90Yt1J3k8vRMRe8dc09AlCb252Lur6n19u/YCv9bd/zXgL1e7tlGo\nqiuq6tyq2krvz/TvquoNNDpegKr6L+CeJNu6plcAd9HwmOlNx1yQ5Ie6v+OvoHc+qeUxn3CyMe4F\nLkvyhCTPAs4D/nkoR6yqdXWjdyHurwH/CvzeuOsZ0RhfSu/XttuAW7vbxcCP0DvT/nXgb4CnjbvW\nEYz9IuCz3f2mxwu8EJjt/pz3AE89A8b8B8BXgTuAPwee0NqYgevonVM4Tu83tDedaozA73V5dhB4\n9bDq8OsHJKlB621aRpI0AMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNej/AeMoV9tL6EFyAAAA\nAElFTkSuQmCC\n",
1939 | "text/plain": [
1940 | ""
1941 | ]
1942 | },
1943 | "metadata": {},
1944 | "output_type": "display_data"
1945 | }
1946 | ],
1947 | "source": [
1948 | "plt.scatter(x,y)"
1949 | ]
1950 | },
1951 | {
1952 | "cell_type": "markdown",
1953 | "metadata": {},
1954 | "source": [
1955 | "Теперь посчитаем корреляцию Спирмена."
1956 | ]
1957 | },
1958 | {
1959 | "cell_type": "code",
1960 | "execution_count": 48,
1961 | "metadata": {
1962 | "collapsed": false
1963 | },
1964 | "outputs": [
1965 | {
1966 | "data": {
1967 | "text/plain": [
1968 | "SpearmanrResult(correlation=0.99999999999999989, pvalue=0.0)"
1969 | ]
1970 | },
1971 | "execution_count": 48,
1972 | "metadata": {},
1973 | "output_type": "execute_result"
1974 | }
1975 | ],
1976 | "source": [
1977 | "mrstat.spearmanr(x,y)"
1978 | ]
1979 | },
1980 | {
1981 | "cell_type": "markdown",
1982 | "metadata": {},
1983 | "source": [
1984 | "Коэффициент Спирмена равен 1! Когда корреляция Спирмена больше, чем корреляция Пирсона - это знак, что можно сделать нелинейное преобразование и взять например логарифм. Это позволит перейти к линейным отношениям, а статистика любит линейные отношения."
1985 | ]
1986 | },
1987 | {
1988 | "cell_type": "markdown",
1989 | "metadata": {},
1990 | "source": [
1991 | "#### 7.3 Коэффициент Крамера"
1992 | ]
1993 | },
1994 | {
1995 | "cell_type": "markdown",
1996 | "metadata": {},
1997 | "source": [
1998 | "Коэффициент Крамера позволяет оценить связь между двумя номинальными переменными. Он основан на статистике хи-квадрат. Посчитаем для связи между типом школы и полом ученика."
1999 | ]
2000 | },
2001 | {
2002 | "cell_type": "code",
2003 | "execution_count": 49,
2004 | "metadata": {
2005 | "collapsed": false
2006 | },
2007 | "outputs": [
2008 | {
2009 | "data": {
2010 | "text/html": [
2011 | "\n",
2012 | "
\n",
2013 | " \n",
2014 | " \n",
2015 | " female | \n",
2016 | " 0 | \n",
2017 | " 1 | \n",
2018 | "
\n",
2019 | " \n",
2020 | " schtyp | \n",
2021 | " | \n",
2022 | " | \n",
2023 | "
\n",
2024 | " \n",
2025 | " \n",
2026 | " \n",
2027 | " 1 | \n",
2028 | " 77 | \n",
2029 | " 91 | \n",
2030 | "
\n",
2031 | " \n",
2032 | " 2 | \n",
2033 | " 14 | \n",
2034 | " 18 | \n",
2035 | "
\n",
2036 | " \n",
2037 | "
\n",
2038 | "
"
2039 | ],
2040 | "text/plain": [
2041 | "female 0 1\n",
2042 | "schtyp \n",
2043 | "1 77 91\n",
2044 | "2 14 18"
2045 | ]
2046 | },
2047 | "execution_count": 49,
2048 | "metadata": {},
2049 | "output_type": "execute_result"
2050 | }
2051 | ],
2052 | "source": [
2053 | "table"
2054 | ]
2055 | },
2056 | {
2057 | "cell_type": "code",
2058 | "execution_count": 50,
2059 | "metadata": {
2060 | "collapsed": false
2061 | },
2062 | "outputs": [
2063 | {
2064 | "data": {
2065 | "text/plain": [
2066 | "(0.01533749547184478, 0.82828242684033948)"
2067 | ]
2068 | },
2069 | "execution_count": 50,
2070 | "metadata": {},
2071 | "output_type": "execute_result"
2072 | }
2073 | ],
2074 | "source": [
2075 | "mrstat.vcramer(table.values)"
2076 | ]
2077 | },
2078 | {
2079 | "cell_type": "markdown",
2080 | "metadata": {},
2081 | "source": [
2082 | "Такое маленькое значение (0.01) говорит о том, что связи нет, а p-value не позволяет отвергнуть нулевую гипотезу о том, что пропорции не отличаются. Коэффициент Крамера принимает значения от 0 до 1, что делает его хорошо интерпретируемым."
2083 | ]
2084 | },
2085 | {
2086 | "cell_type": "markdown",
2087 | "metadata": {},
2088 | "source": [
2089 | "### 8. Связанные выборки"
2090 | ]
2091 | },
2092 | {
2093 | "cell_type": "markdown",
2094 | "metadata": {},
2095 | "source": [
2096 | "#### 8.1 Ти-тест для связанных выборок"
2097 | ]
2098 | },
2099 | {
2100 | "cell_type": "markdown",
2101 | "metadata": {},
2102 | "source": [
2103 | "Тест для связанных выборок используется например при клинических испытаниях новых лекарств. Конкретный пример (взято из [курса](https://www.coursera.org/learn/stats-for-data-analysis/home/welcome)) : \n",
2104 | "\n",
2105 | "В рамках исследования эффективности препарата метилфенидат 24 пациента с синдромом дефицита внимания и гиперактивности в течение недели принимали либо метилфенидат, либо плацебо. В конце недели каждый пациент проходили тест на способность к подавлению импульсивных поведенческих реакций. На втором этапе плацебо и препарат менялись, и после недельного курса каждый испытуемые проходили второй тест.\n",
2106 | "\n",
2107 | "Необходимо оценить эффективность препарата."
2108 | ]
2109 | },
2110 | {
2111 | "cell_type": "code",
2112 | "execution_count": 51,
2113 | "metadata": {
2114 | "collapsed": false
2115 | },
2116 | "outputs": [
2117 | {
2118 | "data": {
2119 | "text/html": [
2120 | "\n",
2121 | "
\n",
2122 | " \n",
2123 | " \n",
2124 | " | \n",
2125 | " Placebo | \n",
2126 | " Methylphenidate | \n",
2127 | "
\n",
2128 | " \n",
2129 | " \n",
2130 | " \n",
2131 | " 0 | \n",
2132 | " 57 | \n",
2133 | " 62 | \n",
2134 | "
\n",
2135 | " \n",
2136 | " 1 | \n",
2137 | " 27 | \n",
2138 | " 49 | \n",
2139 | "
\n",
2140 | " \n",
2141 | " 2 | \n",
2142 | " 32 | \n",
2143 | " 30 | \n",
2144 | "
\n",
2145 | " \n",
2146 | " 3 | \n",
2147 | " 31 | \n",
2148 | " 34 | \n",
2149 | "
\n",
2150 | " \n",
2151 | " 4 | \n",
2152 | " 34 | \n",
2153 | " 38 | \n",
2154 | "
\n",
2155 | " \n",
2156 | "
\n",
2157 | "
"
2158 | ],
2159 | "text/plain": [
2160 | " Placebo Methylphenidate\n",
2161 | "0 57 62\n",
2162 | "1 27 49\n",
2163 | "2 32 30\n",
2164 | "3 31 34\n",
2165 | "4 34 38"
2166 | ]
2167 | },
2168 | "execution_count": 51,
2169 | "metadata": {},
2170 | "output_type": "execute_result"
2171 | }
2172 | ],
2173 | "source": [
2174 | "data = pd.read_csv('ADHD.txt', sep = ' ', header = 0)\n",
2175 | "data.columns = ['Placebo', 'Methylphenidate']\n",
2176 | "data.head()"
2177 | ]
2178 | },
2179 | {
2180 | "cell_type": "markdown",
2181 | "metadata": {},
2182 | "source": [
2183 | "Эти выборки являются свзянными, т.к. каждая строчка содержит результаты одного и того же пациена с лекарством и без. Проверим, есть ли статистически значимый эффект от приема препарата."
2184 | ]
2185 | },
2186 | {
2187 | "cell_type": "code",
2188 | "execution_count": 52,
2189 | "metadata": {
2190 | "collapsed": false
2191 | },
2192 | "outputs": [
2193 | {
2194 | "data": {
2195 | "text/plain": [
2196 | "Ttest_relResult(statistic=3.2223624451230406, pvalue=0.003771488176381471)"
2197 | ]
2198 | },
2199 | "execution_count": 52,
2200 | "metadata": {},
2201 | "output_type": "execute_result"
2202 | }
2203 | ],
2204 | "source": [
2205 | "mrstat.ttest_rel(data.Methylphenidate,data.Placebo)"
2206 | ]
2207 | },
2208 | {
2209 | "cell_type": "markdown",
2210 | "metadata": {},
2211 | "source": [
2212 | "Нулевая гипотеза в данном случае заключается в том, что средние не отличаются. P-value получилось < 0.05, а это значит, что нулевая гипотеза отвергается и можно заключить, что средние отличаются."
2213 | ]
2214 | },
2215 | {
2216 | "cell_type": "markdown",
2217 | "metadata": {},
2218 | "source": [
2219 | "#### 8.2 Тест для разности двух долей - связанные выборки"
2220 | ]
2221 | },
2222 | {
2223 | "cell_type": "markdown",
2224 | "metadata": {},
2225 | "source": [
2226 | "Сгенерируем некоторые выборки и представим, что они связанные. Здесь та же история, что и с ти-тестом - представим, что это доля до и после какого-либо мероприятия. Нужно оценить повлияло ли мероприятие на долю."
2227 | ]
2228 | },
2229 | {
2230 | "cell_type": "code",
2231 | "execution_count": 53,
2232 | "metadata": {
2233 | "collapsed": false
2234 | },
2235 | "outputs": [],
2236 | "source": [
2237 | "a = [0]*150 + [1]*50\n",
2238 | "b = [0]*170 + [1]*30"
2239 | ]
2240 | },
2241 | {
2242 | "cell_type": "code",
2243 | "execution_count": 54,
2244 | "metadata": {
2245 | "collapsed": false
2246 | },
2247 | "outputs": [
2248 | {
2249 | "name": "stdout",
2250 | "output_type": "stream",
2251 | "text": [
2252 | "0.25\n",
2253 | "0.15\n"
2254 | ]
2255 | }
2256 | ],
2257 | "source": [
2258 | "print np.mean(a)\n",
2259 | "print np.mean(b)"
2260 | ]
2261 | },
2262 | {
2263 | "cell_type": "code",
2264 | "execution_count": 55,
2265 | "metadata": {
2266 | "collapsed": false
2267 | },
2268 | "outputs": [
2269 | {
2270 | "data": {
2271 | "text/plain": [
2272 | "2.4284674728924927e-06"
2273 | ]
2274 | },
2275 | "execution_count": 55,
2276 | "metadata": {},
2277 | "output_type": "execute_result"
2278 | }
2279 | ],
2280 | "source": [
2281 | "mrstat.proportions_diff_rel(a,b)"
2282 | ]
2283 | },
2284 | {
2285 | "cell_type": "markdown",
2286 | "metadata": {},
2287 | "source": [
2288 | "P-value < 0.05 следовательно доли значимо различаются."
2289 | ]
2290 | },
2291 | {
2292 | "cell_type": "markdown",
2293 | "metadata": {},
2294 | "source": [
2295 | "#### 8.3 Доверительный интервал для разности долей "
2296 | ]
2297 | },
2298 | {
2299 | "cell_type": "code",
2300 | "execution_count": 56,
2301 | "metadata": {
2302 | "collapsed": false
2303 | },
2304 | "outputs": [
2305 | {
2306 | "data": {
2307 | "text/plain": [
2308 | "(0.058422885269509665, 0.14157711473049034)"
2309 | ]
2310 | },
2311 | "execution_count": 56,
2312 | "metadata": {},
2313 | "output_type": "execute_result"
2314 | }
2315 | ],
2316 | "source": [
2317 | "mrstat.proportions_confint_diff_rel(a,b)"
2318 | ]
2319 | },
2320 | {
2321 | "cell_type": "markdown",
2322 | "metadata": {},
2323 | "source": [
2324 | "Доверительный интервал не содержит 0 следовательно доли различаются."
2325 | ]
2326 | },
2327 | {
2328 | "cell_type": "markdown",
2329 | "metadata": {
2330 | "collapsed": true
2331 | },
2332 | "source": [
2333 | "#### 8.4 Критерий знаков для связанных выборок"
2334 | ]
2335 | },
2336 | {
2337 | "cell_type": "code",
2338 | "execution_count": 57,
2339 | "metadata": {
2340 | "collapsed": false
2341 | },
2342 | "outputs": [
2343 | {
2344 | "data": {
2345 | "text/plain": [
2346 | "(5.0, 0.063914656639099121)"
2347 | ]
2348 | },
2349 | "execution_count": 57,
2350 | "metadata": {},
2351 | "output_type": "execute_result"
2352 | }
2353 | ],
2354 | "source": [
2355 | "mrstat.sign_test(data.Methylphenidate - data.Placebo)"
2356 | ]
2357 | },
2358 | {
2359 | "cell_type": "markdown",
2360 | "metadata": {},
2361 | "source": [
2362 | "P-value > 0.05 следовательно нельзя заявлять, что медианы выборок отличаются."
2363 | ]
2364 | },
2365 | {
2366 | "cell_type": "markdown",
2367 | "metadata": {},
2368 | "source": [
2369 | "#### 8.5 Критерий знаковых рангов Вилкоксона для связанных выборок"
2370 | ]
2371 | },
2372 | {
2373 | "cell_type": "markdown",
2374 | "metadata": {},
2375 | "source": [
2376 | "Нулевая гипотеза: две связанные выборки взяты из одной генеральной совокупности, т.е. не отличаются."
2377 | ]
2378 | },
2379 | {
2380 | "cell_type": "code",
2381 | "execution_count": 58,
2382 | "metadata": {
2383 | "collapsed": false
2384 | },
2385 | "outputs": [
2386 | {
2387 | "data": {
2388 | "text/plain": [
2389 | "WilcoxonResult(statistic=48.5, pvalue=0.0037070137534509031)"
2390 | ]
2391 | },
2392 | "execution_count": 58,
2393 | "metadata": {},
2394 | "output_type": "execute_result"
2395 | }
2396 | ],
2397 | "source": [
2398 | "mrstat.wilcoxon(data.Methylphenidate,data.Placebo)"
2399 | ]
2400 | },
2401 | {
2402 | "cell_type": "markdown",
2403 | "metadata": {},
2404 | "source": [
2405 | "P-value < 0.05 следовательно нулевая гипотеза отвергается - можно заявлять, что выборки различаются."
2406 | ]
2407 | },
2408 | {
2409 | "cell_type": "markdown",
2410 | "metadata": {},
2411 | "source": [
2412 | "### 9. Дисперсионный анализ"
2413 | ]
2414 | },
2415 | {
2416 | "cell_type": "markdown",
2417 | "metadata": {},
2418 | "source": [
2419 | "#### 9.1 Однофакторная ANOVA"
2420 | ]
2421 | },
2422 | {
2423 | "cell_type": "markdown",
2424 | "metadata": {},
2425 | "source": [
2426 | "Используется для проверки гипотезы о том, что средние для двух и более групп одной вещественной переменной различаются. Проверим различается ли среднее по тесту write для разных типов школьной программы."
2427 | ]
2428 | },
2429 | {
2430 | "cell_type": "code",
2431 | "execution_count": 59,
2432 | "metadata": {
2433 | "collapsed": false
2434 | },
2435 | "outputs": [
2436 | {
2437 | "data": {
2438 | "text/plain": [
2439 | "F_onewayResult(statistic=21.274737826343454, pvalue=4.3101626235711319e-09)"
2440 | ]
2441 | },
2442 | "execution_count": 59,
2443 | "metadata": {},
2444 | "output_type": "execute_result"
2445 | }
2446 | ],
2447 | "source": [
2448 | "stats.f_oneway(hsb[hsb['prog'] == 1]['write'],\n",
2449 | " hsb[hsb['prog'] == 2]['write'],\n",
2450 | " hsb[hsb['prog'] == 3]['write'])"
2451 | ]
2452 | },
2453 | {
2454 | "cell_type": "markdown",
2455 | "metadata": {},
2456 | "source": [
2457 | "P-value < 0.05 следовательно нулевая гипотеза о равенстве средних не принимается. Посмотрим на эти средние."
2458 | ]
2459 | },
2460 | {
2461 | "cell_type": "code",
2462 | "execution_count": 60,
2463 | "metadata": {
2464 | "collapsed": false
2465 | },
2466 | "outputs": [
2467 | {
2468 | "data": {
2469 | "text/html": [
2470 | "\n",
2471 | "
\n",
2472 | " \n",
2473 | " \n",
2474 | " | \n",
2475 | " mean | \n",
2476 | " std | \n",
2477 | " count | \n",
2478 | "
\n",
2479 | " \n",
2480 | " prog | \n",
2481 | " | \n",
2482 | " | \n",
2483 | " | \n",
2484 | "
\n",
2485 | " \n",
2486 | " \n",
2487 | " \n",
2488 | " 1 | \n",
2489 | " 51.333333 | \n",
2490 | " 9.397775 | \n",
2491 | " 45 | \n",
2492 | "
\n",
2493 | " \n",
2494 | " 2 | \n",
2495 | " 56.257143 | \n",
2496 | " 7.943343 | \n",
2497 | " 105 | \n",
2498 | "
\n",
2499 | " \n",
2500 | " 3 | \n",
2501 | " 46.760000 | \n",
2502 | " 9.318754 | \n",
2503 | " 50 | \n",
2504 | "
\n",
2505 | " \n",
2506 | "
\n",
2507 | "
"
2508 | ],
2509 | "text/plain": [
2510 | " mean std count\n",
2511 | "prog \n",
2512 | "1 51.333333 9.397775 45\n",
2513 | "2 56.257143 7.943343 105\n",
2514 | "3 46.760000 9.318754 50"
2515 | ]
2516 | },
2517 | "execution_count": 60,
2518 | "metadata": {},
2519 | "output_type": "execute_result"
2520 | }
2521 | ],
2522 | "source": [
2523 | "hsb.groupby('prog')['write'].agg(['mean','std','count'])"
2524 | ]
2525 | },
2526 | {
2527 | "cell_type": "markdown",
2528 | "metadata": {},
2529 | "source": [
2530 | "Действительно - средние различается. И ANOVA подтверждает, что эти различия статистически значимые."
2531 | ]
2532 | },
2533 | {
2534 | "cell_type": "markdown",
2535 | "metadata": {},
2536 | "source": [
2537 | "#### 9.2 Критерий Краскела-Уоллиса"
2538 | ]
2539 | },
2540 | {
2541 | "cell_type": "markdown",
2542 | "metadata": {},
2543 | "source": [
2544 | "Это непараметрический аналог ANOVA. Применяется тогда, когда нельзя сказать, что данные в группах распределены нормально."
2545 | ]
2546 | },
2547 | {
2548 | "cell_type": "code",
2549 | "execution_count": 61,
2550 | "metadata": {
2551 | "collapsed": false
2552 | },
2553 | "outputs": [
2554 | {
2555 | "data": {
2556 | "text/plain": [
2557 | "KruskalResult(statistic=34.045178212149644, pvalue=4.0474685563408278e-08)"
2558 | ]
2559 | },
2560 | "execution_count": 61,
2561 | "metadata": {},
2562 | "output_type": "execute_result"
2563 | }
2564 | ],
2565 | "source": [
2566 | "stats.kruskal(hsb[hsb['prog'] == 1]['write'],\n",
2567 | " hsb[hsb['prog'] == 2]['write'],\n",
2568 | " hsb[hsb['prog'] == 3]['write'])"
2569 | ]
2570 | },
2571 | {
2572 | "cell_type": "markdown",
2573 | "metadata": {},
2574 | "source": [
2575 | "Видим, что p-value практически такой же, как у ANOVA."
2576 | ]
2577 | },
2578 | {
2579 | "cell_type": "markdown",
2580 | "metadata": {},
2581 | "source": [
2582 | "#### 9.3 Двухфакторная ANOVA"
2583 | ]
2584 | },
2585 | {
2586 | "cell_type": "markdown",
2587 | "metadata": {},
2588 | "source": [
2589 | "Применяется, когда нужно понять влияют ли на вещественную переменную две категориальные переменные. Допустим хочеться понять, зависят ли оценки по тесту write от пола и социально экономического статуса учеников."
2590 | ]
2591 | },
2592 | {
2593 | "cell_type": "code",
2594 | "execution_count": 62,
2595 | "metadata": {
2596 | "collapsed": true
2597 | },
2598 | "outputs": [],
2599 | "source": [
2600 | "from mrstat import ols\n",
2601 | "from mrstat import anova_lm"
2602 | ]
2603 | },
2604 | {
2605 | "cell_type": "code",
2606 | "execution_count": 63,
2607 | "metadata": {
2608 | "collapsed": false
2609 | },
2610 | "outputs": [],
2611 | "source": [
2612 | "data = hsb[['write','female','ses']]"
2613 | ]
2614 | },
2615 | {
2616 | "cell_type": "code",
2617 | "execution_count": 64,
2618 | "metadata": {
2619 | "collapsed": false
2620 | },
2621 | "outputs": [
2622 | {
2623 | "name": "stderr",
2624 | "output_type": "stream",
2625 | "text": [
2626 | "C:\\Anaconda2\\lib\\site-packages\\scipy\\stats\\_distn_infrastructure.py:875: RuntimeWarning: invalid value encountered in greater\n",
2627 | " return (self.a < x) & (x < self.b)\n",
2628 | "C:\\Anaconda2\\lib\\site-packages\\scipy\\stats\\_distn_infrastructure.py:875: RuntimeWarning: invalid value encountered in less\n",
2629 | " return (self.a < x) & (x < self.b)\n",
2630 | "C:\\Anaconda2\\lib\\site-packages\\scipy\\stats\\_distn_infrastructure.py:1814: RuntimeWarning: invalid value encountered in less_equal\n",
2631 | " cond2 = cond0 & (x <= self.a)\n"
2632 | ]
2633 | }
2634 | ],
2635 | "source": [
2636 | "formula = 'write ~ C(female) + C(ses) + C(female):C(ses)'\n",
2637 | "model = ols(formula, data).fit()\n",
2638 | "aov_table = anova_lm(model, typ=1)"
2639 | ]
2640 | },
2641 | {
2642 | "cell_type": "code",
2643 | "execution_count": 65,
2644 | "metadata": {
2645 | "collapsed": false
2646 | },
2647 | "outputs": [
2648 | {
2649 | "data": {
2650 | "text/html": [
2651 | "\n",
2652 | "
\n",
2653 | " \n",
2654 | " \n",
2655 | " | \n",
2656 | " df | \n",
2657 | " sum_sq | \n",
2658 | " mean_sq | \n",
2659 | " F | \n",
2660 | " PR(>F) | \n",
2661 | "
\n",
2662 | " \n",
2663 | " \n",
2664 | " \n",
2665 | " C(female) | \n",
2666 | " 1.0 | \n",
2667 | " 1176.213845 | \n",
2668 | " 1176.213845 | \n",
2669 | " 14.626683 | \n",
2670 | " 0.000177 | \n",
2671 | "
\n",
2672 | " \n",
2673 | " C(ses) | \n",
2674 | " 2.0 | \n",
2675 | " 1080.599437 | \n",
2676 | " 540.299718 | \n",
2677 | " 6.718840 | \n",
2678 | " 0.001509 | \n",
2679 | "
\n",
2680 | " \n",
2681 | " C(female):C(ses) | \n",
2682 | " 2.0 | \n",
2683 | " 21.430904 | \n",
2684 | " 10.715452 | \n",
2685 | " 0.133251 | \n",
2686 | " 0.875326 | \n",
2687 | "
\n",
2688 | " \n",
2689 | " Residual | \n",
2690 | " 194.0 | \n",
2691 | " 15600.630814 | \n",
2692 | " 80.415623 | \n",
2693 | " NaN | \n",
2694 | " NaN | \n",
2695 | "
\n",
2696 | " \n",
2697 | "
\n",
2698 | "
"
2699 | ],
2700 | "text/plain": [
2701 | " df sum_sq mean_sq F PR(>F)\n",
2702 | "C(female) 1.0 1176.213845 1176.213845 14.626683 0.000177\n",
2703 | "C(ses) 2.0 1080.599437 540.299718 6.718840 0.001509\n",
2704 | "C(female):C(ses) 2.0 21.430904 10.715452 0.133251 0.875326\n",
2705 | "Residual 194.0 15600.630814 80.415623 NaN NaN"
2706 | ]
2707 | },
2708 | "execution_count": 65,
2709 | "metadata": {},
2710 | "output_type": "execute_result"
2711 | }
2712 | ],
2713 | "source": [
2714 | "aov_table"
2715 | ]
2716 | },
2717 | {
2718 | "cell_type": "markdown",
2719 | "metadata": {
2720 | "collapsed": true
2721 | },
2722 | "source": [
2723 | "По данной таблице можно сделать следующие выводы:\n",
2724 | "- write зависит от female (p-value = 0.00017 < 0.005)\n",
2725 | "- write завист от ses (p-value = 0.001509 < 0.005)\n",
2726 | "- write не зависит от взаимодейстивий female и ses (p-value = 0.875326 > 0.05)"
2727 | ]
2728 | },
2729 | {
2730 | "cell_type": "markdown",
2731 | "metadata": {},
2732 | "source": [
2733 | "Спасибо за внимание! )"
2734 | ]
2735 | }
2736 | ],
2737 | "metadata": {
2738 | "anaconda-cloud": {},
2739 | "kernelspec": {
2740 | "display_name": "Python 2",
2741 | "language": "python",
2742 | "name": "python2"
2743 | },
2744 | "language_info": {
2745 | "codemirror_mode": {
2746 | "name": "ipython",
2747 | "version": 2
2748 | },
2749 | "file_extension": ".py",
2750 | "mimetype": "text/x-python",
2751 | "name": "python",
2752 | "nbconvert_exporter": "python",
2753 | "pygments_lexer": "ipython2",
2754 | "version": "2.7.13"
2755 | }
2756 | },
2757 | "nbformat": 4,
2758 | "nbformat_minor": 0
2759 | }
2760 |
--------------------------------------------------------------------------------
/mrstat.py:
--------------------------------------------------------------------------------
1 | from scipy.stats import probplot as qq_plot
2 | from scipy.stats import ttest_1samp, shapiro, chi2_contingency, wilcoxon, mannwhitneyu
3 | from scipy.stats import ttest_ind, ttest_rel, fisher_exact
4 | from statsmodels.sandbox.stats.multicomp import multipletests
5 | from statsmodels.stats.weightstats import CompareMeans, DescrStatsW
6 | from statsmodels.stats.weightstats import zconfint
7 | from statsmodels.stats.descriptivestats import sign_test
8 | from statsmodels.stats.proportion import proportion_confint
9 | from statsmodels.stats.proportion import samplesize_confint_proportion
10 | from scipy.stats import pearsonr, spearmanr, kstest, ks_2samp, chisquare
11 | from statsmodels.formula.api import ols
12 | from statsmodels.stats.anova import anova_lm
13 | from scipy import stats
14 | import itertools
15 | import numpy as np
16 | import statsmodels.stats.api as sms
17 |
18 | def get_z(mu,mu_0,sigma,n):
19 | z = (mu-mu_0)/(sigma/np.sqrt(n))
20 | return z
21 |
22 | def mean_diff_confint_ind(sample1,sample2):
23 | cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2))
24 | return cm.tconfint_diff()
25 |
26 | def mean_diff_confint_rel(sample1,sample2):
27 | return DescrStatsW(sample1 - sample2).tconfint_mean()
28 |
29 | def prop_test(sample,p_0,alternative='two-sided'):
30 | p = sample.mean()
31 | n = len(sample)
32 | se = np.sqrt(p*(1-p)/n)
33 | z = (p - p_0)/se
34 | return get_norm_p(z,alternative=alternative)
35 |
36 | def prop_confint(sample,method='normal'):
37 | return proportion_confint(sum(sample),len(sample),method=method)
38 |
39 | def get_bootstrap_samples(data, n_samples):
40 | indices = np.random.randint(0, len(data), (n_samples, len(data)))
41 | samples = data[indices]
42 | return samples
43 |
44 | def stat_intervals(stat, alpha):
45 | boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)])
46 | return boundaries
47 |
48 | def bootstrap_conf_int(data,stat_func,alpha=0.05,n_samples=1000):
49 | '''
50 | a = np.random.normal(size=1000)
51 | conf_int(a,np.median)
52 | '''
53 | scores = [stat_func(sample) for sample in get_bootstrap_samples(data,n_samples)]
54 | return stat_intervals(scores, alpha)
55 |
56 | def bootstrap_test(sample, param, stat_func, n_samples = 1000, alternative = 'two-sided'):
57 |
58 | if alternative not in ('two-sided', 'less', 'greater'):
59 | raise ValueError("alternative not recognized\n"
60 | "should be 'two-sided', 'less' or 'greater'")
61 |
62 | param_d = [stat_func(i) for i in get_bootstrap_samples(sample,n_samples)]
63 |
64 | mean_p = np.mean(param_d)
65 | t_stat = stat_func(sample) - param
66 |
67 | zero_dist = [(mm - mean_p) for mm in param_d]
68 |
69 | if alternative == 'two-sided':
70 | return sum([1. if abs(x) >= abs(t_stat) else 0. for x in zero_dist]) / len(zero_dist)
71 |
72 | if alternative == 'less':
73 | return sum([1. if x <= t_stat else 0. for x in zero_dist]) / len(zero_dist)
74 |
75 | if alternative == 'greater':
76 | return sum([1. if x >= t_stat else 0. for x in zero_dist]) / len(zero_dist)
77 |
78 | def bootstrap_diff_conf_int(a,b,stat_func,alpha=0.05,n_samples=1000):
79 | '''
80 | a = np.random.normal(size=1000)
81 | b = np.random.normal(loc=2,size=1000)
82 | diff_conf_int(b,a,np.median)
83 | '''
84 | scores_a = [stat_func(sample) for sample in get_bootstrap_samples(a,n_samples)]
85 | scores_b = [stat_func(sample) for sample in get_bootstrap_samples(b,n_samples)]
86 | delta_scores = [x[0] - x[1] for x in zip(scores_a,scores_b)]
87 | return stat_intervals(delta_scores, alpha)
88 |
89 | def vcramer(table):
90 | chi, p, _, _ = stats.chi2_contingency(table,correction=False)
91 | n = table.sum()
92 | r,c = table.shape
93 | return np.sqrt(chi/(n*(min(r,c)-1.))), p
94 |
95 | def mcc(a,b,c,d):
96 | '''
97 | Matthews correlation from contigency table
98 | '''
99 | return (a*d - b*c) / np.sqrt((a+b)*(a+c)*(b+d)*(c+d))
100 |
101 | def get_norm_p(z_stat, alternative = 'two-sided'):
102 | if alternative not in ('two-sided', 'less', 'greater'):
103 | raise ValueError("alternative not recognized\n"
104 | "should be 'two-sided', 'less' or 'greater'")
105 |
106 | if alternative == 'two-sided':
107 | return 2 * (1 - stats.norm.cdf(np.abs(z_stat)))
108 |
109 | if alternative == 'less':
110 | return stats.norm.cdf(z_stat)
111 |
112 | if alternative == 'greater':
113 | return 1 - stats.norm.cdf(z_stat)
114 |
115 | def get_t_p(z_stat, n, alternative = 'two-sided'):
116 | if alternative not in ('two-sided', 'less', 'greater'):
117 | raise ValueError("alternative not recognized\n"
118 | "should be 'two-sided', 'less' or 'greater'")
119 |
120 | if alternative == 'two-sided':
121 | return 2 * (1 - stats.t.cdf(np.abs(z_stat),df=(n-1)))
122 |
123 | if alternative == 'less':
124 | return stats.t.cdf(np.abs(z_stat),df=(n-1))
125 |
126 | if alternative == 'greater':
127 | return 1 - stats.t.cdf(np.abs(z_stat),df=(n-1))
128 |
129 | def proportions_diff_ind(p1,n1,p2,n2,alternative = 'two-sided'):
130 | '''
131 | AB test
132 | '''
133 | P = float(p1*n1+p2*n2)/(n1+n2)
134 | z = (p1-p2)/np.sqrt(P*(1-P)*(1./n1+1./n2))
135 | return get_norm_p(z,alternative)
136 |
137 | def proportions_diff_ind_table(table,alternative = 'two-sided'):
138 | '''
139 | AB test from contigency table
140 | a, b, c, d = tables.values.ravel()
141 | '''
142 | a,b,c,d = table.ravel()
143 | n1, n2 = a+c, b+d
144 | p1, p2 = float(a)/n1, float(b)/n2
145 | return proportions_diff_ind(p1,n1,p2,n2,alternative)
146 |
147 | def proportions_diff_ind_samples(sample1,sample2,alternative = 'two-sided'):
148 | '''
149 | AB test from samples
150 | '''
151 | n1 = len(sample1)
152 | n2 = len(sample2)
153 | p1 = float(sum(sample1)) / n1
154 | p2 = float(sum(sample2)) / n2
155 | return proportions_diff_ind(p1,n1,p2,n2,alternative)
156 |
157 | def proportions_confint_diff_ind(p1,n1,p2,n2, alpha = 0.05):
158 | '''
159 | confidence interval for proportion difference from ps and ns
160 | '''
161 | z = stats.norm.ppf(1 - alpha / 2.)
162 | left_boundary = (p1 - p2) - z * np.sqrt(p1 * (1 - p1)/ n1 + p2 * (1 - p2)/ n2)
163 | right_boundary = (p1 - p2) + z * np.sqrt(p1 * (1 - p1)/ n1 + p2 * (1 - p2)/ n2)
164 | return (left_boundary, right_boundary)
165 |
166 | def proportions_confint_diff_ind_table(table,alpha = 0.05):
167 | '''
168 | confidence interval for proportion difference from contigency table
169 | '''
170 | a,b,c,d = table.ravel()
171 | n1, n2 = a+c, b+d
172 | p1, p2 = float(a)/n1, float(b)/n2
173 | return proportions_confint_diff_ind(p1,n1,p2,n2, alpha)
174 |
175 | def proportions_confint_diff_ind_samples(sample1,sample2, alpha = 0.05):
176 | '''
177 | confidence interval for proportion difference from samples
178 | '''
179 | n1 = len(sample1)
180 | n2 = len(sample2)
181 | p1 = float(sum(sample1)) / n1
182 | p2 = float(sum(sample2)) / n2
183 | return proportions_confint_diff_ind(p1,n1,p2,n2, alpha)
184 |
185 | def get_props_and_lens(table,invertion=True):
186 | a,b,c,d = table.values[::-1,:].ravel()
187 | n1, n2 = a+c, b+d
188 | p1, p2 = float(a)/n1, float(b)/n2
189 | return p1, n1, p2, n2
190 |
191 | #----------------------------------------------------------------
192 |
193 | def proportions_confint_diff_rel(sample1, sample2, alpha = 0.05):
194 | z = stats.norm.ppf(1 - alpha / 2.)
195 | sample = zip(sample1, sample2)
196 | n = len(sample)
197 |
198 | f = sum([1 if (x[0] == 1 and x[1] == 0) else 0 for x in sample])
199 | g = sum([1 if (x[0] == 0 and x[1] == 1) else 0 for x in sample])
200 |
201 | left_boundary = float(f - g) / n - z * np.sqrt(float((f + g)) / n**2 - float((f - g)**2) / n**3)
202 | right_boundary = float(f - g) / n + z * np.sqrt(float((f + g)) / n**2 - float((f - g)**2) / n**3)
203 | return (left_boundary, right_boundary)
204 |
205 |
206 | def proportions_diff_rel(sample1, sample2, alternative = 'two-sided'):
207 | sample = zip(sample1, sample2)
208 | n = len(sample)
209 |
210 | f = sum([1 if (x[0] == 1 and x[1] == 0) else 0 for x in sample])
211 | g = sum([1 if (x[0] == 0 and x[1] == 1) else 0 for x in sample])
212 |
213 | z = float(f - g) / np.sqrt(f + g - float((f - g)**2) / n )
214 |
215 | return get_norm_p(z,alternative)
216 |
217 | def two_proportions_sample_size(p1,p2,alpha=0.05,power=0.8,frac=0.5):
218 | ratio = frac/(1.-frac)
219 | es = sms.proportion_effectsize(p1, p2)
220 | n = np.floor(sms.NormalIndPower().solve_power(es, power=power, alpha=alpha, ratio=ratio))
221 | n1,n2 = n*ratio, n
222 | return n1,n2
--------------------------------------------------------------------------------