├── ADHD.txt ├── README.md ├── hsb2.csv ├── mrstat.ipynb └── mrstat.py /ADHD.txt: -------------------------------------------------------------------------------- 1 | D0 D60 2 | 57 62 3 | 27 49 4 | 32 30 5 | 31 34 6 | 34 38 7 | 38 36 8 | 71 77 9 | 33 51 10 | 34 45 11 | 53 42 12 | 36 43 13 | 42 57 14 | 26 36 15 | 52 58 16 | 36 35 17 | 55 60 18 | 36 33 19 | 42 49 20 | 36 33 21 | 54 59 22 | 34 35 23 | 29 37 24 | 33 45 25 | 33 29 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mrstat - удобрый скрипт для статистического анализа 2 | -------------------------------------------------------------------------------- /hsb2.csv: -------------------------------------------------------------------------------- 1 | "id","female","race","ses","schtyp","prog","read","write","math","science","socst" 2 | 70,0,4,1,1,1,57,52,41,47,57 3 | 121,1,4,2,1,3,68,59,53,63,61 4 | 86,0,4,3,1,1,44,33,54,58,31 5 | 141,0,4,3,1,3,63,44,47,53,56 6 | 172,0,4,2,1,2,47,52,57,53,61 7 | 113,0,4,2,1,2,44,52,51,63,61 8 | 50,0,3,2,1,1,50,59,42,53,61 9 | 11,0,1,2,1,2,34,46,45,39,36 10 | 84,0,4,2,1,1,63,57,54,58,51 11 | 48,0,3,2,1,2,57,55,52,50,51 12 | 75,0,4,2,1,3,60,46,51,53,61 13 | 60,0,4,2,1,2,57,65,51,63,61 14 | 95,0,4,3,1,2,73,60,71,61,71 15 | 104,0,4,3,1,2,54,63,57,55,46 16 | 38,0,3,1,1,2,45,57,50,31,56 17 | 115,0,4,1,1,1,42,49,43,50,56 18 | 76,0,4,3,1,2,47,52,51,50,56 19 | 195,0,4,2,2,1,57,57,60,58,56 20 | 114,0,4,3,1,2,68,65,62,55,61 21 | 85,0,4,2,1,1,55,39,57,53,46 22 | 167,0,4,2,1,1,63,49,35,66,41 23 | 143,0,4,2,1,3,63,63,75,72,66 24 | 41,0,3,2,1,2,50,40,45,55,56 25 | 20,0,1,3,1,2,60,52,57,61,61 26 | 12,0,1,2,1,3,37,44,45,39,46 27 | 53,0,3,2,1,3,34,37,46,39,31 28 | 154,0,4,3,1,2,65,65,66,61,66 29 | 178,0,4,2,2,3,47,57,57,58,46 30 | 196,0,4,3,2,2,44,38,49,39,46 31 | 29,0,2,1,1,1,52,44,49,55,41 32 | 126,0,4,2,1,1,42,31,57,47,51 33 | 103,0,4,3,1,2,76,52,64,64,61 34 | 192,0,4,3,2,2,65,67,63,66,71 35 | 150,0,4,2,1,3,42,41,57,72,31 36 | 199,0,4,3,2,2,52,59,50,61,61 37 | 144,0,4,3,1,1,60,65,58,61,66 38 | 200,0,4,2,2,2,68,54,75,66,66 39 | 80,0,4,3,1,2,65,62,68,66,66 40 | 16,0,1,1,1,3,47,31,44,36,36 41 | 153,0,4,2,1,3,39,31,40,39,51 42 | 176,0,4,2,2,2,47,47,41,42,51 43 | 177,0,4,2,2,2,55,59,62,58,51 44 | 168,0,4,2,1,2,52,54,57,55,51 45 | 40,0,3,1,1,1,42,41,43,50,41 46 | 62,0,4,3,1,1,65,65,48,63,66 47 | 169,0,4,1,1,1,55,59,63,69,46 48 | 49,0,3,3,1,3,50,40,39,49,47 49 | 136,0,4,2,1,2,65,59,70,63,51 50 | 189,0,4,2,2,2,47,59,63,53,46 51 | 7,0,1,2,1,2,57,54,59,47,51 52 | 27,0,2,2,1,2,53,61,61,57,56 53 | 128,0,4,3,1,2,39,33,38,47,41 54 | 21,0,1,2,1,1,44,44,61,50,46 55 | 183,0,4,2,2,2,63,59,49,55,71 56 | 132,0,4,2,1,2,73,62,73,69,66 57 | 15,0,1,3,1,3,39,39,44,26,42 58 | 67,0,4,1,1,3,37,37,42,33,32 59 | 22,0,1,2,1,3,42,39,39,56,46 60 | 185,0,4,2,2,2,63,57,55,58,41 61 | 9,0,1,2,1,3,48,49,52,44,51 62 | 181,0,4,2,2,2,50,46,45,58,61 63 | 170,0,4,3,1,2,47,62,61,69,66 64 | 134,0,4,1,1,1,44,44,39,34,46 65 | 108,0,4,2,1,1,34,33,41,36,36 66 | 197,0,4,3,2,2,50,42,50,36,61 67 | 140,0,4,2,1,3,44,41,40,50,26 68 | 171,0,4,2,1,2,60,54,60,55,66 69 | 107,0,4,1,1,3,47,39,47,42,26 70 | 81,0,4,1,1,2,63,43,59,65,44 71 | 18,0,1,2,1,3,50,33,49,44,36 72 | 155,0,4,2,1,1,44,44,46,39,51 73 | 97,0,4,3,1,2,60,54,58,58,61 74 | 68,0,4,2,1,2,73,67,71,63,66 75 | 157,0,4,2,1,1,68,59,58,74,66 76 | 56,0,4,2,1,3,55,45,46,58,51 77 | 5,0,1,1,1,2,47,40,43,45,31 78 | 159,0,4,3,1,2,55,61,54,49,61 79 | 123,0,4,3,1,1,68,59,56,63,66 80 | 164,0,4,2,1,3,31,36,46,39,46 81 | 14,0,1,3,1,2,47,41,54,42,56 82 | 127,0,4,3,1,2,63,59,57,55,56 83 | 165,0,4,1,1,3,36,49,54,61,36 84 | 174,0,4,2,2,2,68,59,71,66,56 85 | 3,0,1,1,1,2,63,65,48,63,56 86 | 58,0,4,2,1,3,55,41,40,44,41 87 | 146,0,4,3,1,2,55,62,64,63,66 88 | 102,0,4,3,1,2,52,41,51,53,56 89 | 117,0,4,3,1,3,34,49,39,42,56 90 | 133,0,4,2,1,3,50,31,40,34,31 91 | 94,0,4,3,1,2,55,49,61,61,56 92 | 24,0,2,2,1,2,52,62,66,47,46 93 | 149,0,4,1,1,1,63,49,49,66,46 94 | 82,1,4,3,1,2,68,62,65,69,61 95 | 8,1,1,1,1,2,39,44,52,44,48 96 | 129,1,4,1,1,1,44,44,46,47,51 97 | 173,1,4,1,1,1,50,62,61,63,51 98 | 57,1,4,2,1,2,71,65,72,66,56 99 | 100,1,4,3,1,2,63,65,71,69,71 100 | 1,1,1,1,1,3,34,44,40,39,41 101 | 194,1,4,3,2,2,63,63,69,61,61 102 | 88,1,4,3,1,2,68,60,64,69,66 103 | 99,1,4,3,1,1,47,59,56,66,61 104 | 47,1,3,1,1,2,47,46,49,33,41 105 | 120,1,4,3,1,2,63,52,54,50,51 106 | 166,1,4,2,1,2,52,59,53,61,51 107 | 65,1,4,2,1,2,55,54,66,42,56 108 | 101,1,4,3,1,2,60,62,67,50,56 109 | 89,1,4,1,1,3,35,35,40,51,33 110 | 54,1,3,1,2,1,47,54,46,50,56 111 | 180,1,4,3,2,2,71,65,69,58,71 112 | 162,1,4,2,1,3,57,52,40,61,56 113 | 4,1,1,1,1,2,44,50,41,39,51 114 | 131,1,4,3,1,2,65,59,57,46,66 115 | 125,1,4,1,1,2,68,65,58,59,56 116 | 34,1,1,3,2,2,73,61,57,55,66 117 | 106,1,4,2,1,3,36,44,37,42,41 118 | 130,1,4,3,1,1,43,54,55,55,46 119 | 93,1,4,3,1,2,73,67,62,58,66 120 | 163,1,4,1,1,2,52,57,64,58,56 121 | 37,1,3,1,1,3,41,47,40,39,51 122 | 35,1,1,1,2,1,60,54,50,50,51 123 | 87,1,4,2,1,1,50,52,46,50,56 124 | 73,1,4,2,1,2,50,52,53,39,56 125 | 151,1,4,2,1,3,47,46,52,48,46 126 | 44,1,3,1,1,3,47,62,45,34,46 127 | 152,1,4,3,1,2,55,57,56,58,61 128 | 105,1,4,2,1,2,50,41,45,44,56 129 | 28,1,2,2,1,1,39,53,54,50,41 130 | 91,1,4,3,1,3,50,49,56,47,46 131 | 45,1,3,1,1,3,34,35,41,29,26 132 | 116,1,4,2,1,2,57,59,54,50,56 133 | 33,1,2,1,1,2,57,65,72,54,56 134 | 66,1,4,2,1,3,68,62,56,50,51 135 | 72,1,4,2,1,3,42,54,47,47,46 136 | 77,1,4,1,1,2,61,59,49,44,66 137 | 61,1,4,3,1,2,76,63,60,67,66 138 | 190,1,4,2,2,2,47,59,54,58,46 139 | 42,1,3,2,1,3,46,52,55,44,56 140 | 2,1,1,2,1,3,39,41,33,42,41 141 | 55,1,3,2,2,2,52,49,49,44,61 142 | 19,1,1,1,1,1,28,46,43,44,51 143 | 90,1,4,3,1,2,42,54,50,50,52 144 | 142,1,4,2,1,3,47,42,52,39,51 145 | 17,1,1,2,1,2,47,57,48,44,41 146 | 122,1,4,2,1,2,52,59,58,53,66 147 | 191,1,4,3,2,2,47,52,43,48,61 148 | 83,1,4,2,1,3,50,62,41,55,31 149 | 182,1,4,2,2,2,44,52,43,44,51 150 | 6,1,1,1,1,2,47,41,46,40,41 151 | 46,1,3,1,1,2,45,55,44,34,41 152 | 43,1,3,1,1,2,47,37,43,42,46 153 | 96,1,4,3,1,2,65,54,61,58,56 154 | 138,1,4,2,1,3,43,57,40,50,51 155 | 10,1,1,2,1,1,47,54,49,53,61 156 | 71,1,4,2,1,1,57,62,56,58,66 157 | 139,1,4,2,1,2,68,59,61,55,71 158 | 110,1,4,2,1,3,52,55,50,54,61 159 | 148,1,4,2,1,3,42,57,51,47,61 160 | 109,1,4,2,1,1,42,39,42,42,41 161 | 39,1,3,3,1,2,66,67,67,61,66 162 | 147,1,4,1,1,2,47,62,53,53,61 163 | 74,1,4,2,1,2,57,50,50,51,58 164 | 198,1,4,3,2,2,47,61,51,63,31 165 | 161,1,4,1,1,2,57,62,72,61,61 166 | 112,1,4,2,1,2,52,59,48,55,61 167 | 69,1,4,1,1,3,44,44,40,40,31 168 | 156,1,4,2,1,2,50,59,53,61,61 169 | 111,1,4,1,1,1,39,54,39,47,36 170 | 186,1,4,2,2,2,57,62,63,55,41 171 | 98,1,4,1,1,3,57,60,51,53,37 172 | 119,1,4,1,1,1,42,57,45,50,43 173 | 13,1,1,2,1,3,47,46,39,47,61 174 | 51,1,3,3,1,1,42,36,42,31,39 175 | 26,1,2,3,1,2,60,59,62,61,51 176 | 36,1,3,1,1,1,44,49,44,35,51 177 | 135,1,4,1,1,2,63,60,65,54,66 178 | 59,1,4,2,1,2,65,67,63,55,71 179 | 78,1,4,2,1,2,39,54,54,53,41 180 | 64,1,4,3,1,3,50,52,45,58,36 181 | 63,1,4,1,1,1,52,65,60,56,51 182 | 79,1,4,2,1,2,60,62,49,50,51 183 | 193,1,4,2,2,2,44,49,48,39,51 184 | 92,1,4,3,1,1,52,67,57,63,61 185 | 160,1,4,2,1,2,55,65,55,50,61 186 | 32,1,2,3,1,3,50,67,66,66,56 187 | 23,1,2,1,1,2,65,65,64,58,71 188 | 158,1,4,2,1,1,52,54,55,53,51 189 | 25,1,2,2,1,1,47,44,42,42,36 190 | 188,1,4,3,2,2,63,62,56,55,61 191 | 52,1,3,1,1,2,50,46,53,53,66 192 | 124,1,4,1,1,3,42,54,41,42,41 193 | 175,1,4,3,2,1,36,57,42,50,41 194 | 184,1,4,2,2,3,50,52,53,55,56 195 | 30,1,2,3,1,2,41,59,42,34,51 196 | 179,1,4,2,2,2,47,65,60,50,56 197 | 31,1,2,2,2,1,55,59,52,42,56 198 | 145,1,4,2,1,3,42,46,38,36,46 199 | 187,1,4,2,2,1,57,41,57,55,52 200 | 118,1,4,2,1,1,55,62,58,58,61 201 | 137,1,4,3,1,2,63,65,65,53,61 202 | -------------------------------------------------------------------------------- /mrstat.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MR STAT \n", 8 | "by glebmikh" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "На данный момент сообщество python не имеет полноценного пакета для статистического анализа:\n", 16 | "\n", 17 | "- реализации статистических тестов разбросаны по разным библиотекам;\n", 18 | "- некоторые из основных тестов не имеют стандартной реализации.\n", 19 | "\n", 20 | "Я решил, что будет удобно собрать в один скрипт все стандартные реализации и дополнить их недостающими методами. Сделав это, больше не придется вспоминать, где что лежит или копировать куски кода из предыдущих проектов." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### Содержание" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "- [1. Одновыборочный ти-тест](#1.-Одновыборочный-ти-тест)\n", 35 | "\t- [1.1 Гипотеза о среднием](#1.1-Гипотеза-о-среднием)\n", 36 | "\t- [1.2 Доверительный интервал для среднего](#1.2-Доверительный-интервал-для-среднего)\n", 37 | "- [2. Тесты на распределение](#2.-Тесты-на-распределение)\n", 38 | "\t- [2.1 Критерий Шапиро-Уилка](#2.1-Критерий-Шапиро-Уилка)\n", 39 | "\t- [2.2 КуКу график](#2.2-КуКу-график)\n", 40 | "\t- [2.3 Тест Колмогорова-Смирнова для проверки формы распределения](#2.3-Тест-Колмогорова-Смирнова-для-проверки-формы-распределения)\n", 41 | "\t- [2.4 Двухвыборочный тест Колмогорова-Смирнова](#2.4-Двухвыборочный-тест-Колмогорова-Смирнова)\n", 42 | "- [3. Двухвыборочные ти-тесты](#3.-Двухвыборочные-ти-тесты)\n", 43 | "\t- [3.1 Ти-тест для двух независимых выборок](#3.1-Ти-тест-для-двух-независимых-выборок)\n", 44 | "\t- [3.2 Доверительный интервал разности средних для двух независимых выборок](#3.2-Доверительный-интервал-разности-средних-для-двух-независимых-выборок)\n", 45 | "- [4. Пропорция](#4.-Пропорция)\n", 46 | "\t- [4.1. Тест для одной доли](#4.1.-Тест-для-одной-доли)\n", 47 | "\t- [4.2 Доверительная интервал для одной доли](#4.2-Доверительная-интервал-для-одной-доли)\n", 48 | "\t- [4.3 Размер выборки для заданной доли и ширины интервала](#4.3-Размер-выборки-для-заданной-доли-и-ширины-интервала)\n", 49 | "- [5. Две доли и АБ тестинг](#5.-Две-доли-и-АБ-тестинг)\n", 50 | "\t- [5.1 Тест разности двух независимых долей](#5.1-Тест-разности-двух-независимых-долей)\n", 51 | "\t- [5.2 Доверительный интервал для разности двух незавимых долей](#5.2-Доверительный-интервал-для-разности-двух-незавимых-долей)\n", 52 | "\t- [5.3 Тест Хи-квадрат](#5.3-Тест-Хи-квадрат)\n", 53 | "\t- [5.4 Точный тест Фишера](#5.4-Точный-тест-Фишера)\n", 54 | "\t- [5.5 Размер выборок для для двух пропорций](#5.5-Размер-выборок-для-для-двух-пропорций)\n", 55 | "- [6. Непараметрические критерии](#6.-Непараметрические-критерии)\n", 56 | "\t- [6.1 Критерий знаков](#6.1-Критерий-знаков)\n", 57 | "\t- [6.2 Критерий знаковых рангов Вилкоксона](#6.2-Критерий-знаковых-рангов-Вилкоксона)\n", 58 | "\t- [6.3 Критерий Манна-Уитни](#6.3-Критерий-Манна-Уитни)\n", 59 | "\t- [6.4 Бутстреп](#6.4-Бутстреп)\n", 60 | "- [7. Корреляция](#7.-Корреляция)\n", 61 | "\t- [7.1 Коэффициет корреляции Пирсона](#7.1-Коэффициет-корреляции-Пирсона)\n", 62 | "\t- [7.2 Коэффициет корреляции Спирмена](#7.2-Коэффициет-корреляции-Спирмена)\n", 63 | "\t- [7.3 Коэффициент Крамера](#7.3-Коэффициент-Крамера)\n", 64 | "- [8. Связанные выборки](#8.-Связанные-выборки)\n", 65 | "\t- [8.1 Ти-тест для связанных выборок](#8.1-Ти-тест-для-связанных-выборок)\n", 66 | "\t- [8.2 Тест для разности двух долей - связанные выборки](#8.2-Тест-для-разности-двух-долей---связанные-выборки)\n", 67 | "\t- [8.3 Доверительный интервал для разности долей ](#8.3-Доверительный-интервал-для-разности-долей-)\n", 68 | "\t- [8.4 Непараметрические критерии для связанных выборок](#8.4-Непараметрические-критерии-для-связанных-выборок)\n", 69 | "- [9. Дисперсионный анализ](#9.-Дисперсионный-анализ)\n", 70 | "\t- [9.1 Однофакторная ANOVA](#9.1-Однофакторная-ANOVA)\n", 71 | "\t- [9.2 Критерий Краскела-Уоллиса](#9.2-Критерий-Краскела-Уоллиса)\n", 72 | "\t- [9.3 Двухфакторная ANOVA](#9.3-Двухфакторная-ANOVA)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "В данном руководстве я буду использовать дата-сет [hsb2](https://github.com/rpruim/OpenIntro/blob/master/data/hsb2.csv) и примеры из статьи [What statistical analysis should I use?](http://www.ats.ucla.edu/stat/stata/whatstat/whatstat.htm), которые дополню некоторыми другими полезными случаями. В данных hsb2 содержится 200 наблюдений об учениках старших классов: социодемографические данные и оценки полученные на тестах по разным предметам." 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Если импорт выдаст ошибки - просто установите недостающие библиотеки." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 1, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "import mrstat\n", 98 | "import numpy as np\n", 99 | "from matplotlib import pyplot as plt\n", 100 | "from scipy import stats\n", 101 | "import pandas as pd\n", 102 | "%matplotlib inline\n", 103 | "hsb = pd.read_csv('hsb2.csv')" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 2, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/html": [ 116 | "
\n", 117 | "\n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | "
idfemaleracesesschtypprogreadwritemathsciencesocst
070041115752414757
1121142136859536361
286043114433545831
3141043136344475356
4172042124752575361
\n", 207 | "
" 208 | ], 209 | "text/plain": [ 210 | " id female race ses schtyp prog read write math science socst\n", 211 | "0 70 0 4 1 1 1 57 52 41 47 57\n", 212 | "1 121 1 4 2 1 3 68 59 53 63 61\n", 213 | "2 86 0 4 3 1 1 44 33 54 58 31\n", 214 | "3 141 0 4 3 1 3 63 44 47 53 56\n", 215 | "4 172 0 4 2 1 2 47 52 57 53 61" 216 | ] 217 | }, 218 | "execution_count": 2, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "hsb.head()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "collapsed": true 231 | }, 232 | "source": [ 233 | "### 1. Одновыборочный ти-тест" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "#### 1.1 Гипотеза о среднием" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Проверим гипотезу, что средняя оценка на тесте по письму (write) равна 50 пунктам." 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 3, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "Ttest_1sampResult(statistic=4.140324966963024, pvalue=5.1209194607163552e-05)" 261 | ] 262 | }, 263 | "execution_count": 3, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "mrstat.ttest_1samp(hsb['write'],50)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "P-value получилось меньше 0.05 - это значит, что гипотеза о среднем равным 50 не принимается." 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "#### 1.2 Доверительный интервал для среднего" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "Чтобы получить представление о среднем для write построим 95% доверительный интервал." 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 4, 296 | "metadata": { 297 | "collapsed": false 298 | }, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "(51.461359138353302, 54.088640861646695)" 304 | ] 305 | }, 306 | "execution_count": 4, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "mrstat.zconfint(hsb['write'])" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "Данный интервал с 95% вероятностью содержит истинное среднее для оценки write. Так же доверительный интервал можно записать в следующем виде:" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 5, 325 | "metadata": { 326 | "collapsed": false 327 | }, 328 | "outputs": [ 329 | { 330 | "name": "stdout", 331 | "output_type": "stream", 332 | "text": [ 333 | "52.775 +/- 1.31364086165\n" 334 | ] 335 | } 336 | ], 337 | "source": [ 338 | "lb, hb = mrstat.zconfint(hsb['write'])\n", 339 | "print hsb.write.mean(), '+/-', (hb-lb)/2." 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "### 2. Тесты на распределение" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "#### 2.1 Критерий Шапиро-Уилка" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "Проверяем гипотезу о том, что случайная величина распределена нормально." 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 6, 366 | "metadata": { 367 | "collapsed": false 368 | }, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "(0.9470317363739014, 9.865516403806396e-07)" 374 | ] 375 | }, 376 | "execution_count": 6, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "mrstat.shapiro(hsb['write'])" 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": {}, 388 | "source": [ 389 | "P-value < 0.05 следовательно гипотеза о нормальности распределения оценок по write отвергается. Не лишним так же всегда взглянуть на гистограмму." 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 7, 395 | "metadata": { 396 | "collapsed": false 397 | }, 398 | "outputs": [ 399 | { 400 | "data": { 401 | "text/plain": [ 402 | "" 403 | ] 404 | }, 405 | "execution_count": 7, 406 | "metadata": {}, 407 | "output_type": "execute_result" 408 | }, 409 | { 410 | "data": { 411 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEZVJREFUeJzt3X+I3HV+x/HX62KKIXskEe2wROleQSziYmyGq8VyzGo9\nclqqQpHKVWK1rH/cBf8IlPT+aBU5CMWcbaEcpGd64dq7JRwnSs4quTR7Ioje7jVmE6N4yMq5xOTS\n0+DY4JG7d/+Yb2Auzux8Z+Y7M9/5+HzAsPP9Md957Wcnr/3uN9/vjCNCAIDx95lRBwAAFINCB4BE\nUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgERQ6ACTismE+2ZVXXhlTU1Mtl3300Udav379MOP0\nhJzFG5es5CzWuOSURp91cXHxbERc1XHFiBjabevWrdHOkSNH2i4rE3IWb1yykrNY45IzYvRZJS1E\njo7lkAsAJIJCB4BEUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgERQ6ACRiqJf+AyiPqV0/HNlz\nL+++c2TPnTL20AEgERQ6ACSCQgeARFDoAJAICh0AEkGhA0AiOha67cttv2r7NdsnbD+WzX/U9ort\no9ntjsHHBQC0k+c89I8l3RoRddtrJb1k+7+yZU9GxBODiwcAyKtjoWcff1TPJtdmtxhkKABA93Id\nQ7e9xvZRSWckHYqIV7JFO2wfs73P9qaBpQQAdOTGDnjOle2Nkp6WtEPSLySdVWNv/XFJkxHxYIvH\nzEqalaRKpbJ1bm6u5bbr9bomJia6zT905CzeuGRNLefSyrkhpGltevOGsRlPafQ/+5mZmcWIqHZa\nr6tClyTbfy/p/5qPndueknQwIm5Y7bHVajUWFhZaLpufn1etVusqyyiQs3jjkjW1nKN+L5dxGU9p\n9D9727kKPc9ZLldle+ayvU7S7ZLesD3ZtNo9ko73GhYA0L88Z7lMStpve40avwAORMRB29+xvUWN\nQy7Lkh4eXEwAQCd5znI5JummFvPvH0giAEBPuFIUABJBoQNAIih0AEgEhQ4AiaDQASARFDoAJIJC\nB4BEUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgERQ6ACSCQgeARFDoAJAICh0AEkGhA0AiKHQA\nSETHQrd9ue1Xbb9m+4Ttx7L5V9g+ZPut7OumwccFALSTZw/9Y0m3RsSNkrZI2mb7Zkm7JB2OiGsl\nHc6mAQAj0rHQo6GeTa7NbiHpLkn7s/n7Jd09kIQAgFxyHUO3vcb2UUlnJB2KiFckVSLiVLbKe5Iq\nA8oIAMjBEZF/ZXujpKcl7ZD0UkRsbFr2fkR84ji67VlJs5JUqVS2zs3Ntdx2vV7XxMREd+lHgJzF\nG5esqeVcWjk3hDTtVdZJp88P9zmnN2/o6XGj/tnPzMwsRkS103qXdbPRiPjA9hFJ2ySdtj0ZEads\nT6qx997qMXsl7ZWkarUatVqt5bbn5+fVblmZkLN445I1tZwP7Prh4MOsYuf0Be1Z6qqC+rb85VpP\njxuXn32es1yuyvbMZXudpNslvSHpWUnbs9W2S3pmUCEBAJ3l+fU4KWm/7TVq/AI4EBEHbb8s6YDt\nhyS9I+neAeYEAHTQsdAj4pikm1rM/19Jtw0iFACge1wpCgCJoNABIBEUOgAkgkIHgERQ6ACQCAod\nABIx3Mu0gJKaynHV5M7pCwO5unJ5952FbxOfTuyhA0AiKHQASASFDgCJoNABIBEUOgAkgkIHgERQ\n6ACQCAodABJBoQNAIih0AEgEhQ4AiaDQASARHQvd9jW2j9h+3fYJ249k8x+1vWL7aHa7Y/BxAQDt\n5Hm3xQuSdkbET21/VtKi7UPZsicj4onBxQMA5NWx0CPilKRT2f0PbZ+UtHnQwQAA3enqGLrtKUk3\nSXolm7XD9jHb+2xvKjgbAKALjoh8K9oTkn4s6esR8QPbFUlnJYWkxyVNRsSDLR43K2lWkiqVyta5\nubmW26/X65qYmOjpmxgmchavDFmXVs51XKeyTjp9vvjnnt68odDt5R3PPN/zIA1qPFfT61iP+jU6\nMzOzGBHVTuvlKnTbayUdlPRCRHyjxfIpSQcj4obVtlOtVmNhYaHlsvn5edVqtY5ZRo2cxStD1ryf\nWLRnqfgP+Sr6E4vyjmee73mQBjWeq+l1rEf9GrWdq9DznOViSU9JOtlc5rYnm1a7R9LxXoICAIqR\n59fjLZLul7Rk+2g272uS7rO9RY1DLsuSHh5IQgBALnnOcnlJklsseq74OACAXnGlKAAkgkIHgERQ\n6ACQCAodABJBoQNAIih0AEjEcC/TAoAR6vXq2J3TF/RAn1fWFn1FcCvsoQNAIih0AEgEhQ4AiaDQ\nASARFDoAJIJCB4BEUOgAkAgKHQASQaEDQCIodABIBJf+AyNW9Ic1F3GZOsYTe+gAkIiOhW77GttH\nbL9u+4TtR7L5V9g+ZPut7OumwccFALSTZw/9gqSdEXG9pJslfcX29ZJ2STocEddKOpxNAwBGpGOh\nR8SpiPhpdv9DSSclbZZ0l6T92Wr7Jd09qJAAgM66OoZue0rSTZJekVSJiFPZovckVQpNBgDoiiMi\n34r2hKQfS/p6RPzA9gcRsbFp+fsR8Ynj6LZnJc1KUqVS2To3N9dy+/V6XRMTEz18C8NFzuKVIevS\nyrmO61TWSafPDyFMn8hZvCKyTm/e0PNjZ2ZmFiOi2mm9XIVue62kg5JeiIhvZPPelFSLiFO2JyXN\nR8R1q22nWq3GwsJCy2Xz8/Oq1Wods4waOYtXhqx5Th3cOX1Be5bKf6YvOYtXRNZ+PrHIdq5Cz3OW\niyU9JenkxTLPPCtpe3Z/u6RnegkKAChGnl85t0i6X9KS7aPZvK9J2i3pgO2HJL0j6d7BRAQA5NGx\n0CPiJUlus/i2YuMAAHrFlaIAkAgKHQASQaEDQCIodABIBIUOAImg0AEgEeNxmdanVD8ffNDPhxz0\nc0UbgNFhDx0AEkGhA0AiKHQASASFDgCJoNABIBEUOgAkgkIHgERQ6ACQCAodABJBoQNAIrj0H5/Q\nz1sO9KL5bQp42wGgd+yhA0AiOha67X22z9g+3jTvUdsrto9mtzsGGxMA0EmePfRvS9rWYv6TEbEl\nuz1XbCwAQLc6FnpEvCjpl0PIAgDoQz/H0HfYPpYdktlUWCIAQE8cEZ1XsqckHYyIG7LpiqSzkkLS\n45ImI+LBNo+dlTQrSZVKZevc3FzL56jX65qYmOj+OxiyYeZcWjnX82Mr66TT5wsMM0DNWac3bxhJ\nhjxjPS5jSs7iFZG1n9f2zMzMYkRUO63XU6HnXXaparUaCwsLLZfNz8+rVqt1zDJqw8zZ7ycW7Vka\nj7NSm7OO6rTFPGM9LmNKzuIVkbWf17btXIXe0yEX25NNk/dIOt5uXQDAcHT8lWP7e5Jqkq60/a6k\nf5BUs71FjUMuy5IeHmBGAEAOHQs9Iu5rMfupAWQBAPSBK0UBIBEUOgAkgkIHgERQ6ACQCAodABJB\noQNAIih0AEgEhQ4AiaDQASARFDoAJIJCB4BEUOgAkAgKHQASQaEDQCIodABIBIUOAImg0AEgEePx\nCa341Ojng7GBTzv20AEgER0L3fY+22dsH2+ad4XtQ7bfyr5uGmxMAEAnefbQvy1p2yXzdkk6HBHX\nSjqcTQMARqhjoUfEi5J+ecnsuyTtz+7vl3R3wbkAAF3q9Rh6JSJOZfffk1QpKA8AoEeOiM4r2VOS\nDkbEDdn0BxGxsWn5+xHR8ji67VlJs5JUqVS2zs3NtXyOer2uiYmJbvMP3TBzLq2c6/mxlXXS6fMF\nhhmgcclKzmKNS06pmKzTmzf0/NiZmZnFiKh2Wq/X0xZP256MiFO2JyWdabdiROyVtFeSqtVq1Gq1\nluvNz8+r3bIyGWbOB/o4hW/n9AXtWRqPs1LHJSs5izUuOaVisi5/uVZMmFX0esjlWUnbs/vbJT1T\nTBwAQK/ynLb4PUkvS7rO9ru2H5K0W9Lttt+S9KfZNABghDr+DRER97VZdFvBWQAAfeBKUQBIBIUO\nAImg0AEgERQ6ACSCQgeARFDoAJCI8bhMS+X54IOd0xf6uoITAAaFPXQASASFDgCJoNABIBEUOgAk\ngkIHgERQ6ACQCAodABJBoQNAIih0AEgEhQ4AiaDQASARFDoAJKKvN+eyvSzpQ0m/lnQhIqpFhAIA\ndK+Id1uciYizBWwHANAHDrkAQCL6LfSQ9CPbi7ZniwgEAOiNI6L3B9ubI2LF9u9KOiRpR0S8eMk6\ns5JmJalSqWydm5trua16va6JiYm2z7W0cq7nnEWqrJNOnx91is7GJac0PlnJWaxxySkVk3V684ae\nHzszM7OY5/8o+yr039qQ/aikekQ80W6darUaCwsLLZfNz8+rVqu13X6ZPrFoz1L5P+hpXHJK45OV\nnMUal5xSMVmXd9/Z82Nt5yr0ng+52F5v+7MX70v6oqTjvW4PANCffn7lVCQ9bfvidr4bEc8XkgoA\n0LWeCz0i3pZ0Y4FZAAB94LRFAEgEhQ4AiaDQASARFDoAJIJCB4BEUOgAkAgKHQASQaEDQCIodABI\nBIUOAImg0AEgERQ6ACSCQgeARFDoAJAICh0AEkGhA0AiKHQASASFDgCJoNABIBF9FbrtbbbftP0z\n27uKCgUA6F7PhW57jaR/lfQlSddLus/29UUFAwB0p5899M9L+llEvB0Rv5I0J+muYmIBALrVT6Fv\nlvTzpul3s3kAgBFwRPT2QPsvJG2LiL/Jpu+X9EcR8dVL1puVNJtNXifpzTabvFLS2Z7CDBc5izcu\nWclZrHHJKY0+6+9FxFWdVrqsjydYkXRN0/TV2bzfEhF7Je3ttDHbCxFR7SPPUJCzeOOSlZzFGpec\n0vhk7eeQy08kXWv7c7Z/R9JfSnq2mFgAgG71vIceERdsf1XSC5LWSNoXEScKSwYA6Eo/h1wUEc9J\neq6gLB0Py5QEOYs3LlnJWaxxySmNSdae/1MUAFAuXPoPAIkYeqHbvtz2q7Zfs33C9mPZ/CtsH7L9\nVvZ107Cz5cz5qO0V20ez2x2jzHmR7TW2/8f2wWy6VOPZrEXW0o2p7WXbS1mehWxeKce0TdYyjulG\n29+3/Ybtk7b/uIxj2iZn6cazlVHsoX8s6daIuFHSFknbbN8saZekwxFxraTD2fQotcspSU9GxJbs\nVtT/IfTrEUknm6bLNp7NLs0qlXNMZ7I8F09XK/OYXppVKt+Y/rOk5yPiDyTdqMZroIxj2iqnVL7x\n/IShF3o01LPJtdkt1HjbgP3Z/P2S7h52tmar5Cwd21dLulPSt5pml2o8L2qTdVyUckzHge0Nkr4g\n6SlJiohfRcQHKtmYrpJzLIzkGHr2J/dRSWckHYqIVyRVIuJUtsp7kiqjyNasTU5J2mH7mO19ZfgT\nUdI/SfpbSb9pmle68cy0yiqVb0xD0o9sL2ZXO0vlHdNWWaVyjennJP1C0r9nh9u+ZXu9yjem7XJK\n5RrPlkZS6BHx64jYosbVpZ+3fcMly0Ml2Btuk/Obkn5fjcMwpyTtGWFE2f4zSWciYrHdOmUZz1Wy\nlmpMM3+S/ey/JOkrtr/QvLAsY5pplbVsY3qZpD+U9M2IuEnSR7rk8EpJxrRdzrKNZ0sjPcsl+1Pm\niKRtkk7bnpSk7OuZUWZr1pwzIk5nRf8bSf+mxrtOjtItkv7c9rIa73h5q+3/UDnHs2XWEo6pImIl\n+3pG0tNqZCrjmLbMWsIxfVfSu01/5X5fjeIs25i2zFnC8WxpFGe5XGV7Y3Z/naTbJb2hxtsGbM9W\n2y7pmWFna9Yu58UXX+YeScdHke+iiPi7iLg6IqbUePuF/46Iv1LJxlNqn7VsY2p7ve3PXrwv6YtZ\nptKNabusZRvTiHhP0s9tX5fNuk3S6yrZmLbLWbbxbKevK0V7NClpvxsfkPEZSQci4qDtlyUdsP2Q\npHck3TuCbM3a5fyO7S1q/Gm4LOnhEWZczW6VazxX848lG9OKpKdtS41/I9+NiOdt/0TlG9N2Wcv4\nOt0h6T/deO+ntyX9tbJ/WyUb01Y5/6WE4/kJXCkKAIngSlEASASFDgCJoNABIBEUOgAkgkIHgERQ\n6ACQCAodABJBoQNAIv4fx3wV4k2PReUAAAAASUVORK5CYII=\n", 412 | "text/plain": [ 413 | "" 414 | ] 415 | }, 416 | "metadata": {}, 417 | "output_type": "display_data" 418 | } 419 | ], 420 | "source": [ 421 | "hsb['write'].hist()" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "Распределения действительно не симметричное и не очень похоже на нормальное. Попробуем найти нормальное распределение среди оценок." 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 8, 434 | "metadata": { 435 | "collapsed": false 436 | }, 437 | "outputs": [ 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "read (0.9797889590263367, 0.005552584305405617)\n", 443 | "write (0.9470317363739014, 9.865516403806396e-07)\n", 444 | "math (0.976807177066803, 0.002145080827176571)\n", 445 | "science (0.9852479696273804, 0.03476548567414284)\n", 446 | "socst (0.9606784582138062, 2.343731830478646e-05)\n" 447 | ] 448 | } 449 | ], 450 | "source": [ 451 | "for col in hsb.columns[-5:]:\n", 452 | " print col, mrstat.shapiro(hsb[col])" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "Самый большой p-value получился у science. Посмотрим на гистограмму." 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 9, 465 | "metadata": { 466 | "collapsed": false 467 | }, 468 | "outputs": [ 469 | { 470 | "data": { 471 | "text/plain": [ 472 | "" 473 | ] 474 | }, 475 | "execution_count": 9, 476 | "metadata": {}, 477 | "output_type": "execute_result" 478 | }, 479 | { 480 | "data": { 481 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEiJJREFUeJzt3XFsnHd9x/H3l7ZTs5ol6dqdrJTNoFWdqnpNFwuYmKZz\nS1FpK1qkCVGVKh1MRhpUnRTEAn9sZQgpfxBgqtCkCDqiUbA6RpUq7UAh1CAmVGaXULe0VRFLR6M0\npqTNcImYAt/94SeZmzrc2b67J/4975dk3fP87jk/32/O97knv7vnLjITSdLa95q6C5Ak9YaBLkmF\nMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSrEuYPc2UUXXZQjIyOD3OWyvfzyy1xwwQV1\nl1ELe29m79Ds/tdC7zMzMy9k5sWdthtooI+MjDA9PT3IXS7b1NQU7Xa77jJqYe/tusuoTZP7Xwu9\nR8Sz3WznlIskFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBVioGeKSmer2UPH\nuH37g3WXMVAHd9xQdwnqsa6P0CPinIj4fkTsrdYvjIh9EfFMdbmxf2VKkjpZzpTLncCTi9a3A/sz\n81Jgf7UuSapJV4EeEZcANwCfWzR8E7C7Wt4N3Nzb0iRJy9HtEfpngA8Dv1401srMw9Xy80Crl4VJ\nkpYnMvM3bxBxI3B9Zv51RLSBD2XmjRHxUmZuWLTdi5n5qnn0iJgAJgBardaWycnJnjbQa/Pz8wwN\nDdVdRi2a3Pvc0WMcOV53FYM1umn9qeUm3/droffx8fGZzBzrtF0373J5C/COiLgeOB/4nYj4InAk\nIoYz83BEDANzS904M3cBuwDGxsbybP/c4bXw2cj90uTe7753Dztnm/Wmr4O3tk8tN/m+L6n3jlMu\nmfmRzLwkM0eAdwPfzMz3AA8AW6vNtgJ7+lalJKmj1ZxYtAO4NiKeAd5arUuSarKs/2Nm5hQwVS3/\nDLim9yVJklbCU/8lqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJA\nl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYXoGOgRcX5EfC8ifhART0TEx6rxuyLiUEQcqH6u73+5\nkqQz6eYbi34JXJ2Z8xFxHvCdiPj36rpPZ+Yn+1eeJKlbHQM9MxOYr1bPq36yn0VJkpavqzn0iDgn\nIg4Ac8C+zHykuuqOiHgsIu6JiI19q1KS1FEsHIB3uXHEBuB+4A7gp8ALLBytfxwYzsz3LnGbCWAC\noNVqbZmcnOxB2f0zPz/P0NBQ3WXUosm9zx09xpHjdVcxWKOb1p9abvJ9vxZ6Hx8fn8nMsU7bLSvQ\nASLi74BfLJ47j4gRYG9mXvGbbjs2NpbT09PL2t+gTU1N0W636y6jFk3u/e5797BztpuXlMpxcMcN\np5abfN+vhd4joqtA7+ZdLhdXR+ZExDrgWuCpiBhetNk7gcdXWqwkafW6OSQZBnZHxDksPAHcl5l7\nI+JfImIzC1MuB4H3969MSVIn3bzL5THgqiXGb+tLRZKkFfFMUUkqhIEuSYUw0CWpEAa6JBXCQJek\nQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSpE\nN98pen5EfC8ifhART0TEx6rxCyNiX0Q8U11u7H+5kqQz6eYI/ZfA1Zl5JbAZuC4i3gxsB/Zn5qXA\n/mpdklSTjoGeC+ar1fOqnwRuAnZX47uBm/tSoSSpK5GZnTeKOAeYAf4Q+Gxm/m1EvJSZG6rrA3jx\n5Pppt50AJgBardaWycnJXtbfc/Pz8wwNDdVdRi2a3Pvc0WMcOV53FYM1umn9qeUm3/droffx8fGZ\nzBzrtN253fyyzPwVsDkiNgD3R8QVp12fEbHkM0Nm7gJ2AYyNjWW73e5ml7WZmpribK+xX5rc+933\n7mHnbFcPh2IcvLV9arnJ931JvS/rXS6Z+RLwMHAdcCQihgGqy7nelydJ6lY373K5uDoyJyLWAdcC\nTwEPAFurzbYCe/pVpCSps27+jzkM7K7m0V8D3JeZeyPiu8B9EfE+4FngXX2sU5LUQcdAz8zHgKuW\nGP8ZcE0/ipIkLZ9nikpSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiGa9fFy\nkk4Z2f7gqeVtoye4fdF6vx3cccPA9tUkHqFLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQnTzFXSv\ni4iHI+KHEfFERNxZjd8VEYci4kD1c33/y5UknUk370M/AWzLzEcj4rXATETsq677dGZ+sn/lSZK6\n1c1X0B0GDlfLP4+IJ4FN/S5MkrQ8y5pDj4gRFr5f9JFq6I6IeCwi7omIjT2uTZK0DJGZ3W0YMQR8\nC/hEZn41IlrAC0ACHweGM/O9S9xuApgAaLVaWyYnJ3tVe1/Mz88zNDRUdxm1aHLvc0ePceR43VXU\np7WOgfY/umn94HbWwVr4ux8fH5/JzLFO23UV6BFxHrAX+HpmfmqJ60eAvZl5xW/6PWNjYzk9Pd1x\nf3Wampqi3W7XXUYtmtz73ffuYedscz/aaNvoiYH2fzZ9lsta+LuPiK4CvZt3uQTweeDJxWEeEcOL\nNnsn8PhKCpUk9UY3T8lvAW4DZiPiQDX2UeCWiNjMwpTLQeD9falQktSVbt7l8h0glrjqod6XI0la\nqeZOGuqMRgb4udinO5vmVqW1xlP/JakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw\n0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKkQ3X0H3uoh4OCJ+GBFPRMSd1fiFEbEvIp6p\nLjf2v1xJ0pl0c4R+AtiWmZcDbwY+EBGXA9uB/Zl5KbC/Wpck1aRjoGfm4cx8tFr+OfAksAm4Cdhd\nbbYbuLlfRUqSOlvWHHpEjABXAY8Arcw8XF31PNDqaWWSpGWJzOxuw4gh4FvAJzLzqxHxUmZuWHT9\ni5n5qnn0iJgAJgBardaWycnJ3lTeJ/Pz8wwNDdVdRi1O9j576FhtNYxuWl/LfueOHuPI8Vp2fVZo\nrWOg/dd1Py9lLTzmx8fHZzJzrNN2XX1JdEScB/wbcG9mfrUaPhIRw5l5OCKGgbmlbpuZu4BdAGNj\nY9lut7vZZW2mpqY422vsl5O9317nl0Tf2q5lv3ffu4eds839zvRtoycG2n9d9/NSSnrMd/MulwA+\nDzyZmZ9adNUDwNZqeSuwp/flSZK61c1T8luA24DZiDhQjX0U2AHcFxHvA54F3tWfEiVJ3egY6Jn5\nHSDOcPU1vS1HkrRSzZ00XANGBjyXvW30RK3z55JWx1P/JakQBrokFcJAl6RCGOiSVAgDXZIKYaBL\nUiEMdEkqhIEuSYUw0CWpEJ4pKmngBn0W9EkHd9xQy34HxSN0SSqEgS5JhTDQJakQBrokFcIXRXVW\nqevFsm2jtexW6qluvoLunoiYi4jHF43dFRGHIuJA9XN9f8uUJHXSzZTLF4Drlhj/dGZurn4e6m1Z\nkqTl6hjomflt4OgAapEkrcJqXhS9IyIeq6ZkNvasIknSikRmdt4oYgTYm5lXVOst4AUggY8Dw5n5\n3jPcdgKYAGi1WlsmJyd7Uni/zM/PMzQ0VHcZAMweOjbQ/bXWwZHjA93lWaPJvUNz+h/dtP5VY2fT\nY/5MxsfHZzJzrNN2Kwr0bq873djYWE5PT3fcX52mpqZot9t1lwHU8yXRO2eb+canJvcOzel/qVP/\nz6bH/JlERFeBvqIpl4gYXrT6TuDxM20rSRqMjk/JEfFloA1cFBHPAX8PtCNiMwtTLgeB9/exRklS\nFzoGembessTw5/tQiyRpFTz1X5IKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12S\nCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgrRMdAj4p6ImIuIxxeNXRgR+yLi\nmepyY3/LlCR10s0R+heA604b2w7sz8xLgf3VuiSpRh0DPTO/DRw9bfgmYHe1vBu4ucd1SZKWKTKz\n80YRI8DezLyiWn8pMzdUywG8eHJ9idtOABMArVZry+TkZG8q75P5+XmGhoZeMTZ76FhN1QxWax0c\nOV53FfVocu/QnP5HN61/1dhSj/mzzfj4+ExmjnXa7tzV7igzMyLO+KyQmbuAXQBjY2PZbrdXu8u+\nmpqa4vQab9/+YD3FDNi20RPsnF31n8Sa1OTeoTn9H7y1/aqxpR7za9VK3+VyJCKGAarLud6VJEla\niZUG+gPA1mp5K7CnN+VIklaqm7ctfhn4LnBZRDwXEe8DdgDXRsQzwFurdUlSjTpOmmXmLWe46poe\n1yJJWgXPFJWkQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgph\noEtSIQx0SSqEgS5JhTDQJakQBrokFWJV3wobEQeBnwO/Ak50863UkqT+6MXXfI9n5gs9+D2SpFVw\nykWSCrHaQE/gGxExExETvShIkrQykZkrv3HEpsw8FBG/B+wD7sjMb5+2zQQwAdBqtbZMTk6upt6+\nm5+fZ2ho6BVjs4eO1VTNYLXWwZHjdVdRjyb3Ds3pf3TT+leNLfWYP9uMj4/PdPMa5aoC/RW/KOIu\nYD4zP3mmbcbGxnJ6eron++uXqakp2u32K8ZGtj9YTzEDtm30BDtne/GyytrT5N6hOf0f3HHDq8aW\nesyfbSKiq0Bf8ZRLRFwQEa89uQy8DXh8pb9PkrQ6q3lKbgH3R8TJ3/OlzPxaT6qSJC3bigM9M38M\nXNnDWiRJq+DbFiWpEAa6JBXCQJekQhjoklQIA12SClH+mQSSVFnqJMFtoye4fQAnDy51UlOveYQu\nSYUw0CWpEAa6JBXCQJekQqyZF0UH9YmHg3qBRJJ6zSN0SSqEgS5JhTDQJakQBrokFcJAl6RCrCrQ\nI+K6iHg6In4UEdt7VZQkaflW852i5wCfBd4OXA7cEhGX96owSdLyrOYI/Y3AjzLzx5n5v8AkcFNv\nypIkLddqAn0T8JNF689VY5KkGkRmruyGEX8BXJeZf1Wt3wa8KTM/eNp2E8BEtXoZ8PTKyx2Ii4AX\n6i6iJvbeXE3ufy30/geZeXGnjVZz6v8h4HWL1i+pxl4hM3cBu1axn4GKiOnMHKu7jjrYezN7h2b3\nX1Lvq5ly+U/g0oh4fUT8FvBu4IHelCVJWq4VH6Fn5omI+CDwdeAc4J7MfKJnlUmSlmVVn7aYmQ8B\nD/WolrPFmpke6gN7b64m919M7yt+UVSSdHbx1H9JKkRjAz0izo+I70XEDyLiiYj4WDV+YUTsi4hn\nqsuNddfaLxFxTkR8PyL2VutN6v1gRMxGxIGImK7GGtF/RGyIiK9ExFMR8WRE/GmDer+sus9P/vxP\nRPxNKf03NtCBXwJXZ+aVwGbguoh4M7Ad2J+ZlwL7q/VS3Qk8uWi9Sb0DjGfm5kVvWWtK//8IfC0z\n/wi4koW/gUb0nplPV/f5ZmAL8AvgfkrpPzMb/wP8NvAo8CYWTnwarsaHgafrrq9PPV/Cwh/u1cDe\naqwRvVf9HQQuOm2s+P6B9cB/Ub1+1qTel/i3eBvwHyX13+Qj9JNTDgeAOWBfZj4CtDLzcLXJ80Cr\ntgL76zPAh4FfLxprSu8ACXwjImaqs5mhGf2/Hvgp8M/VdNvnIuICmtH76d4NfLlaLqL/Rgd6Zv4q\nF/7rdQnwxoi44rTrk4UHflEi4kZgLjNnzrRNqb0v8mfVff924AMR8eeLryy4/3OBPwH+KTOvAl7m\ntOmFgns/pToZ8h3Av55+3Vruv9GBflJmvgQ8DFwHHImIYYDqcq7O2vrkLcA7IuIgC5+SeXVEfJFm\n9A5AZh6qLudYmEN9I83o/znguep/owBfYSHgm9D7Ym8HHs3MI9V6Ef03NtAj4uKI2FAtrwOuBZ5i\n4eMLtlabbQX21FNh/2TmRzLzkswcYeG/nd/MzPfQgN4BIuKCiHjtyWUW5lIfpwH9Z+bzwE8i4rJq\n6BrghzSg99Pcwv9Pt0Ah/Tf2xKKI+GNgNwsfW/Aa4L7M/IeI+F3gPuD3gWeBd2Xm0foq7a+IaAMf\nyswbm9J7RLyBhaNyWJiC+FJmfqJB/W8GPgf8FvBj4C+pHgMU3jucehL/b+ANmXmsGivivm9soEtS\naRo75SJJpTHQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqxP8BiBp5J9CujDUAAAAASUVO\nRK5CYII=\n", 482 | "text/plain": [ 483 | "" 484 | ] 485 | }, 486 | "metadata": {}, 487 | "output_type": "display_data" 488 | } 489 | ], 490 | "source": [ 491 | "hsb.science.hist()" 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": {}, 497 | "source": [ 498 | "Распределение действительно более симметричное и напоминает нормальное." 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "#### 2.2 КуКу график" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": {}, 511 | "source": [ 512 | "Это визуальный спобоб проверить распределение на нормальность. Посмотрим на write." 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 10, 518 | "metadata": { 519 | "collapsed": false 520 | }, 521 | "outputs": [ 522 | { 523 | "data": { 524 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xmc1XP7x/HX1YK7ckeLZGnKdt8GlRpL9nCTfblxR4gi\nsuV2W+/87LnthEjkLnfDXVFkS0SSkPaVO1IIbbKGarp+f3y+o9M0c86ZmXPmzJzzfj4e53HO93u+\ny3VGznU+u7k7IiKSu2plOgAREcksJQIRkRynRCAikuOUCEREcpwSgYhIjlMiEBHJcUoEkrXM7CYz\nG1LBc88xswlx3n/VzLqWdqyZ/WRmO1TkvuWMcZyZnZfu+0j2UyKQasXMFprZL9GX6RIzG2RmDTId\nV0nufpS7Dy7jvQbuvgAgiv+2it4nFX8PM2tpZm5mdSoah2Q3JQKpjo5z9wZAO6AAuL7kARbkyr/f\nhH8PkcrIlf+RpAZy98XAq8Du8HtVSB8zexdYBexgZtuY2Sgz+9bMPjGz80tcZjMzG2pmP5rZVDNr\nU/yGmV1rZp9G7801s5NKnGtm9rCZfW9mH5nZYTFvlFktE/363snMegBdgKujX/QvmtlVZvZcieMf\nNLO+5f17lLhGLTO73swWmdlSM3vKzBpGb4+Pnr+L4uiQ6F6SW5QIpNoys+2Bo4FpMbvPAnoAmwOL\ngP8CXwLbAKcAt5vZoTHHnwAMBxoBTwPPm1nd6L1PgQOBhsDNwBAzax5z7j7RMU2AG4ERZtYo2fjd\nfQBQCNwVVRcdBwwBOpnZFtFnrAN0Bp5KdL0y/h7FzokeHYEdgAbAw9F7B0XPW0RxvJfsZ5DcoEQg\n1dHzZvYdMAF4G7g95r1B7j7H3dcCWwP7A9e4+6/uPh14Ajg75vgp7v6su68B7gM2A/YFcPfh7v6V\nu69z96HAfGDvmHOXAg+4+5ro/Y+BYyrzwdz9a8Iv9FOjXZ2A5e4+Jc5p8f4exboA97n7Anf/CbgO\n6Kx2AUmG/pFIdXSiu79RxntfxLzeBvjW3X+M2beIUI++0fHuvs7MiksPmNnZwBVAy+iQBoRf/8UW\n+4azMi4qPreSBgM9gceBM4H/JDg+3t+j2DaE+IotIvz/3ayiQUruUIlAaprYL+avgEZmtnnMvhbA\n4pjt7YtfRI3L2wFfmVke4Yv4EqCxu28BzAYs5txtzSx2u0V0z4rGW+x5oLWZ7Q4cS6g+qqyvgLyY\n7RbAWmBJGTGI/E6JQGosd/8CmAj8y8w2M7PWQHdCPXyx9mZ2clRFcjnwG/A+UJ/wBbkMwMzOZeNG\n2K2Ay8ysrpmdCuwKvFLOMJcQ6uxj4/4VeJbQZjHJ3T8v5zVL8wzwdzNrFXUvvR0YGlWhLQPWlYxD\npJgSgdR0pxOqdr4CRgI3lqhGeQH4G7CS0NB8clTnPxe4F3iP8GW9B/BuiWt/AOwMLAf6AKe4+4py\nxjcQyDez78zs+Zj9g6N7JqoWStaT0bXGA58BvwKXArj7KkL870Zx7Juie0qWMC1MI1L1zKwF8BGw\ntbv/kOl4JLepRCBSxaK2iiuA/yoJSHWgXkMiVcjM6hOqohYRuo6KZJyqhkREclxaq4bM7O9mNsfM\nZpvZM1HPjkZm9rqZzY+et0xnDCIiEl/aSgRmti1hJGS+u/9iZsMIXe/yCYOA7jCza4Et3f2aeNdq\n0qSJt2zZMi1xiohkqylTpix396aJjkt3G0Ed4A9mtgaoR+jidx1wSPT+YGAcEDcRtGzZksmTJ6cv\nShGRLGRmixIflcaqoWimxHuAz4Gvge/dfQzQLJpvBeAbyhgCb2Y9zGyymU1etmxZusIUEcl5aUsE\nUd3/CUArwjwo9c3szNhjonlcSq2bcvcB7l7g7gVNmyYs2YiISAWls7H4cOAzd18Wzfw4AtgPWFI8\n1W/0vDSNMYiISALpTASfA/uaWb1o4q7DgHnAKKBrdExXwhQAIiKSIWlrLHb3D8zsWWAqYRbEacAA\nwlS/w8ysO2FQzWnpikFERBJLa68hd7+RsLJTrN8IpQMREakGNNeQiEiOUyIQEamOVqyAyy+H779P\n+62UCEREqhN3GD4c8vOhXz8YPz7tt1QiEBGpLr7+Gk4+GU47DbbfHqZMgeOOS/ttlQhERDLNHZ58\nEnbdFUaPhrvugvffh9atq+T2Wo9ARCSTFiyAHj1g7Fg46CB44gnYeecqDUElAhGRTCgqggcegD32\ngEmT4NFH4a23qjwJgEoEIiJVb+5c6N49VP8cfTT07x/aBDJEJQIRkaqyejXceivsuSfMnw9DhsBL\nL2U0CYBKBCIiVWPy5FAKmDkTOneGvn1hq60yHRWgEoGISHqtWgVXXw377APLl8MLL8Azz1SbJAAq\nEYiIpM/bb8N558Enn8D558Pdd0PDhpmOaiMqEYiIpNoPP0DPnnDIIbBuXegaOmBAtUwCoEQgIpJa\nL78Mu+0WvvivuAJmzYJDD810VHEpEYiIpMLy5XDmmXDsseGX/8SJcO+9UK9epiNLSIlARKQy3OG/\n/w3TQwwbBjfdBFOnhsbhGkKNxSIiFbV4cWgLePFF2HtvGDgQdt8901GVm0oEIiLl5Q6PPx6min7j\njVAFNHFijUwCoBKBiEj5fPpp6Ar61lvQsWNICDvumOmoKkUlAhGRZBQVwX33hUnipkwJvYLGjq3x\nSQBUIhARSWz27DA9xKRJYaGYRx+FbbfNdFQpoxKBiEhZVq8OvYDatYPPPgu9g154IauSAKhEICJS\nukmToFs3mDMHunQJawc0aZLpqNJCJQIRkVirVsE//gEdOsD334dpoocMydokACoRiIis99ZbYZK4\nBQvgwgvhzjvhj3/MdFRppxKBiMj334d1gw89FGrVgnHjQoNwDiQBUCIQkVw3alQYGDZwIFx1FcyY\nAQcfnOmoqpQSgYjkpqVLw0phJ5wAjRvDBx/AXXfViEniUk2JQERyizsUFoZSwMiRYQ3hyZOhoCDT\nkWWMGotFJHd88UWYJO7ll2HffUN1UH5+pqPKuLSVCMzsT2Y2Pebxg5ldbmaNzOx1M5sfPW+ZrhhE\nRICwSlj//mHBmLfeCmMCJkxQEoikLRG4+8fu3tbd2wLtgVXASOBaYKy77wyMjbZFRNJj/vzQG6hn\nz7BGwOzZ0KsX1K6d6ciqjapqIzgM+NTdFwEnAIOj/YOBE6soBhHJJWvXhsXiW7eG6dNDNdCYMdCq\nVaYjq3aqqo2gM/BM9LqZu38dvf4GaFZFMYhIrpgxI0wSN2UKnHgi9OsH22yT6aiqrbSXCMxsE+B4\nYHjJ99zdAS/jvB5mNtnMJi9btizNUYpIVvjtN/i//ws9gL74IiwdOWKEkkACVVE1dBQw1d2XRNtL\nzKw5QPS8tLST3H2Auxe4e0HTpk2rIEwRqdHeew/23BNuuw3OOAPmzoVTTwWzTEdW7VVFIjid9dVC\nAKOArtHrrsALVRCDiGSrn3+Gyy+H/feHn36CV16BwYPDIDFJSloTgZnVB/4CjIjZfQfwFzObDxwe\nbYuIlN8bb4R1gvv2hYsuClNGH3VUpqOqcdLaWOzuPwONS+xbQehFJCJSMStXwpVXwpNPwi67wPjx\ncOCBmY6qxtIUEyJSs4wcGQaCDR4M114beggpCVSKppgQkZphyRK49FIYPhzatg3TRLRrl+mosoJK\nBCJSvbnDU0/BrruG9YL79AnLSCoJpIwSgYhUX59/DkcfDV27hkQwYwb8859Qt27c0woLoWXLsMZM\ny5ahHTnedmFh6ecV70/0Xrx7l3VcIqm6TlLcvdo/2rdv7yKSQ4qK3B9+2L1BA/f69d0feijsS8KQ\nIe716rmHokRyj3r13Hv23Pi8evXC9Uq7ZvF7ie5d2nEV+QwVuQ4w2ZP4jrVwbPVWUFDgkydPznQY\nIlIVPv44rBs8YQIccQQ89lj4SZykli1h0aLy37Z2bSgq2nh/Xl54Lu2aeXmwcGHie5c8LpFUXcfM\nprh7woUWVDUkItXDmjVwxx3Qpk0YDzBoEIweXa4kAKE2qSJKSwLF1yvrmiX3J3tcIqm6TrKUCEQk\n86ZNC1NEX3cdHHtsmB6ia9cKTQ/RokXFQihrVuoWLcq+Zsn9yR6XSKqukywlAhHJnF9/hd69Ya+9\n4Kuv4Nlnw2PrrSt8yT59yr/scL160KPHxufVqxeuV9o1i99LdO/SjkskVddJWjINCZl+qLFYJAtN\nmOD+pz+FltBzznFfsSJllx4yxD0vz90sPPfsGX+7uBG25HmxjbPx3ot37/I28KbyOqixWESqpR9/\nDF1A+/ULdR0DBoRGYUk5NRaLSJUqLIQmTUK1fslH7drhuZO9xqI/7s66h/vxoF9Kg0WzqXP0EZhB\nnTps8NykyfrrldxXq1bZr9Pe5z4LaYoJEam0wkI499zQ8ac0Ddd9y31cwTkMZh5/5kDeYSL7hzej\n3jpFJZ5XrFh/fmn7ynq9aFGo7wfo0qVinyfXqEQgIpXWu3fZSeBknmMu+ZzJEG6jN3sybX0SSJNV\nq0JMkhyVCESk0krr3741X/Mwl/BXRjCVPenEaGbQNqMxSelUIhCRStuwf7vTlUHMJZ9jeJlruIO9\nmVSlSWDjmCQeJQIRqbQ+fcI8cHks5DWOZBDnMos9aM1M7uIaiqq48iGtfe6zkBKBiFRal85FTOz8\nILPZnQ68x0X04xDGMZ9dgNCbBzYcKFy8r3hEb8nnxo3XLztccp9Z2a/z8kKPVDUUJ09tBCJSOfPm\nwXnnUTBxInTqBI89xiMtWvBIpuOSpKlEICLlUjxeoK6tobf14bf8tqyY+BFn8RRNJr1C4TuqnK9p\nVCIQkaQVjxfYfc1UnqQbbZnBUE7jMh5kKc3gW+jWLRyrqpmaQyUCEUnaLdf9wi1rrmUSe7MVSzmR\nkXRmaEgCkdWr1Ye/plGJQESSM348L35xHrswnyfozpXcw/dsUeqh6sNfs6hEIJLl4s0BlMzjj/YD\n/exiOPhg6rCWw3iD83mizCQA6sNf06hEIJLFEs0BlEgnXuUxLmA7vuR+Lud6bmMV9eOes8km6sNf\n05SrRGBmW5pZ63QFIyKpFW8OoHgasYLBnM2rHM2PbM5+TOQK7k+YBBo3hiefVENxTZOwRGBm44Dj\no2OnAEvN7F13vyLNsYlIJZW/rt45leE8zCVsyUpu4f/oQ29Ws2mZZ5jBunWVClMyLJkSQUN3/wE4\nGXjK3fcBDk9vWCJSGYWF0KABlGfdqeZ8xUhOYhh/YxF5tGcKN3JL3CQAag/IBskkgjpm1hw4DXgp\nzfGISCUVFsLZZ8PPPyd7htONgcwlnyN5jSu5mw68xywS1wJrTp/skEwiuAV4DfjU3T80sx2A+ekN\nS0Qqqnfv5KtqWrGANzicgZzHdNqyB7O4lyvjThJXPEeQ5vTJHlqzWCTL1KoVv0rIDNatKYKHHgpZ\no3ZtuPtuOP/89d/ykhVStmaxme1iZmPNbHa03drMrk8yiC3M7Fkz+8jM5plZBzNrZGavm9n86HnL\nZK4lImWLHSuQ6LfdYVvPgf33h7//HTp2hLlz4YILlARyWDL/5R8HrgPWALj7TKBzktfvC4x29z8D\nbYB5wLXAWHffGRgbbYtIBRWPFYhdt7c0dVnNjbVuYfTSPeGTT8KJL74I221XNYFKtZXMgLJ67j7J\nYicSh7WJTjKzhsBBwDkA7r4aWG1mJwCHRIcNBsYB1yQdsYhsIJmxAgV8yL9rdWf3dbPg9NOhb19o\n2rRqApRqL5kSwXIz2xFwADM7Bfg6ifNaAcuAf5vZNDN7wszqA83cvfj8byBmtqoYZtbDzCab2eRl\ny5YlcTuR3BRvrMAfWMVdXMX77Mvuzb+FUaPg6aeVBGQDySSCi4HHgD+b2WLgcqBnEufVAdoBj7r7\nnsDPlKgG8tBSXWqNprsPcPcCdy9oqn+0kmUqO/9P7KOsNoGDGccM2nAV9zC0wXkwZw4cd1zVflCp\nERImAndf4O6HA02BP7v7Ae6+MIlrfwl86e4fRNvPEhLDkmhcAtHz0gpFLlJDJVunX1F/5Hse5ULG\n0RHDOaLOm3j/x6Bhw/TcUGq8ZKaYuKHENgDufku889z9GzP7wsz+5O4fA4cBc6NHV+CO6PmFioUu\nUjNVdP6fZBzDS/TnQprzNffwD/o2uoU7Hqynvv4SVzKNxbHjEzcDjiX0/knGpUChmW0CLADOJZRC\nhplZd2ARYcSySM5Ix1z9TVhGX3pxBs8wi935KyP4wPfmytTfSrJQwkTg7vfGbpvZPYSRxgm5+3Sg\ntMEMhyUVnUgWatECFi1K1dWczvyXB7mMhnzPDdzMHVzLNnmbpOoGkgMqMoKkHqCOxyIV1KcP1K1b\n+etsy5eM4nie4QwWsAPtmMqt3IDX2UTz/0i5JNNGMIv1PXtqExqN47YPiEjZiuvre/WqWIOxsY7z\neIK7uYq6rOHv3MeDXMY6atOgAfTvr/l/pHwSzjVkZnkxm2uBJe6ecEBZKmmuIZHIJ5+EOYHGjQvT\nQzz+OOy4Y6ajkmqq0nMNRXMCNQJ+jHn8Avwx2i8iZbjoojB1TzLjAJo0CV1K41q7Fu69F1q3hqlT\nQwIYO1ZJQFIiXtXQFEKVkJXyngM7pCUikRruoovg0UeTP37FCujWLbwutUpn1izo3h0+/BCOPx4e\neQS23TYlsYqApqEWSbk6daCoqPzn5eXBwoUxO377DW6/PTy23DJMG33aaaEYIZKEZKuGkhlHQDRV\n9M6EcQQAuPv4iocnkr0qkgSgxPiCDz4IpYA5c+DMM+H++0MdkkgaJNNr6DygF6HL6HRgX+A94ND0\nhiZSM9WuXbFk0KIFYX3J//s/eOCBUP3z0ktwzDEpj1EkVjLjCHoBewGL3L0jsCfwXVqjEqnBevQo\n/zmbbAJPnPFmaAy+/3648MJQGlASkCqQTNXQr+7+q5lhZpu6+0dm9qe0RyZSQz3ySHju3z/xamEA\nrbb8jjFtrmKnfz0BO+0UuoYefHBaYxSJlUyJ4Esz2wJ4HnjdzF4gzBEkIiUUdxt99NGQBBo0gCFD\nwutSH8+/wILN8tlp/JNw9dUwc6aSgFS5ZOYaOil6eZOZvQU0BEanNSqRGqi0bqM//QTnnBNeb9A1\ndOlSuOwyGDo0VAeNGgUFCTt3iKRFvAFlr5jZmWbWoHifu7/t7qOiZSdFJMaAAaXvX7s2TD0NhGLA\nkCGw664wciTceitMnqwkIBkVr2roMeAY4DMzG2ZmJ0XTSYtIKeL1FPr8c+CLL+DYY+Gss2CXXWDa\nNLj++tTMQCdSCWUmAnd/wd1PB/KA54Czgc/N7N9m9peqClAkk8ozVURZjHX8c8tHYbfdQkPwAw/A\nhAmQn19ln0MknmTaCFYBQ4GhZtYaGExICrXTHJtIRpV3qojS7Mz/GMh5HPjtO3D44aH+qFWr1AQo\nkiIJew2ZWTMzu9TM3iX0HHqNsPawSFYrq84/GbVZy1XcxQzasHe9WfDkkzBmjJKAVEtllgjM7Hzg\ndOBPhKqhq9x9YlUFJpJpFZ0qojUzeJJutGcqIziJkz/pB82bpzY4kRSKVzXUAfgXMNbd11VRPCLV\nRq1asK4c//I34Teu5zau5Q6+pRGnMJzJLf7Kyc01SZxUb2UmAnfvVpWBiFQnCdcHKGFf3mMg3cln\nHoM5myu4jx/qNGbQ7emJTySVKrJmsUjW6907udJAfX7ifi7nXfanPj/TiVc5h8GsbtCYQYO0ZKTU\nDElNQy2SazaYEjqGWUyCeP31MMPcwoVwySXk3X47ozffvKpCFEmZhEtVlvWoyiBFUqWwMEzrn2hM\nQFmTxbVoAaxcGZYUO+II2HRTeOedsGiMkoDUUMkuVdkCWBm93gL4HFA/OKlRCgvh3HNhzZqKnV+n\nDgw+cSTkXwTLlsF118ENN8BmmyU+WaQaizeyuJW77wC8ARzn7k3cvTFwLDCmqgIUSZXevSueBJrx\nDU+vPZWD+54MW28NkyaFJSSVBCQLJNNYvK+7v1K84e6vAvulLySR9Cir3j8+52wGM5d8juPF8OU/\naRK005hKyR7JJIKvzOx6M2sZPXoDX6U7MJFUa9GinMeziFc5isGcwzx2pX2t6aE6SJPESZZJJhGc\nDjQFRgIjotenpzMokXTo0ye573BjHRfzMHPYjQOYwCU8xIG8w8EX/Dn9QYpkQDKTzn0L9DKz+u7+\ncxXEJJIWxX36e/WCFStKP2YXPmYg3TmAdxnNkVzAY3xZK48LL1i/BKVItklm0rn9zGwuMC/abmNm\n+l9CaqQuXWD58lKWjFy9Br/9X3y8aRsO2HIuDBpEp3WvssjzKCpSEpDslkzV0P3AkcAKAHefARyU\nzMXNbKGZzTKz6WY2OdrXyMxeN7P50fOWFQ1epDwuuih0ATULzxddFL0xbRrssw/8859w3HEwdy50\n7Rp/kQGRLJLUFBPu/kWJXeWZl7Gju7d19+K1+K4lTGS3MzA22hZJq+K1BYpnFC0qgicf/ZXR7f4J\ne+0FX30Fzz0Hw4eH7qEiOSSZRPCFme0HuJnVNbMriaqJKugEwuI2RM8nVuJaIkkpubbA/kxgBm3o\nNO1fcPbZMG8enHxyZoITybBkEsGFwMXAtsBioG20nQwH3jCzKWbWI9rXzN2/jl5/AzQr7UQz62Fm\nk81s8rJly5K8nUjpiksCDfiRh7iECRzIJqzmCF4Li8ZsqRpKyV1xE4GZ1QbOcvcu7t7M3bdy9zPd\nvYw+Fxs5wN3bAkcBF5vZBm0L7u6EZLERdx/g7gXuXtC0adMkbye5KtEcQgBH8Bqz2Z2LeIS+XMYe\nzOLN2kdkNnCRaiBuInD3IuCMil7c3RdHz0sJ4xD2BpaYWXOA6HlpRa8vAuvnECqrS+iWfMsguvIa\nnVhFPQ5gApfTl59pQI8epZ8jkkuSqRqaYGYPm9mBZtau+JHoJDOrb2abF78GjgBmA6OArtFhXYEX\nKhi7CBBvDiHnrzzLPHblDJ7mNnqzJ9N4L5ohpX59dQsVgeTWI2gbPd8Ss8+BQxOc1wwYaaFcXgd4\n2t1Hm9mHwDAz6w4sAk4rX8giGyptDqGt+Zp+XMzJjGQK7TiS15jx+z/lYNWqKgpQpJpLZmRxx4pc\n2N0XAG1K2b8COKwi1xQpTYsWsGhR8ZZzDoO4jyvYjF+5mju5jysoKuWfennnHhLJVsmMLG5mZgPN\n7NVoOz/6NS9SLRTPIdSSzxjDEfybbsxiD9owg7u5utQksMkm4TwRSa6NYBDwGrBNtP0/4PJ0BSRS\nXl06FzGx84PMZnf25X168giHMI757FLq8Y0bhx6jWk9YJEgmETRx92HAOgB3X0v5RhaLpM+8eXDg\ngRT8pxf1jzqYzRfN4VHvyTqvtfF8QtFj+XIlAZFYySSCn82sMVF/fzPbF/g+rVGJJLJmDdx2G0Wt\n2/Lt+x9zJv/BXn2ZJu1aUFiY6eBEapZkeg1dQejyuaOZvUtYj+CUtEYlEs+UKWHx+JkzGWGncbE/\nxDK2AsJYgm7dwmH61S+SnIQlAnefChxMWJ7yAmA3d5+Z7sBENvLLL3DNNbD33rBsGT2ajuQ0H/p7\nEii2enUYWyAiySmzRGBmZc3AtYuZ4e4j0hSTyMbGj4fzzoP588Pz3XfzRKMtyjy8YusTi+SmeFVD\nx0XPWxFKA29G2x2BiYRlK0XS64cf+N/J17LL2EdZQCvO5w3efOIweCL+aRojIJK8MhOBu58LYGZj\ngPziGUOj+YEGVUl0ktteeYWfz7qAnb5dzH38nf/jVlZRP+FpdepojIBIeSTTa2j7mGmjAZYA+r0l\n6bN8OZx5JhxzDIt//CP7MZF/cF9SSQCgYUM1FIuURzK9hsaa2WvAM9H234A30heS5Cx3GDYMLr0U\nVq6EG26g9S3/5Dc2Lddlvv02TfGJZKlk5hq6xMxOYv06xQPcfWR6w5Kc89VX0LMnjBoFBQXwxhvQ\nujUN+sFvya5+EVH7gEj5xE0E0cI0b0QTz+nLX1LPHQYOhCuvhN9+g3vugV69oE4dCgvhhx/KdznN\nISRSfsksTLPOzBpWUTySSxYsgMMPh/PPh7ZtYdYs+Mc/Qmsv8dYZKJ3mEBKpmGTaCH4CZpnZ68DP\nxTvd/bK0RSXZragIHnwwfNPXqQOPPRbGBtTa8HdJWWMBzGDduiqIUyRHJJMIRqAxA5IiL90xm+1u\n7E7b1ZN4iWO4kP4svmC7MGY9SWoDEEmtZBLBUGCn6PUn7v5rGuORbLV6NTNP/xdHjOjD9zTkdJ7m\nv3QGrFyXURuASOqV2UZgZnXM7C7gS2Aw8BTwhZndZWZ1qypAyQIffgjt29N6xE0M51Tymct/OZ3y\nJgGAzTdXG4BIqsVrLL4baAS0cvf27t4O2BHYArinKoKTGm7VqtAbaN99YeVKjmMUZ1LIcppW+JIa\nIyCSevGqho4FdnF3L97h7j+YWU/gI6BXuoOTGmzcuNAA/OmncMEFDGt/Jy9f0DBa1aLi1D4gknrx\nEoHHJoGYnUVmVsn/nSVrff89XH01DBgAO+4Ib74JHTtydcswZKAy1D4gkh7xqobmmtnZJXea2ZmE\nEoHIhl58EfLz4YknQpXQzJnQsSNQ+WmhNUZAJH3ilQguBkaYWTdgSrSvAPgDcFK6A5MaZNmyMBr4\nmWdgjz3g+edhr71+f7uwMPT9L61EkJcHCxdWXagisrF401AvBvYxs0OB3aLdr7j72CqJTKo/9/Dl\nf9llYS6Im2+Ga68NdTiRwkI499zSB4Cpqkekekhm0rk3Wb8ojUjw5ZdhkriXXoJ99gnzBe2220aH\nxZsmQl1BRaqHZNYjEFlv3bowJUR+PowdC/fdB+++W2oSgPhtA+oKKlI9KBFI8ubPh0MPhQsvZPwv\ne7HDL7OxK/6O1amNGaU+4vUUUldQkepBiUASW7s2TA/dujWrJ03jgtqPc/DaN/iMHSp8SbUPiFQf\nycw1JLls5kzo3h0mT4bjj+fAKY8wafG2lbpkrVrqCipSnahEIKX77Te48UZo3x4WLYKhQ+H55/nw\nq8olAQjVRUoCItVH2hOBmdU2s2lm9lK03cjMXjez+dHzlumOQcrp/fehXTu45Rbo3Jnht8yjyUWn\nYbWs0qOOY17NAAAPRUlEQVSDQW0DItVNVZQIegHzYravBca6+87A2GhbqoOff4YrroD99gvjAl5+\nmcJO/6HLZY1ZUc51g8uitgGR6ieticDMtgOOAZ6I2X0CYVproucT0xmDJGns2DAq+P774cILYc4c\nOProci8XGY+miRCpntLdWPwAcDWwecy+Zu7+dfT6G6BZaSeaWQ+gB0AL1SWkz3ffhXmBBg6EnXeG\nt9+Ggw76/e1k5gjS0pEiNVvaSgRmdiyw1N2nlHVMNLtpqbXO7j7A3QvcvaBp04rPXy9xvPACq1rl\ns3bgIO7gGhp8OgM7+CDq1Alf7rVqJTdjqPK0SM2WzhLB/sDxZnY0sBnwRzMbAiwxs+bu/rWZNQeW\npjEGKc2SJWF+oGHD+MTacC4vMpX2EP2qLyoKz8kkgXr1VOcvUtOlrUTg7te5+3bu3hLoDLzp7mcC\no4Cu0WFdgRfSFYOU4A7/+U+YHuL557l7i9to7x+GJFAOtaJ/NXl5YdkB1fmL1GyZGFB2BzDMzLoD\ni4DTMhBD7vn889AI/Oqr0KEDDBzINbvtWu4Fw8zWlxhEJDtUSSJw93HAuOj1CuCwqrivEFpx+/eH\na64Jr/v2hYsvhtq1adSIcncLVXuASPbRFBPZ7H//C+sGv/MOHH54qMdp1QoI6wT88EP5LqcxACLZ\nSVNMZKO1a+HOO6F1a5g1K3TeHzPm9yQA8dcJqF07PJut36cxACLZSyWCbDNjBnTrBlOnwkknQb9+\n0Lz5RoeVNT7ALOQREckdKhFki19/heuvh4ICWLwYnn0WRoyg8M3mtGwZvuCLxwc0abLhr/1YagMQ\nyT0qEWSDiRPDVNEffQRdu4ZVwxo1orAQevSAVavCYcW9fcpqINaYAJHcpBJBTfbTT2Fg2AEHhG/7\n0aNh0CBo1AgI7QDFSSCR2rU1JkAkVykR1FRjxsDuu8PDD4fuoLNnw5FHbnBIMvMEFVu3TklAJFcp\nEdQ0K1fCueeGL/3NNoPx4+Ghh2DzzSkshJYtw8jfeO0ApVHbgEjuUhtBTTJiRPj1v2wZXHcd3HBD\nSAawUXtAeQaKqW1AJLepRFATfPMNnHIK/PWvsPXW8OGHcPvtvycBKF97QOPG4WGm+YJERCWC6s0d\nBg8Oq4atWhW+/K+8EurW3ehQrRsgIhWlEkF1tXAhdOoU2gPy82H69FAdFCWBirQHqB1AREqjEkF1\ns25dGA183XXh2/3hh6Fnz/VzP1Ox9gC1A4hIWVQiqE4++igsE1k8NmD27NA4XGvD/0zJtAeYqR1A\nRJKjEkF1sGYN3H033Hwz1K8f2gXOOqvM+p5kxwcsX57CGEUka6lEkGlTp8Lee4ef+ccfD/Pmwdln\nx630T6auX+0BIpIsJYJM+eWX0A6w996he+hzz8Hw4dCsWcJT+/QJdf5lUXuAiJSHEkEmTJgAbdvC\nHXeEX/9z58LJJyd9epcuoc4/L299W4DaA0SkotRGUJV+/DGUAvr1C30/x4yBv/ylQpfq0kVf9iKS\nGioRVJXRo8MkcY88Ar16MfT6WbQ8/y/UqhVyQmHhhmMDWraEiy6Kv11YmMHPIyJZw9w90zEkVFBQ\n4JMnT850GBWzYkUYGfzUU7DrrjBwIIULOmwwDgDCODEzWL06+UvXq6dqIBEpm5lNcfeCRMepRJAu\n7qHxNz8fnn46rB42bRp06FDqOIA1a8qXBCBco3fv1IUsIrlJbQTp8PXXoR7n+eehffvQFtCmze9v\nl2edgERSeS0RyU0qEaSSOzz5ZKgCGj0a7rwT3n9/gyQAqe3jr/ECIlJZSgSp8tlncMQRYe3gNm1g\nxgy4+uqwYnwJpY0DqFsXNtmkfLfUeAERSQUlgsoqKoK+fUOPoA8+gEcfhbfegl12KfOUkuMA8vLg\n3/8OhYnYfT17xt9WQ7GIpIJ6DVXG3LmhBPD++3DUUfDYY7D99pmOSkQEUK+hjfrkV7TPfanXWb0a\nbr0V9twT5s+HIUMoPONlWh64fan3S1UsIiJp4e7V/tG+fXsvjyFD3OvVcw+tt+FRr17YX9nr7L/p\nh/5ti9Zh429/c1+yJO79UhWLiEh5AZM9ie/YrKwaatkSFi3aeH9eXlj4qyLX2YxfuJkb+Qf3sqz2\n1mz93CNwwgkJ7wepiUVEpLwyXjVkZpuZ2SQzm2Fmc8zs5mh/IzN73czmR89bpvreZfWtL2+f++Lj\nD+JtZtKaq7mbgXRn16I5vyeBRPdLVSwiIumSzjaC34BD3b0N0BboZGb7AtcCY919Z2BstJ1SZfWt\nL2+f+/ztfuARevI2h1CLdRzKWC5gAA3ztkj6fqmKRUQkXdKWCKIqqp+izbrRw4ETgMHR/sHAiam+\nd2n99Mvd5/7ll/ng593owQDu5QpaM5O3OLTU68S7X0piERFJp2QaEir6AGoD04GfgDujfd/FvG+x\n2yXO7QFMBia3aNGi3I0kQ4a45+W5m4XnpBtnly1z79IltOrm5/urN72f1HXi3a/CsYiIVALVqbHY\nzLYARgKXAhPcfYuY91a6e9x2gioZR+AOQ4fCpZfCd9+F2dyuuw423TS99xURSZOMNxbHcvfvgLeA\nTsASM2sOED0vrYoY4lq8GE48EU4/HVq1CusI33STkoCI5IR09hpqGpUEMLM/AH8BPgJGAV2jw7oC\nL6QrhoTc4fHHw1TRr78O99wD770He+yRsZBERKpaOqehbg4MNrPahIQzzN1fMrP3gGFm1h1YBJyW\nxhjK9umncP75YV6gQw4JCWGnnTISiohIJqUtEbj7TGDPUvavAA5L130TKp4k7vrrw5Sfjz0G550X\n5n8QEclBubUwzezZYZK4SZPg2GPDTKHbbZfpqEREMio3fgavXg033wzt2sGCBWHpyFGjlARERMiF\nEsGkSaEUMHs2nHEGPPAANG2a6ahERKqN7C4R3HYbdOgAK1fCiy+G+Z+VBERENpDdiWDHHUPPoDlz\nQpuAiIhsJLurhk4/PTxERKRM2V0iEBGRhJQIRERynBKBiEiOUyIQEclxSgQiIjlOiUBEJMcpEYiI\n5DglAhGRHFclS1VWlpktI6xdkI2aAMszHUQa6fPVfNn+GbP58+W5e8J5dWpEIshmZjY5mTVFayp9\nvpov2z9jtn++ZKhqSEQkxykRiIjkOCWCzBuQ6QDSTJ+v5sv2z5jtny8htRGIiOQ4lQhERHKcEoGI\nSI5TIsgwM7vbzD4ys5lmNtLMtsh0TKlmZqea2RwzW2dmWdNNz8w6mdnHZvaJmV2b6XhSzcyeNLOl\nZjY707Gkg5ltb2Zvmdnc6N9nr0zHlClKBJn3OrC7u7cG/gdcl+F40mE2cDIwPtOBpIqZ1Qb6AUcB\n+cDpZpaf2ahSbhDQKdNBpNFa4B/ung/sC1ychf8Nk6JEkGHuPsbd10ab7wPbZTKedHD3ee7+cabj\nSLG9gU/cfYG7rwb+C5yQ4ZhSyt3HA99mOo50cfev3X1q9PpHYB6wbWajygwlguqlG/BqpoOQpGwL\nfBGz/SU5+iWSDcysJbAn8EFmI8mM7F68vpowszeArUt5q7e7vxAd05tQVC2sythSJZnPKFIdmVkD\n4Dngcnf/IdPxZIISQRVw98PjvW9m5wDHAod5DR3YkegzZqHFwPYx29tF+6QGMbO6hCRQ6O4jMh1P\npqhqKMPMrBNwNXC8u6/KdDyStA+Bnc2slZltAnQGRmU4JikHMzNgIDDP3e/LdDyZpESQeQ8DmwOv\nm9l0M+uf6YBSzcxOMrMvgQ7Ay2b2WqZjqqyogf8S4DVCI+Mwd5+T2ahSy8yeAd4D/mRmX5pZ90zH\nlGL7A2cBh0b/7003s6MzHVQmaIoJEZEcpxKBiEiOUyIQEclxSgQiIjlOiUBEJMcpEYiI5DglAqlS\nZtY4pqveN2a2OHr9nZnNreJY2sZ2FzSz4ys6i6iZLTSzJqmLrlz3PsfMtonZfqJ48rRMxiU1hxKB\nVCl3X+Hubd29LdAfuD963RZYl+r7mVm80fNtgd8TgbuPcvc7Uh1DFTgH+D0RuPt57l6lSVVqNiUC\nqU5qm9nj0dzwY8zsDwBmtqOZjTazKWb2jpn9Odrf0szejNZyGGtmLaL9g8ysv5l9ANxlZvWjufUn\nmdk0MzshGg18C/C3qETyt+iX9cPRNZpF60PMiB77Rfufj+KYY2Y9En0gMzvXzP4X3fvxmOsPMrNT\nYo77KXpuEH2WqWY2y8xOiPms80r+faJrFACF0ef4g5mNK23dBzM7M4pjupk9Zma1o8cgM5sd3e/v\nlfjvJzWUEoFUJzsD/dx9N+A74K/R/gHApe7eHrgSeCTa/xAwOFrLoRB4MOZa2wH7ufsVQG/gTXff\nG+gI3A3UBW4AhkYllKElYnkQeNvd2wDtgOJRw92iOAqAy8yscVkfxsyaAzcTRrAeQFi3IJFfgZPc\nvV0U673RVAil/n3c/VlgMtAl+hy/lBHLrsDfgP2jElgR0IVQKtrW3Xd39z2AfycRo2QZTTon1cln\n7j49ej0FaBnNDLkfMHz99yGbRs8dCAveAPwHuCvmWsPdvSh6fQRwvJldGW1vBrRIEMuhwNkA0XW+\nj/ZfZmYnRa+3J3w5ryjjGvsA49x9GYCZDQV2SXBfA243s4MIVWXbAs2i9zb6+yS4VqzDgPbAh9Hf\n8Q/AUuBFYAczewh4GRhTjmtKllAikOrkt5jXRYQvq1rAd9Gv2PL4Oea1EX49b7A4jpntU54Lmtkh\nwOFAB3dfZWbjCEmlItYSlcjNrBawSbS/C9AUaO/ua8xsYcw9Svv7JB0+ofS00Qp4ZtYGOBK4EDiN\nsC6G5BBVDUm1Fs0P/5mZnQphxsjoiwtgImHWTwhfoO+UcZnXgEuLq1jMbM9o/4+ECf9KMxboGR1f\n28waAg2BlVES+DNhecN4PgAOjnpK1QVOjXlvIeEXOsDxhKoqonssjZJARyAvwT0SfY7Yz3OKmW0V\nfaZGZpYX9Siq5e7PAdcTqsEkxygRSE3QBehuZjMIdfXFS0JeCpxrZjMJs0iWtfj4rYQv2plmNifa\nBngLyC9uLC5xTi+go5nNIlTD5AOjgTpmNg+4g7C0aJnc/WvgJsIMnu8SZikt9jghScwgVHEVl2AK\ngYLovmcDH8W7R2QQ0L+4sbiMWOYSvujHRH+v14HmhKqncWY2HRhCdq6ZLQlo9lGRKmJhAaICd78k\n07GIxFKJQEQkx6lEICKS41QiEBHJcUoEIiI5TolARCTHKRGIiOQ4JQIRkRz3/1k9DzO9rjhdAAAA\nAElFTkSuQmCC\n", 525 | "text/plain": [ 526 | "" 527 | ] 528 | }, 529 | "metadata": {}, 530 | "output_type": "display_data" 531 | } 532 | ], 533 | "source": [ 534 | "_,_ = mrstat.qq_plot(hsb.write,dist='norm',plot=plt)" 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": {}, 540 | "source": [ 541 | "На этом графике сравниваются теотические квантили распределения (красная прямая) и квантили полученные из фактического распределения переменной (синии точки). Если точки лежат вдоль прямой - то распределение можно считать нормальным. В данном случае точки с прямой не совпадают, в основном из-за тяжелого правого хвоста. Взглянем на куку график для science." 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 11, 547 | "metadata": { 548 | "collapsed": false 549 | }, 550 | "outputs": [ 551 | { 552 | "data": { 553 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xm81PMex/HXp1NJlrRJaEOujlYdlGtfu7ZwCTdXElGh\nZItc2coe2UopRQeFUtfSItK1plUbsnQiWiVLSJ3P/eP3m5pOZ5lTZ5Zz5v18POYx8/vNb37zmSPz\nme/y+3zN3RERkfRVLtkBiIhIcikRiIikOSUCEZE0p0QgIpLmlAhERNKcEoGISJpTIpAyy8xuN7OR\n2/naS8zsvUKef9PMOuR3rJn9amb7bc/7FjPGqWZ2WbzfR8o+JQJJKWa2xMx+D79MV5jZcDPbNdlx\n5eXu/3D3EQU8t6u7fw0Qxn/39r5PSfw9zKy+mbmZld/eOKRsUyKQVHSGu+8KHAJkAbfmPcAC6fLv\nt8i/h8iOSJf/kaQUcvdlwJtAY9jcFdLXzN4H1gP7mdneZjbezH40sy/N7PI8p6lkZqPM7Bczm2Vm\nzSJPmFkvM/sqfG6hmZ2d57VmZo+b2Toz+8zMToh6osBumfDX9wFm1hloD9wY/qL/r5ndYGav5Dn+\nUTMbUNy/R55zlDOzW80sx8xWmtmzZlYlfHpaeP9TGEfrot5L0osSgaQsM6sDnArMjtr9b6AzsBuQ\nA7wIfAfsDZwL9DOz46OObwu8BFQDngdeNbMK4XNfAUcBVYA7gJFmVjvqtYeHx9QA+gBjzKxarPG7\n+2AgG7g/7C46AxgJtDGzPcLPWB64AHi2qPMV8PeIuCS8HQfsB+wKPB4+d3R4v0cYx4exfgZJD0oE\nkopeNbOfgPeAd4F+Uc8Nd/cF7r4R2Av4O3CTu//h7nOAp4GLo46f6e4vu/tfQH+gEtAKwN1fcvfv\n3T3X3UcBi4HDol67EnjE3f8Kn/8cOG1HPpi7/0DwC/28cFcbYLW7zyzkZYX9PSLaA/3d/Wt3/xW4\nGbhA4wISC/0jkVR0lru/VcBz30Y93hv40d1/idqXQ9CPvs3x7p5rZpHWA2Z2MdATqB8esivBr/+I\nZb51VcacyGt30AigCzAEuAh4rojjC/t7ROxNEF9EDsH/37W2N0hJH2oRSGkT/cX8PVDNzHaL2lcX\nWBa1XSfyIBxc3hf43szqEXwRXwVUd/c9gPmARb12HzOL3q4bvuf2xhvxKtDUzBoDpxN0H+2o74F6\nUdt1gY3AigJiENlMiUBKLXf/FvgAuMfMKplZU6ATQT98REszOyfsIukB/Al8BOxC8AW5CsDMOrLt\nIOyewDVmVsHMzgMaAW8UM8wVBH320XH/AbxMMGYx3d2XFvOc+XkBuNbMGoTTS/sBo8IutFVAbt44\nRCKUCKS0u5Cga+d7YCzQJ083yjjgfGAtwUDzOWGf/0LgIeBDgi/rJsD7ec79MdAQWA30Bc519zXF\njG8okGlmP5nZq1H7R4TvWVS3UKyGheeaBnwD/AFcDeDu6wnifz+Mo1UJvaeUEaaFaUQSz8zqAp8B\ne7n7z8mOR9KbWgQiCRaOVfQEXlQSkFSgWUMiCWRmuxB0ReUQTB0VSTp1DYmIpDl1DYmIpLlS0TVU\no0YNr1+/frLDEBEpVWbOnLna3WsWdVypSAT169dnxowZyQ5DRKRUMbOcoo+Kc9eQmV1rZgvMbL6Z\nvRBe9FPNzCab2eLwvmo8YxARkcLFLRGY2T7ANUCWuzcGMgiqLPYCprh7Q2BKuC0iIkkS78Hi8sDO\n4eX9lQmu/mxLcFUl4f1ZcY5BREQKEbdEEC6i8SCwFPgBWOfuk4BaYSlegOUUUB3RzDqb2Qwzm7Fq\n1ap4hSkikvbi2TVUleDXfwOCErm7mNlF0ceEJX7zvZDB3Qe7e5a7Z9WsWeSgt4iIbKd4dg2dCHzj\n7qvCRUHGAEcAKyKrQIX3K+MYg4iIFCGeiWAp0MrMKoc13U8AFgHjgQ7hMR0IqkOKiEiSxHOM4GOC\nmuuzgHnhew0G7gVOMrPFBK2Ge+MVg4hIqbVmDfToAevWxf2t4npBmbv3IVj0O9qfBK0DERHJyx1e\nfhmuugp+/BFOOAHOOCOub6laQyIiqeKHH+Ccc6BdO6hTB2bOjHsSACUCEZHkc4dhw6BRI5gwAe6/\nHz76CJo2Tcjbl4paQyIiZdbXX0PnzjBlChx9NDz9NDRsmNAQ1CIQEUmGTZvgkUegSROYPh0GDoR3\n3kl4EgC1CEREEm/hQujUKej+OfVUGDQoGBNIErUIREQSZcMGuOsuaNECFi+GkSPhtdeSmgRALQIR\nkcSYMSNoBXz6KVxwAQwYAHvumeyoALUIRETia/16uPFGOPxwWL0axo2DF15ImSQAahGIiMTPu+/C\nZZfBl1/C5ZfDAw9AlSrJjmobahGIiJS0n3+GLl3g2GMhNzeYGjp4cEomAVAiEBEpWa+/DgcfHHzx\n9+wJ8+bB8ccnO6pCKRGIiJSE1avhoovg9NODX/4ffAAPPQSVKyc7siIpEYiI7Ah3ePHFoDzE6NFw\n++0wa1YwOFxKKBGIiGyvZcugbVu48ELYb78gAfTpAxUr7vCps7Ohfn0oVy64z87e4VMWSIlARKS4\n3GHIEMjMhLfeCrqAPvgAGjcukdNnZwflh3JygrfKyQm245UMlAhERIrjq6+CNQI6d4aWLYPB4J49\nISOjxN6id+/g8oNo69cH++NBiUBEJBabNkH//kGRuJkzg1lBU6bA/vuX+FstXVq8/TtKiUBEpCjz\n58MRR8B118GJJwZF4y6/HMzi8nZ16xZv/45SIhARKciGDcEsoEMOgW++CWYHjRsH++wT17ft23fb\nWaeVKwf740GJQEQkP9OnBwngjjuCpSMXLoTzz49bKyBa+/ZBz1O9esHb1asXbLdvH5/3U60hEZFo\n69fDf/4TLBqz995BmejTTkt4GO3bx++LPy+1CEREIt55JxgM7t8/mBW0YMHmJBDrvP5Ezv8vKWoR\niIisWwc33BBcG3DAATB1KhxzzOanI/P6I1M6I/P6Yetf7bEel2rM3ZMdQ5GysrJ8xowZyQ5DRMqi\n8eODSqHLlwezgm6/fZuR2vr1gy/1vOrVgyVLin9copjZTHfPKuo4dQ2JSHpauTJYKaxtW6heHT7+\nGO6/P98icbHO60/0/P+SokQgIunFPejDycyEsWODNYRnzICsgn84xzqvP9Hz/0uKEoGIpI9vv4Uz\nzgjKRTdsCLNnw623FlkkLtZ5/Yme/19SlAhEpOzLzYVBg4IFY955J5ga+t57QasgBrHO60/0/P+S\nErfBYjP7GzAqatd+wG3As+H++sASoJ27ry3sXBosFpHttnhxUA7i3XeD8hCDB0ODBsmOKiGSPljs\n7p+7e3N3bw60BNYDY4FewBR3bwhMCbdFRErWxo3BYvFNm8KcOTB0KEyalDZJoDgS1TV0AvCVu+cA\nbYER4f4RwFkJikFE0sXcudCqFdx4I7RpE5SHuPTShJSHKI0SlQguAF4IH9dy9x/Cx8uBWvm9wMw6\nm9kMM5uxatWqRMQoIqXdn38G5SGysoKB4dGjYcyYoFSEFCjuicDMKgJnAi/lfc6DAYp8ByncfbC7\nZ7l7Vs2aNeMcpYiUNtGlHGrUgDZVPmRhpRZw9918tN+/aF5xIeXOP4/6DYyuXbcu+5B3O1IGojSW\nhygR7h7XG0FX0KSo7c+B2uHj2sDnRZ2jZcuWLiISMXKke+XK7uBemV/9Ybr7JsxzqONteMODiwVi\nv1Wu7N6ly5ZzRu8fOTLZn3b7ATM8hu/pRHQNXciWbiGA8UCH8HEHYFwCYhCRMiSylOMJvMV8GtOD\nATxJVw5mARP4R7HPt359MJkokctDppK4JgIz2wU4CRgTtfte4CQzWwycGG6LiMTs55y1PE0n3uIk\nNlCRo5jG1TzOr+y23efctCn//aleHqIkxLX6qLv/BlTPs28NwSwiEZHiGzuWzzK6Um3TKu6hF3fQ\nhz+ptMOnzcjIPxmkenmIkqAri0WkdFixIlgp7JxzKL/vXhxTaTq3cE+JJIHKlYNy0aWxPERJUCIQ\nkdTmDs8+C40aBesF9+1LtcXT6fr0IZtLOVSvHtwiZR26dNm6zENR24MHw5NPls7yECVB6xGISOpa\nuhSuuAImTIAjjuC/Zw3l6icOYunSoMumb9/0+KLeXrGWmNAKZSKSenJzYeBA6NUraBE89hjZVbrS\n+cpypW71r9JAXUMiklo+/zxYJvKqq+CII2D+fLjqKnr/p1zaTu+MNyUCEUkNf/0F994LzZoFi8YP\nHx50CdWvD5Te1b9KAyUCEYmrSNkGs6B0g1lwy8gI7suXhxY2m1kVD4ebb+aVP09nr7ULqXFdB2rU\ntM3HFDScmQ7TO+NNYwQiEjfZ2UE/fqRLJ/rLPDcXduIP/rPpLm7iPlZTg3/yMmP4Z3DAmi3HFnSx\nV7pM74w3tQhEJG4ipSDycwTvM4fm9KYfz/FvMlm4JQnEIJ2md8abWgQiEjf59d/vyi/04xa68QRL\nqcvJTGQyJxfrvGawZEnJxChqEYhInGRnB2MC0U5mIvNpTDee4DGupjHzi50EQOMCJU2JQERKXGRs\nINK3X5UfeYZLmEgb1lOZo/gfPRjAb+xa7HNrXKDkKRGISImLHhs4h1dYSCYXMZK76U0LZvMBf9/c\nWsjIyP8+UjYiep/GBeJDYwQiUuJycmAvfuBxruKfjGEWLWjDBD615uTmJjs6yUstAhEpUdkjnUsY\nzkIyOY3XuYl7OYzpzKW5+vZTlFoEIlJyliyhzuWdeYbJTOMoLuNpFnMgEMz0Ud9+alKLQER23KZN\n8Oij0LgxLf74kK48wbFM3ZwEILiYTH37qUmJQES2kZ0NNWpsKQdR2K2RLeL98kdD9+68+dtRHMwC\nBtKVvEui16uXpA8jRVIiEJGtZGdDx46wZk3hx5XnL26hL3NozkF8xr95llN5g2/ZdiBAUz5Tm8YI\nRGQrvXsHhUAL04JZDONSmjOXUbTjGh5lJbXyPTYjQ1M+U51aBCKylcLKOlfid+6hF9M5jD1ZyVmM\n5QJGFZgEICgupySQ2pQIRGQrBU3xPIppzKUZvbiP4VxCJgsZx1nbfT5JHUoEIrKVvn2hQoUt27vx\nM4/TjWkcQ3k2cgJvcTlPs449ijxXxYoaGygNlAhEZCvt28MzzwTlHdrwJvNpTBcG8jA9aMI83uaE\nmM5TvToMG6ZuodKgWIPFZlYVqOPun8YpHhEpIdnZ0L170bN/8lPD1tDfr+VinuOnfTIp9/IHXNuq\nFdeWfJiSAopsEZjZVDPb3cyqAbOAIWbWP/6hicj2inUK6Lac8xjNAm/EhbzAnfyHBj/OIvurVvEI\nU1JELF1DVdz9Z+Ac4Fl3Pxw4Mb5hiciOiGUKaF61+Z6xnM1ozieHerRkJn24k59+34neveMTp6SG\nWBJBeTOrDbQDXotzPCJSAgqbArot51KGspBMTmEi1/MArfmQeTTdzvNJaRNLIrgTmAh85e6fmNl+\nwOJYTm5me5jZy2b2mZktMrPWZlbNzCab2eLwvuqOfAAR2VasUzYb8DVvcSJDuYw5NKcJ83iI69mU\nZ/hQU0DLtiITgbu/5O5N3b1LuP21u8e6wvQAYIK7HwQ0AxYBvYAp7t4QmBJui0gJyjsFNK9ybKI7\njzCPJhzKJ1zBII7nbb7igG2OVXmIsi+WweIDzWyKmc0Pt5ua2a0xvK4KcDQwFMDdN7j7T0BbYER4\n2AiI4YoUESmW6CmgeWWygPf5O49wLe9wHJksZDBXbFUkLrJ6mFYESw/m7oUfYPYucAPwlLu3CPfN\nd/fGRbyuOTAYWEjQGpgJdAeWufse4TEGrI1s53l9Z6AzQN26dVvm5OQU86OJyFY2bIB774W774bd\ndw/KRl94YVBCVMokM5vp7llFHRfLGEFld5+eZ9/GGF5XHjgEGBgmkN/I0w3kQRbKNxO5+2B3z3L3\nrJo1a8bwdiISLTsb6tcPft2fWfsT1h6QBX36wLnnwqJF8K9/KQkIEFsiWG1m+xN+YZvZucAPMbzu\nO+A7d/843H6ZIDGsCGchEd6vLHbUIlKo7Gzo3BlW5qznPr+Bsctbsf67H5naczw8/zzox5VEiSUR\ndAOeAg4ys2VAD6BLUS9y9+XAt2b2t3DXCQTdROOBDuG+DsC44gYtIoXr3RsOXT+VuTTjBh7kaS4j\n0xdwyStnJDs0SUFFlphw96+BE81sF6Ccu/9SjPNfDWSbWUXga6AjQfIZbWadgByC6xNEJI+uXWHQ\noGCJx+LYnXXcx01cyVN8yf4cx9tM5TgAftH1AJKPIhOBmd2WZxsAd7+zqNe6+xwgv4GK2KpWiaSp\nrl1h4MDiv+40XmMQV1KbH3iQ67iNO/mdypuf1/UAkp9YuoZ+i7ptAv4B1I9jTCJpb/Dg4h1fg1Vk\n8y9e4wzWUpXWfMgNPLhVEjDT9QCSv1i6hh6K3jazBwmuNBaRONm0KdYjnQt4kUe5hiqs4zbu4F56\n8RcVtz3SdT2A5G971iyuDOxb0oGIyBblygVLPBZmH75jIF04g9f4mMPoxFAWUPDlPfXqlXCQUmbE\nMkYwjy1z/TOAmgT1h0QkDrKzC3/eyOUynuYBbqACf3Et/XmUa8glo8DXaKUwKUwsLYLTox5vBFa4\neywXlInIdujdu+DWwP58yRAu5zim8jbHcTlD+Jr9Cz1f9eowYIC6haRgBSaCcCEagLzTRXc3M9z9\nx/iFJZK+8iv5nMFGejCAB3f+T1BN7qEhHN+pE1/pymApAYW1CGYSdAnl9y/Ngf3iEpFIGsvODmb3\nRF870Jh5DKUTh/EJnHQmPPkk7LNP8oKUMqfARODuDRIZiEi6iywvGekWqsif3EI/bqEfa6nKe1e9\nyJGPtlN9IClxMc0aChePaQhUiuxz92nxCkokHUUvL3kYHzOUTjRmAc9xEXdVfZgvHquR3AClzIpl\nPYLLgGkE1w7cEd7fHt+wREq/7GyoUSP4AR/LLScHKvMbD9GTD2lNFdZxGq9xMc/x5U9KAhI/sVxZ\n3B04FMhx9+OAFsBPcY1KpJSLdPOsWRP7a47jbT6lKT15mEFcycEs4A1OA1QaQuIrlkTwh7v/AWBm\nO7n7Z8DfiniNSFqL7uYpShV+YjCX8zYnkEs5jmEq3XiSX9h98zG6BkDiKZYxgu/MbA/gVWCyma0l\nqBoqIgXIbwpofs5kHAPpQi1WcB83cju38wc7b3OcrgGQeIql1tDZ4cPbzewdoAowIa5RiZRiRV0Z\nDFCTlTzKNVzAKObSlDMZz8x8C/WqNITEX4FdQ2b2hpldZGa7Rva5+7vuPt7dNyQmPJHSJTsbLr64\nsDUEnPaMZBGNOJux3MpdZDGjwCSg0hCSCIWNETwFnAZ8Y2ajzezscIEZESlAYeUh9uVbXuN0RvJv\nvuBAWjCbvtzKRirke3z16jBsmLqFJP4KTATuPs7dLwTqAa8AFwNLzewZMzspUQGKlCb5jQ0YuVzJ\nQBZwMKdVngqPPELrje+x0DNxp8Db6tVKApIYRc4acvf17j4qHCs4GWiOxggkzcR6TUDeLqGGfMFU\njmUgXZnO4TB/PnTvDhkFVwoVSbRYLiirZWZXm9n7BDOHJgKHxD0ykRSxPdcEZLCRG7ifuTSjCfPo\nyDDGXDkJGqhyi6SewgaLLzezt4FZBOUlbnD3/dy9l7vPTViEIklWnGsCAJoyl485nPu5iTf5B5ks\nZOcuHXlyoGoESWoqbPpoa+AeYIq7F7FWkkjZFes1ARX5k1u5m17cy49U41xe4hX+CRhPPhnXEEV2\nSGHVRy9NZCAiqapu3aAOUGFa8SFD6UQmixjBxfSkPz9SHdBwgKS+WEpMiKS1vn2DtWDyswu/8jA9\neJ+/swu/0YY3uYQRm5MAQOfOCQpUZDspEYgUoX17eOaZYF5/tBOZzDya0IMBPEE3GjOfibTZ/Hy5\nctClC+oWkpQXy1KV+dJSlZIusrODGZ+RWUP7VV3LpCbXsf+0Z+Bvf4On/8fVRx7J1ckNU2S7xbpU\nZV1gbfh4D2ApoHlwUuZFpo5GZg2dxVieXNuVmtNWMf+Mm2k8+jaoVKnwk4ikuMKuLG7g7vsBbwFn\nuHsNd68OnA5MSlSAIskUmTpai+WM5jzGcg7L2YvDmM7pn/ZTEpAyIZYxglbu/kZkw93fBI6IX0gi\nqWNpjnMxI1hIJmfwX26mH4cxndkcEvO0UpFUF0si+N7MbjWz+uGtN/B9LCc3syVmNs/M5pjZjHBf\nNTObbGaLw/uqO/IBRGKRnQ277hr7spFmUM9yeIN/MIJLWEQjmjOHe7l5c5E4rRomZUUsieBCoCYw\nFhgTPr6wGO9xnLs3d/dInd1eBBepNQSmhNsicRMpDf3bb7Edb+TSjcdZwMEcyXtcxWMcxf/4nIM2\nH6Py0FKWxLIwzY9AdzPbxd1j/F+pUG2BY8PHI4CpwE0lcF6RfBVWGjqvA/mcoXTiSN5nAqdwBU+x\nlG1XhlF5aClLYik6d4SZLQQWhdvNzCzWmdEOvGVmM80scllNLXf/IXy8HKhVwPt2NrMZZjZj1apV\nMb6dyLZi6csvz1/04h7m0oxMFtKB4fyDN/NNAqAkIGVLLGsWPwycAowHcPe5ZnZ0jOc/0t2Xmdme\nBOsdfxb9pLu7meW7lpO7DwYGA2RlZRW43pNIYbKz8y8PHa05sxlKJw5hNi9xLlfzGCvYq8DjVTJC\nypqYrix292/z7NoU4+uWhfcrCcYYDgNWmFltgPB+ZczRihRD5BqAgrqFduIP+nILn3Aoe/M95/AK\n7Xip0CQAKhkhZU8sieBbMzsCcDOrYGbXE3YTFcbMdjGz3SKPCRa1mU/QsugQHtYBGLddkYsUobDy\n0X/nPebSjFu4h2e5mEYsYiznFHo+lYyQsiqWrqErgQHAPsAygovJusXwulrAWDOLvM/z7j7BzD4B\nRptZJyAHaLc9gYsUJb+KobvyC/dwM1fxBNSvD09N5NKTT0aldiWdFZoIzCwD+Le7F3tozN2/Bprl\ns38NcEJxzydSHNnZ2+47mYkMpjN1+JZndruGjvP6BhcXiKS5QruG3H0T8K8ExSJSYnr33vK4Kj8y\nnA5MpA3rqcyRvEfFgQOUBERCsXQNvWdmjwOjgM3XEbj7rLhFJbKDgimjzj95hSfoRjV+5G56cze3\n8ieV+EDTP0U2iyURNA/v74za58DxJR+OSGzylobOay9+4Am6cQ5jmckhnMJE5ob/lOvlf2mASNqK\n5cri4xIRiEis8paG3ppzCcPpT08q8Qc3ch/96cmm8J+6SkOIbCuWK4trmdlQM3sz3M4MZ/yIJEVB\n00Lr8w2TOJlnuJR5NKEZc3mAGzcngXLlVBpCJD+xXEcwHJgI7B1ufwH0iFdAIkXJWzKiHJu4mkeZ\nT2Na8RFdeJJjmcpiDtzqOHclAZH8xJIIarj7aCAXwN03EuOVxSLxUC1qEdWDWMT/OIpH6c67HMPB\nLGAQXfB8/mmrbLRI/mJJBL+ZWXWCAWLMrBWwLq5RiRQgOxt+/jkoEtebu5lDc/7G51zEc5zG63xL\n/t/2GhsQKVgss4Z6EpSF2N/M3idYj+DcuEYlUoDevaHJXzMZxqU041NG0Y6reYxV7Fnga6pXhwED\n1C0kUpBYZg3NMrNjgL8RLF7/ubsXUMFFJI5+/50uObdzPQ+yglqcxVjGcRYQVBiNdc0BEdlagYnA\nzAqqwHWgmeHuY+IUk8i2pk3j5/Mv4yYWM4TLuIEHWMcem59W/7/I9iusRXBGeL8nwWL1b4fbxwEf\nECxbKRJfP/8MvXrBwIH8VL4BZ/MWb+cpVWWm/n+RHVFgInD3jgBmNgnIjKwqFq4hMDwh0Ul6e+MN\nuOIKWLYMrr2WzIfv4jd22eYwTQsV2TGxzBqqE7W0JMAKKGBqhsh2ys6GGjWCX/c1bDUj7SI47TQW\nfLc7rfwD7OH++SYBUMkIkR0Vy6yhKWY2EXgh3D4feCt+IUm62VIywmnHaB7jaqqylju4jX7cwgZ2\nKvC15curW0hkR5kXtphr5CCzs4HIOsXT3H1sXKPKIysry2fMmJHIt5QEql8fNuR8z0C60JbxfEIW\nnRjKPJoW+VrNFhIpmJnNdPesoo6LZWGat8LCcwn98pc04c5JOUN5gOvZiT+5jgcZQPfN9YFieLmI\n7KBYFqbJNbMqCYpH0snXX8OJJzKEy5lDc5owj/5cF3MSAMjIiGN8Imkilv/jfgXmmdlktl6Y5pq4\nRSVl26ZN8OijwWXC5cvz8aVPccqzl7FhYyxzF7bWuXMc4hNJM7EkgjHomgEpKfPnQ6dOMH06nHYa\nDBrE4fvuy7DjC19oJq9y5YKZpU8+Gd9wRdJBLIlgFHBA+PhLd/8jjvFIGZF3BbEKbOBm7qE3fVlH\nFa7heV58/QKoY1u9Tl/wIolXWImJ8kA/4FIgh6DOUB0zewborXpDUpC8K4hl8QnDuJQmzCebf9GD\nR1hNzXxfm5sLAwcGj5UMRBKjsE7ZB4BqQAN3b+nuhwD7A3sADyYiOCmdIiuI7cx6HuB6PqIVVVnL\nGYznIrILTALRBg9OQKAiAhTeNXQ6cKBHXWjg7j+bWRfgM6B7vIOT0mnpUjiGqTzNZRzAVwziCm7i\nPn4m9slnm7T0kUjCFNYicM/narNwSqlmb0v+1q1j5C5XMJXjADiOt+nCoGIlAdC0UJFEKiwRLDSz\ni/PuNLOLCFoEIlv7738hM5MLfnuah8pdT1M+3ZwQikvTQkUSp7CuoW7AGDO7FJgZ7ssCdgbOjndg\nUoqsWhVMEXrhBWjShHKvvspeXxxK5e7we4zTQSM0a0gk8YqsNWRmxwMHh5sL3X1K3KPKQ7WGUpR7\n8OV/zTXBugG33hqsHVCxYrIjExFirzVU5KWc7v62uz8W3oqdBMwsw8xmm9lr4XY1M5tsZovD+6rF\nPaekgO++gzPPDBYCOOAAmD2b7P1vo/6BFTELqoKaBQXlsrOTHayIFKb41/QXX3dgUdR2L2CKuzcE\npoTbUlrk5sJTT0FmJkyZAv37w/vvkz3nYDp3hpyc4LDIrJ+cnKC/X8lAJHXFNRGY2b7AacDTUbvb\nAiPCxyORISk2AAAQHklEQVQgXH1cUt/ixXD88XDllXDooUG5iGuvhYwMeveG9evzf9n69cG1BSKS\nmuLdIngEuBGIrhhfK2rFs+VArfxeaGadzWyGmc1YtWpVnMNMX9nZQfdNdHdO5L5cuXDbNnK9Pcjv\nBzZl3buzuYwh2NtvYfvvh1lwTKQlUJClSxPycURkO8QtEZjZ6cBKd59Z0DHhdQr5jla7+2B3z3L3\nrJo1i74SVYovO5t8u3Mi9+7QhE/5kNY8yA1M4mQyWchQLiOoOBK7ulrcVCRlxV74vfj+DpxpZqcC\nlYDdzWwksMLMarv7D2ZWG1gZxxikEIV151TkT26hH7fQj7VUpR2jeInzKG4CAKhcWctJiqSyuLUI\n3P1md9/X3esDFwBvu/tFwHigQ3hYB2BcvGKQwhXUXXM4HzGLQ+jDnbzIBTRiES/Rju1JAvXqBXWD\n2rffsVhFJH7i2SIoyL3AaDPrRFDVtF0SYhCC7provv3K/MZd/IcePMIy9uFUXudNTt3u89erB0uW\n7HicIhJfiZg+irtPdffTw8dr3P0Ed2/o7ie6+4+JiEG21bdv0G0DcDxTmEcTevIwg7iSg1mwQ0mg\nYkV1B4mUFsloEUiKaN8eKvz2E7nXXc8Fvw7lCxpyNO/yQcbRbNoUzAbansXhq1eHAQPUHSRSWigR\npLNx42h3exf4fSXcdBMH9unDtJ13TnZUIpJgSgTpaMWKoD7Q6NHQrFlQNbRly2RHJSJJkpAxAkkR\n7vDcc0F5iFdfhbvvhk8+URIQSXNqEaSLpUuD0hBvvgmtW8PQodCoUbKjEpEUoBZBWZebGxT3P/hg\nePfdYBT3f/9TEhCRzZQIyrIvvmBl5rHQrRuTf23FAX/Mx7pfQ41aGdSogcpFiwigRFA2bdwI993H\npsZNqfD5PDoyjJOZxFe5DQBYsya4gcpFi4gSQdkzdy4cfjj06sXk8qeSyUKG05FYykOoXLRIelIi\nKCv++CNYKjIrC5YtY9o1L/OP38ewnNrFOo3KRYukH80aKgs++AA6dYLPPoMOHXipdX8u6Vltu06l\nctEi6UctgtLs11+DC8OOPDLo15kwAYYP54Z7qhVYXrowKhctkp6UCEqrSZOgcWN4/HHo1i1YNvKU\nU4Ciu3eqVw9uABkZwb3KRYukLyWCFBZZRrJcueC+a1doWmctz1hHOOUUFn9biaN8GjVeeIwaDXbb\nPB20oEJx9eoFz61eHdzcgwlG7kG5aCUBkfSkMYIUFVlGMtLFk5MDyweOYSLdqMkq+nEzd+bexp9U\ngjVbXheZDpqXun1EpCBqEaSo6GUka7GclziXMfyT5ezFoXxCb/oFSSAG6vYRkcKoRZCign5+pwMj\n6E9PKrOem+nHg1zPRirEfB4zrRImIoVTiyBJ8vb/Z2dvva+BLWECbRhORxaSSXPmcC83FysJgKaD\nikjR1CJIgvz6/zt2DH69/7Uhl248wT1+M47RjccZSBd8O3K2xgVEJBZqESRBdP9/xF9/QYMNnzGN\no3mMa3iPI2nMfJ7K6AZWjnr1oEuXoL/fbMsU0OjHoOmgIlJ8ahEkQd55/uX5ixt4gD7cwW/swsWM\n4Dn+DRiWG1SSFhGJF7UIkiC6374Fs5jOYfSjN+M5k0Ys4jkuJlIkTn38IhJvSgRJ0LcvVNv5d/px\nM9M5jL1YTruMV7io4kuspNbm49THLyKJoESQBO3rvcc3VZpzM/fyHBdzyr4LaTviHIYN2zIGoD5+\nEUkUJYISkt900LzP7W6/MGL3q+Coo9i90gaYNImOPoxPv61K+/bBl/6SJcGYgEo+iEiiaLC4BOQ3\nHbRz5y3Pd+4MR62fwFNcQZ1fvuWJ8t2pcevdnH/SrskJWEQkinlBFcpSSFZWls+YMSPZYRSofv3g\nyz+vevVgj01ruPa7nnTgWRbSiE4M5SNaU6+ervgVkfgys5nunlXUcXHrGjKzSmY23czmmtkCM7sj\n3F/NzCab2eLwvmq8YkiU/Ms+O4flvMTE7zL5F89zF7fSgtl8ROtCXiMiknjxHCP4Ezje3ZsBzYE2\nZtYK6AVMcfeGwJRwu1TLO8VzL35gDOcwmnasqFiHLGZwG3exgZ0KfI2ISLLELRF44Ndws0J4c6At\nMCLcPwI4K14xJErfvsFUT3A6MoxFNKINE5h9wX3MH/IRX1ZuttXxmhYqIqkkroPFZpYBzAQOAJ5w\n94/NrJa7/xAeshyiJs6XUu3bwy4rv6H6LZ056o+3+Hino1lx9xDOvP5AWgCeEZSVWLo0aAn07asZ\nQSKSOuI6fdTdN7l7c2Bf4DAza5zneSdoJWzDzDqb2Qwzm7Fq1ap4hlmowqaFAsFKMAMGcNatjTmq\nwscwcCCHr3+HM68/cPMhmhYqIqksIdcRuPtPwDtAG2CFmdUGCO9XFvCawe6e5e5ZNWvWTESY24hM\nC83JCZZzjEwL3ZwMFi4MFo7v0QOOOQYWLIArrwyyhohIKRHPWUM1zWyP8PHOwEnAZ8B4oEN4WAdg\nXLxi2FH5VQldvx5uv2UD3HUXtGgBixfDyJHw+utQp05yAhUR2QHxHCOoDYwIxwnKAaPd/TUz+xAY\nbWadgBygXRxj2CH5TfFsyQyGLu0Et30K558Pjz4Ke+6Z+OBEREpI3BKBu38KtMhn/xrghHi9b0mq\nW3fLhWKV+J076MN1PMSqjL3glVehbdvkBigiUgLUmV2IyLTQo3mXT2nKjTzAiIxOTHtygZKAiJQZ\nSgSFaH/Gz8xu3YV3OZZy5HLhnlPYacRg2nXeI9mhiYiUGBWdK8jrr8OVV3Lg999Dz57sf+edvLDL\nLsmOSkSkxKlFkNfq1XDRRXD66bD77vDBB/DQQ6AkICJllBJBhDu8+CI0agSjRkGfPjBrFhx+eLIj\nExGJK3UNASxbBl27wvjxcOihMHQoNGmS7KhERBKizLYIiiwNAUErYMgQyMyEyZPhwQfhww+VBEQk\nrZTJFkFhK4ZtrvPz1Vdw+eXwzjtw7LFBQjjggGSEKyKSVGWyRVBQaYjevQmKxPXvH/zqnzkTnnoK\npkxREhCRtFUmWwQFrf61W858OKITTJ8ezAoaOBD23TexwYmIpJgy2SLIu/pXBTZwG3cwk0Pg66/h\n+eeDgWElARGRspkItqwYBocynZm05A5uZ9kR5wWloy+8EMySG6SISIook11DkQHhH7rdzbXr+rAy\nozZTe/yXYx88PbmBiYikoDKZCCBMBuX2h3cvp/Z991G7SpVkhyQikpLKbCIAgi6gCy9MdhQiIimt\nTI4RiIhI7JQIRETSnBKBiEiaUyIQEUlzSgQiImlOiUBEJM0pEYiIpDklAhGRNGfunuwYimRmq4Cc\nZMcRJzWA1ckOIo70+Uq/sv4Zy/Lnq+fuNYs6qFQkgrLMzGa4e1ay44gXfb7Sr6x/xrL++WKhriER\nkTSnRCAikuaUCJJvcLIDiDN9vtKvrH/Gsv75iqQxAhGRNKcWgYhImlMiEBFJc0oESWZmD5jZZ2b2\nqZmNNbM9kh1TSTOz88xsgZnlmlmZmaZnZm3M7HMz+9LMeiU7npJmZsPMbKWZzU92LPFgZnXM7B0z\nWxj+++ye7JiSRYkg+SYDjd29KfAFcHOS44mH+cA5wLRkB1JSzCwDeAL4B5AJXGhmmcmNqsQNB9ok\nO4g42ghc5+6ZQCugWxn8bxgTJYIkc/dJ7r4x3PwI2DeZ8cSDuy9y98+THUcJOwz40t2/dvcNwItA\n2yTHVKLcfRrwY7LjiBd3/8HdZ4WPfwEWAfskN6rkUCJILZcCbyY7CInJPsC3UdvfkaZfImWBmdUH\nWgAfJzeS5Cjbi9enCDN7C9grn6d6u/u48JjeBE3V7ETGVlJi+YwiqcjMdgVeAXq4+8/JjicZlAgS\nwN1PLOx5M7sEOB04wUvphR1FfcYyaBlQJ2p733CflCJmVoEgCWS7+5hkx5Ms6hpKMjNrA9wInOnu\n65Mdj8TsE6ChmTUws4rABcD4JMckxWBmBgwFFrl7/2THk0xKBMn3OLAbMNnM5pjZoGQHVNLM7Gwz\n+w5oDbxuZhOTHdOOCgf4rwImEgwyjnb3BcmNqmSZ2QvAh8DfzOw7M+uU7JhK2N+BfwPHh//vzTGz\nU5MdVDKoxISISJpTi0BEJM0pEYiIpDklAhGRNKdEICKS5pQIRETSnBKBJJSZVY+aqrfczJaFj38y\ns4UJjqV59HRBMztze6uImtkSM6tRctEV670vMbO9o7afjhRPS2ZcUnooEUhCufsad2/u7s2BQcDD\n4ePmQG5Jv5+ZFXb1fHNgcyJw9/Hufm9Jx5AAlwCbE4G7X+buCU2qUropEUgqyTCzIWFt+ElmtjOA\nme1vZhPMbKaZ/c/MDgr31zezt8O1HKaYWd1w/3AzG2RmHwP3m9kuYW396WY228zahlcD3wmcH7ZI\nzg9/WT8enqNWuD7E3PB2RLj/1TCOBWbWuagPZGYdzeyL8L2HRJ1/uJmdG3Xcr+H9ruFnmWVm88ys\nbdRnXZT37xOeIwvIDj/HzmY2Nb91H8zsojCOOWb2lJllhLfhZjY/fL9rd+C/n5RSSgSSShoCT7j7\nwcBPwD/D/YOBq929JXA98GS4/zFgRLiWQzbwaNS59gWOcPeeQG/gbXc/DDgOeACoANwGjApbKKPy\nxPIo8K67NwMOASJXDV8axpEFXGNm1Qv6MGZWG7iD4ArWIwnWLSjKH8DZ7n5IGOtDYSmEfP8+7v4y\nMANoH36O3wuIpRFwPvD3sAW2CWhP0Crax90bu3sT4JkYYpQyRkXnJJV84+5zwsczgfphZcgjgJe2\nfB+yU3jfmmDBG4DngPujzvWSu28KH58MnGlm14fblYC6RcRyPHAxQHiedeH+a8zs7PBxHYIv5zUF\nnONwYKq7rwIws1HAgUW8rwH9zOxogq6yfYBa4XPb/H2KOFe0E4CWwCfh33FnYCXwX2A/M3sMeB2Y\nVIxzShmhRCCp5M+ox5sIvqzKAT+Fv2KL47eox0bw63mrxXHM7PDinNDMjgVOBFq7+3ozm0qQVLbH\nRsIWuZmVAyqG+9sDNYGW7v6XmS2Jeo/8/j4xh0/QetpmBTwzawacAlwJtCNYF0PSiLqGJKWF9eG/\nMbPzIKgYGX5xAXxAUPUTgi/Q/xVwmonA1ZEuFjNrEe7/haDgX36mAF3C4zPMrApQBVgbJoGDCJY3\nLMzHwDHhTKkKwHlRzy0h+IUOcCZBVxXhe6wMk8BxQL0i3qOozxH9ec41sz3Dz1TNzOqFM4rKufsr\nwK0E3WCSZpQIpDRoD3Qys7kEffWRJSGvBjqa2acEVSQLWnz8LoIv2k/NbEG4DfAOkBkZLM7zmu7A\ncWY2j6AbJhOYAJQ3s0XAvQRLixbI3X8Abieo4Pk+QZXSiCEESWIuQRdXpAWTDWSF73sx8Flh7xEa\nDgyKDBYXEMtCgi/6SeHfazJQm6DraaqZzQFGUjbXzJYiqPqoSIJYsABRlrtflexYRKKpRSAikubU\nIhARSXNqEYiIpDklAhGRNKdEICKS5pQIRETSnBKBiEia+z/EXwUC6VC0VgAAAABJRU5ErkJggg==\n", 554 | "text/plain": [ 555 | "" 556 | ] 557 | }, 558 | "metadata": {}, 559 | "output_type": "display_data" 560 | } 561 | ], 562 | "source": [ 563 | "_,_ = mrstat.qq_plot(hsb.science,dist='norm',plot=plt)" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "Видно, что синие точки на этом графике гораздо сильнее привязаны к красной линии. Это говорит о том, что science распределен более нормально. КуКу график можно строить не только для нормального распределения, но и для любого другого." 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "#### 2.3 Тест Колмогорова-Смирнова для проверки формы распределения" 578 | ] 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "metadata": {}, 583 | "source": [ 584 | "Нормальность (и не только) можно проверить так же и при помощи этого теста." 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": 12, 590 | "metadata": { 591 | "collapsed": false 592 | }, 593 | "outputs": [ 594 | { 595 | "data": { 596 | "text/plain": [ 597 | "KstestResult(statistic=0.1343270786922024, pvalue=0.0013129370215807512)" 598 | ] 599 | }, 600 | "execution_count": 12, 601 | "metadata": {}, 602 | "output_type": "execute_result" 603 | } 604 | ], 605 | "source": [ 606 | "mrstat.kstest(hsb['write'],'norm',args=(hsb['write'].mean(),\n", 607 | " hsb['write'].std(ddof=1)))" 608 | ] 609 | }, 610 | { 611 | "cell_type": "markdown", 612 | "metadata": {}, 613 | "source": [ 614 | "P-value < 0.05 следовательно гипотеза о нормальности не принимается. Как вы поняли, в args передаются параметры нормального распределения, с которым мы хотим сравнить распределение выборки. Я предал туда выборочное среднее и стандартное отклонения. Проверим science." 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 13, 620 | "metadata": { 621 | "collapsed": false 622 | }, 623 | "outputs": [ 624 | { 625 | "data": { 626 | "text/plain": [ 627 | "KstestResult(statistic=0.076233630374236583, pvalue=0.18585568226600646)" 628 | ] 629 | }, 630 | "execution_count": 13, 631 | "metadata": {}, 632 | "output_type": "execute_result" 633 | } 634 | ], 635 | "source": [ 636 | "mrstat.kstest(hsb['science'],'norm',args=(hsb['science'].mean(),\n", 637 | " hsb['science'].std(ddof=1)))" 638 | ] 639 | }, 640 | { 641 | "cell_type": "markdown", 642 | "metadata": {}, 643 | "source": [ 644 | "P-value получился больше 0.05 следовательно можно заключить, что данная выборка принадлежит к нормальному распределению. С помощью этого теста можно сравнивать выборки не только с нормальным распределением, но и с другими." 645 | ] 646 | }, 647 | { 648 | "cell_type": "markdown", 649 | "metadata": {}, 650 | "source": [ 651 | "#### 2.4 Двухвыборочный тест Колмогорова-Смирнова" 652 | ] 653 | }, 654 | { 655 | "cell_type": "markdown", 656 | "metadata": {}, 657 | "source": [ 658 | "С помощью этого теста можно проверить принадлежат ли две выборки к одному распределению." 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 14, 664 | "metadata": { 665 | "collapsed": false 666 | }, 667 | "outputs": [ 668 | { 669 | "data": { 670 | "text/plain": [ 671 | "Ks_2sampResult(statistic=0.15000000000000002, pvalue=0.01973175474986974)" 672 | ] 673 | }, 674 | "execution_count": 14, 675 | "metadata": {}, 676 | "output_type": "execute_result" 677 | } 678 | ], 679 | "source": [ 680 | "mrstat.ks_2samp(hsb['write'],hsb['science'])" 681 | ] 682 | }, 683 | { 684 | "cell_type": "markdown", 685 | "metadata": {}, 686 | "source": [ 687 | "Если p-value больше 0.05 - можно заключить, что выборки принадлежат к одному распределению. Write и science принадлежат к разным. А вот write и socst к одному." 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": 15, 693 | "metadata": { 694 | "collapsed": false 695 | }, 696 | "outputs": [ 697 | { 698 | "data": { 699 | "text/plain": [ 700 | "Ks_2sampResult(statistic=0.125, pvalue=0.080917080808807579)" 701 | ] 702 | }, 703 | "execution_count": 15, 704 | "metadata": {}, 705 | "output_type": "execute_result" 706 | } 707 | ], 708 | "source": [ 709 | "mrstat.ks_2samp(hsb['write'],hsb['socst'])" 710 | ] 711 | }, 712 | { 713 | "cell_type": "markdown", 714 | "metadata": { 715 | "collapsed": true 716 | }, 717 | "source": [ 718 | "### 3. Двухвыборочные ти-тесты" 719 | ] 720 | }, 721 | { 722 | "cell_type": "markdown", 723 | "metadata": {}, 724 | "source": [ 725 | "#### 3.1 Ти-тест для двух независимых выборок" 726 | ] 727 | }, 728 | { 729 | "cell_type": "markdown", 730 | "metadata": {}, 731 | "source": [ 732 | "Данный тест позволяет понять если значимые различия между средними двух независимых выборок. Сравним оценки по тесту science для мальчиков и девочек." 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": 16, 738 | "metadata": { 739 | "collapsed": false 740 | }, 741 | "outputs": [ 742 | { 743 | "name": "stdout", 744 | "output_type": "stream", 745 | "text": [ 746 | "53.2307692308 50.6972477064\n" 747 | ] 748 | } 749 | ], 750 | "source": [ 751 | "s_boys = hsb[hsb['female'] == 0]['science']\n", 752 | "s_girls = hsb[hsb['female'] == 1]['science']\n", 753 | "print s_boys.mean(), s_girls.mean()" 754 | ] 755 | }, 756 | { 757 | "cell_type": "markdown", 758 | "metadata": {}, 759 | "source": [ 760 | "Средняя оценка по научным дисциплинам у мальчиков выше. Проверим, имеет ли это утверждение статистическую значимость." 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 17, 766 | "metadata": { 767 | "collapsed": false 768 | }, 769 | "outputs": [ 770 | { 771 | "data": { 772 | "text/plain": [ 773 | "Ttest_indResult(statistic=1.7847013349359799, pvalue=0.076026848666140553)" 774 | ] 775 | }, 776 | "execution_count": 17, 777 | "metadata": {}, 778 | "output_type": "execute_result" 779 | } 780 | ], 781 | "source": [ 782 | "mrstat.ttest_ind(s_boys,s_girls,equal_var=False)" 783 | ] 784 | }, 785 | { 786 | "cell_type": "markdown", 787 | "metadata": {}, 788 | "source": [ 789 | "P-value >= 0.05, а значит нельзя утверждать, что средние различны. Теперь проведем такой же тест для write." 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": 18, 795 | "metadata": { 796 | "collapsed": false 797 | }, 798 | "outputs": [ 799 | { 800 | "name": "stdout", 801 | "output_type": "stream", 802 | "text": [ 803 | "50.1208791209 54.9908256881\n" 804 | ] 805 | } 806 | ], 807 | "source": [ 808 | "w_boys = hsb[hsb['female'] == 0]['write']\n", 809 | "w_girls = hsb[hsb['female'] == 1]['write']\n", 810 | "print w_boys.mean(), w_girls.mean()" 811 | ] 812 | }, 813 | { 814 | "cell_type": "code", 815 | "execution_count": 19, 816 | "metadata": { 817 | "collapsed": false 818 | }, 819 | "outputs": [ 820 | { 821 | "data": { 822 | "text/plain": [ 823 | "Ttest_indResult(statistic=-3.6564080478875276, pvalue=0.00034088493594266187)" 824 | ] 825 | }, 826 | "execution_count": 19, 827 | "metadata": {}, 828 | "output_type": "execute_result" 829 | } 830 | ], 831 | "source": [ 832 | "mrstat.ttest_ind(w_boys,w_girls,equal_var=False)" 833 | ] 834 | }, 835 | { 836 | "cell_type": "markdown", 837 | "metadata": {}, 838 | "source": [ 839 | "Получилось, что средние оценки по тесту write у мальчиков и девочек статистически значимо разные." 840 | ] 841 | }, 842 | { 843 | "cell_type": "markdown", 844 | "metadata": {}, 845 | "source": [ 846 | "#### 3.2 Доверительный интервал разности средних для двух независимых выборок" 847 | ] 848 | }, 849 | { 850 | "cell_type": "markdown", 851 | "metadata": {}, 852 | "source": [ 853 | "Построим 95% доверительный интервал для разности средних оценок science." 854 | ] 855 | }, 856 | { 857 | "cell_type": "code", 858 | "execution_count": 20, 859 | "metadata": { 860 | "collapsed": false 861 | }, 862 | "outputs": [ 863 | { 864 | "data": { 865 | "text/plain": [ 866 | "(-0.22316404940007306, 5.2902070980945028)" 867 | ] 868 | }, 869 | "execution_count": 20, 870 | "metadata": {}, 871 | "output_type": "execute_result" 872 | } 873 | ], 874 | "source": [ 875 | "mrstat.mean_diff_confint_ind(s_boys,s_girls)" 876 | ] 877 | }, 878 | { 879 | "cell_type": "markdown", 880 | "metadata": {}, 881 | "source": [ 882 | "Истинное значение разницы для двух средних с 95% вероятностью лежит в этом интервале. Данный интервая содержит 0 - это значит, что разницу двух средних нельзя считать отличной от нуля. Построим такой же интервал для оценок по write." 883 | ] 884 | }, 885 | { 886 | "cell_type": "code", 887 | "execution_count": 21, 888 | "metadata": { 889 | "collapsed": false 890 | }, 891 | "outputs": [ 892 | { 893 | "data": { 894 | "text/plain": [ 895 | "(2.2980585638240707, 7.4418345705644668)" 896 | ] 897 | }, 898 | "execution_count": 21, 899 | "metadata": {}, 900 | "output_type": "execute_result" 901 | } 902 | ], 903 | "source": [ 904 | "mrstat.mean_diff_confint_ind(w_girls,w_boys)" 905 | ] 906 | }, 907 | { 908 | "cell_type": "markdown", 909 | "metadata": {}, 910 | "source": [ 911 | "Данный интервал не содержит ноль - значит разницу можно признать отличной от нуля." 912 | ] 913 | }, 914 | { 915 | "cell_type": "markdown", 916 | "metadata": {}, 917 | "source": [ 918 | "### 4. Пропорция" 919 | ] 920 | }, 921 | { 922 | "cell_type": "markdown", 923 | "metadata": {}, 924 | "source": [ 925 | "#### 4.1. Тест для одной доли" 926 | ] 927 | }, 928 | { 929 | "cell_type": "markdown", 930 | "metadata": {}, 931 | "source": [ 932 | "Рассчитаем долю девочек в выборке." 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": 22, 938 | "metadata": { 939 | "collapsed": false 940 | }, 941 | "outputs": [ 942 | { 943 | "data": { 944 | "text/plain": [ 945 | "0.545" 946 | ] 947 | }, 948 | "execution_count": 22, 949 | "metadata": {}, 950 | "output_type": "execute_result" 951 | } 952 | ], 953 | "source": [ 954 | "hsb.female.mean()" 955 | ] 956 | }, 957 | { 958 | "cell_type": "markdown", 959 | "metadata": {}, 960 | "source": [ 961 | "Сравним долю девочек в выборке с гипотетической долей 0.5" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": 23, 967 | "metadata": { 968 | "collapsed": false 969 | }, 970 | "outputs": [ 971 | { 972 | "data": { 973 | "text/plain": [ 974 | "0.20125699204448533" 975 | ] 976 | }, 977 | "execution_count": 23, 978 | "metadata": {}, 979 | "output_type": "execute_result" 980 | } 981 | ], 982 | "source": [ 983 | "mrstat.prop_test(hsb.female,0.5)" 984 | ] 985 | }, 986 | { 987 | "cell_type": "markdown", 988 | "metadata": {}, 989 | "source": [ 990 | "P-value получился > 0.05 значит мы не можем утверждать, что истинная доля девочек отлична от 0.5" 991 | ] 992 | }, 993 | { 994 | "cell_type": "markdown", 995 | "metadata": {}, 996 | "source": [ 997 | "#### 4.2 Доверительная интервал для одной доли" 998 | ] 999 | }, 1000 | { 1001 | "cell_type": "code", 1002 | "execution_count": 24, 1003 | "metadata": { 1004 | "collapsed": false 1005 | }, 1006 | "outputs": [ 1007 | { 1008 | "data": { 1009 | "text/plain": [ 1010 | "(0.47598602492749653, 0.61401397507250355)" 1011 | ] 1012 | }, 1013 | "execution_count": 24, 1014 | "metadata": {}, 1015 | "output_type": "execute_result" 1016 | } 1017 | ], 1018 | "source": [ 1019 | "mrstat.prop_confint(hsb.female)" 1020 | ] 1021 | }, 1022 | { 1023 | "cell_type": "markdown", 1024 | "metadata": {}, 1025 | "source": [ 1026 | "Истинная доля девочек с 95% вероятностью лежит в этом интервале. Интервал содержит 0.5 - значит нет оснований утверждать, что истинная доля отлична от 0.5. Запишем так же в стиле плюс-минус:" 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": 25, 1032 | "metadata": { 1033 | "collapsed": false 1034 | }, 1035 | "outputs": [ 1036 | { 1037 | "name": "stdout", 1038 | "output_type": "stream", 1039 | "text": [ 1040 | "0.545 +/- 0.0690139750725\n" 1041 | ] 1042 | } 1043 | ], 1044 | "source": [ 1045 | "lb, hb = mrstat.prop_confint(hsb['female'])\n", 1046 | "print hsb.female.mean(), '+/-', (hb-lb)/2." 1047 | ] 1048 | }, 1049 | { 1050 | "cell_type": "markdown", 1051 | "metadata": {}, 1052 | "source": [ 1053 | "#### 4.3 Размер выборки для заданной доли и ширины интервала" 1054 | ] 1055 | }, 1056 | { 1057 | "cell_type": "markdown", 1058 | "metadata": {}, 1059 | "source": [ 1060 | "С попощью данной процедуры можно определить какой размер выборки потребуется для того, чтобы получить доверительный интервал заданной ширины. Например нужно понять, сколько наблюдений потребуется, чтобы утверждать, что истинное среднее (доля) равно 0.5 плюс-минус 0.05. Для этого:" 1061 | ] 1062 | }, 1063 | { 1064 | "cell_type": "code", 1065 | "execution_count": 26, 1066 | "metadata": { 1067 | "collapsed": false 1068 | }, 1069 | "outputs": [ 1070 | { 1071 | "data": { 1072 | "text/plain": [ 1073 | "384.14588206941266" 1074 | ] 1075 | }, 1076 | "execution_count": 26, 1077 | "metadata": {}, 1078 | "output_type": "execute_result" 1079 | } 1080 | ], 1081 | "source": [ 1082 | "mrstat.samplesize_confint_proportion(0.5,0.05)" 1083 | ] 1084 | }, 1085 | { 1086 | "cell_type": "markdown", 1087 | "metadata": {}, 1088 | "source": [ 1089 | "Потребуется выборка размером 384." 1090 | ] 1091 | }, 1092 | { 1093 | "cell_type": "markdown", 1094 | "metadata": {}, 1095 | "source": [ 1096 | "### 5. Две доли и АБ тестинг" 1097 | ] 1098 | }, 1099 | { 1100 | "cell_type": "markdown", 1101 | "metadata": {}, 1102 | "source": [ 1103 | "#### 5.1 Тест разности двух независимых долей" 1104 | ] 1105 | }, 1106 | { 1107 | "cell_type": "markdown", 1108 | "metadata": {}, 1109 | "source": [ 1110 | "Проверим различается ли доля тех, кто ходит в частную школу среди мальчиков и девочек. Для начала построим таблицу сопряженности." 1111 | ] 1112 | }, 1113 | { 1114 | "cell_type": "code", 1115 | "execution_count": 27, 1116 | "metadata": { 1117 | "collapsed": false 1118 | }, 1119 | "outputs": [ 1120 | { 1121 | "data": { 1122 | "text/html": [ 1123 | "
\n", 1124 | "\n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | "
female01
schtyp
17791
21418
\n", 1150 | "
" 1151 | ], 1152 | "text/plain": [ 1153 | "female 0 1\n", 1154 | "schtyp \n", 1155 | "1 77 91\n", 1156 | "2 14 18" 1157 | ] 1158 | }, 1159 | "execution_count": 27, 1160 | "metadata": {}, 1161 | "output_type": "execute_result" 1162 | } 1163 | ], 1164 | "source": [ 1165 | "table = hsb.pivot_table(values=u'id',index=u'schtyp',columns=u'female',aggfunc='count')\n", 1166 | "table" 1167 | ] 1168 | }, 1169 | { 1170 | "cell_type": "markdown", 1171 | "metadata": {}, 1172 | "source": [ 1173 | "Теперь рассчитаем долю учащихся в частных школах для мальчиков и девочек, а так же размер выборок." 1174 | ] 1175 | }, 1176 | { 1177 | "cell_type": "code", 1178 | "execution_count": 28, 1179 | "metadata": { 1180 | "collapsed": false 1181 | }, 1182 | "outputs": [ 1183 | { 1184 | "data": { 1185 | "text/plain": [ 1186 | "(0.15384615384615385, 91, 0.16513761467889909, 109)" 1187 | ] 1188 | }, 1189 | "execution_count": 28, 1190 | "metadata": {}, 1191 | "output_type": "execute_result" 1192 | } 1193 | ], 1194 | "source": [ 1195 | "mrstat.get_props_and_lens(table)" 1196 | ] 1197 | }, 1198 | { 1199 | "cell_type": "markdown", 1200 | "metadata": {}, 1201 | "source": [ 1202 | "Доля учащихся частных школ среди мальчиков равна 0.15, а для девочек 0.17. Проверим значима ли эта разница." 1203 | ] 1204 | }, 1205 | { 1206 | "cell_type": "code", 1207 | "execution_count": 29, 1208 | "metadata": { 1209 | "collapsed": false 1210 | }, 1211 | "outputs": [ 1212 | { 1213 | "data": { 1214 | "text/plain": [ 1215 | "0.82828242684033948" 1216 | ] 1217 | }, 1218 | "execution_count": 29, 1219 | "metadata": {}, 1220 | "output_type": "execute_result" 1221 | } 1222 | ], 1223 | "source": [ 1224 | "mrstat.proportions_diff_ind(*mrstat.get_props_and_lens(table))" 1225 | ] 1226 | }, 1227 | { 1228 | "cell_type": "markdown", 1229 | "metadata": {}, 1230 | "source": [ 1231 | "P-value получился гораздо больше 0.05 - следовательно нельзя отвергнуть гипотезу о том, что доли равны. Делаем вывод, что доля тех, кто учится в частных школах не зависит от пола." 1232 | ] 1233 | }, 1234 | { 1235 | "cell_type": "markdown", 1236 | "metadata": {}, 1237 | "source": [ 1238 | "#### 5.2 Доверительный интервал для разности двух незавимых долей" 1239 | ] 1240 | }, 1241 | { 1242 | "cell_type": "markdown", 1243 | "metadata": {}, 1244 | "source": [ 1245 | "Построим 95% доверительный интервал для разности долей частных школ среди мальчиков и девочек." 1246 | ] 1247 | }, 1248 | { 1249 | "cell_type": "code", 1250 | "execution_count": 30, 1251 | "metadata": { 1252 | "collapsed": false 1253 | }, 1254 | "outputs": [ 1255 | { 1256 | "data": { 1257 | "text/plain": [ 1258 | "(-0.11304660507498858, 0.090463683409498105)" 1259 | ] 1260 | }, 1261 | "execution_count": 30, 1262 | "metadata": {}, 1263 | "output_type": "execute_result" 1264 | } 1265 | ], 1266 | "source": [ 1267 | "mrstat.proportions_confint_diff_ind(*mrstat.get_props_and_lens(table))" 1268 | ] 1269 | }, 1270 | { 1271 | "cell_type": "markdown", 1272 | "metadata": {}, 1273 | "source": [ 1274 | "Доверительный интервал получился очень широким, а так же он содержит 0. На основе этого тоже можно сделать вывод, что доли учащихся в частных школах среди мальчиков и девочек значимо не отличаются." 1275 | ] 1276 | }, 1277 | { 1278 | "cell_type": "markdown", 1279 | "metadata": {}, 1280 | "source": [ 1281 | "Обратите внимание, что в mrstat есть несколько похожих функций для проверки гипотез о доле и построения доверительных интрервалов:\n", 1282 | "\n", 1283 | " mrstat.proportions_confint_diff_ind(p1,n1,p2,n2) - берет на вход доли и размеры выборок\n", 1284 | " mrstat.proportions_confint_diff_ind_samples(sample1,sample2) - берет на вход две выборки из 0 и 1\n", 1285 | " mrstat.proportions_confint_diff_ind_table(contigency_table) - берет на вход таблицу сопряженности" 1286 | ] 1287 | }, 1288 | { 1289 | "cell_type": "markdown", 1290 | "metadata": {}, 1291 | "source": [ 1292 | "#### 5.3 Тест Хи-квадрат" 1293 | ] 1294 | }, 1295 | { 1296 | "cell_type": "markdown", 1297 | "metadata": {}, 1298 | "source": [ 1299 | "С помощью теста хи-квадрат можно проверить имеются ли статистически значимые различия между долей частных школ среди мальчиков и девочек. Для этого сначала нужно посчитать таблицу сопряженности." 1300 | ] 1301 | }, 1302 | { 1303 | "cell_type": "code", 1304 | "execution_count": 31, 1305 | "metadata": { 1306 | "collapsed": false 1307 | }, 1308 | "outputs": [ 1309 | { 1310 | "data": { 1311 | "text/html": [ 1312 | "
\n", 1313 | "\n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | "
female01
schtyp
17791
21418
\n", 1339 | "
" 1340 | ], 1341 | "text/plain": [ 1342 | "female 0 1\n", 1343 | "schtyp \n", 1344 | "1 77 91\n", 1345 | "2 14 18" 1346 | ] 1347 | }, 1348 | "execution_count": 31, 1349 | "metadata": {}, 1350 | "output_type": "execute_result" 1351 | } 1352 | ], 1353 | "source": [ 1354 | "table" 1355 | ] 1356 | }, 1357 | { 1358 | "cell_type": "code", 1359 | "execution_count": 32, 1360 | "metadata": { 1361 | "collapsed": false 1362 | }, 1363 | "outputs": [ 1364 | { 1365 | "data": { 1366 | "text/plain": [ 1367 | "(0.04704775346977183, 0.82828242684033948, 1L, array([[ 76.44, 91.56],\n", 1368 | " [ 14.56, 17.44]]))" 1369 | ] 1370 | }, 1371 | "execution_count": 32, 1372 | "metadata": {}, 1373 | "output_type": "execute_result" 1374 | } 1375 | ], 1376 | "source": [ 1377 | "mrstat.chi2_contingency(table,correction=False)" 1378 | ] 1379 | }, 1380 | { 1381 | "cell_type": "markdown", 1382 | "metadata": {}, 1383 | "source": [ 1384 | "P-value получился равным 0.83, это больше чем 0.05 следовательно гипотеза о том, что доли различаются, отвергается. Обратите внимание, что p-value получилось абсолютно такой же, как в тесте для двух независимых долей в пункте 5.1. Это происходит из-за того, что в основе теста для двух долей лежит нормальное распределение, а хи-квадрат - это распределение, которое является производной от нормального распределениея, и для данного случая они совпадают." 1385 | ] 1386 | }, 1387 | { 1388 | "cell_type": "markdown", 1389 | "metadata": {}, 1390 | "source": [ 1391 | "#### 5.4 Точный тест Фишера" 1392 | ] 1393 | }, 1394 | { 1395 | "cell_type": "markdown", 1396 | "metadata": {}, 1397 | "source": [ 1398 | "Если в таблице сопряженности есть ячейки содержащие значения меньше 5, то хи-квадрат использовать нельзя. Но можно использовать точный тест Фишера. На вход он принимает ту же таблицу сопряженности." 1399 | ] 1400 | }, 1401 | { 1402 | "cell_type": "code", 1403 | "execution_count": 33, 1404 | "metadata": { 1405 | "collapsed": false 1406 | }, 1407 | "outputs": [ 1408 | { 1409 | "data": { 1410 | "text/plain": [ 1411 | "(1.0879120879120878, 0.84917505847685981)" 1412 | ] 1413 | }, 1414 | "execution_count": 33, 1415 | "metadata": {}, 1416 | "output_type": "execute_result" 1417 | } 1418 | ], 1419 | "source": [ 1420 | "mrstat.fisher_exact(table)" 1421 | ] 1422 | }, 1423 | { 1424 | "cell_type": "markdown", 1425 | "metadata": {}, 1426 | "source": [ 1427 | "Да, кстати всё это примеры АБ тестинга)." 1428 | ] 1429 | }, 1430 | { 1431 | "cell_type": "markdown", 1432 | "metadata": {}, 1433 | "source": [ 1434 | "#### 5.5 Размер выборок для для двух пропорций" 1435 | ] 1436 | }, 1437 | { 1438 | "cell_type": "markdown", 1439 | "metadata": {}, 1440 | "source": [ 1441 | "При планировании аб тестирования требуется понимать, какого объема выборка потребуется. Для того, чтобы это сделать нужно знать следующие параметры:\n", 1442 | "\n", 1443 | "- базовый уровень показателя p1. т.е. это та цифра, котороую необходимо улучшить\n", 1444 | "- желаемый уровень показателя, т.е цифра, при которой тестирование будет считаться успешным. Определяется экспертно.\n", 1445 | "- достигаемый уровень значимости и мощность. Общепринятые стандарты это 0.05 и 0.8\n", 1446 | "- пропорции контрольной и тестовой выборок, если необходимо.\n", 1447 | "\n", 1448 | "Рассмотрим на примере. Допустим есть какой-либо показатель доли в 10% и мы хотим его увеличить с помощью новой технологии. Увеличение будет считаться успеным, если показатель выростет на 20%. Так же мы не хотим делать большую контрольную группу, чтобы сразу получить выгоду от внедрения новой технологии - сделаем контрольную группу 20% от всего теста. Уровень значимости и мощность оставим стандартными. Вот что получится:" 1449 | ] 1450 | }, 1451 | { 1452 | "cell_type": "code", 1453 | "execution_count": 34, 1454 | "metadata": { 1455 | "collapsed": true 1456 | }, 1457 | "outputs": [], 1458 | "source": [ 1459 | "p1 = 0.1\n", 1460 | "p2 = 0.1 * 1.2" 1461 | ] 1462 | }, 1463 | { 1464 | "cell_type": "code", 1465 | "execution_count": 35, 1466 | "metadata": { 1467 | "collapsed": false 1468 | }, 1469 | "outputs": [ 1470 | { 1471 | "data": { 1472 | "text/plain": [ 1473 | "(2396.5, 9586.0)" 1474 | ] 1475 | }, 1476 | "execution_count": 35, 1477 | "metadata": {}, 1478 | "output_type": "execute_result" 1479 | } 1480 | ], 1481 | "source": [ 1482 | "mrstat.two_proportions_sample_size(p1,p2,frac=0.2)" 1483 | ] 1484 | }, 1485 | { 1486 | "cell_type": "markdown", 1487 | "metadata": {}, 1488 | "source": [ 1489 | "Получается, что для того чтобы считать тестирование состоявшимся необходимо накопить 2396.5 в контроле и 9586.0 в пилоте или 11982.5 для всего тестирования. Данные значения нужно округлять до целого, а еще лучше до круглых чисел в большую сторону." 1490 | ] 1491 | }, 1492 | { 1493 | "cell_type": "markdown", 1494 | "metadata": {}, 1495 | "source": [ 1496 | "### 6. Непараметрические критерии" 1497 | ] 1498 | }, 1499 | { 1500 | "cell_type": "markdown", 1501 | "metadata": {}, 1502 | "source": [ 1503 | "Непараметрические критерии следует использовать с крайне ненормальными и несимметричными распределениями." 1504 | ] 1505 | }, 1506 | { 1507 | "cell_type": "markdown", 1508 | "metadata": {}, 1509 | "source": [ 1510 | "#### 6.1 Критерий знаков" 1511 | ] 1512 | }, 1513 | { 1514 | "cell_type": "markdown", 1515 | "metadata": {}, 1516 | "source": [ 1517 | "Применяется для проверки гипотезы о медиане. Не предъявляет никаких требований к распределению. Проверим гипотезу о том, что медиана оценок по write равна 50." 1518 | ] 1519 | }, 1520 | { 1521 | "cell_type": "code", 1522 | "execution_count": 36, 1523 | "metadata": { 1524 | "collapsed": false 1525 | }, 1526 | "outputs": [ 1527 | { 1528 | "data": { 1529 | "text/plain": [ 1530 | "54.0" 1531 | ] 1532 | }, 1533 | "execution_count": 36, 1534 | "metadata": {}, 1535 | "output_type": "execute_result" 1536 | } 1537 | ], 1538 | "source": [ 1539 | "np.median(hsb.write)" 1540 | ] 1541 | }, 1542 | { 1543 | "cell_type": "code", 1544 | "execution_count": 37, 1545 | "metadata": { 1546 | "collapsed": false 1547 | }, 1548 | "outputs": [ 1549 | { 1550 | "data": { 1551 | "text/plain": [ 1552 | "(27.0, 0.00015185854094876164)" 1553 | ] 1554 | }, 1555 | "execution_count": 37, 1556 | "metadata": {}, 1557 | "output_type": "execute_result" 1558 | } 1559 | ], 1560 | "source": [ 1561 | "mrstat.sign_test(hsb.write,50)" 1562 | ] 1563 | }, 1564 | { 1565 | "cell_type": "markdown", 1566 | "metadata": {}, 1567 | "source": [ 1568 | "P-value маленький следовательно гипотеза о медиане равной 50 не принимается." 1569 | ] 1570 | }, 1571 | { 1572 | "cell_type": "markdown", 1573 | "metadata": {}, 1574 | "source": [ 1575 | "#### 6.2 Критерий знаковых рангов Вилкоксона" 1576 | ] 1577 | }, 1578 | { 1579 | "cell_type": "markdown", 1580 | "metadata": {}, 1581 | "source": [ 1582 | "Этот криетерий использует больше информации, чем просто критерий знаков. Проверим ту же гипотезу." 1583 | ] 1584 | }, 1585 | { 1586 | "cell_type": "code", 1587 | "execution_count": 38, 1588 | "metadata": { 1589 | "collapsed": false 1590 | }, 1591 | "outputs": [ 1592 | { 1593 | "data": { 1594 | "text/plain": [ 1595 | "WilcoxonResult(statistic=6524.0, pvalue=3.6917631525880871e-05)" 1596 | ] 1597 | }, 1598 | "execution_count": 38, 1599 | "metadata": {}, 1600 | "output_type": "execute_result" 1601 | } 1602 | ], 1603 | "source": [ 1604 | "mrstat.wilcoxon(hsb.write.values-50)" 1605 | ] 1606 | }, 1607 | { 1608 | "cell_type": "markdown", 1609 | "metadata": {}, 1610 | "source": [ 1611 | "В данном случае гипотеза о медиане равной 50 отвергается с еще большей силой. Критерий знаков и критерий знаковых рангов Вилкоксона являются альтернативой для одновыборочного ти-теста, а критерий знаковых рангов может еще применяться провеки гипотезы о равенстве средних для двух зависимых выборок." 1612 | ] 1613 | }, 1614 | { 1615 | "cell_type": "markdown", 1616 | "metadata": {}, 1617 | "source": [ 1618 | "#### 6.3 Критерий Манна-Уитни" 1619 | ] 1620 | }, 1621 | { 1622 | "cell_type": "markdown", 1623 | "metadata": {}, 1624 | "source": [ 1625 | "Критерий Манна-Уитни является альтернативой ти-теста для двух независимых выборок и может применяться при любом типе распределения. Этот критерий проверяет гипотезу о том, что две выборки взяты из одной генеральной совокупности, в частности, насколько значения одной выборки больше чем в другой. Применим критерий Манна-Уитни к случаю с оценкой по тесту write для мальчиков и девочек." 1626 | ] 1627 | }, 1628 | { 1629 | "cell_type": "code", 1630 | "execution_count": 39, 1631 | "metadata": { 1632 | "collapsed": false 1633 | }, 1634 | "outputs": [ 1635 | { 1636 | "data": { 1637 | "text/plain": [ 1638 | "MannwhitneyuResult(statistic=3606.0, pvalue=0.00043746993681806757)" 1639 | ] 1640 | }, 1641 | "execution_count": 39, 1642 | "metadata": {}, 1643 | "output_type": "execute_result" 1644 | } 1645 | ], 1646 | "source": [ 1647 | "mrstat.mannwhitneyu(w_boys,w_girls)" 1648 | ] 1649 | }, 1650 | { 1651 | "cell_type": "markdown", 1652 | "metadata": {}, 1653 | "source": [ 1654 | "P-value < 0.05 следовательно гипотеза о том, что две выборки пришли из одного распределения отвергается. Результаты критения Манна-Уитни можно применять на равне с обычным ти-тестом.\n", 1655 | "\n", 1656 | "Кстата, критерий Манна-Уитни является официальным \"сдаточным\" критерием для ряда АБ тестов в Yandex." 1657 | ] 1658 | }, 1659 | { 1660 | "cell_type": "markdown", 1661 | "metadata": {}, 1662 | "source": [ 1663 | "#### 6.4 Бутстреп" 1664 | ] 1665 | }, 1666 | { 1667 | "cell_type": "markdown", 1668 | "metadata": {}, 1669 | "source": [ 1670 | "С помощью бутстрепа можно проверять любой параметр вне зависимости от распределения выборки." 1671 | ] 1672 | }, 1673 | { 1674 | "cell_type": "code", 1675 | "execution_count": 40, 1676 | "metadata": { 1677 | "collapsed": false 1678 | }, 1679 | "outputs": [ 1680 | { 1681 | "name": "stdout", 1682 | "output_type": "stream", 1683 | "text": [ 1684 | "52.775\n", 1685 | "54.0\n" 1686 | ] 1687 | } 1688 | ], 1689 | "source": [ 1690 | "print np.mean(hsb.write)\n", 1691 | "print np.median(hsb.write)" 1692 | ] 1693 | }, 1694 | { 1695 | "cell_type": "code", 1696 | "execution_count": 41, 1697 | "metadata": { 1698 | "collapsed": false 1699 | }, 1700 | "outputs": [ 1701 | { 1702 | "name": "stdout", 1703 | "output_type": "stream", 1704 | "text": [ 1705 | "(p-value for mean = 51) = 0.004\n", 1706 | "(p-value for median = 53) = 0.16\n" 1707 | ] 1708 | } 1709 | ], 1710 | "source": [ 1711 | "print '(p-value for mean = 51) = ', mrstat.bootstrap_test(hsb.write.values,51,np.mean)\n", 1712 | "print '(p-value for median = 53) = ', mrstat.bootstrap_test(hsb.write.values,53,np.median)" 1713 | ] 1714 | }, 1715 | { 1716 | "cell_type": "markdown", 1717 | "metadata": {}, 1718 | "source": [ 1719 | "Гипотеза о среднем равном 51 отвергается (p-value < 0.05), гипотеза о медиане равной 53 принимается (p-value > 0.05)." 1720 | ] 1721 | }, 1722 | { 1723 | "cell_type": "markdown", 1724 | "metadata": {}, 1725 | "source": [ 1726 | "С помощью бутстрепа так же можно строить доверительные интервалы." 1727 | ] 1728 | }, 1729 | { 1730 | "cell_type": "code", 1731 | "execution_count": 42, 1732 | "metadata": { 1733 | "collapsed": false 1734 | }, 1735 | "outputs": [ 1736 | { 1737 | "name": "stdout", 1738 | "output_type": "stream", 1739 | "text": [ 1740 | "mean interval [ 51.389875 54.105 ]\n", 1741 | "median interval [ 52. 57.]\n" 1742 | ] 1743 | } 1744 | ], 1745 | "source": [ 1746 | "print 'mean interval ', mrstat.bootstrap_conf_int(hsb.write.values,np.mean)\n", 1747 | "print 'median interval ', mrstat.bootstrap_conf_int(hsb.write.values,np.median)" 1748 | ] 1749 | }, 1750 | { 1751 | "cell_type": "markdown", 1752 | "metadata": {}, 1753 | "source": [ 1754 | "А так же доверительный интревал для разницы двух параметров." 1755 | ] 1756 | }, 1757 | { 1758 | "cell_type": "code", 1759 | "execution_count": 43, 1760 | "metadata": { 1761 | "collapsed": false 1762 | }, 1763 | "outputs": [ 1764 | { 1765 | "name": "stdout", 1766 | "output_type": "stream", 1767 | "text": [ 1768 | "[-1.3 2.4405]\n", 1769 | "[ 0. 7.]\n" 1770 | ] 1771 | } 1772 | ], 1773 | "source": [ 1774 | "print mrstat.bootstrap_diff_conf_int(hsb.write.values,hsb.read.values,np.mean)\n", 1775 | "print mrstat.bootstrap_diff_conf_int(hsb.write.values,hsb.read.values,np.median)" 1776 | ] 1777 | }, 1778 | { 1779 | "cell_type": "markdown", 1780 | "metadata": {}, 1781 | "source": [ 1782 | "Мы видим, что разницы и для средних и для медиан между оценками по write и read содержат 0, следовательно нельзя утверждать, что между этими параметрами существует значимая разница." 1783 | ] 1784 | }, 1785 | { 1786 | "cell_type": "markdown", 1787 | "metadata": {}, 1788 | "source": [ 1789 | "Бутстреп можно использовать тогда, когда нельзя применить другие модели из-за различных ограничений, например, по размеру выборки." 1790 | ] 1791 | }, 1792 | { 1793 | "cell_type": "markdown", 1794 | "metadata": {}, 1795 | "source": [ 1796 | "### 7. Корреляция" 1797 | ] 1798 | }, 1799 | { 1800 | "cell_type": "markdown", 1801 | "metadata": {}, 1802 | "source": [ 1803 | "#### 7.1 Коэффициет корреляции Пирсона" 1804 | ] 1805 | }, 1806 | { 1807 | "cell_type": "code", 1808 | "execution_count": 44, 1809 | "metadata": { 1810 | "collapsed": false 1811 | }, 1812 | "outputs": [ 1813 | { 1814 | "data": { 1815 | "text/plain": [ 1816 | "" 1817 | ] 1818 | }, 1819 | "execution_count": 44, 1820 | "metadata": {}, 1821 | "output_type": "execute_result" 1822 | }, 1823 | { 1824 | "data": { 1825 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHwZJREFUeJzt3X+MXeV95/H312AMxRDM2B28sQ2kdumyBTswm0JdEOGX\n0i0CVqloqyBoF9a7UtWm2W3BRKvVJtpWOFVLWmmVLus2chL6w4Llh7wrFmMXQVhCMnaM2RqovQFq\nW8wYBthgEowL3/3jnjEz9tw55/o+5znPc8/nJVl37vGde77nOXfmO+c5z/N9zN0REZH2mtN0ACIi\n0iwlAhGRllMiEBFpOSUCEZGWUyIQEWk5JQIRkZZTIhARaTklAhGRllMiEBFpuRObDqCKhQsX+jnn\nnNN0GCIiWdm2bdsb7r6o7HVZJIJzzjmH0dHRpsMQEcmKmb1a5XXqGhIRaTklAhGRllMiEBFpOSUC\nEZGWUyIQEWk5JQKRxE0cPMRze99m4uChpkNpjba1eRbDR0Xa6uEd+7nzgZ3MnTOHwx9+yFc+eyHX\nr/p402ENtDa2ua4IRBI1cfAQdz6wk/cOf8g7h/6R9w5/yB0P7GzNX6lNaGubKxGIJGrfWz9m7pzp\nP6Jz58xh31s/biiiwdfWNlciEEnUkgWncPjDD6dtO/zhhyxZcEpDEQ2+tra5EoFIoobmz+Mrn72Q\nk+fO4bR5J3Ly3Dl85bMXMjR/XtOhDay2trm5e9MxlBoZGXHVGpK2mjh4iH1v/ZglC04Z+F9IqRiU\nNjezbe4+UvY6jRoSSdzQ/HlZ/zLKUdvaXF1DIiItp0QgItJySgQiIi2nRCAi0nJKBCIiLadEICLZ\nqFIMruw1uRSU2zP+DveP7mXP+Du170vDR0UkC1WKwZW9JpeCcv/xoef5xnf+4cjzWy5dxpdvuKC2\n/emKQESSV6UYXNlrcikot2f8nWlJAOAbz/xDrVcGSgQikrwqxeDKXpNLQbkde9/uaXsISgQikrwq\nxeDKXpNLQblVS8/oaXsISgQikrwqxeDKXpNLQbnlw6dxy6XLpm275dJlLB8+rbZ9quiciGSjSjG4\nstfkUlBuz/g77Nj7NquWnnHcSUBF50Rk4FQpBlf2mlwKyi0fPq3Wq4Cp1DUkIlKDmPMA+qUrAhGR\nwGLPA+iXrghERAJqYh5Av2pLBGZ2npntmPLvh2b2O2Z2ppltNrPdxeOCumIQEYmtiXkA/aotEbj7\nS+6+yt1XARcDPwIeBNYCW9x9BbCleC4iMhCamAfQr1hdQ1cB/9fdXwVuADYU2zcAN0aKQUSkdk3M\nA+hXrJvFvwr8VfH1sLu/Vnw9BgxHikFEJIov33ABt1xyTt/zAGKpPRGY2UnA9cBdR/+fu7uZzTij\nzczWAGsAli1bNtNLRESSFXMeQL9idA39IrDd3ceL5+NmthigeDww0ze5+73uPuLuI4sWLYoQpohI\nO8VIBL/GR91CAI8AtxZf3wo8HCEGERHpotZEYGanAtcA/33K5ruBa8xsN3B18VxERBpS6z0Cd38X\nGDpq2wSdUUQiIpIAzSwWaYFc1ul9aPtebt/wPR7avnfG/w9xHLFqAKnWkIgkI5d1ei/5g82M/fB9\nAB5/4QDrHn2RZ754zZH/D3EcsWoAqdaQiCQjl3V6H9q+90gSmPTaD98/cmUQ4jhi1QBSrSERSUou\n6/Ruen5s1u0hjiNWDSDVGhKRpOSyTu91F5w16/YQxxGrBpBqDYlIUnJZp/fGi5ay+PSTpm1bfPpJ\n3HjRUiDMccSqAZRjrSGtWSzSArms0/vQ9r1sen6M6y4460gSmCrEcYRYCzil/cym6prFSgQiIgOq\naiJQ15CISIJizv3QPAIRkcTEnvuhKwIRkYQ0MfdDiUBEJCFNzP1QIhARSUgTcz+UCERaIJeiczEK\ntcVqi9GXJ/jjx15i9OWJnr5vcs7ESScY806cw0knWO1zP3SzWGTA5VJ0LkahtlhtcfP67/DtPZ0E\n8Kdb93DZ8iG+efsllb9/9JU3ef8DBzrD+0dffVM3i0Xk+ORSdC5GobZYbTH68sSRJDDpqT0Tla8M\nmihap0QgMsByKToXo1BbrLZ4cvcbPW0/WhNF65QIRAZYLkXnYhRqi9UWl69Y2NP2ozVRtE6JQGSA\n5VJ0LkahtlhtMXLuEJctn7ZCL5ctH2Lk3KEu3zFdE0XrVGtIpAVyKToXo1BbrLYYfXmCJ3e/weUr\nFlZOAlOFaAsVnRMRaTkVnZNklY3j1pj36drUHmXvEaItqrxHiGM53nkETdA8AomqbBy3xrxP16b2\nKHuPEG1R5T1CHEu/8whi0xWBRFM2jltj3qdrU3uUvUeItqjyHiGOpd95BE1QIpBoysZxa8z7dG1q\nj7L3CNEWVd4jxLH0O4+gCUoEEk3ZOG6NeZ+uTe1R9h4h2qLKe4Q4ln7nETRBiUCiKRvHrTHv07Wp\nPcreI0RbVHmPEMfS7zyCJmj4qERXNo5bY96na1N7lL1HiLao8h4hjqXfeQQhaB6BiEjLaR6BiGQn\nlzkTg0bzCEQkCbnMmRhEuiIQkcblMmdiUNWaCMzsDDO738xeNLMXzOxSMzvTzDab2e7icUGdMYhI\n+nKZMzGo6r4i+BPgUXf/GWAl8AKwFtji7iuALcVzkaAGqa85xLGkXvcm5pwJ1Yg6Vm33CMzsY8Dl\nwK8DuPv7wPtmdgNwRfGyDcATwJ11xSHtM0h9zSGOJYe6N0Pz53HTxUumlXe4aWRJ8OGyqhE1szqv\nCM4FXge+bmbfN7P1ZnYqMOzurxWvGQOGa4xBWmaQ+ppDHEsudW8mDh5i47Z907ZtHN0X9LypRlR3\ndSaCE4GLgK+5+yeBdzmqG8g7kxhmnMhgZmvMbNTMRl9//fUaw5RBMkh9zSGOJZe6NzHOm2pEdVdn\nItgH7HP3Z4vn99NJDONmthigeDww0ze7+73uPuLuI4sWLaoxTBkkudTnqSLEseRS9ybGeVONqO5q\nSwTuPgbsNbPzik1XAbuAR4Bbi223Ag/XFYO0Ty71eaoIcSy51L2Jcd5UI6q7WktMmNkqYD1wEvAD\n4DfoJJ+NwDLgVeAmd39ztvdRiQnpVS71eaoIcSwp1L2pIsZ5a1ONKNUaEhFpOdUaEhGRSpQIRERa\nTolARKTllAhERFpOiUB6klP9lBSEaK8QtXHK4ggRZ4z3yKnuUqyaRiFoPQKpLLf6KU0L0V4hauOU\nxREizhjvkVPdpVg1jULRFYFUkmP9lCaFaK8QtXHK4ggRZ4z3yKnuUqyaRiEpEUglOdZPaVKI9gpR\nG6csjhBxxniPnOouxappFJISgVSSY/2UJoVorxC1ccriCBFnjPfIqe5SrJpGISkRSCU51k9pUoj2\nClEbpyyOEHHGeI+c6i7FqmkUkkpMSE9SqJ+SkxDtFaI2TlkcIeKM8R451V2KVdNoNqo1JCLScqo1\nJCIilSgRtEgqk4ZCSCGOFGKoKkasqUz2KotDi9cfSxPKWiKVSUMhpBBHCjFUFSPWVCZ7lcWhxetn\npiuCFkhl0lAIKcSRQgxVxYg1lcleZXFo8frulAhaIJVJQyGkEEcKMVQVI9ZUJnuVxaHF67tTImiB\nVCYNhZBCHCnEUFWMWFOZ7FUWRy8Tvfrp38/p8zFJiaAFUpk0FEIKcaQQQ1UxYk1lsldZHFUnej28\nYz+r123l5vXPsnrdVh7Zsb9yDFXiSJHmEbRIKpOGQkghjhRiqCpGrKlM9iqLY7aJXhMHD7F63Vbe\nO/zRX/Qnz53D03de2fMxpfD5qDqPYNZRQ2b2PNA1U7j7hccRmzRkaP68vj+QId4jhBTiSCGGqmLE\nGmIfI+f2X/KhLI7lw6d1nek72b//Hh8lgsn+/V6PLafPR9nw0euKx98sHr9ZPH6unnBERPrTz1/i\nOfbvhzBrInD3VwHM7Bp3/+SU/1prZtuBtXUGJyLSi37H70/2799x1Hvk8pf98ao6oczMbLW7P108\n+Xl0o1lEEjJ1/P5k184dD+xk9fKFPf0iv37Vx1m9fGHj/fsxVU0EtwF/YWYfAwx4C/hXtUUlItKj\ntvbvh1Dpr3p33+buK4GVwIXuvsrdt9cbmqQol5o1MeQSJ5TX10mlDtWWXWPcef9zbNk11vP3Vu3f\nj3Xe+t1PzM9X5VpDZvZLwD8DTjYzANz9yzXFJQnKpWZNDLnECeX1dVKpQ3XtPU/w9+PvAvA3o/s4\nb/hU/tcXrqj8/VX692Odt373E/vzVWkegZn9GfATwKeB9cAvA99199tqi2wKzSNoXsjx1U3uI4Rc\n4oTOlcDV9zx5zPbHv3A5y4dPC3IsId5jy64xbvvGtmO2//ktF3PV+WdVeo+p8czUvx/rvPW7n5Bx\nhl6P4Ofd/RbgLXf/EnAp8NM9RSRZy6VmTQy5xAnl9XVSqUP12K7xnrbPZmj+PFYuPeOYX5qxzlu/\n+2ni81U1EUxG8CMz+yfAYWBxPSFJinKpWRNDLnFCeX2dVOpQXXv+cE/bj0es89bvfpr4fFVNBJvM\n7AzgD4HtwCvAX9UVlKQnl5o1MeQSJ5TX10mlDtVV55/FecOnTtt23vCpPXcL1R1njP008fnqudaQ\nmc0DTnb3/1dPSMfSPYJ05FKzJoZc4oTyhdRTqUO1ZdcYj+0a59rzh4Mmgalinbd+9xMizqCL15vZ\nTwD/Hljm7v/azFYA57n7puOKrkdKBCIivQt9s/jrwCE6N4kB9gP/uUIQr5jZ82a2w8xGi21nmtlm\nM9tdPC6oGEPPUhkbnYsqa7mWtUcq7RVjXdpYx9rP2PrcxDhvsdYs7leK8wh+yt1/xcx+DcDdf2ST\nkwnKfdrdpy4ztBbY4u53m9na4vmd1UOuJpWx0bmospZrWXuk0l4x1qWNdaz9jq3PSYzzFmvN4n7F\n/lmqekXwvpmdQlGS2sx+is4VwvG4AdhQfL0BuPE436erQVqjN4Yqa7mWtUcq7RVjXdpYx7pl19iR\nJDDppfF3B/LKIMZ5i7Vmcb+a+FkqTQTFX/5/BjwKLDWz+4AtwB0V3t+Bx81sm5mtKbYNu/trxddj\nwIzjw8xsjZmNmtno66+/XmFXH0llbHQuqqzlWtYeqbRXjHVpYx1ryLH1qYtx3mKtWdyvJn6WSruG\n3N3N7PeAK4BL6BSd+/xR3T3d/IK77zeznwQ2m9mLM7z3jHer3f1e4F7o3CyusK8jUhkbnYsqa7mW\ntUcq7dXLurTHK9axXnv+MH8zum/G7YMmxnmLsY8QUp5HsB34hLv/D3ffVDEJ4O77i8cDwIPAp4Bx\nM1sMUDwe6D3s2aUyNjoXVdZyLWuPVNqr6rq0/Yh1rDHG1qcixnmLsY8Qkp1HUPwlvxx4FXiXzlWB\nz7ZUpZmdCsxx93eKrzcDXwauAiam3Cw+091n7WY63uGjqYyNzkXZWHMob49U2qvKsfQr1rHGGFuf\nihjnLcY+QkhxHsHZM22fXMGsy/d8gs5VAHS6oP7S3X/fzIaAjcAyOonlJnd/c7b9ax6BiEjvgixe\nP2m2X/izfM8P6KxfcPT2CTpXBSIikgAtNylynFKZPCfSr8oL04jIR1KZPCcSgq4IRHqUyuQ5kVCU\nCER6lMrkOZFQBjoRDFIfbowCelX2kcsi6KMvT/DHj73E6MsTwfcRe8JP2bFUkUsxtxDHWnZuQ+wj\nRBwp6Xk9giYcz/DRQerDjVFAr8o+clkE/eb13+Hbez76Ib9s+RDfvP2SoPt4ZMf+YxZJr+PzVXYs\nVeRSzC3EsZad2xD7CBFHLKHLUGdlkPpwYxTQq7KPsoJdqRT6G315YtoPOsBTeyaO/PUX6rNx/aqP\n8/SdV/Kt23+Op++8spYf8rJjqSKXYm4hjrXs3IbYR4g4UjSQiWCQ+nBjFNCrso9cFkF/cvfM1U8m\nt4f8bHRbJD2UsmOpIpdibiGOtezchthHiDhSNJCJIJUCaCHEKKBXZR+5LIJ++YqFs27P6bNRdixV\n5FLMLcSxlp3bEPsIEUeKBjIRpFIALYQYBfSq7COXRdBHzh3isuVD07ZdtnyIkXOHgu0jlpFzh/jp\nGYrOTR5LFbkUcys7b1WUndsQ+wgRR4oG9mYxpFMALYQYBfSq7COXRdBHX57gyd1vcPmKhTP+oOfw\n2Zg4eIjV67by3uGP/ro8ee4cnr7zyp5jzqWYW9l5q6Ls3IbYR4g4YghadK5pKjonbfTc3re5ef2z\nvHPoH49sO23eiXzr9p9jZWI19CVNrR41JDIIYvY1pzL/Q5qhWkMiiZrsaz56vkLoboZU5n9Ic9Q1\nJJK4OvuaQ9yHCHkvQ8JS15DIgKhzvkIq8z+kWUoEJXLp94wVZ4z9PLR9L7dv+B4Pbd9b2z5CqFJb\nJ0R71VknKJX5H9Is3SOYRS79nrHijLGfS/5gM2M/fB+Ax184wLpHX+SZL14TdB8hVKmtE6K96q4T\nFOI+RKx7GVIf3SPoIpd+z1hxxtjPQ9v38jsbdx6z/as3XciNFy0Nso8Q9oy/w9X3PHnM9se/cPmR\n8fMh2qvKfkJJZf6HhKV7BH3Kpd8zVpwx9rPp+bGetjelSm2dEO0Vo07QpBD3IequvST1USLoIpd+\nz1hxxtjPdRec1dP2plSprROivULWCYpxbyeX+2lyLCWCLnKpFxIrzhj7ufGipSw+/aRp2xafflJS\n3UJQrbZOiPYKVSfo4R37Wb1uKzevf5bV67byyI79PX1/KvuQ+ugeQYlc+j1jxRljPw9t38um58e4\n7oKzkksCU1WprROivfqp4RPj3k4u99PaqOo9Ao0aKjE0f14WH+ZYccbYz40XLU06AUxaPnxa6S/m\nEO1VZT/dTN6reI+PfklP3qsIdR5j7EPqNdBdQ7n0WcaKM8R49Bhr34agPvGOGPd2crmfJt0N7BWB\n5gBMF2I8eoy1b0OI0aa5fL6G5s9j2Zmn8Pfj7x7ZdvaZYbv1hubPY+TsBdOWgfznZy/Q1UBGBvKK\nIJc1Q2PFGWJN2Rhr34YQo01z+XxBp/b+1CQA8NL4u0HX6d0z/s6MawGn9tmQ7gYyEWgOwHQhxqPH\nHNPejxhtmsvnC+Ks05vLZ0O6G8hEkEufZaw4Q4xHj7H2bQhV27Sf/v1cPl8QZ53e3OY7yLEGMhFo\nDsB0Icajx1j7NoQqbdrvmPdcPl8QZ53enOY7yMwGeh6B5gBMF2JN2Rhr34bQrU1DjnnP5fMFcdbp\nTX2+QxtpHgGaA3C0fsajh3yPGLq1acgx77l8vqBzZVDnQu2Q/nwH6a72riEzO8HMvm9mm4rnZ5rZ\nZjPbXTwuqDsGkUk59e+3ic5Ls2LcI/g88MKU52uBLe6+AthSPBeJIqf+/TbReWlWrfcIzGwJsAH4\nfeDfuft1ZvYScIW7v2Zmi4En3P282d5HaxZLaDn177eJzktYqdwj+CpwBzC143DY3V8rvh4Dhmf6\nRjNbA6wBWLZs2UwvETluOfXvt4nOSzNq6xoys+uAA+6+rdtrvHM5MuMlibvf6+4j7j6yaNGiusIU\nEWm9Ou8RrAauN7NXgL8GrjSzbwHjRZcQxeOBugIIMTllkCa45NIeg9TmMVRpr1yKBUozausacve7\ngLsAzOwK4Hfd/WYz+0PgVuDu4vHhOvYfoihYLoXFqsilPQapzWOo0l65FAuU5jQxs/hu4Boz2w1c\nXTwPKkRRsJwKi5XJpT0Gqc1jqNJeuRQLlGZFSQTu/oS7X1d8PeHuV7n7Cne/2t3fDL2/EEXBcios\nViaX9hikNo+hSnupIJxUMZC1hkJMThmkCS65tMcgtXkMVdorl2KB0qyBTAQhJqcM0gSXXNpjkNo8\nhirtlUuxQGmWis5FeI9U5NIeg9TmMVRpr1yKBUpYVSeUDXQiEBFps6qJYCC7hgZRm8bWt+lYRVIw\n0GWoB0Wbxta36VhFUqErgsS1aWx9m45VJCVKBIlr09j6Nh2rSEqUCBLXprH1uR2r7mVMp/bIl+4R\nJG5yrPgdR/WbD+KwyqH587jp4iXTSiLcNLIkyWPVvYzp1B550/DRTLRhbH0uC5jnEmcsao90afjo\ngBmaP4+VS88Y6B+sXO4R5BJnLGqP/CkRRKC+02pyuUeQS5yxqD3yp0RQs4d37Gf1uq3cvP5ZVq/b\nyiM79jcdUrJyqTWUS5yxqD3yp3sENVLf6fHJ5X5ILnHGovZITyqL17faZN/pe3yUCCb7TvWD0l0u\nC5jnEmcsao98qWuoRD/9+yH7Ttt0n0FrK4vEpSuCWfQ7NjrUuPg2jdHW2soi8emKoItQ6/xu3LZv\n2raNo/uSWys4FVpbWaQZSgRdpLLOb5vGaOfSXm06J9IOSgRdpLLOb5vGaOfSXm06J9IOSgRdpLLO\nb5vGaOfSXm06J9IOmkdQIpV1fts0RjuX9mrTOZE8ac1iEZGWU9E5ERGpRIlARKTllAhERFpOiUBE\npOWUCEREWk6JIAEqXiYiTVLRuYapeJmINE1XBA1S8TIRSUFticDMTjaz75rZc2b2d2b2pWL7mWa2\n2cx2F48L6oohdSpeJiIpqPOK4BBwpbuvBFYBnzGzS4C1wBZ3XwFsKZ63koqXiUgKaksE3nGweDq3\n+OfADcCGYvsG4Ma6YkidipeJSApqvVlsZicA24DlwH9x92fNbNjdXyteMgYM1xlD6q5f9XFWL1+o\n4mUi0phaE4G7fwCsMrMzgAfN7GeP+n83sxmr3pnZGmANwLJly+oMs3Fa9FtEmhRl1JC7vw38LfAZ\nYNzMFgMUjwe6fM+97j7i7iOLFi2KEaaISCvVOWpoUXElgJmdAlwDvAg8AtxavOxW4OG6YhARkXJ1\ndg0tBjYU9wnmABvdfZOZPQNsNLPbgFeBm2qMQUREStSWCNx9J/DJGbZPAFfVtV8REemNZhbLQFL9\nJpHqVGtIBo7qN4n0RlcEMlBUv0mkd0oEMlBUv0mkd0oEMlBUv0mkd0oEMlBUv0mkd7pZLANH9ZtE\neqNEIANJ9ZtEqlPXkIhIyykRiIi0nBKBiEjLKRGIiLScEoGISMuZ+4wLhCXFzF6nU7J6JguBNyKG\nc7wUZ3i5xKo4w8olTmg+1rPdvXRlrywSwWzMbNTdR5qOo4ziDC+XWBVnWLnECfnEqq4hEZGWUyIQ\nEWm5QUgE9zYdQEWKM7xcYlWcYeUSJ2QSa/b3CEREpD+DcEUgIiJ9yCYRmNnJZvZdM3vOzP7OzL5U\nbD/TzDab2e7icUGicf4nM9tvZjuKf/+iyTgnmdkJZvZ9M9tUPE+qPaeaIdbk2tTMXjGz54t4Rott\nybVplziTa08AMzvDzO43sxfN7AUzuzTRNp0pziTb9GjZJALgEHClu68EVgGfMbNLgLXAFndfAWwp\nnjepW5wA97j7quLf/2wuxGk+D7ww5Xlq7TnV0bFCmm366SKeyWGDqbbp0XFCmu35J8Cj7v4zwEo6\nn4EU23SmOCHNNp0mm0TgHQeLp3OLfw7cAGwotm8AbmwgvCNmiTM5ZrYE+CVg/ZTNSbXnpC6x5iLJ\nNs2BmX0MuBz4cwB3f9/d3yaxNp0lzixkkwjgSNfADuAAsNndnwWG3f214iVjwHBjARa6xAnwW2a2\n08z+IoVLWeCrwB3A1LUdk2vPwkyxQnpt6sDjZrbNzNYU21Js05nihPTa81zgdeDrRbfgejM7lfTa\ntFuckF6bHiOrRODuH7j7KmAJ8Ckz+9mj/t9J4K/vLnF+DfgEne6i14A/ajBEzOw64IC7b+v2mlTa\nc5ZYk2rTwi8U5/4Xgd80s8un/mcqbcrMcabYnicCFwFfc/dPAu9yVDdQIm3aLc4U2/QYWSWCScUl\n198CnwHGzWwxQPF4oMnYppoap7uPFwniQ+C/AZ9qNjpWA9eb2SvAXwNXmtm3SLM9Z4w1wTbF3fcX\njweAB+nElFybzhRniu0J7AP2Tbmqvp/OL9zU2nTGOBNt02NkkwjMbJGZnVF8fQpwDfAi8Ahwa/Gy\nW4GHm4mwo1uckx/awr8E/k8T8U1y97vcfYm7nwP8KrDV3W8msfaE7rGm1qZmdqqZnTb5NXBtEVNS\nbdotztTaE8Ddx4C9ZnZesekqYBeJtWm3OFNs05nktGbxYmCDmZ1AJ4FtdPdNZvYMsNHMbqNTofSm\nJoOke5zfNLNVdC5hXwH+TYMxzuZu0mrP2XwlsTYdBh40M+j8bP2luz9qZt8jrTbtFmeqn9HfAu4z\ns5OAHwC/QfGzlVCbwsxx/mmibTqNZhaLiLRcNl1DIiJSDyUCEZGWUyIQEWk5JQIRkZZTIhARaTkl\nApE+FKUEzi++/mLT8YgcDw0fFTlOZnaCu38w5flBd5/fZEwix0NXBCJTmNnvmdlvF1/fY2Zbi6+v\nNLP7zOygmf2RmT0HXGpmT5jZiJndDZxS1Jy/r/iem62zNsUOM/uvxSRDkeQoEYhM9xRwWfH1CDDf\nzOYW254ETgWedfeV7v7tyW9y97XAj4ua858zs38K/Aqwuiju9gHwuZgHIlJVTiUmRGLYBlxsZqfT\nWWRoO52EcBnw23R+oT9Q4X2uAi4GvleUcjiF5gujicxIiUBkCnc/bGYvA78O/G9gJ/BpYDmdFafe\nm3pfYBYGbHD3u+qKVSQUdQ2JHOsp4HfpdAU9Bfxb4PtePrLicNGNBJ3lE3/ZzH4SjqxbfHZdAYv0\nQ4lA5FhP0aki+4y7jwPvFdvK3AvsNLP73H0X8B+Ax8xsJ7C5eE+R5Gj4qIhIy+mKQESk5ZQIRERa\nTolARKTllAhERFpOiUBEpOWUCEREWk6JQESk5ZQIRERa7v8D9IFxdYjzlk0AAAAASUVORK5CYII=\n", 1826 | "text/plain": [ 1827 | "" 1828 | ] 1829 | }, 1830 | "metadata": {}, 1831 | "output_type": "display_data" 1832 | } 1833 | ], 1834 | "source": [ 1835 | "hsb.plot(kind='scatter',x='write',y='read')" 1836 | ] 1837 | }, 1838 | { 1839 | "cell_type": "code", 1840 | "execution_count": 45, 1841 | "metadata": { 1842 | "collapsed": false 1843 | }, 1844 | "outputs": [ 1845 | { 1846 | "data": { 1847 | "text/plain": [ 1848 | "(0.59677647908804532, 1.1056411255416739e-20)" 1849 | ] 1850 | }, 1851 | "execution_count": 45, 1852 | "metadata": {}, 1853 | "output_type": "execute_result" 1854 | } 1855 | ], 1856 | "source": [ 1857 | "mrstat.pearsonr(hsb.write,hsb.read)" 1858 | ] 1859 | }, 1860 | { 1861 | "cell_type": "markdown", 1862 | "metadata": {}, 1863 | "source": [ 1864 | "Корреляция получилась равной 0.5968, а p-value для нулейвой гипотезы об отсутсвии корреляции < 0.05, следовательно можно заключить, что между этими переменными существует статистически значимая корреляция." 1865 | ] 1866 | }, 1867 | { 1868 | "cell_type": "markdown", 1869 | "metadata": {}, 1870 | "source": [ 1871 | "С помощью [этой игры](http://guessthecorrelation.com/) можно научиться определять корреляцию на глаз." 1872 | ] 1873 | }, 1874 | { 1875 | "cell_type": "markdown", 1876 | "metadata": {}, 1877 | "source": [ 1878 | "#### 7.2 Коэффициет корреляции Спирмена" 1879 | ] 1880 | }, 1881 | { 1882 | "cell_type": "markdown", 1883 | "metadata": {}, 1884 | "source": [ 1885 | "Корреляция Спирмена способна уловить нелинейную связь между переменными. Возьмем напремер x и посчитаем корреляцию Пирсона с его экспонентой." 1886 | ] 1887 | }, 1888 | { 1889 | "cell_type": "code", 1890 | "execution_count": 46, 1891 | "metadata": { 1892 | "collapsed": false 1893 | }, 1894 | "outputs": [ 1895 | { 1896 | "data": { 1897 | "text/plain": [ 1898 | "(0.25080845773967969, 0.011412991126929171)" 1899 | ] 1900 | }, 1901 | "execution_count": 46, 1902 | "metadata": {}, 1903 | "output_type": "execute_result" 1904 | } 1905 | ], 1906 | "source": [ 1907 | "x = np.linspace(0,100,101)\n", 1908 | "y = np.exp(x)\n", 1909 | "mrstat.pearsonr(x,y)" 1910 | ] 1911 | }, 1912 | { 1913 | "cell_type": "markdown", 1914 | "metadata": {}, 1915 | "source": [ 1916 | "Коэффициент получился очень маленьким, но связь определенно есть - функция монотонно возрастает. Взглянем на график." 1917 | ] 1918 | }, 1919 | { 1920 | "cell_type": "code", 1921 | "execution_count": 47, 1922 | "metadata": { 1923 | "collapsed": false 1924 | }, 1925 | "outputs": [ 1926 | { 1927 | "data": { 1928 | "text/plain": [ 1929 | "" 1930 | ] 1931 | }, 1932 | "execution_count": 47, 1933 | "metadata": {}, 1934 | "output_type": "execute_result" 1935 | }, 1936 | { 1937 | "data": { 1938 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEUJJREFUeJzt3X+s3Xddx/Hni+4CVyEU7A1sdy0lZikCAsWbMQeSCZLC\nJGxRjEPxByFZIKBgsIZpgjH+oaQEBQYsC0yYkJE4mtrAsKJOAeMmt93cTwpV1PV2sgJ2Y+4K3Xz7\nx/l2Obvc9p7be8499376fCQn93w/38853/fntH313M/3c843VYUkqS2PG3cBkqThM9wlqUGGuyQ1\nyHCXpAYZ7pLUIMNdkho01nBPck2S+5LcsYzH/HySSjLTbT8zyYEktya5M8mbR1exJK0PGec69yQv\nAx4Erq2q5w3Q/8nA54DHA2+rqtkkj6c3ju8leRJwB3BhVR0ZZe2StJaN9Z17VX0R+E5/W5IfTfJX\nSfYn+VKSZ/ft/kPgPcD/9j3H96vqe93mE3CqSZLWZBBeDfxGVf0E8NvAhwGSvAjYXFWfW/iAJJuT\n3AbcA7zHd+2SznRnjbuAft20yoXAXyQ50fyEJI8D3gf8+mKPq6p7gOcnOQfYk+T6qvrmKpQsSWvS\nmgp3er9JHKuqF/Y3JnkK8Dzg77vQfwawN8lrq2r2RL+qOtKdnP0p4PrVK1uS1pY1NS1TVQ8A30jy\nCwDpeUFV3V9Vm6pqa1VtBW4CXtudUD03yWTX/6nAS4GD4xqDJK0F414KeR3wT8C2JIeTvAn4ZeBN\nSf4FuBO4ZImn+THg5q7/PwDvrarbR1m3JK11Y10KKUkajTU1LSNJGo6xnVDdtGlTbd26dVyHl6R1\naf/+/d+qqqml+o0t3Ldu3crs7OzSHSVJj0ryH4P0c1pGkhpkuEtSgwx3SWqQ4S5JDTLcJalBa+27\nZSSpWXtumWPXvoMcOTbPORsn2bljG5dunx7JsQx3SVoFe26Z44rdtzN//BEA5o7Nc8Xu3jeljCLg\nnZaRpFWwa9/BR4P9hPnjj7Br32i+59Bwl6RVcOTY/LLaV8pwl6RVcM7GyWW1r5ThLkmrYOeObUxO\nbHhM2+TEBnbu2DaS43lCVZJWwYmTpq6WkaTGXLp9emRhvpDTMpLUIMNdkhpkuEtSgwx3SWqQ4S5J\nDTLcJalBS4Z7ks1JbkxyV5I7k7x9kT4XJbk/ya3d7d2jKVeSNIhB1rk/DLyzqg4keTKwP8kXququ\nBf2+VFWvGX6JkqTlWvKde1XdW1UHuvvfBe4GVmcVviTptCxrzj3JVmA7cPMiuy9McluSzyd57kke\nf3mS2SSzR48eXXaxkqTBDBzuSZ4EfAZ4R1U9sGD3AWBLVT0f+CCwZ7HnqKqrq2qmqmampqZOt2ZJ\n0hIGCvckE/SC/VNVtXvh/qp6oKoe7O7fAEwk2TTUSiVJAxtktUyAjwF3V9X7TtLnGV0/kpzfPe+3\nh1moJGlwg6yWeQnwK8DtSW7t2n4X2AJQVVcBrwPekuRhYB64rKpqBPVKkgawZLhX1ZeBLNHnSuDK\nYRUlSVoZP6EqSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1\nyHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMM\nd0lqkOEuSQ0y3CWpQYa7JDXIcJekBi0Z7kk2J7kxyV1J7kzy9kX6JMkHkhxKcluSF42mXEnSIM4a\noM/DwDur6kCSJwP7k3yhqu7q6/Nq4Lzu9mLgI91PSdIYLPnOvaruraoD3f3vAncD0wu6XQJcWz03\nARuTnD30aiVJA1nWnHuSrcB24OYFu6aBe/q2D/OD/wGQ5PIks0lmjx49urxKJUkDGzjckzwJ+Azw\njqp64HQOVlVXV9VMVc1MTU2dzlNIkgYwULgnmaAX7J+qqt2LdJkDNvdtn9u1SZLGYJDVMgE+Btxd\nVe87Sbe9wK92q2YuAO6vqnuHWKckaRkGWS3zEuBXgNuT3Nq1/S6wBaCqrgJuAC4GDgEPAW8cfqmS\npEEtGe5V9WUgS/Qp4K3DKkqStDJ+QlWSGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLU\nIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y\n3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatCS4Z7kmiT3JbnjJPsvSnJ/klu727uHX6Yk\naTnOGqDPx4ErgWtP0edLVfWaoVQkSVqxJd+5V9UXge+sQi2SpCEZ1pz7hUluS/L5JM8d0nNKkk7T\nINMySzkAbKmqB5NcDOwBzlusY5LLgcsBtmzZMoRDS5IWs+J37lX1QFU92N2/AZhIsukkfa+uqpmq\nmpmamlrpoSVJJ7HicE/yjCTp7p/fPee3V/q8kqTTt+S0TJLrgIuATUkOA78PTABU1VXA64C3JHkY\nmAcuq6oaWcWSpCUtGe5V9fol9l9Jb6mkJGmN8BOqktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGG\nuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhL\nUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatCS4Z7kmiT3JbnjJPuT\n5ANJDiW5LcmLhl+mJGk5zhqgz8eBK4FrT7L/1cB53e3FwEe6n5J0xttzyxy79h3kyLF5ztk4yc4d\n27h0+/TIj7vkO/eq+iLwnVN0uQS4tnpuAjYmOXtYBUrSerXnljmu2H07c8fmKWDu2DxX7L6dPbfM\njfzYw5hznwbu6ds+3LX9gCSXJ5lNMnv06NEhHFqS1q5d+w4yf/yRx7TNH3+EXfsOjvzYq3pCtaqu\nrqqZqpqZmppazUNL0qo7cmx+We3DNIxwnwM2922f27VJ0hntnI2Ty2ofpmGE+17gV7tVMxcA91fV\nvUN4Xkla13bu2MbkxIbHtE1ObGDnjm0jP/aSq2WSXAdcBGxKchj4fWACoKquAm4ALgYOAQ8BbxxV\nsZK0npxYFTOO1TKpqpEfZDEzMzM1Ozs7lmNL0nqVZH9VzSzVz0+oSlKDDHdJapDhLkkNMtwlqUGG\nuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhL\nUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEDhXuS\nVyU5mORQknctsv+iJPcnubW7vXv4pUqSBnXWUh2SbAA+BLwSOAx8JcneqrprQdcvVdVrRlCjJGmZ\nBnnnfj5wqKr+raq+D3wauGS0ZUmSVmKQcJ8G7unbPty1LXRhktuSfD7Jcxd7oiSXJ5lNMnv06NHT\nKFeSNIhhnVA9AGypqucDHwT2LNapqq6uqpmqmpmamhrSoSVJCw0S7nPA5r7tc7u2R1XVA1X1YHf/\nBmAiyaahVSlJWpZBwv0rwHlJnpXk8cBlwN7+DkmekSTd/fO75/32sIuVJA1mydUyVfVwkrcB+4AN\nwDVVdWeSN3f7rwJeB7wlycPAPHBZVdUI65YknULGlcEzMzM1Ozs7lmNL0nqVZH9VzSzVz0+oSlKD\nlpyWkSQtz55b5ti17yBHjs1zzsZJdu7YxqXbF1tBPjqGuyQN0Z5b5rhi9+3MH38EgLlj81yx+3aA\nVQ14p2UkaYh27Tv4aLCfMH/8EXbtO7iqdRjukjRER47NL6t9VAx3SRqiczZOLqt9VAx3SRqinTu2\nMTmx4TFtkxMb2Llj26rW4QlVSRqiEydNXS0jSY25dPv0qof5Qk7LSFKDDHdJapDhLkkNMtwlqUGG\nuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSg/yEqiQNwVq4QEc/w12SVmitXKCjn9MykrRCa+UCHf0Md0la\nobVygY5+hrskrdBauUBHP8NdklZorVygo58nVCXpNPWvkHnK5ARPnHgcxx467moZSVqvFq6QOTZ/\nnMmJDfzJL75w7BfqAKdlJOm0rMUVMv185y5Jp7Dww0k//ewpbvzqUebW4AqZfgOFe5JXAe8HNgAf\nrao/XrA/3f6LgYeAX6+qA0Ou9QfmtxIend868YIv3Hcm3/d18XXxdVnZ/f9+6DgBqsuguWPzfPKm\n/zxlTo1zhUy/VNWpOyQbgK8BrwQOA18BXl9Vd/X1uRj4DXrh/mLg/VX14lM978zMTM3Ozg5c6ML5\nLUlaayYnNvBHP/fjI51zT7K/qmaW6jfInPv5wKGq+req+j7waeCSBX0uAa6tnpuAjUnOXnbVp7DY\n/JYkrRXTGydHHuzLMci0zDRwT9/2YXrvzpfqMw3c298pyeXA5QBbtmxZVqFrZR5Lkhaa3jjJP77r\n5eMu4zFWdbVMVV1dVTNVNTM1NbWsx66VeSxJ6jfuDyudzCDhPgds7ts+t2tbbp8VWewTYJK0GtL9\nnN44yRsu2ML0xknC2puK6TfItMxXgPOSPIteYF8G/NKCPnuBtyX5NL0pm/ur6l6G6MSL52oZVz/4\nuvi6rPbrMu5Pm56OJcO9qh5O8jZgH72lkNdU1Z1J3tztvwq4gd5KmUP0lkK+cRTFXrp9et29wJI0\nDgOtc6+qG+gFeH/bVX33C3jrcEuTJJ0uv35AkhpkuEtSgwx3SWqQ4S5JDVryu2VGduDkKPAfp/nw\nTcC3hljOeuCYzwyO+cywkjE/s6qW/BTo2MJ9JZLMDvLFOS1xzGcGx3xmWI0xOy0jSQ0y3CWpQes1\n3K8edwFj4JjPDI75zDDyMa/LOXdJ0qmt13fukqRTMNwlqUHrLtyTvCrJwSSHkrxr3PWMQpLNSW5M\ncleSO5O8vWt/WpIvJPl69/Op4651mJJsSHJLks92262Pd2OS65N8NcndSX7yDBjzb3V/p+9Icl2S\nJ7Y25iTXJLkvyR19bScdY5Irujw7mGTHsOpYV+HeXaz7Q8CrgecAr0/ynPFWNRIPA++squcAFwBv\n7cb5LuBvq+o84G+77Za8Hbi7b7v18b4f+KuqejbwAnpjb3bMSaaB3wRmqup59L5C/DLaG/PHgVct\naFt0jN2/68uA53aP+XCXcyu2rsKdwS7Wve5V1b1VdaC7/116/+in6Y31E123TwCXjqfC4UtyLvCz\nwEf7mlse71OAlwEfA6iq71fVMRoec+csYDLJWcAPAUdobMxV9UXgOwuaTzbGS4BPV9X3quob9K6J\ncf4w6lhv4X6yC3E3K8lWYDtwM/D0vitc/Rfw9DGVNQp/CvwO8H99bS2P91nAUeDPuqmojyb5YRoe\nc1XNAe8F/hO4l94V2/6ahsfc52RjHFmmrbdwP6MkeRLwGeAdVfVA/77uAilNrGNN8hrgvqraf7I+\nLY23cxbwIuAjVbUd+B8WTEe0NuZunvkSev+xnQP8cJI39PdpbcyLWa0xrrdwH/mFuNeKJBP0gv1T\nVbW7a/5mkrO7/WcD942rviF7CfDaJP9Ob6rt5Uk+Sbvjhd47tMNVdXO3fT29sG95zD8DfKOqjlbV\ncWA3cCFtj/mEk41xZJm23sL90Yt1J3k8vRMRe8dc09AlCb252Lur6n19u/YCv9bd/zXgL1e7tlGo\nqiuq6tyq2krvz/TvquoNNDpegKr6L+CeJNu6plcAd9HwmOlNx1yQ5Ie6v+OvoHc+qeUxn3CyMe4F\nLkvyhCTPAs4D/nkoR6yqdXWjdyHurwH/CvzeuOsZ0RhfSu/XttuAW7vbxcCP0DvT/nXgb4CnjbvW\nEYz9IuCz3f2mxwu8EJjt/pz3AE89A8b8B8BXgTuAPwee0NqYgevonVM4Tu83tDedaozA73V5dhB4\n9bDq8OsHJKlB621aRpI0AMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNej/AeMoV9tL6EFyAAAA\nAElFTkSuQmCC\n", 1939 | "text/plain": [ 1940 | "" 1941 | ] 1942 | }, 1943 | "metadata": {}, 1944 | "output_type": "display_data" 1945 | } 1946 | ], 1947 | "source": [ 1948 | "plt.scatter(x,y)" 1949 | ] 1950 | }, 1951 | { 1952 | "cell_type": "markdown", 1953 | "metadata": {}, 1954 | "source": [ 1955 | "Теперь посчитаем корреляцию Спирмена." 1956 | ] 1957 | }, 1958 | { 1959 | "cell_type": "code", 1960 | "execution_count": 48, 1961 | "metadata": { 1962 | "collapsed": false 1963 | }, 1964 | "outputs": [ 1965 | { 1966 | "data": { 1967 | "text/plain": [ 1968 | "SpearmanrResult(correlation=0.99999999999999989, pvalue=0.0)" 1969 | ] 1970 | }, 1971 | "execution_count": 48, 1972 | "metadata": {}, 1973 | "output_type": "execute_result" 1974 | } 1975 | ], 1976 | "source": [ 1977 | "mrstat.spearmanr(x,y)" 1978 | ] 1979 | }, 1980 | { 1981 | "cell_type": "markdown", 1982 | "metadata": {}, 1983 | "source": [ 1984 | "Коэффициент Спирмена равен 1! Когда корреляция Спирмена больше, чем корреляция Пирсона - это знак, что можно сделать нелинейное преобразование и взять например логарифм. Это позволит перейти к линейным отношениям, а статистика любит линейные отношения." 1985 | ] 1986 | }, 1987 | { 1988 | "cell_type": "markdown", 1989 | "metadata": {}, 1990 | "source": [ 1991 | "#### 7.3 Коэффициент Крамера" 1992 | ] 1993 | }, 1994 | { 1995 | "cell_type": "markdown", 1996 | "metadata": {}, 1997 | "source": [ 1998 | "Коэффициент Крамера позволяет оценить связь между двумя номинальными переменными. Он основан на статистике хи-квадрат. Посчитаем для связи между типом школы и полом ученика." 1999 | ] 2000 | }, 2001 | { 2002 | "cell_type": "code", 2003 | "execution_count": 49, 2004 | "metadata": { 2005 | "collapsed": false 2006 | }, 2007 | "outputs": [ 2008 | { 2009 | "data": { 2010 | "text/html": [ 2011 | "
\n", 2012 | "\n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | "
female01
schtyp
17791
21418
\n", 2038 | "
" 2039 | ], 2040 | "text/plain": [ 2041 | "female 0 1\n", 2042 | "schtyp \n", 2043 | "1 77 91\n", 2044 | "2 14 18" 2045 | ] 2046 | }, 2047 | "execution_count": 49, 2048 | "metadata": {}, 2049 | "output_type": "execute_result" 2050 | } 2051 | ], 2052 | "source": [ 2053 | "table" 2054 | ] 2055 | }, 2056 | { 2057 | "cell_type": "code", 2058 | "execution_count": 50, 2059 | "metadata": { 2060 | "collapsed": false 2061 | }, 2062 | "outputs": [ 2063 | { 2064 | "data": { 2065 | "text/plain": [ 2066 | "(0.01533749547184478, 0.82828242684033948)" 2067 | ] 2068 | }, 2069 | "execution_count": 50, 2070 | "metadata": {}, 2071 | "output_type": "execute_result" 2072 | } 2073 | ], 2074 | "source": [ 2075 | "mrstat.vcramer(table.values)" 2076 | ] 2077 | }, 2078 | { 2079 | "cell_type": "markdown", 2080 | "metadata": {}, 2081 | "source": [ 2082 | "Такое маленькое значение (0.01) говорит о том, что связи нет, а p-value не позволяет отвергнуть нулевую гипотезу о том, что пропорции не отличаются. Коэффициент Крамера принимает значения от 0 до 1, что делает его хорошо интерпретируемым." 2083 | ] 2084 | }, 2085 | { 2086 | "cell_type": "markdown", 2087 | "metadata": {}, 2088 | "source": [ 2089 | "### 8. Связанные выборки" 2090 | ] 2091 | }, 2092 | { 2093 | "cell_type": "markdown", 2094 | "metadata": {}, 2095 | "source": [ 2096 | "#### 8.1 Ти-тест для связанных выборок" 2097 | ] 2098 | }, 2099 | { 2100 | "cell_type": "markdown", 2101 | "metadata": {}, 2102 | "source": [ 2103 | "Тест для связанных выборок используется например при клинических испытаниях новых лекарств. Конкретный пример (взято из [курса](https://www.coursera.org/learn/stats-for-data-analysis/home/welcome)) : \n", 2104 | "\n", 2105 | "В рамках исследования эффективности препарата метилфенидат 24 пациента с синдромом дефицита внимания и гиперактивности в течение недели принимали либо метилфенидат, либо плацебо. В конце недели каждый пациент проходили тест на способность к подавлению импульсивных поведенческих реакций. На втором этапе плацебо и препарат менялись, и после недельного курса каждый испытуемые проходили второй тест.\n", 2106 | "\n", 2107 | "Необходимо оценить эффективность препарата." 2108 | ] 2109 | }, 2110 | { 2111 | "cell_type": "code", 2112 | "execution_count": 51, 2113 | "metadata": { 2114 | "collapsed": false 2115 | }, 2116 | "outputs": [ 2117 | { 2118 | "data": { 2119 | "text/html": [ 2120 | "
\n", 2121 | "\n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | "
PlaceboMethylphenidate
05762
12749
23230
33134
43438
\n", 2157 | "
" 2158 | ], 2159 | "text/plain": [ 2160 | " Placebo Methylphenidate\n", 2161 | "0 57 62\n", 2162 | "1 27 49\n", 2163 | "2 32 30\n", 2164 | "3 31 34\n", 2165 | "4 34 38" 2166 | ] 2167 | }, 2168 | "execution_count": 51, 2169 | "metadata": {}, 2170 | "output_type": "execute_result" 2171 | } 2172 | ], 2173 | "source": [ 2174 | "data = pd.read_csv('ADHD.txt', sep = ' ', header = 0)\n", 2175 | "data.columns = ['Placebo', 'Methylphenidate']\n", 2176 | "data.head()" 2177 | ] 2178 | }, 2179 | { 2180 | "cell_type": "markdown", 2181 | "metadata": {}, 2182 | "source": [ 2183 | "Эти выборки являются свзянными, т.к. каждая строчка содержит результаты одного и того же пациена с лекарством и без. Проверим, есть ли статистически значимый эффект от приема препарата." 2184 | ] 2185 | }, 2186 | { 2187 | "cell_type": "code", 2188 | "execution_count": 52, 2189 | "metadata": { 2190 | "collapsed": false 2191 | }, 2192 | "outputs": [ 2193 | { 2194 | "data": { 2195 | "text/plain": [ 2196 | "Ttest_relResult(statistic=3.2223624451230406, pvalue=0.003771488176381471)" 2197 | ] 2198 | }, 2199 | "execution_count": 52, 2200 | "metadata": {}, 2201 | "output_type": "execute_result" 2202 | } 2203 | ], 2204 | "source": [ 2205 | "mrstat.ttest_rel(data.Methylphenidate,data.Placebo)" 2206 | ] 2207 | }, 2208 | { 2209 | "cell_type": "markdown", 2210 | "metadata": {}, 2211 | "source": [ 2212 | "Нулевая гипотеза в данном случае заключается в том, что средние не отличаются. P-value получилось < 0.05, а это значит, что нулевая гипотеза отвергается и можно заключить, что средние отличаются." 2213 | ] 2214 | }, 2215 | { 2216 | "cell_type": "markdown", 2217 | "metadata": {}, 2218 | "source": [ 2219 | "#### 8.2 Тест для разности двух долей - связанные выборки" 2220 | ] 2221 | }, 2222 | { 2223 | "cell_type": "markdown", 2224 | "metadata": {}, 2225 | "source": [ 2226 | "Сгенерируем некоторые выборки и представим, что они связанные. Здесь та же история, что и с ти-тестом - представим, что это доля до и после какого-либо мероприятия. Нужно оценить повлияло ли мероприятие на долю." 2227 | ] 2228 | }, 2229 | { 2230 | "cell_type": "code", 2231 | "execution_count": 53, 2232 | "metadata": { 2233 | "collapsed": false 2234 | }, 2235 | "outputs": [], 2236 | "source": [ 2237 | "a = [0]*150 + [1]*50\n", 2238 | "b = [0]*170 + [1]*30" 2239 | ] 2240 | }, 2241 | { 2242 | "cell_type": "code", 2243 | "execution_count": 54, 2244 | "metadata": { 2245 | "collapsed": false 2246 | }, 2247 | "outputs": [ 2248 | { 2249 | "name": "stdout", 2250 | "output_type": "stream", 2251 | "text": [ 2252 | "0.25\n", 2253 | "0.15\n" 2254 | ] 2255 | } 2256 | ], 2257 | "source": [ 2258 | "print np.mean(a)\n", 2259 | "print np.mean(b)" 2260 | ] 2261 | }, 2262 | { 2263 | "cell_type": "code", 2264 | "execution_count": 55, 2265 | "metadata": { 2266 | "collapsed": false 2267 | }, 2268 | "outputs": [ 2269 | { 2270 | "data": { 2271 | "text/plain": [ 2272 | "2.4284674728924927e-06" 2273 | ] 2274 | }, 2275 | "execution_count": 55, 2276 | "metadata": {}, 2277 | "output_type": "execute_result" 2278 | } 2279 | ], 2280 | "source": [ 2281 | "mrstat.proportions_diff_rel(a,b)" 2282 | ] 2283 | }, 2284 | { 2285 | "cell_type": "markdown", 2286 | "metadata": {}, 2287 | "source": [ 2288 | "P-value < 0.05 следовательно доли значимо различаются." 2289 | ] 2290 | }, 2291 | { 2292 | "cell_type": "markdown", 2293 | "metadata": {}, 2294 | "source": [ 2295 | "#### 8.3 Доверительный интервал для разности долей " 2296 | ] 2297 | }, 2298 | { 2299 | "cell_type": "code", 2300 | "execution_count": 56, 2301 | "metadata": { 2302 | "collapsed": false 2303 | }, 2304 | "outputs": [ 2305 | { 2306 | "data": { 2307 | "text/plain": [ 2308 | "(0.058422885269509665, 0.14157711473049034)" 2309 | ] 2310 | }, 2311 | "execution_count": 56, 2312 | "metadata": {}, 2313 | "output_type": "execute_result" 2314 | } 2315 | ], 2316 | "source": [ 2317 | "mrstat.proportions_confint_diff_rel(a,b)" 2318 | ] 2319 | }, 2320 | { 2321 | "cell_type": "markdown", 2322 | "metadata": {}, 2323 | "source": [ 2324 | "Доверительный интервал не содержит 0 следовательно доли различаются." 2325 | ] 2326 | }, 2327 | { 2328 | "cell_type": "markdown", 2329 | "metadata": { 2330 | "collapsed": true 2331 | }, 2332 | "source": [ 2333 | "#### 8.4 Критерий знаков для связанных выборок" 2334 | ] 2335 | }, 2336 | { 2337 | "cell_type": "code", 2338 | "execution_count": 57, 2339 | "metadata": { 2340 | "collapsed": false 2341 | }, 2342 | "outputs": [ 2343 | { 2344 | "data": { 2345 | "text/plain": [ 2346 | "(5.0, 0.063914656639099121)" 2347 | ] 2348 | }, 2349 | "execution_count": 57, 2350 | "metadata": {}, 2351 | "output_type": "execute_result" 2352 | } 2353 | ], 2354 | "source": [ 2355 | "mrstat.sign_test(data.Methylphenidate - data.Placebo)" 2356 | ] 2357 | }, 2358 | { 2359 | "cell_type": "markdown", 2360 | "metadata": {}, 2361 | "source": [ 2362 | "P-value > 0.05 следовательно нельзя заявлять, что медианы выборок отличаются." 2363 | ] 2364 | }, 2365 | { 2366 | "cell_type": "markdown", 2367 | "metadata": {}, 2368 | "source": [ 2369 | "#### 8.5 Критерий знаковых рангов Вилкоксона для связанных выборок" 2370 | ] 2371 | }, 2372 | { 2373 | "cell_type": "markdown", 2374 | "metadata": {}, 2375 | "source": [ 2376 | "Нулевая гипотеза: две связанные выборки взяты из одной генеральной совокупности, т.е. не отличаются." 2377 | ] 2378 | }, 2379 | { 2380 | "cell_type": "code", 2381 | "execution_count": 58, 2382 | "metadata": { 2383 | "collapsed": false 2384 | }, 2385 | "outputs": [ 2386 | { 2387 | "data": { 2388 | "text/plain": [ 2389 | "WilcoxonResult(statistic=48.5, pvalue=0.0037070137534509031)" 2390 | ] 2391 | }, 2392 | "execution_count": 58, 2393 | "metadata": {}, 2394 | "output_type": "execute_result" 2395 | } 2396 | ], 2397 | "source": [ 2398 | "mrstat.wilcoxon(data.Methylphenidate,data.Placebo)" 2399 | ] 2400 | }, 2401 | { 2402 | "cell_type": "markdown", 2403 | "metadata": {}, 2404 | "source": [ 2405 | "P-value < 0.05 следовательно нулевая гипотеза отвергается - можно заявлять, что выборки различаются." 2406 | ] 2407 | }, 2408 | { 2409 | "cell_type": "markdown", 2410 | "metadata": {}, 2411 | "source": [ 2412 | "### 9. Дисперсионный анализ" 2413 | ] 2414 | }, 2415 | { 2416 | "cell_type": "markdown", 2417 | "metadata": {}, 2418 | "source": [ 2419 | "#### 9.1 Однофакторная ANOVA" 2420 | ] 2421 | }, 2422 | { 2423 | "cell_type": "markdown", 2424 | "metadata": {}, 2425 | "source": [ 2426 | "Используется для проверки гипотезы о том, что средние для двух и более групп одной вещественной переменной различаются. Проверим различается ли среднее по тесту write для разных типов школьной программы." 2427 | ] 2428 | }, 2429 | { 2430 | "cell_type": "code", 2431 | "execution_count": 59, 2432 | "metadata": { 2433 | "collapsed": false 2434 | }, 2435 | "outputs": [ 2436 | { 2437 | "data": { 2438 | "text/plain": [ 2439 | "F_onewayResult(statistic=21.274737826343454, pvalue=4.3101626235711319e-09)" 2440 | ] 2441 | }, 2442 | "execution_count": 59, 2443 | "metadata": {}, 2444 | "output_type": "execute_result" 2445 | } 2446 | ], 2447 | "source": [ 2448 | "stats.f_oneway(hsb[hsb['prog'] == 1]['write'],\n", 2449 | " hsb[hsb['prog'] == 2]['write'],\n", 2450 | " hsb[hsb['prog'] == 3]['write'])" 2451 | ] 2452 | }, 2453 | { 2454 | "cell_type": "markdown", 2455 | "metadata": {}, 2456 | "source": [ 2457 | "P-value < 0.05 следовательно нулевая гипотеза о равенстве средних не принимается. Посмотрим на эти средние." 2458 | ] 2459 | }, 2460 | { 2461 | "cell_type": "code", 2462 | "execution_count": 60, 2463 | "metadata": { 2464 | "collapsed": false 2465 | }, 2466 | "outputs": [ 2467 | { 2468 | "data": { 2469 | "text/html": [ 2470 | "
\n", 2471 | "\n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | " \n", 2497 | " \n", 2498 | " \n", 2499 | " \n", 2500 | " \n", 2501 | " \n", 2502 | " \n", 2503 | " \n", 2504 | " \n", 2505 | " \n", 2506 | "
meanstdcount
prog
151.3333339.39777545
256.2571437.943343105
346.7600009.31875450
\n", 2507 | "
" 2508 | ], 2509 | "text/plain": [ 2510 | " mean std count\n", 2511 | "prog \n", 2512 | "1 51.333333 9.397775 45\n", 2513 | "2 56.257143 7.943343 105\n", 2514 | "3 46.760000 9.318754 50" 2515 | ] 2516 | }, 2517 | "execution_count": 60, 2518 | "metadata": {}, 2519 | "output_type": "execute_result" 2520 | } 2521 | ], 2522 | "source": [ 2523 | "hsb.groupby('prog')['write'].agg(['mean','std','count'])" 2524 | ] 2525 | }, 2526 | { 2527 | "cell_type": "markdown", 2528 | "metadata": {}, 2529 | "source": [ 2530 | "Действительно - средние различается. И ANOVA подтверждает, что эти различия статистически значимые." 2531 | ] 2532 | }, 2533 | { 2534 | "cell_type": "markdown", 2535 | "metadata": {}, 2536 | "source": [ 2537 | "#### 9.2 Критерий Краскела-Уоллиса" 2538 | ] 2539 | }, 2540 | { 2541 | "cell_type": "markdown", 2542 | "metadata": {}, 2543 | "source": [ 2544 | "Это непараметрический аналог ANOVA. Применяется тогда, когда нельзя сказать, что данные в группах распределены нормально." 2545 | ] 2546 | }, 2547 | { 2548 | "cell_type": "code", 2549 | "execution_count": 61, 2550 | "metadata": { 2551 | "collapsed": false 2552 | }, 2553 | "outputs": [ 2554 | { 2555 | "data": { 2556 | "text/plain": [ 2557 | "KruskalResult(statistic=34.045178212149644, pvalue=4.0474685563408278e-08)" 2558 | ] 2559 | }, 2560 | "execution_count": 61, 2561 | "metadata": {}, 2562 | "output_type": "execute_result" 2563 | } 2564 | ], 2565 | "source": [ 2566 | "stats.kruskal(hsb[hsb['prog'] == 1]['write'],\n", 2567 | " hsb[hsb['prog'] == 2]['write'],\n", 2568 | " hsb[hsb['prog'] == 3]['write'])" 2569 | ] 2570 | }, 2571 | { 2572 | "cell_type": "markdown", 2573 | "metadata": {}, 2574 | "source": [ 2575 | "Видим, что p-value практически такой же, как у ANOVA." 2576 | ] 2577 | }, 2578 | { 2579 | "cell_type": "markdown", 2580 | "metadata": {}, 2581 | "source": [ 2582 | "#### 9.3 Двухфакторная ANOVA" 2583 | ] 2584 | }, 2585 | { 2586 | "cell_type": "markdown", 2587 | "metadata": {}, 2588 | "source": [ 2589 | "Применяется, когда нужно понять влияют ли на вещественную переменную две категориальные переменные. Допустим хочеться понять, зависят ли оценки по тесту write от пола и социально экономического статуса учеников." 2590 | ] 2591 | }, 2592 | { 2593 | "cell_type": "code", 2594 | "execution_count": 62, 2595 | "metadata": { 2596 | "collapsed": true 2597 | }, 2598 | "outputs": [], 2599 | "source": [ 2600 | "from mrstat import ols\n", 2601 | "from mrstat import anova_lm" 2602 | ] 2603 | }, 2604 | { 2605 | "cell_type": "code", 2606 | "execution_count": 63, 2607 | "metadata": { 2608 | "collapsed": false 2609 | }, 2610 | "outputs": [], 2611 | "source": [ 2612 | "data = hsb[['write','female','ses']]" 2613 | ] 2614 | }, 2615 | { 2616 | "cell_type": "code", 2617 | "execution_count": 64, 2618 | "metadata": { 2619 | "collapsed": false 2620 | }, 2621 | "outputs": [ 2622 | { 2623 | "name": "stderr", 2624 | "output_type": "stream", 2625 | "text": [ 2626 | "C:\\Anaconda2\\lib\\site-packages\\scipy\\stats\\_distn_infrastructure.py:875: RuntimeWarning: invalid value encountered in greater\n", 2627 | " return (self.a < x) & (x < self.b)\n", 2628 | "C:\\Anaconda2\\lib\\site-packages\\scipy\\stats\\_distn_infrastructure.py:875: RuntimeWarning: invalid value encountered in less\n", 2629 | " return (self.a < x) & (x < self.b)\n", 2630 | "C:\\Anaconda2\\lib\\site-packages\\scipy\\stats\\_distn_infrastructure.py:1814: RuntimeWarning: invalid value encountered in less_equal\n", 2631 | " cond2 = cond0 & (x <= self.a)\n" 2632 | ] 2633 | } 2634 | ], 2635 | "source": [ 2636 | "formula = 'write ~ C(female) + C(ses) + C(female):C(ses)'\n", 2637 | "model = ols(formula, data).fit()\n", 2638 | "aov_table = anova_lm(model, typ=1)" 2639 | ] 2640 | }, 2641 | { 2642 | "cell_type": "code", 2643 | "execution_count": 65, 2644 | "metadata": { 2645 | "collapsed": false 2646 | }, 2647 | "outputs": [ 2648 | { 2649 | "data": { 2650 | "text/html": [ 2651 | "
\n", 2652 | "\n", 2653 | " \n", 2654 | " \n", 2655 | " \n", 2656 | " \n", 2657 | " \n", 2658 | " \n", 2659 | " \n", 2660 | " \n", 2661 | " \n", 2662 | " \n", 2663 | " \n", 2664 | " \n", 2665 | " \n", 2666 | " \n", 2667 | " \n", 2668 | " \n", 2669 | " \n", 2670 | " \n", 2671 | " \n", 2672 | " \n", 2673 | " \n", 2674 | " \n", 2675 | " \n", 2676 | " \n", 2677 | " \n", 2678 | " \n", 2679 | " \n", 2680 | " \n", 2681 | " \n", 2682 | " \n", 2683 | " \n", 2684 | " \n", 2685 | " \n", 2686 | " \n", 2687 | " \n", 2688 | " \n", 2689 | " \n", 2690 | " \n", 2691 | " \n", 2692 | " \n", 2693 | " \n", 2694 | " \n", 2695 | " \n", 2696 | " \n", 2697 | "
dfsum_sqmean_sqFPR(>F)
C(female)1.01176.2138451176.21384514.6266830.000177
C(ses)2.01080.599437540.2997186.7188400.001509
C(female):C(ses)2.021.43090410.7154520.1332510.875326
Residual194.015600.63081480.415623NaNNaN
\n", 2698 | "
" 2699 | ], 2700 | "text/plain": [ 2701 | " df sum_sq mean_sq F PR(>F)\n", 2702 | "C(female) 1.0 1176.213845 1176.213845 14.626683 0.000177\n", 2703 | "C(ses) 2.0 1080.599437 540.299718 6.718840 0.001509\n", 2704 | "C(female):C(ses) 2.0 21.430904 10.715452 0.133251 0.875326\n", 2705 | "Residual 194.0 15600.630814 80.415623 NaN NaN" 2706 | ] 2707 | }, 2708 | "execution_count": 65, 2709 | "metadata": {}, 2710 | "output_type": "execute_result" 2711 | } 2712 | ], 2713 | "source": [ 2714 | "aov_table" 2715 | ] 2716 | }, 2717 | { 2718 | "cell_type": "markdown", 2719 | "metadata": { 2720 | "collapsed": true 2721 | }, 2722 | "source": [ 2723 | "По данной таблице можно сделать следующие выводы:\n", 2724 | "- write зависит от female (p-value = 0.00017 < 0.005)\n", 2725 | "- write завист от ses (p-value = 0.001509 < 0.005)\n", 2726 | "- write не зависит от взаимодейстивий female и ses (p-value = 0.875326 > 0.05)" 2727 | ] 2728 | }, 2729 | { 2730 | "cell_type": "markdown", 2731 | "metadata": {}, 2732 | "source": [ 2733 | "Спасибо за внимание! )" 2734 | ] 2735 | } 2736 | ], 2737 | "metadata": { 2738 | "anaconda-cloud": {}, 2739 | "kernelspec": { 2740 | "display_name": "Python 2", 2741 | "language": "python", 2742 | "name": "python2" 2743 | }, 2744 | "language_info": { 2745 | "codemirror_mode": { 2746 | "name": "ipython", 2747 | "version": 2 2748 | }, 2749 | "file_extension": ".py", 2750 | "mimetype": "text/x-python", 2751 | "name": "python", 2752 | "nbconvert_exporter": "python", 2753 | "pygments_lexer": "ipython2", 2754 | "version": "2.7.13" 2755 | } 2756 | }, 2757 | "nbformat": 4, 2758 | "nbformat_minor": 0 2759 | } 2760 | -------------------------------------------------------------------------------- /mrstat.py: -------------------------------------------------------------------------------- 1 | from scipy.stats import probplot as qq_plot 2 | from scipy.stats import ttest_1samp, shapiro, chi2_contingency, wilcoxon, mannwhitneyu 3 | from scipy.stats import ttest_ind, ttest_rel, fisher_exact 4 | from statsmodels.sandbox.stats.multicomp import multipletests 5 | from statsmodels.stats.weightstats import CompareMeans, DescrStatsW 6 | from statsmodels.stats.weightstats import zconfint 7 | from statsmodels.stats.descriptivestats import sign_test 8 | from statsmodels.stats.proportion import proportion_confint 9 | from statsmodels.stats.proportion import samplesize_confint_proportion 10 | from scipy.stats import pearsonr, spearmanr, kstest, ks_2samp, chisquare 11 | from statsmodels.formula.api import ols 12 | from statsmodels.stats.anova import anova_lm 13 | from scipy import stats 14 | import itertools 15 | import numpy as np 16 | import statsmodels.stats.api as sms 17 | 18 | def get_z(mu,mu_0,sigma,n): 19 | z = (mu-mu_0)/(sigma/np.sqrt(n)) 20 | return z 21 | 22 | def mean_diff_confint_ind(sample1,sample2): 23 | cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2)) 24 | return cm.tconfint_diff() 25 | 26 | def mean_diff_confint_rel(sample1,sample2): 27 | return DescrStatsW(sample1 - sample2).tconfint_mean() 28 | 29 | def prop_test(sample,p_0,alternative='two-sided'): 30 | p = sample.mean() 31 | n = len(sample) 32 | se = np.sqrt(p*(1-p)/n) 33 | z = (p - p_0)/se 34 | return get_norm_p(z,alternative=alternative) 35 | 36 | def prop_confint(sample,method='normal'): 37 | return proportion_confint(sum(sample),len(sample),method=method) 38 | 39 | def get_bootstrap_samples(data, n_samples): 40 | indices = np.random.randint(0, len(data), (n_samples, len(data))) 41 | samples = data[indices] 42 | return samples 43 | 44 | def stat_intervals(stat, alpha): 45 | boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)]) 46 | return boundaries 47 | 48 | def bootstrap_conf_int(data,stat_func,alpha=0.05,n_samples=1000): 49 | ''' 50 | a = np.random.normal(size=1000) 51 | conf_int(a,np.median) 52 | ''' 53 | scores = [stat_func(sample) for sample in get_bootstrap_samples(data,n_samples)] 54 | return stat_intervals(scores, alpha) 55 | 56 | def bootstrap_test(sample, param, stat_func, n_samples = 1000, alternative = 'two-sided'): 57 | 58 | if alternative not in ('two-sided', 'less', 'greater'): 59 | raise ValueError("alternative not recognized\n" 60 | "should be 'two-sided', 'less' or 'greater'") 61 | 62 | param_d = [stat_func(i) for i in get_bootstrap_samples(sample,n_samples)] 63 | 64 | mean_p = np.mean(param_d) 65 | t_stat = stat_func(sample) - param 66 | 67 | zero_dist = [(mm - mean_p) for mm in param_d] 68 | 69 | if alternative == 'two-sided': 70 | return sum([1. if abs(x) >= abs(t_stat) else 0. for x in zero_dist]) / len(zero_dist) 71 | 72 | if alternative == 'less': 73 | return sum([1. if x <= t_stat else 0. for x in zero_dist]) / len(zero_dist) 74 | 75 | if alternative == 'greater': 76 | return sum([1. if x >= t_stat else 0. for x in zero_dist]) / len(zero_dist) 77 | 78 | def bootstrap_diff_conf_int(a,b,stat_func,alpha=0.05,n_samples=1000): 79 | ''' 80 | a = np.random.normal(size=1000) 81 | b = np.random.normal(loc=2,size=1000) 82 | diff_conf_int(b,a,np.median) 83 | ''' 84 | scores_a = [stat_func(sample) for sample in get_bootstrap_samples(a,n_samples)] 85 | scores_b = [stat_func(sample) for sample in get_bootstrap_samples(b,n_samples)] 86 | delta_scores = [x[0] - x[1] for x in zip(scores_a,scores_b)] 87 | return stat_intervals(delta_scores, alpha) 88 | 89 | def vcramer(table): 90 | chi, p, _, _ = stats.chi2_contingency(table,correction=False) 91 | n = table.sum() 92 | r,c = table.shape 93 | return np.sqrt(chi/(n*(min(r,c)-1.))), p 94 | 95 | def mcc(a,b,c,d): 96 | ''' 97 | Matthews correlation from contigency table 98 | ''' 99 | return (a*d - b*c) / np.sqrt((a+b)*(a+c)*(b+d)*(c+d)) 100 | 101 | def get_norm_p(z_stat, alternative = 'two-sided'): 102 | if alternative not in ('two-sided', 'less', 'greater'): 103 | raise ValueError("alternative not recognized\n" 104 | "should be 'two-sided', 'less' or 'greater'") 105 | 106 | if alternative == 'two-sided': 107 | return 2 * (1 - stats.norm.cdf(np.abs(z_stat))) 108 | 109 | if alternative == 'less': 110 | return stats.norm.cdf(z_stat) 111 | 112 | if alternative == 'greater': 113 | return 1 - stats.norm.cdf(z_stat) 114 | 115 | def get_t_p(z_stat, n, alternative = 'two-sided'): 116 | if alternative not in ('two-sided', 'less', 'greater'): 117 | raise ValueError("alternative not recognized\n" 118 | "should be 'two-sided', 'less' or 'greater'") 119 | 120 | if alternative == 'two-sided': 121 | return 2 * (1 - stats.t.cdf(np.abs(z_stat),df=(n-1))) 122 | 123 | if alternative == 'less': 124 | return stats.t.cdf(np.abs(z_stat),df=(n-1)) 125 | 126 | if alternative == 'greater': 127 | return 1 - stats.t.cdf(np.abs(z_stat),df=(n-1)) 128 | 129 | def proportions_diff_ind(p1,n1,p2,n2,alternative = 'two-sided'): 130 | ''' 131 | AB test 132 | ''' 133 | P = float(p1*n1+p2*n2)/(n1+n2) 134 | z = (p1-p2)/np.sqrt(P*(1-P)*(1./n1+1./n2)) 135 | return get_norm_p(z,alternative) 136 | 137 | def proportions_diff_ind_table(table,alternative = 'two-sided'): 138 | ''' 139 | AB test from contigency table 140 | a, b, c, d = tables.values.ravel() 141 | ''' 142 | a,b,c,d = table.ravel() 143 | n1, n2 = a+c, b+d 144 | p1, p2 = float(a)/n1, float(b)/n2 145 | return proportions_diff_ind(p1,n1,p2,n2,alternative) 146 | 147 | def proportions_diff_ind_samples(sample1,sample2,alternative = 'two-sided'): 148 | ''' 149 | AB test from samples 150 | ''' 151 | n1 = len(sample1) 152 | n2 = len(sample2) 153 | p1 = float(sum(sample1)) / n1 154 | p2 = float(sum(sample2)) / n2 155 | return proportions_diff_ind(p1,n1,p2,n2,alternative) 156 | 157 | def proportions_confint_diff_ind(p1,n1,p2,n2, alpha = 0.05): 158 | ''' 159 | confidence interval for proportion difference from ps and ns 160 | ''' 161 | z = stats.norm.ppf(1 - alpha / 2.) 162 | left_boundary = (p1 - p2) - z * np.sqrt(p1 * (1 - p1)/ n1 + p2 * (1 - p2)/ n2) 163 | right_boundary = (p1 - p2) + z * np.sqrt(p1 * (1 - p1)/ n1 + p2 * (1 - p2)/ n2) 164 | return (left_boundary, right_boundary) 165 | 166 | def proportions_confint_diff_ind_table(table,alpha = 0.05): 167 | ''' 168 | confidence interval for proportion difference from contigency table 169 | ''' 170 | a,b,c,d = table.ravel() 171 | n1, n2 = a+c, b+d 172 | p1, p2 = float(a)/n1, float(b)/n2 173 | return proportions_confint_diff_ind(p1,n1,p2,n2, alpha) 174 | 175 | def proportions_confint_diff_ind_samples(sample1,sample2, alpha = 0.05): 176 | ''' 177 | confidence interval for proportion difference from samples 178 | ''' 179 | n1 = len(sample1) 180 | n2 = len(sample2) 181 | p1 = float(sum(sample1)) / n1 182 | p2 = float(sum(sample2)) / n2 183 | return proportions_confint_diff_ind(p1,n1,p2,n2, alpha) 184 | 185 | def get_props_and_lens(table,invertion=True): 186 | a,b,c,d = table.values[::-1,:].ravel() 187 | n1, n2 = a+c, b+d 188 | p1, p2 = float(a)/n1, float(b)/n2 189 | return p1, n1, p2, n2 190 | 191 | #---------------------------------------------------------------- 192 | 193 | def proportions_confint_diff_rel(sample1, sample2, alpha = 0.05): 194 | z = stats.norm.ppf(1 - alpha / 2.) 195 | sample = zip(sample1, sample2) 196 | n = len(sample) 197 | 198 | f = sum([1 if (x[0] == 1 and x[1] == 0) else 0 for x in sample]) 199 | g = sum([1 if (x[0] == 0 and x[1] == 1) else 0 for x in sample]) 200 | 201 | left_boundary = float(f - g) / n - z * np.sqrt(float((f + g)) / n**2 - float((f - g)**2) / n**3) 202 | right_boundary = float(f - g) / n + z * np.sqrt(float((f + g)) / n**2 - float((f - g)**2) / n**3) 203 | return (left_boundary, right_boundary) 204 | 205 | 206 | def proportions_diff_rel(sample1, sample2, alternative = 'two-sided'): 207 | sample = zip(sample1, sample2) 208 | n = len(sample) 209 | 210 | f = sum([1 if (x[0] == 1 and x[1] == 0) else 0 for x in sample]) 211 | g = sum([1 if (x[0] == 0 and x[1] == 1) else 0 for x in sample]) 212 | 213 | z = float(f - g) / np.sqrt(f + g - float((f - g)**2) / n ) 214 | 215 | return get_norm_p(z,alternative) 216 | 217 | def two_proportions_sample_size(p1,p2,alpha=0.05,power=0.8,frac=0.5): 218 | ratio = frac/(1.-frac) 219 | es = sms.proportion_effectsize(p1, p2) 220 | n = np.floor(sms.NormalIndPower().solve_power(es, power=power, alpha=alpha, ratio=ratio)) 221 | n1,n2 = n*ratio, n 222 | return n1,n2 --------------------------------------------------------------------------------