├── .gitignore ├── .pylintrc ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── Makefile ├── README.md ├── cheatsheets ├── linear_tests_cheat_sheet.pages ├── linear_tests_cheat_sheet.pdf └── linear_tests_cheat_sheet.png ├── index.html ├── requirements-dev.txt ├── requirements.txt ├── scripts ├── process-html.sh └── test.sh ├── tests-as-linear.ipynb └── tests_as_linear ├── __init__.py ├── plots.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ 125 | 126 | # OS-generated files 127 | .DS_Store 128 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | # Use multiple processes to speed up Pylint. 3 | jobs=1 4 | 5 | init-hook='import sys; sys.path.append("knead/")' 6 | 7 | # Allow loading of arbitrary C extensions. Extensions are imported into the 8 | # active Python interpreter and may run arbitrary code. 9 | unsafe-load-any-extension=no 10 | 11 | # Allow optimization of some AST trees. This will activate a peephole AST 12 | # optimizer, which will apply various small optimizations. For instance, it can 13 | # be used to obtain the result of joining multiple strings with the addition 14 | # operator. Joining a lot of strings can lead to a maximum recursion error in 15 | # Pylint and this flag can prevent that. It has one side effect, the resulting 16 | # AST will be different than the one from reality. 17 | optimize-ast=no 18 | 19 | [MESSAGES CONTROL] 20 | 21 | # Only show warnings with the listed confidence levels. Leave empty to show 22 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 23 | confidence= 24 | 25 | # Disable the message, report, category or checker with the given id(s). You 26 | # can either give multiple identifiers separated by comma (,) or put this 27 | # option multiple times (only on the command line, not in the configuration 28 | # file where it should appear only once).You can also use "--disable=all" to 29 | # disable everything first and then reenable specific checks. For example, if 30 | # you want to run only the similarities checker, you can use "--disable=all 31 | # --enable=similarities". If you want to run only the classes checker, but have 32 | # no Warning level messages displayed, use"--disable=all --enable=classes 33 | # --disable=W" 34 | # 35 | # Disable warnings, missing-docstring errors and wrong indentation errors 36 | disable=W,C0111,C0330 37 | 38 | # Enable the message, report, category or checker with the given id(s). You can 39 | # either give multiple identifier separated by comma (,) or put this option 40 | # multiple time. See also the "--disable" option for examples. 41 | enable=import-error, 42 | import-self, 43 | reimported, 44 | wildcard-import, 45 | misplaced-future, 46 | relative-import, 47 | deprecated-module, 48 | unpacking-non-sequence, 49 | invalid-all-object, 50 | undefined-all-variable, 51 | used-before-assignment, 52 | cell-var-from-loop, 53 | global-variable-undefined, 54 | dangerous-default-value, 55 | redefined-builtin, 56 | redefine-in-handler, 57 | unused-import, 58 | unused-wildcard-import, 59 | global-variable-not-assigned, 60 | undefined-loop-variable, 61 | global-statement, 62 | global-at-module-level, 63 | bad-open-mode, 64 | redundant-unittest-assert, 65 | boolean-datetime, 66 | unused-variable 67 | 68 | 69 | [REPORTS] 70 | 71 | # Set the output format. Available formats are text, parseable, colorized, msvs 72 | # (visual studio) and html. You can also give a reporter class, eg 73 | # mypackage.mymodule.MyReporterClass. 74 | output-format=colorized 75 | 76 | # Put messages in a separate file for each module / package specified on the 77 | # command line instead of printing them on stdout. Reports (if any) will be 78 | # written in a file name "pylint_global.[txt|html]". 79 | files-output=no 80 | 81 | # Tells whether to display a full report or only the messages 82 | reports=no 83 | 84 | # Python expression which should return a note less than 10 (10 is the highest 85 | # note). You have access to the variables errors warning, statement which 86 | # respectively contain the number of errors / warnings messages and the total 87 | # number of statements analyzed. This is used by the global evaluation report 88 | # (RP0004). 89 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 90 | 91 | [BASIC] 92 | 93 | # List of builtins function names that should not be used, separated by a comma 94 | bad-functions=map,filter,input 95 | 96 | # Good variable names which should always be accepted, separated by a comma 97 | good-names=a,b,c,d,f,i,j,k,df,x,y,y2,_,fig,ax 98 | 99 | # Bad variable names which should always be refused, separated by a comma 100 | bad-names=foo,bar,baz,toto,tutu,tata 101 | 102 | # Colon-delimited sets of names that determine each other's naming style when 103 | # the name regexes allow several styles. 104 | name-group= 105 | 106 | # Include a hint for the correct naming format with invalid-name 107 | include-naming-hint=yes 108 | 109 | # Regular expression matching correct method names 110 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 111 | 112 | # Naming hint for method names 113 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 114 | 115 | # Regular expression matching correct function names 116 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 117 | 118 | # Naming hint for function names 119 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 120 | 121 | # Regular expression matching correct module names 122 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 123 | 124 | # Naming hint for module names 125 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 126 | 127 | # Regular expression matching correct attribute names 128 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 129 | 130 | # Naming hint for attribute names 131 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 132 | 133 | # Regular expression matching correct class attribute names 134 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 135 | 136 | # Naming hint for class attribute names 137 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 138 | 139 | # Regular expression matching correct constant names 140 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 141 | 142 | # Naming hint for constant names 143 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 144 | 145 | # Regular expression matching correct class names 146 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 147 | 148 | # Naming hint for class names 149 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 150 | 151 | # Regular expression matching correct argument names 152 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 153 | 154 | # Naming hint for argument names 155 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 156 | 157 | # Regular expression matching correct inline iteration names 158 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 159 | 160 | # Naming hint for inline iteration names 161 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 162 | 163 | # Regular expression matching correct variable names 164 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 165 | 166 | # Naming hint for variable names 167 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 168 | 169 | # Regular expression which should only match function or class names that do 170 | # not require a docstring. 171 | no-docstring-rgx=^_ 172 | 173 | # Minimum line length for functions/classes that require docstrings, shorter 174 | # ones are exempt. 175 | docstring-min-length=-1 176 | 177 | 178 | [ELIF] 179 | 180 | # Maximum number of nested blocks for function / method body 181 | max-nested-blocks=5 182 | 183 | 184 | [FORMAT] 185 | 186 | # Maximum number of characters on a single line. 187 | max-line-length=100 188 | 189 | # Regexp for a line that is allowed to be longer than the limit. 190 | ignore-long-lines=^\s*(# )??$ 191 | 192 | # Allow the body of an if to be on the same line as the test if there is no 193 | # else. 194 | single-line-if-stmt=no 195 | 196 | # List of optional constructs for which whitespace checking is disabled. `dict- 197 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 198 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 199 | # `empty-line` allows space-only lines. 200 | no-space-check=trailing-comma,dict-separator 201 | 202 | # Maximum number of lines in a module 203 | max-module-lines=1000 204 | 205 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 206 | # tab). 207 | indent-string=' ' 208 | 209 | # Number of spaces of indent required inside a hanging or continued line. 210 | indent-after-paren=4 211 | 212 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 213 | expected-line-ending-format= 214 | 215 | 216 | [LOGGING] 217 | 218 | # Logging modules to check that the string format arguments are in logging 219 | # function parameter format 220 | logging-modules=logging 221 | 222 | 223 | [MISCELLANEOUS] 224 | 225 | # List of note tags to take in consideration, separated by a comma. 226 | notes=FIXME,XXX,TODO 227 | 228 | 229 | [SIMILARITIES] 230 | 231 | # Minimum lines number of a similarity. 232 | min-similarity-lines=4 233 | 234 | # Ignore comments when computing similarities. 235 | ignore-comments=yes 236 | 237 | # Ignore docstrings when computing similarities. 238 | ignore-docstrings=yes 239 | 240 | # Ignore imports when computing similarities. 241 | ignore-imports=no 242 | 243 | 244 | [SPELLING] 245 | 246 | # Spelling dictionary name. Available dictionaries: none. To make it working 247 | # install python-enchant package. 248 | spelling-dict= 249 | 250 | # List of comma separated words that should not be checked. 251 | spelling-ignore-words= 252 | 253 | # A path to a file that contains private dictionary; one word per line. 254 | spelling-private-dict-file= 255 | 256 | # Tells whether to store unknown words to indicated private dictionary in 257 | # --spelling-private-dict-file option instead of raising a message. 258 | spelling-store-unknown-words=no 259 | 260 | 261 | [TYPECHECK] 262 | 263 | # Tells whether missing members accessed in mixin class should be ignored. A 264 | # mixin class is detected if its name ends with "mixin" (case insensitive). 265 | ignore-mixin-members=yes 266 | 267 | # List of module names for which member attributes should not be checked 268 | # (useful for modules/projects where namespaces are manipulated during runtime 269 | # and thus existing member attributes cannot be deduced by static analysis. It 270 | # supports qualified module names, as well as Unix pattern matching. 271 | ignored-modules= 272 | 273 | # List of classes names for which member attributes should not be checked 274 | # (useful for classes with attributes dynamically set). This supports can work 275 | # with qualified names. 276 | ignored-classes= 277 | 278 | # List of members which are set dynamically and missed by pylint inference 279 | # system, and so shouldn't trigger E1101 when accessed. Python regular 280 | # expressions are accepted. 281 | generated-members= 282 | 283 | 284 | [VARIABLES] 285 | 286 | # Tells whether we should check for unused import in __init__ files. 287 | init-import=no 288 | 289 | # A regular expression matching the name of dummy variables (i.e. expectedly 290 | # not used). 291 | dummy-variables-rgx=_$|dummy 292 | 293 | # List of additional names supposed to be defined in builtins. Remember that 294 | # you should avoid to define new builtins when possible. 295 | additional-builtins= 296 | 297 | # List of strings which can identify a callback function by name. A callback 298 | # name must start or end with one of those strings. 299 | callbacks=cb_,_cb 300 | 301 | 302 | [CLASSES] 303 | 304 | # List of method names used to declare (i.e. assign) instance attributes. 305 | defining-attr-methods=__init__,__new__,setUp 306 | 307 | # List of valid names for the first argument in a class method. 308 | valid-classmethod-first-arg=cls 309 | 310 | # List of valid names for the first argument in a metaclass class method. 311 | valid-metaclass-classmethod-first-arg=mcs 312 | 313 | # List of member names, which should be excluded from the protected access 314 | # warning. 315 | exclude-protected=_asdict,_fields,_replace,_source,_make 316 | 317 | 318 | [DESIGN] 319 | 320 | # Maximum number of arguments for function / method 321 | max-args=5 322 | 323 | # Argument names that match this expression will be ignored. Default to name 324 | # with leading underscore 325 | ignored-argument-names=_.* 326 | 327 | # Maximum number of locals for function / method body 328 | max-locals=15 329 | 330 | # Maximum number of return / yield for function / method body 331 | max-returns=6 332 | 333 | # Maximum number of branch for function / method body 334 | max-branches=12 335 | 336 | # Maximum number of statements in function / method body 337 | max-statements=50 338 | 339 | # Maximum number of parents for a class (see R0901). 340 | max-parents=7 341 | 342 | # Maximum number of attributes for a class (see R0902). 343 | max-attributes=7 344 | 345 | # Minimum number of public methods for a class (see R0903). 346 | min-public-methods=2 347 | 348 | # Maximum number of public methods for a class (see R0904). 349 | max-public-methods=20 350 | 351 | # Maximum number of boolean expressions in a if statement 352 | max-bool-expr=5 353 | 354 | 355 | [IMPORTS] 356 | 357 | # Deprecated modules which should not be used, separated by a comma 358 | deprecated-modules=optparse 359 | 360 | # Create a graph of every (i.e. internal and external) dependencies in the 361 | # given file (report RP0402 must not be disabled) 362 | import-graph= 363 | 364 | # Create a graph of external dependencies in the given file (report RP0402 must 365 | # not be disabled) 366 | ext-import-graph= 367 | 368 | # Create a graph of internal dependencies in the given file (report RP0402 must 369 | # not be disabled) 370 | int-import-graph= 371 | 372 | 373 | [EXCEPTIONS] 374 | 375 | # Exceptions that will emit a warning when being caught. Defaults to 376 | # "Exception" 377 | overgeneral-exceptions=Exception 378 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | python: 3.6 4 | cache: pip 5 | 6 | install: 7 | - make venv 8 | - pip freeze 9 | 10 | script: 11 | - make lint 12 | - make test 13 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at george@eigenfoo.xyz. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | [![GitHub Issues](https://img.shields.io/github/issues/eigenfoo/tests-as-linear.svg)](https://github.com/eigenfoo/tests-as-linear/issues) 4 | [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/eigenfoo/tests-as-linear.svg)](https://github.com/eigenfoo/tests-as-linear/pulls) 5 | [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) 6 | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v1.4%20adopted-ff69b4.svg)](https://github.com/eigenfoo/tests-as-linear/blob/master/CODE_OF_CONDUCT.md) 7 | 8 | Contributions are always welcome! Check out the GitHub issue trackers for both 9 | [this Python port](https://github.com/eigenfoo/tests-as-linear/issues) and/or 10 | the [original R post](https://github.com/lindeloev/tests-as-linear/issues) for 11 | some ideas on how to contribute. 12 | 13 | - Raise issues if you have ideas. 14 | - Submit pull requests if you want to help improve these resources. 15 | - Star this repo if you want to follow updates. 16 | - Fork this repo if you want to make your own spin! 17 | 18 | ## Project structure 19 | 20 | ```bash 21 | . 22 | ├── CODE_OF_CONDUCT.md 23 | ├── CONTRIBUTING.md 24 | ├── LICENSE.txt 25 | ├── Makefile 26 | ├── README.md 27 | ├── cheatsheets 28 | │   └── ... 29 | ├── index.html # Published HTML. 30 | ├── requirements-dev.txt # Dependencies for development. 31 | ├── requirements.txt # Dependencies. 32 | ├── scripts # Shell scripts for development, testing and deployment. 33 | │   └── ... 34 | ├── tests-as-linear.ipynb # Main notebook. 35 | └── tests_as_linear # Supporting Python source code. 36 | ├── __init__.py 37 | ├── plots.py # Functions for large plots in main notebook. 38 | └── utils.py # Utility functions used in main notebook. 39 | ``` 40 | 41 | ## Development instructions 42 | 43 | ```bash 44 | git clone git@github.com:/tests-as-linear.git 45 | cd tests-as-linear/ 46 | make venv 47 | source venv/bin/actviate 48 | # Do your work... 49 | make publish 50 | make check 51 | deactivate 52 | ``` 53 | 54 | Please make sure that `make publish` and `make check` successfully complete 55 | before committing and pushing: these commands generate publishable HTML files, 56 | lint the Python modules and run various test scripts. 57 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public 379 | licenses. Notwithstanding, Creative Commons may elect to apply one of 380 | its public licenses to material it publishes and in those instances 381 | will be considered the “Licensor.” The text of the Creative Commons 382 | public licenses is dedicated to the public domain under the CC0 Public 383 | Domain Dedication. Except for the limited purpose of indicating that 384 | material is shared under a Creative Commons public license or as 385 | otherwise permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the 393 | public licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. 396 | 397 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help venv lint-black lint-pylint lint test check black publish clean 2 | .DEFAULT_GOAL = help 3 | 4 | PYTHON = python3 5 | SHELL = bash 6 | VENV_PATH = venv 7 | 8 | help: 9 | @echo "Usage:" 10 | @grep -E '^[a-zA-Z_-]+:.*?# .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?# "}; {printf "\033[1;34mmake %-10s\033[0m%s\n", $$1, $$2}' 11 | 12 | venv: # Set up Python virtual environment. 13 | @printf "Creating Python virtual environment...\n" 14 | rm -rf ${VENV_PATH} 15 | ( \ 16 | python -m venv ${VENV_PATH}; \ 17 | source ${VENV_PATH}/bin/activate; \ 18 | pip install -U pip; \ 19 | pip install -r requirements-dev.txt; \ 20 | deactivate; \ 21 | ) 22 | @printf "\n\nVirtual environment created! \033[1;34mRun \`source ${VENV_PATH}/bin/activate\` to activate it.\033[0m\n\n\n" 23 | 24 | lint-black: 25 | @printf "Checking code style with black...\n" 26 | black tests_as_linear/ --check --target-version=py36 27 | @printf "\033[1;34mBlack passes!\033[0m\n\n" 28 | 29 | lint-pylint: 30 | @printf "Checking code style with pylint...\n" 31 | pylint tests_as_linear/ --rcfile=.pylintrc 32 | @printf "\033[1;34mPylint passes!\033[0m\n\n" 33 | 34 | lint: lint-black lint-pylint # Check code style with black and pylint. 35 | 36 | test: clean # Run test scripts. 37 | @printf "Running test script...\n" 38 | ${SHELL} scripts/test.sh 39 | @printf "\033[1;34mTests pass!\033[0m\n\n" 40 | 41 | check: clean lint test # Alias for `make clean lint test`. 42 | 43 | black: # Format code in-place with black. 44 | black tests_as_linear/ --target-version=py36 45 | 46 | publish: # Run notebook in-place and generate HTML files. 47 | jupyter nbconvert --to notebook --inplace --execute tests-as-linear.ipynb 48 | jupyter nbconvert --to html tests-as-linear.ipynb 49 | sh scripts/process-html.sh 50 | rm tests-as-linear.html 51 | 52 | clean: # Clean directory. 53 | rm -rf _site/ __pycache__/ 54 | find tests_as_linear/ -type d -name "__pycache__" -exec rm -rf {} + 55 | find tests_as_linear/ -type d -name "__pycache__" -delete 56 | find tests_as_linear/ -type f -name "*.pyc" -delete 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # _Common statistical tests are linear models_: Python port 2 | 3 | [![Build Status](https://travis-ci.com/eigenfoo/tests-as-linear.svg?branch=master)](https://travis-ci.com/eigenfoo/tests-as-linear) 4 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/eigenfoo/tests-as-linear/master?filepath=tests-as-linear.ipynb) 5 | 6 | A Python port of Jonas Kristoffer Lindeløv's post [_Common statistical tests are 7 | linear models (or: how to teach 8 | stats)_](https://lindeloev.github.io/tests-as-linear/), which originally had 9 | accompanying code in R. 10 | 11 | ## Notes on Python port 12 | 13 | The original post used R's built-in functions to verify that the linear models 14 | were indeed equivalent to the statistical tests (by showing that the p-values, 15 | t-values, and other such statistics, were the same in either case). In this 16 | Python port, we instead verify that `scipy.stats` functions and `smf.ols` output 17 | agree. 18 | 19 | The original R post had [four 20 | appendices](https://github.com/lindeloev/tests-as-linear/tree/master/simulations), 21 | each of which demonstrated (through numerical simulation) that a common 22 | statistical test was well-approximated by a linear model. These simulations have 23 | not been ported to Python (yet!). This is [an outstanding 24 | issue](https://github.com/eigenfoo/tests-as-linear/issues/14). In the meantime, 25 | please refer to the [original appendices 26 | upstream](https://github.com/lindeloev/tests-as-linear/tree/master/simulations) 27 | for the simulations. 28 | 29 | Finally, certain statistical functions and tests are supported by neither 30 | `statsmodels` or `scipy`, the two statistical Python libraries used in this 31 | port. R code with no simple Python equivalent (mainly in sections 6 and 7, 32 | "Three or more means" and "Proportions: Chi-square is a log-linear model") has 33 | not been ported. Such code has been noted in yellow warning boxes. 34 | 35 | ## Contributing 36 | 37 | Please refer to [the contributing 38 | guide](https://github.com/eigenfoo/tests-as-linear/blob/master/CONTRIBUTING.md) 39 | for project structure information and development instructions. 40 | 41 | ## License 42 | 43 | This work is licensed under a Creative Commons Attribution 4.0 International 44 | License. Please refer to 45 | [`LICENSE.txt`](https://github.com/eigenfoo/tests-as-linear/blob/master/LICENSE.txt) 46 | for more details. 47 | -------------------------------------------------------------------------------- /cheatsheets/linear_tests_cheat_sheet.pages: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eigenfoo/tests-as-linear/7c8c51ec4f75f688666f1ec7210183ab5fe15778/cheatsheets/linear_tests_cheat_sheet.pages -------------------------------------------------------------------------------- /cheatsheets/linear_tests_cheat_sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eigenfoo/tests-as-linear/7c8c51ec4f75f688666f1ec7210183ab5fe15778/cheatsheets/linear_tests_cheat_sheet.pdf -------------------------------------------------------------------------------- /cheatsheets/linear_tests_cheat_sheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eigenfoo/tests-as-linear/7c8c51ec4f75f688666f1ec7210183ab5fe15778/cheatsheets/linear_tests_cheat_sheet.png -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | black==19.3b0 3 | nbdime==1.0.6 4 | pylint==2.3.1 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter==1.0.0 2 | matplotlib==3.1.0 3 | numpy==1.16.4 4 | pandas==0.24.2 5 | patsy==0.5.1 6 | scipy==1.3.0 7 | statsmodels==0.10.0 8 | -------------------------------------------------------------------------------- /scripts/process-html.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Embed Clicky web analytics and Twitter card. 4 | LINE=9 5 | head -n $LINE tests-as-linear.html > index.html 6 | echo '' >> index.html 7 | echo '' >> index.html 8 | echo '' >> index.html 9 | echo '' >> index.html 10 | echo '' >> index.html 11 | echo '' >> index.html 12 | echo '' >> index.html 13 | echo '' >> index.html 14 | tail -n +$LINE tests-as-linear.html >> index.html 15 | 16 | # Change title. 17 | sed -i.bak "s/tests-as-linear<\/title>/<title>Common statistical tests are linear models: Python port | Eigenfoo<\/title>/" "index.html" 18 | rm index.html.bak 19 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | jupyter nbconvert --to notebook --execute tests-as-linear.ipynb 6 | nbdiff --ignore-output tests-as-linear.ipynb tests-as-linear.nbconvert.ipynb > diff.txt 7 | 8 | if [ -s diff.txt ] 9 | then 10 | echo "Notebook not executed in order. Rerun \`tests-as-linear.ipynb\`." 11 | cat diff.txt 12 | rm -rf diff.txt tests-as-linear.nbconvert.ipynb # Clean up 13 | exit 1 14 | else 15 | echo "Tests passed!" 16 | rm -rf diff.txt tests-as-linear.nbconvert.ipynb # Clean up 17 | exit 0 18 | fi 19 | -------------------------------------------------------------------------------- /tests_as_linear/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eigenfoo/tests-as-linear/7c8c51ec4f75f688666f1ec7210183ab5fe15778/tests_as_linear/__init__.py -------------------------------------------------------------------------------- /tests_as_linear/plots.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import statsmodels.formula.api as smf 4 | import matplotlib.pyplot as plt 5 | from .utils import signed_rank, format_decimals_factory 6 | 7 | plt.style.use("seaborn-whitegrid") 8 | 9 | 10 | def linear_regression_plot(): 11 | # Construct data as a pd.DataFrame 12 | x = np.random.normal(0, 2, 30) 13 | y = 0.8 * x + 0.2 * 5 * np.random.randn(30) 14 | df = pd.DataFrame() 15 | df["x"], df["y"] = x, y 16 | 17 | # Linear regression 18 | res = smf.ols("y ~ 1 + x", df).fit() 19 | intercept, slope = res.params 20 | 21 | # Plot 22 | fig, ax = plt.subplots(figsize=[10, 8]) 23 | ax.scatter(x, y, color="k") 24 | ax.axhline(intercept, color="b", label=r"$\beta_0$ (Intercept)") 25 | ax.plot( 26 | ax.get_xlim(), 27 | [slope * x + intercept for x in ax.get_xlim()], 28 | color="r", 29 | label=r"$\beta_1$ (Slope)", 30 | ) 31 | ax.legend() 32 | 33 | return fig, ax 34 | 35 | 36 | # pylint: disable=R0914 37 | def pearson_spearman_plot(): 38 | # Construct data as pd.DataFrames 39 | x = np.random.normal(0, 2, 30) 40 | y = 0.8 * x + 0.2 * 5 * np.random.randn(30) 41 | data_pearson = pd.DataFrame() 42 | data_pearson["x"], data_pearson["y"] = x, y 43 | data_spearman = data_pearson.rank() 44 | 45 | # Pearson equivalent linear model 46 | res_pearson = smf.ols("y ~ 1 + x", data_pearson).fit() 47 | intercept_pearson, slope_pearson = res_pearson.params 48 | 49 | # Spearman equivalent linear model 50 | res_spearman = smf.ols("y ~ 1 + x", data_spearman).fit() 51 | intercept_spearman, slope_spearman = res_spearman.params 52 | 53 | # Plot 54 | fig, axarr = plt.subplots(ncols=2, figsize=[18, 8]) 55 | 56 | for ax, dataset, to_str, title, a, b in zip( 57 | axarr, 58 | [data_pearson, data_spearman], 59 | [format_decimals_factory(), format_decimals_factory(0)], 60 | ["Pearson", "Spearman"], 61 | [slope_pearson, slope_spearman], 62 | [intercept_pearson, intercept_spearman], 63 | ): 64 | ax.scatter(dataset["x"], dataset["y"], color="k") 65 | 66 | annotations = ( 67 | "(" + dataset["x"].apply(to_str) + ", " + dataset["y"].apply(to_str) + ")" 68 | ) 69 | for i, annot in enumerate(annotations): 70 | ax.annotate(annot, (dataset["x"][i], dataset["y"][i]), color="grey") 71 | 72 | ax.axhline(a, color="b", label=r"$\beta_0$ (Intercept)") 73 | ax.plot( 74 | ax.get_xlim(), 75 | [a * x + b for x in ax.get_xlim()], 76 | color="r", 77 | label=r"$\beta_1$ (Slope)", 78 | ) 79 | 80 | ax.set_title(title) 81 | ax.legend(fontsize="large") 82 | 83 | return fig, axarr 84 | 85 | 86 | def ttest_wilcoxon_plot(): 87 | # Construct data as a pd.DataFrame 88 | y = pd.DataFrame(data=np.random.normal(1, 1, 20), columns=["y"]) 89 | 90 | # t-test equivalent linear model 91 | res = smf.ols(formula="y ~ 1", data=y).fit() 92 | intercept_ttest = res.params.Intercept 93 | 94 | # Wilcoxon equivalent linear model 95 | res = smf.ols(formula="y ~ 1", data=signed_rank(y)).fit() 96 | intercept_wilcoxon = res.params.Intercept 97 | 98 | # Plot 99 | fig, axarr = plt.subplots(ncols=2, figsize=[18, 8]) 100 | 101 | for ax, dataset, to_str, title, b in zip( 102 | axarr, 103 | [y, signed_rank(y)], 104 | [format_decimals_factory(), format_decimals_factory(0)], 105 | ["$t$-test", "Wilcoxon"], 106 | [intercept_ttest, intercept_wilcoxon], 107 | ): 108 | ax.scatter(np.ones_like(dataset), dataset, color="k") 109 | 110 | annotations = dataset.y.apply(to_str) 111 | for i, annot in enumerate(annotations): 112 | ax.annotate(annot, (1, dataset.y[i]), color="grey") 113 | 114 | ax.axhline(b, color="b", label=r"$\beta_0$ (Intercept)") 115 | 116 | ax.set_title(title) 117 | ax.legend(fontsize="large") 118 | 119 | return fig, axarr 120 | 121 | 122 | def pairs_wilcoxon_plot(): 123 | # Construct data as a pd.DataFrame 124 | y = np.random.normal(2, 1, 20) 125 | y2 = y + np.random.randn(20) 126 | df = pd.DataFrame() 127 | df["y"], df["y2"], df["y_sub_y2"] = y, y2, y - y2 128 | 129 | # Wilcoxon equivalent linear model 130 | res = smf.ols(formula="y_sub_y2 ~ 1", data=df).fit() 131 | intercept_wilcoxon = res.params.Intercept 132 | 133 | # Plot 134 | fig, axarr = plt.subplots(ncols=2, figsize=[18, 8]) 135 | 136 | # Left hand figure 137 | axarr[0].scatter(np.zeros_like(df.y), df.y.values, color="k") 138 | axarr[0].scatter(np.ones_like(df.y2), df.y2.values, color="k") 139 | 140 | for i, j in zip(df.y, df.y2): 141 | axarr[0].plot([0, 1], [i, j], color="k") 142 | 143 | axarr[0].set_title("Pairs") 144 | 145 | # Right hand figure 146 | axarr[1].scatter(np.zeros_like(df.y_sub_y2), df.y_sub_y2.values, color="k") 147 | 148 | annotations = df.y_sub_y2.apply(format_decimals_factory()) 149 | for i, annot in enumerate(annotations): 150 | axarr[1].annotate(annot, (0, df.y_sub_y2[i]), color="grey") 151 | 152 | axarr[1].axhline(intercept_wilcoxon, color="b", label=r"$\beta_0$ (Intercept)") 153 | 154 | axarr[1].set_title("$t$-test") 155 | axarr[1].legend(fontsize="large") 156 | 157 | return fig, axarr 158 | 159 | 160 | def dummy_coding_plot(): 161 | # Construct data as a pd.DataFrame 162 | num_points = 20 163 | data1 = np.random.multivariate_normal([0, 0], np.identity(2), num_points) 164 | data2 = np.random.multivariate_normal([4, 4], np.identity(2), num_points) 165 | df = pd.DataFrame(data=np.concatenate([data1, data2]), columns=["x", "y"]) 166 | df["dummy"] = np.concatenate([np.zeros(num_points), np.ones(num_points)]) 167 | 168 | # Linear regression 169 | res = smf.ols(formula="y ~ 1 + dummy", data=df).fit() 170 | beta0, beta1 = res.params 171 | 172 | # Plot 173 | fig, ax = plt.subplots(figsize=[10, 8]) 174 | ax.scatter(*data1.T, color="k") 175 | ax.scatter(*data2.T, color="k") 176 | ax.axhline(beta0, color="c", label=r"$\beta_0$ (group 1 mean)") 177 | ax.plot( 178 | [beta0, beta1], 179 | [beta0, beta1], 180 | color="r", 181 | label=r"$\beta_1$ (slope = difference)", 182 | ) 183 | ax.axhline(beta1, color="b", label=r"$\beta_0 + \beta_1$ (group 2 mean)") 184 | ax.legend(fontsize="large") 185 | 186 | return fig, ax 187 | 188 | 189 | def one_way_anova_plot(): 190 | # Construct data as a pd.DataFrame 191 | a = np.random.normal(0, 1, 20) 192 | b = np.random.normal(-2, 1, 20) 193 | c = np.random.normal(3, 1, 20) 194 | d = np.random.normal(1.5, 1, 20) 195 | 196 | df = pd.DataFrame() 197 | df["y"] = np.concatenate([a, b, c, d]) 198 | df["group_2"] = np.concatenate( 199 | [np.zeros_like(b)] + [np.ones_like(b)] + 2 * [np.zeros_like(b)] 200 | ) 201 | df["group_3"] = np.concatenate( 202 | 2 * [np.zeros_like(c)] + [np.ones_like(c)] + [np.zeros_like(c)] 203 | ) 204 | df["group_4"] = np.concatenate(3 * [np.zeros_like(d)] + [np.ones_like(d)]) 205 | 206 | # ANOVA equivalent linear model 207 | res = smf.ols("y ~ 1 + group_2 + group_3 + group_4", df).fit() 208 | beta0, beta1, beta2, beta3 = res.params 209 | 210 | # Plot 211 | fig, ax = plt.subplots(figsize=[10, 8]) 212 | ax.scatter(0 * np.ones_like(a), a, color="k") 213 | ax.scatter(1 * np.ones_like(b), b, color="k") 214 | ax.scatter(2 * np.ones_like(c), c, color="k") 215 | ax.scatter(3 * np.ones_like(d), d, color="k") 216 | 217 | # Group 1 (baseline) 218 | ax.axhline(beta0, color="b", label=r"$\beta_0$ (group 1 mean)") 219 | 220 | # Group 2 221 | ax.plot([0.7, 1.3], 2 * [beta0 + beta1], color="navy") 222 | ax.plot( 223 | [0, 1], 224 | [beta0, beta0 + beta1], 225 | color="r", 226 | label=r"$\beta_1, \beta_2, ...$ (slopes/differences to $\beta_0$)", 227 | ) 228 | 229 | # Group 3 230 | ax.plot( 231 | [1.7, 2.3], 232 | 2 * [beta0 + beta2], 233 | color="navy", 234 | label=r"$\beta_0+\beta_1, \beta_0+\beta_2 ...$ (group 2, 3 ... means)", 235 | ) 236 | ax.plot([1, 2], [beta0, beta0 + beta2], color="r") 237 | 238 | # Group 4 239 | ax.plot([2.7, 3.3], 2 * [beta0 + beta3], color="navy") 240 | ax.plot([2, 3], [beta0, beta0 + beta3], color="r") 241 | 242 | ax.legend(fontsize="large") 243 | 244 | return fig, ax 245 | 246 | 247 | def two_way_anova_plot(df): 248 | res = smf.ols("y ~ 1 + group * mood", df).fit() 249 | beta_0, beta_b, beta_c, beta_sad, beta_b_sad, beta_c_sad = res.params 250 | 251 | # Logical masks 252 | is_a = df["group"] == "a" 253 | is_b = df["group"] == "b" 254 | is_c = df["group"] == "c" 255 | is_happy = df["mood"] == "happy" 256 | is_sad = df["mood"] == "sad" 257 | 258 | # Plot 259 | fig, ax = plt.subplots(figsize=[10, 8]) 260 | ax.scatter(0 * np.ones(10), df["y"][is_a & is_happy], color="r") 261 | ax.scatter(0 * np.ones(10), df["y"][is_a & is_sad], color="b") 262 | ax.scatter(1 * np.ones(10), df["y"][is_b & is_happy], color="r") 263 | ax.scatter(1 * np.ones(10), df["y"][is_b & is_sad], color="b") 264 | ax.scatter(2 * np.ones(10), df["y"][is_c & is_happy], color="r") 265 | ax.scatter(2 * np.ones(10), df["y"][is_c & is_sad], color="b") 266 | 267 | # Group a 268 | ax.axhline(beta_0, color="r", label="happy") 269 | ax.plot([-0.3, 0.3], 2 * [beta_0 + beta_sad], color="b", label="sad") 270 | 271 | # Group b 272 | ax.plot([0.7, 1.3], 2 * [beta_0 + beta_b], color="r") 273 | ax.plot([0.7, 1.3], 2 * [beta_0 + beta_b + beta_sad + beta_b_sad], color="b") 274 | 275 | # Group c 276 | ax.plot([1.7, 2.3], 2 * [beta_0 + beta_c], color="r") 277 | ax.plot([1.7, 2.3], 2 * [beta_0 + beta_c + beta_sad + beta_c_sad], color="b") 278 | 279 | ax.legend(fontsize="large") 280 | 281 | return fig, ax 282 | 283 | 284 | def ancova_plot(df): 285 | # Logical masks 286 | is_a = df["group"] == "a" 287 | is_b = df["group"] == "b" 288 | is_c = df["group"] == "c" 289 | 290 | # ANCOVA equivalent linear model 291 | res = smf.ols("y ~ 1 + group + age", df).fit() 292 | beta_0, beta_b, beta_c, beta_age = res.params 293 | 294 | # Plot 295 | fig, ax = plt.subplots(figsize=[10, 8]) 296 | 297 | ax.scatter(df[is_a]["age"], df[is_a]["y"], label="Group A", color="r") 298 | ax.scatter(df[is_b]["age"], df[is_b]["y"], label="Group B", color="b") 299 | ax.scatter(df[is_c]["age"], df[is_c]["y"], label="Group C", color="g") 300 | 301 | ax.plot(ax.get_xlim(), [beta_age * x + beta_0 for x in ax.get_xlim()], color="r") 302 | ax.plot( 303 | ax.get_xlim(), 304 | [beta_age * x + beta_0 + beta_b for x in ax.get_xlim()], 305 | color="b", 306 | ) 307 | ax.plot( 308 | ax.get_xlim(), 309 | [beta_age * x + beta_0 + beta_c for x in ax.get_xlim()], 310 | color="g", 311 | ) 312 | 313 | ax.set_xlabel("age", fontsize="large") 314 | ax.set_ylabel("y", fontsize="large") 315 | ax.legend(fontsize="large") 316 | 317 | return fig, ax 318 | -------------------------------------------------------------------------------- /tests_as_linear/utils.py: -------------------------------------------------------------------------------- 1 | """ Utility functions. """ 2 | 3 | import re 4 | import json 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | def signed_rank(df): 10 | return np.sign(df) * df.abs().rank() 11 | 12 | 13 | def format_decimals_factory(num_decimals=1): 14 | return lambda x: "{1:.{0}f}".format(num_decimals, x) 15 | 16 | 17 | def tabulate_results(test_values, ols_results, names, coeff="x"): 18 | """ 19 | Tabulates results of statistical tests and equivalent linear regressions to 20 | demonstrate that the two methods are in fact equivalent. 21 | 22 | Parameters 23 | ---------- 24 | test_values : list 25 | List of values from the scipy statistical test to display. 26 | ols_results : statsmodels.RegressionResults or list thereof 27 | Result object(s) of equivalent linear regression to display. 28 | names : list 29 | List of strings to display. 30 | coeff : str 31 | Name of coefficient whose test statistics should be displayed. Defaults 32 | to "x". 33 | 34 | Returns 35 | ------- 36 | table : pd.DataFrame 37 | """ 38 | # There may be only one OLS result. If so, wrap it up as a single list. 39 | if not isinstance(ols_results, list): 40 | ols_results = [ols_results] 41 | 42 | # Assert shapes 43 | assert len(test_values) == 5 44 | assert len(names) == len(ols_results) + 1 45 | 46 | # Construct and return table 47 | table = pd.DataFrame(index=names) 48 | table["value"] = [test_values[0]] + [res.params[coeff] for res in ols_results] 49 | table["p-values"] = [test_values[1]] + [res.pvalues[coeff] for res in ols_results] 50 | table["t-values"] = [test_values[2]] + [res.tvalues[coeff] for res in ols_results] 51 | table["0.025 CI"] = [test_values[3]] + [ 52 | res.conf_int().loc[coeff, 0] for res in ols_results 53 | ] 54 | table["0.975 CI"] = [test_values[4]] + [ 55 | res.conf_int().loc[coeff, 1] for res in ols_results 56 | ] 57 | 58 | return table 59 | 60 | 61 | def generate_toc(notebook="tests-as-linear.ipynb", max_header_levels=2): 62 | """ 63 | Generates a table of contents in Markdown. 64 | 65 | Assumes that headers begin with `#` symbols (e.g. there is no leading 66 | whitespace). Considers all symbols after the consecutive `#` symbols (there 67 | may be more than one) to be the header. 68 | 69 | Parameters 70 | ---------- 71 | notebook : str 72 | Path to notebook for which to generate a table of contents. 73 | max_header_levels : int 74 | Maximum number of header levels to show in table of contents (i.e. the 75 | depth of headers to display). 76 | 77 | Returns 78 | ------- 79 | toc : str 80 | Table of contents as a Markdown string. 81 | """ 82 | with open(notebook, "r") as f: 83 | cells = json.load(f)["cells"] 84 | 85 | items = ["# Contents"] 86 | for cell in cells: 87 | if cell["cell_type"] == "markdown": 88 | for line in cell["source"]: 89 | match = re.search(r"^[#]{{1,{0}}} ".format(max_header_levels), line) 90 | if match: 91 | level = len(line) - len(line.lstrip("#")) 92 | link = line.strip(" #\n").replace(" ", "-") 93 | items.append( 94 | 2 * (level - 1) * " " 95 | + "- [" 96 | + line.strip(" #\n") 97 | + "](#" 98 | + link 99 | + ")" 100 | ) 101 | 102 | toc = "\n".join(items) 103 | return toc 104 | --------------------------------------------------------------------------------