├── .github └── workflows │ └── test_and_lint.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── docs │ ├── FAQ.md │ ├── basic_tutorial.md │ ├── function_guide.md │ ├── img │ │ ├── dist_overlay.png │ │ ├── ide_pycharm.png │ │ ├── package_import_viz.gif │ │ ├── vp.png │ │ └── vp_inv.png │ ├── index.md │ ├── install_dep.md │ ├── overview.md │ └── stylesheets │ │ └── al_extra.css └── mkdocs.yml ├── examples ├── bootdpci.ipynb ├── bwamcp.ipynb ├── bwbmcp.ipynb ├── bwimcp.ipynb ├── bwmcp.ipynb ├── bwmcppb.ipynb ├── corb.ipynb ├── hypothesize_notebook_general_examples.ipynb ├── l2drmci.ipynb ├── linconb.ipynb ├── lindepbt.ipynb ├── pb2gen.ipynb ├── pball.ipynb ├── pbcor.ipynb ├── rmmcppb.ipynb ├── spmcpa.ipynb ├── spmcpb.ipynb ├── spmcpi.ipynb ├── tmcppb.ipynb ├── winall.ipynb ├── wincor.ipynb ├── wwmcpbt.ipynb ├── wwmcppb.ipynb ├── ydbt.ipynb └── yuenbt.ipynb ├── hypothesize ├── __init__.py ├── compare_groups_with_single_factor │ ├── __init__.py │ └── _compare_groups_with_single_factor.py ├── compare_groups_with_two_factors │ ├── __init__.py │ └── _compare_groups_with_two_factors.py ├── measuring_associations │ ├── __init__.py │ └── _measuring_associations.py ├── tests │ ├── __init__.py │ ├── build_test_data.py │ ├── test_data │ │ ├── bootdpci.pkl │ │ ├── bwamcp.pkl │ │ ├── bwbmcp.pkl │ │ ├── bwimcp.pkl │ │ ├── bwmcp.pkl │ │ ├── bwmcppb.pkl │ │ ├── corb.pkl │ │ ├── l2drmci.pkl │ │ ├── linconb.pkl │ │ ├── lindepbt.pkl │ │ ├── pb2gen.pkl │ │ ├── pball.pkl │ │ ├── pbcor.pkl │ │ ├── rmmcppb.pkl │ │ ├── spmcpa.pkl │ │ ├── spmcpb.pkl │ │ ├── spmcpi.pkl │ │ ├── tmcppb.pkl │ │ ├── winall.pkl │ │ ├── wincor.pkl │ │ ├── wwmcpbt.pkl │ │ ├── wwmcppb.pkl │ │ ├── ydbt.pkl │ │ └── yuenbt.pkl │ └── test_funcs.py └── utilities.py ├── paper ├── paper.bib └── paper.md ├── requirements.txt └── setup.py /.github/workflows/test_and_lint.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python version 3.9.13 20 | uses: actions/setup-python@v1 21 | with: 22 | python-version: 3.9.13 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install flake8 pytest 27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 28 | - name: Lint with flake8 29 | run: | 30 | # stop the build if there are Python syntax errors or undefined names 31 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 32 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 33 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 34 | - name: Test with pytest 35 | run: | 36 | pytest 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *.cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # DotEnv configuration 60 | .env 61 | 62 | # Database 63 | *.db 64 | *.rdb 65 | 66 | # Pycharm 67 | .idea 68 | 69 | # VS Code 70 | .vscode/ 71 | 72 | # Spyder 73 | .spyproject/ 74 | 75 | # Jupyter NB Checkpoints 76 | .ipynb_checkpoints/ 77 | 78 | # exclude data from source control by default 79 | /data/ 80 | 81 | # Mac OS-specific storage files 82 | .DS_Store 83 | 84 | # vim 85 | *.swp 86 | *.swo 87 | 88 | # Mypy cache 89 | .mypy_cache/ 90 | 91 | # documentation build 92 | docs/site/ 93 | 94 | # R history file 95 | *.Rhistory 96 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Feedback and contribution 2 | 3 | Feedback, bug reports, and contributions are welcome via the 4 | [Hypothesize GitHub Repository](http://github.com/Alcampopiano/hypothesize/). 5 | 6 | ## How to contribute new functions to Hypothesize 7 | 8 | A great way to contribute would be to choose a function from the 9 | [WRS](https://dornsife.usc.edu/labs/rwilcox/software/) that does not yet exist in 10 | Hypothesize and convert it to Python. There is a current wish list 11 | [here](https://github.com/Alcampopiano/hypothesize/issues/2) 12 | but certainly any WRS function would be a welcome addition to the library. A list of the currently available 13 | functions in Hypothesize can be found in the documentation's 14 | [function reference](https://alcampopiano.github.io/hypothesize/function_guide/). 15 | 16 | #### Create example data to be used in R and Python 17 | 18 | It is helpful to be able to create some example data that can be used in both R and Python. 19 | One way to do this is to use Hypothesize's 20 | [create_example_data](https://alcampopiano.github.io/hypothesize/function_guide/#create_example_data) function. 21 | It will generate a DataFrame of random data (to be used in Python) as 22 | well save Numpy arrays that can be read into R with the 23 | [RcppCNPy](https://cran.r-project.org/web/packages/RcppCNPy/index.html) 24 | and [Rcpp](https://cran.r-project.org/web/packages/Rcpp/index.html) libraries. 25 | 26 | #### IDE for R and Python 27 | 28 | It is convenient to use the same IDE when converting functions from R to Python. 29 | One suggestion is to use PyCharm's 30 | [r-language-for-intellij](https://plugins.jetbrains.com/plugin/6632-r-language-for-intellij) 31 | Plugin. This makes it possible to have an interpreter and editor for 32 | both languages in the same IDE. Like so: 33 | 34 | drawing 35 | 36 | Of course there are many ways that one might go about converting WRS functions to Python. 37 | These are merely suggestions. 38 | 39 | ### Setting up your Git environment 40 | 41 | 1. Install the latest version of Hypothesize locally using 42 | 43 | ``` 44 | $ pip install git+https://github.com/Alcampopiano/hypothesize/ 45 | ``` 46 | 47 | 2. Fork the repository on GitHub and clone the fork to you local 48 | machine. For more details on forking see the [GitHub 49 | Documentation](https://help.github.com/en/articles/fork-a-repo). 50 | 51 | ``` 52 | $ git clone https://github.com/YOUR-USERNAME/hypothesize.git 53 | ``` 54 | 55 | 3. Create a sync to the original upstream repository by creating a so-called 56 | [remote](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/configuring-a-remote-for-a-fork). 57 | 58 | ``` 59 | $ git remote add upstream https://github.com/Alcampopiano/hypothesize.git 60 | $ git checkout master 61 | $ git pull upstream master 62 | ``` 63 | 64 | Now you will have all of the updates in the master branch of your local fork. 65 | Note that git will complain if you've committed changes to your local master 66 | branch that are not on the upstream repository. This is one reason why it's good practice to avoid 67 | working directly on your master branch. 68 | 69 | ### Commiting new code to Hypothesize 70 | 71 | 1. Create a new local branch and commit changes to your remote branch: 72 | 73 | ``` 74 | $ git checkout -b 75 | ``` 76 | 77 | With this branch checked-out, make the desired changes to the package. 78 | When you are happy with your changes, you can commit them to a remote branch by running 79 | 80 | ``` 81 | $ git add 82 | $ git commit -m "Some descriptive message about your change" 83 | $ git push origin 84 | ``` 85 | 86 | 2. Write a unit test for your code (optional) 87 | 88 | Hypothesize uses `pytest` for unit testing. The strategy currently used for testing 89 | is to pickle results that are assumed to be correct and compare those 90 | against fresh results from the modified code (see the 91 | [tests](https://github.com/Alcampopiano/hypothesize/tree/master/hypothesize/tests) folder for examples). 92 | If you would like to write a test for your new code, you may follow the strategy 93 | described above or come up with another way to test your code. To run the test suite, 94 | first navigate to the "tests" directory then use the `pytest` command from your terminal. 95 | 96 | 3. Submit a pull request (PR) to merge your new branch to Hypothesize's master branch 97 | 98 | For details on creating a PR see GitHub documentation [Creating a pull 99 | request](https://help.github.com/en/articles/creating-a-pull-request). 100 | 101 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2020, Allan Campopiano 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, this 12 | list of conditions and the following disclaimer in the documentation and/or 13 | other materials provided with the distribution. 14 | 15 | * Neither the name of hypothesize nor the names of its 16 | contributors may be used to endorse or promote products derived from this 17 | software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 23 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 26 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 27 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 28 | OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include LICENSE 3 | include requirements.txt 4 | recursive-include src *.py *.json *.ipynb *.html 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hypothesize 2 | 3 | [![status](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926/status.svg)](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926) 4 | ![tests](https://github.com/Alcampopiano/hypothesize/workflows/tests/badge.svg) 5 | [![PyPI version](https://img.shields.io/pypi/v/hypothesize?style=flat-square)](https://pypi.org/project/hypothesize/) 6 | [![PyPI - Downloads](https://img.shields.io/pypi/dw/hypothesize?style=flat-square)](https://pypistats.org/packages/hypothesize) 7 | [![license](https://img.shields.io/pypi/l/hypothesize?style=flat-square)](https://github.com/Alcampopiano/hypothesize/blob/master/LICENSE) 8 | 9 | A Python package for hypothesis testing using robust statistics 10 | 11 | ## Basic Example 12 | 13 | ### A robust measure of association with winsorized correlation 14 | [](https://deepnote.com/launch?name=wincor&url=https://github.com/Alcampopiano/hypothesize/blob/master/examples/wincor.ipynb 15 | ) 16 | 17 | ```python 18 | from hypothesize.measuring_associations import wincor 19 | from hypothesize.utilities import create_example_data 20 | 21 | # creating an example DataFrame with columns "cell_1" and "cell_2" 22 | df=create_example_data(2) 23 | 24 | results=wincor(df.cell_1, df.cell_2) 25 | 26 | # returning the correlation, number of observations, p-value, and winsorized covariance 27 | print(results) 28 | {'cor': 0.11, 'nval': 50, 'sig': 0.44, 'wcov': 0.01} 29 | ``` 30 | 31 | ## Documentation 32 | :book: Please visit the [Hypothesize documentation site](https://Alcampopiano.github.io/hypothesize/). 33 | Note that each statistical test in the can be launched 34 | directly in [Deepnote's](deepnote.com) hosted notebook environment—complete with sample data 35 | (as shown in the example above 👆). 36 | 37 | ## Citing Hypothesize 38 | 39 | [![status](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926/status.svg)](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926) 40 | 41 | If you use Hypothesize in academic work, please use the following citation: 42 | 43 | Campopiano, A., & Wilcox, R. R. (2020). Hypothesize: Robust Statistics for Python. 44 | Journal of Open Source Software, 5(50), 2241, https://doi.org/10.21105/joss.02241 45 | 46 | BibTex: 47 | 48 | ```bib 49 | @article{Campopiano2020, 50 | doi = {10.21105/joss.02241}, 51 | url = {https://doi.org/10.21105/joss.02241}, 52 | year = {2020}, 53 | publisher = {The Open Journal}, 54 | volume = {5}, 55 | number = {50}, 56 | pages = {2241}, 57 | author = {Allan Campopiano and Rand R. Wilcox}, 58 | title = {Hypothesize: Robust Statistics for Python}, 59 | journal = {Journal of Open Source Software} 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /docs/docs/FAQ.md: -------------------------------------------------------------------------------- 1 | # Frequently asked questions 2 | 3 | No attempt is made to fully explain the following 4 | concepts, but hopefully this gets 5 | you started. The Internet has plenty of resources on these topics 6 | if you would like to learn more. 7 | 8 | ## What is a trimmed mean? 9 | 10 | The trimmed mean involves calculating the sample mean after 11 | removing a proportion of values from each 12 | tail of the distribution. In symbols the trimmed mean is expressed as 13 | follows: 14 | 15 | $$ 16 | \bar{X}_t = \frac{X_{(g+1)}\,+,...,+\,X_{(n-g)}}{n-2g} 17 | $$ 18 | 19 | where $X_1, \,X_2,\,...\,,X_n$ is a random sample and 20 | $X_{(1)}, \le X_{(2)}\,,...,\,\le X_{(n)}$ are the observations in 21 | ascending order. The proportion to trim is $\gamma\,(0\lt \gamma \lt.5)$ 22 | and $g = [ \gamma n ]$ rounded down to the nearest integer. 23 | 24 | ## What is bootstrapping? 25 | 26 | In the context of hypothesis testing and generally speaking, 27 | bootstrapping involves taking many random samples (with replacement) 28 | from the data at hand in order to estimate a sampling 29 | distribution of interest. This is in contrast to traditional methods 30 | which assume the shape of the particular sampling distribution under study. 31 | Once we have an emprically derived sampling distribution, 32 | obtaining CIs and p values is relatively straightforward. 33 | 34 | ## What is a contrast matrix? 35 | 36 | First, it is helpful to imagine your 37 | design arranged into a JxK matrix. 38 | 39 | $$ 40 | A=\begin{bmatrix} 41 | a_{1,1} & a_{1,2} & ... & a_{1,K} \\ 42 | a_{2,1} & a_{2,2} & ... & a_{2,K} \\ 43 | a_{J,1} & a_{J,2} & ... & a_{J,K} 44 | \end{bmatrix} 45 | $$ 46 | 47 | A contrast matrix specifies which cells (or elements) in the above 48 | design are to be compared. The rows in a contrast matrix 49 | correspond to the cells in your design. The columns correspond 50 | to the contrasts that you wish to make. 51 | 52 | ### Examples of contrast matrices for different designs 53 | 54 | Matrix notation is used to explain which cells are 55 | being compared, followed by the corresponding 56 | contrast matrix. 57 | 58 | === "design with 2 groups" 59 | 60 | ${a_{1,1} - a_{1,2}}$ 61 | 62 | | contrast 1 | 63 | |------------| 64 | | 1 | 65 | | -1 | 66 | 67 | === "design with 3 groups" 68 | 69 | 1. $\Large{a_{1,1} - a_{1,2}}$ 70 | 2. $\Large{a_{1,1} - a_{1,3}}$ 71 | 3. $\Large{a_{1,2} - a_{1,3}}$ 72 | 73 | | contrast 1 | contrast 2 | contrast 3 | 74 | |------------|------------|------------| 75 | | 1 | 1 | 0 | 76 | | -1 | 0 | 1 | 77 | | 0 | -1 | -1 | 78 | 79 | === "2x2 design" 80 | **Factor A** 81 | 82 | $\Large{(a_{1,1} + a_{1,2})-(a_{2,1} + a_{2,2})}$ 83 | 84 | | contrast 1 | 85 | |------------| 86 | | 1 | 87 | | 1 | 88 | | -1 | 89 | | -1 | 90 | 91 | **Factor B** 92 | 93 | $\Large{(a_{1,1} + a_{2,1})-(a_{1,2} + a_{2,2})}$ 94 | 95 | | contrast 1 | 96 | |------------| 97 | | 1 | 98 | | -1 | 99 | | 1 | 100 | | -1 | 101 | 102 | **Interaction** 103 | 104 | $\Large{(a_{1,1} + a_{2,2})-(a_{1,2} + a_{2,1})}$ 105 | 106 | That is, the difference of the differences 107 | 108 | | contrast 1 | 109 | |------------| 110 | | 1 | 111 | | -1 | 112 | | -1 | 113 | | 1 | 114 | 115 | === "2x3 design" 116 | **Factor A** 117 | 118 | $\Large{(a_{1,1} + a_{1,2} + a_{1,3})-(a_{2,1} + a_{2,2} + a_{2,3})}$ 119 | 120 | | contrast 1 | 121 | |------------| 122 | | 1 | 123 | | 1 | 124 | | 1 | 125 | | -1 | 126 | | -1 | 127 | | -1 | 128 | 129 | **Factor B** 130 | 131 | 1. $\Large{(a_{1,1} + a_{2,1})-(a_{1,2} + a_{2,2})}$ 132 | - $\Large{(a_{1,1} + a_{2,1})-(a_{1,3} + a_{2,3})}$ 133 | - $\Large{(a_{1,2} + a_{2,2})-(a_{1,3} + a_{2,3})}$ 134 | 135 | | contrast 1 | contrast 2 | contrast 3 | 136 | |------------|------------|------------| 137 | | 1 | 1 | 0 | 138 | | -1 | 0 | 1 | 139 | | 0 | -1 | -1 | 140 | | 1 | 1 | 0 | 141 | | -1 | 0 | 1 | 142 | | 0 | -1 | -1 | 143 | 144 | **Interactions** 145 | 146 | 1. $\Large{(a_{1,1} + a_{2,2})-(a_{1,2} + a_{2,1})}$ 147 | - $\Large{(a_{1,1} + a_{2,3})-(a_{1,3} + a_{2,1})}$ 148 | - $\Large{(a_{1,2} + a_{2,3})-(a_{1,3} + a_{2,2})}$ 149 | 150 | | contrast 1 | contrast 2 | contrast 3 | 151 | |------------|------------|------------| 152 | | 1 | 1 | 0 | 153 | | -1 | 0 | 1 | 154 | | 0 | -1 | -1 | 155 | | -1 | -1 | 0 | 156 | | 1 | 0 | -1 | 157 | | 0 | 1 | 1 | 158 | 159 | 160 | !!! success "Not a fan of contrast matrices?" 161 | Don't worry, Hypothesize can generate all linear 162 | contrasts automatically (see functions [con1Way](https://alcampopiano.github.io/hypothesize/function_guide/#con1way) 163 | and [con2way](https://alcampopiano.github.io/hypothesize/function_guide/#con2way)). However, it is useful to 164 | understand this concept so that you know 165 | which comparisons are being made and 166 | how to specify your own if necessary. 167 | 168 |
-------------------------------------------------------------------------------- /docs/docs/basic_tutorial.md: -------------------------------------------------------------------------------- 1 | # Basic Tutorial 2 | 3 | The following tutorial demonstrates how to perform a 4 | robust hypothesis test using 20% trimmed means and 5 | the bootstrap-t test. The data correspond to a 6 | 2 (between-subjects) x 3 (within-subjects) factorial design. 7 | 8 | ### Getting your data into Hypothesize 9 | 10 | In Hypothesize, input data are always specified as a Pandas DataFrame or Series. 11 | In this example, we have a 2x3 factorial design so the data would take the form of 12 | a six-column DataFrame (i.e., J levels x K levels). Using Pandas you can read your data into Python and 13 | use one of the appropriate functions from Hypothesize. In this case we will use the function `bwmcp` 14 | but there are [many others](function_guide.md) to choose from. 15 | 16 | !!! note ""What about my column names?"" 17 | Don't worry, Hypothesize doesn't make use of your column names. 18 | Feel free to name them however you like! 19 | 20 | 21 | ```python 22 | import pandas as pd 23 | 24 | df=pd.read_csv('my_data.csv') 25 | 26 | df.head() 27 | ``` 28 | 29 | | cell_1_1 | cell_1_2 | cell_1_3 | cell_2_1 | cell_2_2 | cell_2_3 | 30 | |------------|------------|------------|------------|------------|------------| 31 | | 0.04 | 0.90 | 0.79 | 0.51 | 0.33 | 0.23 | 32 | | 0.76 | 0.29 | 0.84 | 0.03 | 0.5 | 0.73 | 33 | | 0.71 | 0.59 | 0.11 | 0.89 | 0.76 | 0.04 | 34 | | 0.17 | 0.26 | 0.88 | 0.28 | 0.1 | 0.21 | 35 | | 0.95 | 0.22 | 0.83 | 0.59 | 0.65 | 0.20 | 36 | 37 | ```python 38 | from hypothesize.compare_groups_with_two_factors import bwmcp 39 | 40 | results=bwmcp(J=2, K=3, x=df) 41 | ``` 42 | 43 | ### Examining your results 44 | 45 | The results are returned as a Python Dictionary containing simple Python objects 46 | or DataFrames (when the results are best given as a matrix). For example, here are the 47 | previously computed results for the interaction returned as a DataFrame. 48 | 49 | ```python 50 | results['factor_AB'] 51 | ``` 52 | 53 | | con_num | psihat | se | test | crit_value | p_value | 54 | |---------- |----------- |--------- |---------- |------------- |---------- | 55 | | 0 | -0.100698 | 0.126135 | -0.798336 | 2.3771 | 0.410684 | 56 | | 1 | -0.037972 | 0.151841 | -0.250078 | 2.3771 | 0.804674 | 57 | | 2 | 0.0627261 | 0.135392 | 0.463291 | 2.3771 | 0.659432 | 58 | 59 | 60 | 62 | 63 | -------------------------------------------------------------------------------- /docs/docs/img/dist_overlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/dist_overlay.png -------------------------------------------------------------------------------- /docs/docs/img/ide_pycharm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/ide_pycharm.png -------------------------------------------------------------------------------- /docs/docs/img/package_import_viz.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/package_import_viz.gif -------------------------------------------------------------------------------- /docs/docs/img/vp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/vp.png -------------------------------------------------------------------------------- /docs/docs/img/vp_inv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/vp_inv.png -------------------------------------------------------------------------------- /docs/docs/index.md: -------------------------------------------------------------------------------- 1 | # Hypothesize: robust statistics in Python 2 | 3 | ![Screenshot](img/dist_overlay.png) 4 | 5 | Hypothesize is a robust statistics library for 6 | Python based on Rand R. Wilcox's R package [WRS](https://dornsife.usc.edu/labs/rwilcox/software/). 7 | With Hypothesize you can compare groups and 8 | measure associations using methods that outperform 9 | traditional statistical approaches in terms of power 10 | and accuracy. 11 | 12 | For more information on robust methods please see Wilcox's book 13 | [Introduction to Robust Estimation and Hypothesis Testing](https://play.google.com/store/books/details?id=8f8nBb4__EYC&gl=ca&hl=en-CA&source=productsearch&utm_source=HA_Desktop_US&utm_medium=SEM&utm_campaign=PLA&pcampaignid=MKTAD0930BO1&gclid=CjwKCAiA44LzBRB-EiwA-jJipJzyqx9kwNMq5MMU7fG2RrwBK9F7sirX4pfhS8wO7k9Uz_Sqf2P28BoCYzcQAvD_BwE&gclsrc=aw.ds). 14 | 15 | ## Getting Started 16 | 17 | - [Overview](overview.md) 18 | - [Installation](install_dep.md) 19 | - [Basic Tutorial](basic_tutorial.md#) 20 | 21 | ## User Guide 22 | 23 | - [Function reference](function_guide.md) 24 | - [Frequently asked questions](FAQ.md) 25 | 26 | ## Bug reports and Questions 27 | Hypothesize is BSD-licenced and the source code is available 28 | on [GitHub](https://github.com/Alcampopiano/hypothesize). 29 | For issues and questions, 30 | please use [GitHub Issues](https://github.com/Alcampopiano/hypothesize/issues). 31 | 32 | ## Citing Hypothesize 33 | 34 | [![status](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926/status.svg)](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926) 35 | 36 | If you use Hypothesize in academic work, please use the following citation: 37 | 38 | Campopiano, A., & Wilcox, R. R. (2020). Hypothesize: Robust Statistics for Python. 39 | Journal of Open Source Software, 5(50), 2241, https://doi.org/10.21105/joss.02241 40 | 41 | BibTex: 42 | 43 | ```bib 44 | @article{Campopiano2020, 45 | doi = {10.21105/joss.02241}, 46 | url = {https://doi.org/10.21105/joss.02241}, 47 | year = {2020}, 48 | publisher = {The Open Journal}, 49 | volume = {5}, 50 | number = {50}, 51 | pages = {2241}, 52 | author = {Allan Campopiano and Rand R. Wilcox}, 53 | title = {Hypothesize: Robust Statistics for Python}, 54 | journal = {Journal of Open Source Software} 55 | } 56 | ``` 57 | 58 | ## Contributing to Hypothesize 59 | 60 | The best way to contribute to Hypothesize is to take any function from the WRS collection 61 | and convert it to Python. For more details, please see 62 | [CONTRIBUTING.md](https://github.com/Alcampopiano/hypothesize/blob/master/CONTRIBUTING.md) 63 | in the GitHub repository. -------------------------------------------------------------------------------- /docs/docs/install_dep.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | Hypothesize can be installed using `pip`: 4 | 5 | ``` 6 | $ pip install hypothesize 7 | ``` 8 | 9 | # Dependencies 10 | 11 | Hypothesesize has the following dependencies, 12 | all of which are installed automatically 13 | with the above installation command: 14 | 15 | - python 3.6 or newer 16 | - [NumPy](https://numpy.org/) 17 | - [Pandas](https://pandas.pydata.org/) 18 | - [SciPy](https://www.scipy.org/) 19 | - [more-itertools](https://pypi.org/project/more-itertools/) 20 | -------------------------------------------------------------------------------- /docs/docs/overview.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | The benefits of using robust methods for hypothesis testing 4 | have been known for the last half century. 5 | They have been shown to substantially increase power and accuracy when compared to 6 | traditional approaches. 7 | The issues of robustness and the functions in this library are described in detail in Rand R. Wilcox's book 8 | [Introduction to Robust Estimation and Hypothesis Testing](https://play.google.com/store/books/details?id=8f8nBb4__EYC&gl=ca&hl=en-CA&source=productsearch&utm_source=HA_Desktop_US&utm_medium=SEM&utm_campaign=PLA&pcampaignid=MKTAD0930BO1&gclid=CjwKCAiA44LzBRB-EiwA-jJipJzyqx9kwNMq5MMU7fG2RrwBK9F7sirX4pfhS8wO7k9Uz_Sqf2P28BoCYzcQAvD_BwE&gclsrc=aw.ds). 9 | 10 | The code and function names in Hypothesize are based on Wilcox's R functions in the [WRS](somwhere) package. 11 | Hypothesize simply brings many of these helpful and well-studied robust methods to the Python ecosystem. 12 | In addition, Hypothesize provides a user-friendly API and package structure 13 | as well as one-click, [ready-to-run examples](function_guide.md) for every top-level 14 | function. 15 | 16 | ## Hypothesize is easy to use 17 | 18 | Hypothesize's API is friendly and 19 | consistent, making it easy for you to discover 20 | and use robust functions that are appropriate for 21 | your statistical design. 22 | 23 | ### Package Structure 24 | 25 | Hypothesize organizes functions 26 | based on the statistical design. The following visualizations show 27 | how the package is structured and how 28 | this is reflected in practice when importing from the library: 29 | 30 | ```mermaid 31 | graph TB 32 | linkStyle default interpolate basis 33 | A[Hypothesize] 34 | A --> B(compare groups with single factor) 35 | A --> C(compare groups with two factors) 36 | A --> D(measure associations) 37 | 38 | B --> F(f1) 39 | B --> G(f2) 40 | B --- H(fn) 41 | 42 | C --> F1(f1) 43 | C --> G2(f2) 44 | C --> H3(fn) 45 | 46 | D --> F5(f1) 47 | D --> G6(f2) 48 | D --> H7(fn) 49 | ``` 50 | 51 | --- 52 | ![Screenshot](img/package_import_viz.gif) 53 | 54 | --- 55 | ## Hypothesize is flexible and powerful 56 | 57 | A broad range of choices exist in Hypothesize both in 58 | terms of the supported statistical designs as well as options for fine-grained control over how 59 | tests are computed. For example: 60 | 61 | - Where applicable, many hypothesis tests allow the specification of an estimator. That is, 62 | users may choose when to use the mean, median, trimmed mean, winsorized correlation, 63 | percentage bend correlation, or any other compatible statistical estimator. 64 | 65 | - Single- and multi-factor designs are supported, and this includes supporting independent, 66 | dependent, and mixed groups. 67 | 68 | - Family-wise error can be robustly controlled with sequentially rejective methods (Benjamini & Hochberg, 1995; Hochberg, 1988; Rom, 1990). 69 | 70 |
71 | 72 | Visit the [tutorial section](basic_tutorial.md) and the 73 | [function documentation](function_guide.md) for complete examples 74 | using Hypothesize. -------------------------------------------------------------------------------- /docs/docs/stylesheets/al_extra.css: -------------------------------------------------------------------------------- 1 | /* 2 | .button { 3 | display: block; 4 | width: 100%; 5 | font-size: 16px; 6 | background-color: #5867be; 7 | color: #ffffff !important; 8 | padding: 10px; 9 | box-shadow: 10; 10 | border-radius: 2px; 11 | text-align: center; 12 | border: none; 13 | } 14 | */ 15 | 16 | .button { 17 | display: block; 18 | text-align: center; 19 | } 20 | 21 | /* 22 | .button:hover { 23 | background-color: grey; 24 | color: white !important; 25 | 26 | } 27 | */ 28 | 29 | .mermaid svg { 30 | text-align: center !important; 31 | } 32 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Hypothesize 2 | google_analytics: 3 | - UA-165284209-1 4 | - auto 5 | 6 | nav: 7 | - Home: index.md 8 | - Overview: overview.md 9 | - Tutorial: basic_tutorial.md 10 | - Function Reference: function_guide.md 11 | - FAQ: FAQ.md 12 | 13 | repo_name: Github 14 | repo_url: https://github.com/Alcampopiano/hypothesize 15 | edit_uri: "" 16 | theme: 17 | logo: img/vp.png 18 | favicon: img/vp_inv.png 19 | name: material 20 | palette: 21 | primary: black 22 | accent: red 23 | copyright: "Hypothesize is licensed under the BSD 3-Clause license" 24 | 25 | markdown_extensions: 26 | - admonition 27 | - codehilite: 28 | guess_lang: false 29 | - footnotes 30 | - toc: 31 | permalink: true 32 | - pymdownx.tabbed 33 | - pymdownx.inlinehilite 34 | - pymdownx.arithmatex 35 | - pymdownx.superfences: 36 | custom_fences: 37 | - name: mermaid 38 | class: mermaid 39 | format: !!python/name:pymdownx.superfences.fence_div_format 40 | 41 | extra_css: 42 | - stylesheets/al_extra.css 43 | - https://unpkg.com/mermaid@7.1.2/dist/mermaid.css 44 | extra_javascript: 45 | - https://unpkg.com/mermaid@7.1.2/dist/mermaid.min.js 46 | - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML -------------------------------------------------------------------------------- /examples/bootdpci.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bootdpci.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588616000402}],"collapsed_sections":[],"authorship_tag":"ABX9TyODOlntM0MT9CHgvn8VVl/O"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import bootdpci"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bootdpci(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/bwamcp.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwamcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632200919},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyPVfyF9avy0DbfAuarJl8iy"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import bwamcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwamcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/bwbmcp.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwbmcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632296831},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyMCKdZY/KiF2LwgsqoxM9z0"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwbmcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwbmcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results[0]['test']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/bwimcp.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwimcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632574529},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNi+qxTq2XoeXVqKQ9JR4Ob"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":228},"outputId":"edf97991-0dd9-4813-e161-38e0804b024c","executionInfo":{"status":"ok","timestamp":1588632621496,"user_tz":240,"elapsed":4200,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["!pip install hypothesize"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Collecting hypothesize\n"," Downloading https://files.pythonhosted.org/packages/00/64/d9067b4a72585b2003bbd1823cceaada7f0c9a28441921201df42d31332e/hypothesize-0.1.dev23-py3-none-any.whl\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.0.3)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.18.3)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.4.1)\n","Requirement already satisfied: more-itertools in /usr/local/lib/python3.6/dist-packages (from hypothesize) (8.2.0)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->hypothesize) (2018.9)\n","Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->hypothesize) (2.8.1)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas->hypothesize) (1.12.0)\n","Installing collected packages: hypothesize\n","Successfully installed hypothesize-0.1.dev23\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import bwimcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwimcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/bwmcp.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwmcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632373867},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNq9mbugbYwk9HTz7/3Brgd"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwmcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwmcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/bwmcppb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwmcppb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632676077},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNJh6K+fRRnm1Mgfr3EEWRU"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwmcppb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/corb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"corb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOuT/tbSiQ385N9p7Y0OE2s"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import corb, wincor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=corb(wincor, df.cell_1, df.cell_2, .05, 1000, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/hypothesize_notebook_general_examples.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.4"},"colab":{"name":"hypothesize_notebook_for_colab.ipynb","provenance":[{"file_id":"https://github.com/Alcampopiano/hypothesize/blob/master/examples/hypothesize_notebook_for_colab.ipynb","timestamp":1589199962420}],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"SR6bFvjJtqjq","colab_type":"text"},"source":["## Hypothesize tutorial\n","\n","This notebook provides a few examples of how to use Hypothesize with a few common statistical designs. There are many more functions that could work for these designs but hopefully this helps to get you started.\n","\n"]},{"cell_type":"code","metadata":{"id":"AXTC2Xzu3zM9","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"9TPllfTh3zNE","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"zNFQXwRd3zNJ","colab_type":"text"},"source":["### How to compare two groups"]},{"cell_type":"markdown","metadata":{"id":"pA-fXciM3zNK","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"ZrMIEtaw3zNM","colab_type":"code","outputId":"bb369725-d23c-4d9b-c0f1-2a4f917587f6","executionInfo":{"status":"ok","timestamp":1589200167867,"user_tz":240,"elapsed":4569,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["#df=pd.read_csv(\"/home/allan/two_groups_data.csv\")\n","df=create_example_data(design_values=2)\n","\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
cell_1cell_2
00.6087980.582123
10.6228260.854637
20.2641650.655077
30.7941850.378080
40.9076870.468066
\n","
"],"text/plain":[" cell_1 cell_2\n","0 0.608798 0.582123\n","1 0.622826 0.854637\n","2 0.264165 0.655077\n","3 0.794185 0.378080\n","4 0.907687 0.468066"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"b7DXVXTP3zNR","colab_type":"text"},"source":["#### Import the desired function and pass in the data for each group\n","- This example uses the bootstrapped-t method with 20% trimmed means\n","- The output is a dictionary containing the results (95% confidence interval, p_value, test statistics, etc...)"]},{"cell_type":"code","metadata":{"id":"2hapgjCg3zNU","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"cb66df69-d846-411c-a603-9b0007a1cad9","executionInfo":{"status":"ok","timestamp":1589200168221,"user_tz":240,"elapsed":4920,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["from hypothesize.compare_groups_with_single_factor import yuenbt\n","\n","results=yuenbt(df.cell_1, df.cell_2)\n","\n","results['ci']"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[-0.09190770159731171, 0.25635146839797]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"rCYUwGzw3zNY","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"iU8nQykk3zNZ","colab_type":"text"},"source":["### How to compare three groups"]},{"cell_type":"markdown","metadata":{"id":"GOw1Y9_v3zNb","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"akjpBynJ3zNd","colab_type":"code","outputId":"6209634a-446c-42fb-d106-2cafa7350431","executionInfo":{"status":"ok","timestamp":1589200168223,"user_tz":240,"elapsed":4916,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/one_way_data.csv\")\n","df=create_example_data(design_values=3)\n","\n","df.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
cell_1cell_2cell_3
00.2651090.0889140.480468
10.1199880.4827730.079476
20.1095330.5218340.762804
30.1524540.1775960.741767
40.3554030.5209910.380219
\n","
"],"text/plain":[" cell_1 cell_2 cell_3\n","0 0.265109 0.088914 0.480468\n","1 0.119988 0.482773 0.079476\n","2 0.109533 0.521834 0.762804\n","3 0.152454 0.177596 0.741767\n","4 0.355403 0.520991 0.380219"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"jRxALG1a3zNh","colab_type":"text"},"source":["#### Import the desired functions and pass in the inputs\n","- One approach is to use a set of linear contrasts that will test all pairwise comparisons\n","- Then, the bootstrap-t method and the 20% trimmed mean can be used\n","- CIs are adjusted to control for FWE\n","- All pairwise contrasts can be created automatically using the `con1way` function\n","- The results are a dictionary of DataFrames that contain various statistics (p_value, CIs, standard error, test statistics, etc)"]},{"cell_type":"code","metadata":{"id":"NJ5LK8G_3zNi","colab_type":"code","colab":{}},"source":["from hypothesize.compare_groups_with_single_factor import linconb\n","from hypothesize.utilities import con1way\n","\n","results=linconb(df, con=con1way(3))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"et1Acy1S3zNm","colab_type":"code","outputId":"b562fb9f-7d8a-4203-db4f-2e4cf157e96f","executionInfo":{"status":"ok","timestamp":1589200168984,"user_tz":240,"elapsed":5668,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['test']"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
contrast_indextestsep_value
00.00.4177450.0819210.691152
11.0-0.0433810.0852250.959933
22.0-0.5013320.0756360.602671
\n","
"],"text/plain":[" contrast_index test se p_value\n","0 0.0 0.417745 0.081921 0.691152\n","1 1.0 -0.043381 0.085225 0.959933\n","2 2.0 -0.501332 0.075636 0.602671"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"CCMzuKYX3zNq","colab_type":"code","outputId":"2f0d4212-cb97-479a-aeef-aace296a05a6","executionInfo":{"status":"ok","timestamp":1589200168987,"user_tz":240,"elapsed":5664,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['psihat']"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
contrast_indexpsihatci_lowci_up
00.00.034222-0.1681680.236612
11.0-0.003697-0.2142510.206857
22.0-0.037919-0.2247840.148946
\n","
"],"text/plain":[" contrast_index psihat ci_low ci_up\n","0 0.0 0.034222 -0.168168 0.236612\n","1 1.0 -0.003697 -0.214251 0.206857\n","2 2.0 -0.037919 -0.224784 0.148946"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"d-AMqtzP3zNv","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"XO-FNoJw3zNw","colab_type":"text"},"source":["### How to compare groups in a factorial design"]},{"cell_type":"markdown","metadata":{"id":"qJcHGgDv3zNx","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"jE-FN9Lx3zNz","colab_type":"code","outputId":"b328e4c9-97d3-4cf8-b1ce-70bd1c44ea06","executionInfo":{"status":"ok","timestamp":1589200168990,"user_tz":240,"elapsed":5661,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/two_way_data.csv\")\n","df=create_example_data(design_values=[2,3])\n","\n","df.head()"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
cell_1_1cell_1_2cell_1_3cell_2_1cell_2_2cell_2_3
00.8275240.4762940.1317200.4109990.3203060.370742
10.6322810.5883680.6626480.2425470.2702920.700103
20.0730640.4720470.0539420.0690970.8515960.962723
30.8433770.0959560.6174340.7652790.4207720.993871
40.1907090.0137270.2553850.5779160.2182770.125772
\n","
"],"text/plain":[" cell_1_1 cell_1_2 cell_1_3 cell_2_1 cell_2_2 cell_2_3\n","0 0.827524 0.476294 0.131720 0.410999 0.320306 0.370742\n","1 0.632281 0.588368 0.662648 0.242547 0.270292 0.700103\n","2 0.073064 0.472047 0.053942 0.069097 0.851596 0.962723\n","3 0.843377 0.095956 0.617434 0.765279 0.420772 0.993871\n","4 0.190709 0.013727 0.255385 0.577916 0.218277 0.125772"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"aaVxg64m3zN2","colab_type":"text"},"source":["#### Import the desired function and pass in the data\n","- This example uses a 2-by-3 design\n","- One approach is to use a set of linear contrasts that will test all main effects and interactions\n","- Then, the bootstrap-t method and the 20% trimmed mean can be used\n","- The results are a dictionary of DataFrames that contain various statistics for each factor and the interactions"]},{"cell_type":"code","metadata":{"id":"X_muz_Lz3zN4","colab_type":"code","colab":{}},"source":["from hypothesize.compare_groups_with_two_factors import bwmcp\n","\n","results=bwmcp(J=2, K=3, x=df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"0Sm5AHgQ3zN8","colab_type":"code","outputId":"d39a4bc9-8313-479d-ba7d-5da63ca6c85b","executionInfo":{"status":"ok","timestamp":1589200173973,"user_tz":240,"elapsed":10635,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":79}},"source":["results['factor_A']"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
con_numpsihatsetestcrit_valuep_value
00.00.1732070.1280721.3524181.9600250.15192
\n","
"],"text/plain":[" con_num psihat se test crit_value p_value\n","0 0.0 0.173207 0.128072 1.352418 1.960025 0.15192"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"Ff6ipBF23zN_","colab_type":"code","outputId":"b7712848-0549-4c3d-9c84-bb6bb1e60b69","executionInfo":{"status":"ok","timestamp":1589200173974,"user_tz":240,"elapsed":10630,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['factor_B']"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
con_numpsihatsetestcrit_valuep_value
00.0-0.0675020.120091-0.5620912.4940320.559265
11.00.0393980.1163280.3386802.4940320.721202
22.00.1069000.0984911.0853732.4940320.307179
\n","
"],"text/plain":[" con_num psihat se test crit_value p_value\n","0 0.0 -0.067502 0.120091 -0.562091 2.494032 0.559265\n","1 1.0 0.039398 0.116328 0.338680 2.494032 0.721202\n","2 2.0 0.106900 0.098491 1.085373 2.494032 0.307179"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"2cKKYZh83zOF","colab_type":"code","outputId":"9be1a209-ab26-4fc9-c31a-d6809d2b3c94","executionInfo":{"status":"ok","timestamp":1589200173975,"user_tz":240,"elapsed":10624,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['factor_AB']"],"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
con_numpsihatsetestcrit_valuep_value
00.0-0.1832420.120091-1.5258692.39830.118531
11.0-0.1635250.116328-1.4057202.39830.186978
22.00.0197180.0984910.2001962.39830.833055
\n","
"],"text/plain":[" con_num psihat se test crit_value p_value\n","0 0.0 -0.183242 0.120091 -1.525869 2.3983 0.118531\n","1 1.0 -0.163525 0.116328 -1.405720 2.3983 0.186978\n","2 2.0 0.019718 0.098491 0.200196 2.3983 0.833055"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"markdown","metadata":{"id":"uOyKCT9M3zOJ","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"JZxF8Ygi3zOK","colab_type":"text"},"source":["### How to compute a robust correlation"]},{"cell_type":"markdown","metadata":{"id":"a2WTERe43zOL","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"FU8Ey3iI3zON","colab_type":"code","outputId":"eb04b788-6f9d-446c-83fd-1576065361fc","executionInfo":{"status":"ok","timestamp":1589200173975,"user_tz":240,"elapsed":10618,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/two_groups_data.csv\")\n","df=create_example_data(design_values=2)\n","\n","df.head()"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
cell_1cell_2
00.4022840.049092
10.2082780.550764
20.9584820.986547
30.9577590.277685
40.7028110.749065
\n","
"],"text/plain":[" cell_1 cell_2\n","0 0.402284 0.049092\n","1 0.208278 0.550764\n","2 0.958482 0.986547\n","3 0.957759 0.277685\n","4 0.702811 0.749065"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"qY-7yf8Q3zOQ","colab_type":"text"},"source":["#### Import the desired function and pass in the data for each group\n","- One approach is to winsorize the x and y data\n","- A heteroscedastic method for testing zero correlation is also provided in this package but not shown here \n"," - Please see the function `corb` which uses the percentile bootstrap to compute a 1-alpha CI and p_value for any correlation \n","- The output is a dictionary containing various statistics (the winsorized correlation, winsorized covariance, etc...)"]},{"cell_type":"code","metadata":{"id":"mMeESqd33zOR","colab_type":"code","outputId":"a34a06fa-0113-4201-ce0b-e0d3f5d41930","executionInfo":{"status":"ok","timestamp":1589200173976,"user_tz":240,"elapsed":10612,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["from hypothesize.measuring_associations import wincor\n","\n","results=wincor(df.cell_1, df.cell_2)\n","\n","results['cor']"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.2025744763450888"]},"metadata":{"tags":[]},"execution_count":15}]}]} -------------------------------------------------------------------------------- /examples/l2drmci.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"l2drmci.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPdELHUc+SP48pbwUqZCoT5"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import l2drmci"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=l2drmci(df.cell_1, df.cell_2, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/linconb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"linconb.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPH7skGRd9m7ywyto/ckjRS"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, con1way\n","from hypothesize.compare_groups_with_single_factor import linconb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=linconb(df, con1way(3))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['psihat']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3zbT5WdQvTVv","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/lindepbt.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"lindepbt.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588618128796}],"collapsed_sections":[],"authorship_tag":"ABX9TyO26ovh0/ccrrbqL9dVEnIm"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import lindepbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=lindepbt(df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['psihat']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3zbT5WdQvTVv","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/pb2gen.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pb2gen.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588613922534}],"collapsed_sections":[],"authorship_tag":"ABX9TyM4JaaKmMa7ybUIDVPP24nv"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import pb2gen"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pb2gen(df.cell_1, df.cell_2, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/pball.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pball.ipynb","provenance":[{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyMxgE04vzxNhd/2/0DYU2le"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import pball"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pball(df, beta=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results['pbcorm']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/pbcor.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pbcor.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637753763},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOznWr6vPehd9iyX3yBDNFl"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import pbcor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pbcor(df.cell_1, df.cell_2, beta=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/rmmcppb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"rmmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588616377427}],"collapsed_sections":[],"authorship_tag":"ABX9TyPzvAJueyiG1/st3fdvLHPD"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import rmmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=rmmcppb(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/spmcpa.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpa.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632756490},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyPpFAYJ9Pgd170X7K5xYNmT"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpa"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpa(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/spmcpb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632952101},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNYt8uKlk3sEvH8XgwuD61D"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":202},"outputId":"12ed3517-6b93-41e6-f991-ffd8537060a4","executionInfo":{"status":"ok","timestamp":1588632983358,"user_tz":240,"elapsed":4430,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
cell_1cell_2cell_3cell_4cell_5cell_6
00.5424870.7818400.6352840.8749770.4798600.589451
10.9841390.4146550.5818260.4307580.5294030.197294
20.1846030.8219670.5697230.2796810.9901540.212335
30.4769370.3515050.1017600.0873720.8264080.847228
40.7301130.3923440.4229780.8359710.0068010.418546
\n","
"],"text/plain":[" cell_1 cell_2 cell_3 cell_4 cell_5 cell_6\n","0 0.542487 0.781840 0.635284 0.874977 0.479860 0.589451\n","1 0.984139 0.414655 0.581826 0.430758 0.529403 0.197294\n","2 0.184603 0.821967 0.569723 0.279681 0.990154 0.212335\n","3 0.476937 0.351505 0.101760 0.087372 0.826408 0.847228\n","4 0.730113 0.392344 0.422978 0.835971 0.006801 0.418546"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/spmcpi.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpi.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633014822},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOIO6PyIZ6fl34R9C9+J1Vy"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpi"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpi(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/tmcppb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"tmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588614092764}],"collapsed_sections":[],"authorship_tag":"ABX9TyPLA0Nj1FqLpYcQhJu8eacQ"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, con1way, trim_mean\n","from hypothesize.compare_groups_with_single_factor import tmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":202},"outputId":"49a67e90-f7e3-4a3b-d397-dd6c3b76a769","executionInfo":{"status":"ok","timestamp":1588614138956,"user_tz":240,"elapsed":4864,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
cell_1cell_2cell_3
00.9880890.5315940.898677
10.0400620.9907040.393328
20.5634700.3956950.345625
30.8569800.9594410.168044
40.1588020.3914460.324284
\n","
"],"text/plain":[" cell_1 cell_2 cell_3\n","0 0.988089 0.531594 0.898677\n","1 0.040062 0.990704 0.393328\n","2 0.563470 0.395695 0.345625\n","3 0.856980 0.959441 0.168044\n","4 0.158802 0.391446 0.324284"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=tmcppb(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/winall.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"winall.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637846077},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyN3me0/wTmMxBS9uYJsVVYz"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import winall"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=winall(df, tr=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":69},"outputId":"9eea20c3-b5a7-427b-e522-d401e998d22d","executionInfo":{"status":"ok","timestamp":1588637940868,"user_tz":240,"elapsed":413,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["results['wcor']"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[ 1. , 0.23237836, 0.05106066],\n"," [ 0.23237836, 1. , -0.12543308],\n"," [ 0.05106066, -0.12543308, 1. ]])"]},"metadata":{"tags":[]},"execution_count":9}]}]} -------------------------------------------------------------------------------- /examples/wincor.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wincor.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637954798},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNio2+2ctQ6oNKTHdc5jl9w"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import wincor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=wincor(df.cell_1, df.cell_2, tr=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/wwmcpbt.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wwmcpbt.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588631937930},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNp6W4ZcH5oqiCDO+AIxThj"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import wwmcpbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"e7d3c247-7ff5-40d8-a3b6-0a79c722595a","executionInfo":{"status":"ok","timestamp":1588632020005,"user_tz":240,"elapsed":1968,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["results=wwmcpbt(2, 3, df, .2)"],"execution_count":9,"outputs":[{"output_type":"stream","text":["ask wilcox if dif is supposed to be a argument here\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']['test']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/wwmcppb.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wwmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOTVb1bjVl+ueXtjfFOxCqb"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import wwmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=wwmcppb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']['output']"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/ydbt.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ydbt.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588623469716}],"collapsed_sections":[],"authorship_tag":"ABX9TyOFK1ec5drLBgCKpHf13ETw"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import ydbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=ydbt(df.cell_1, df.cell_2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /examples/yuenbt.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"yuenbt.ipynb","provenance":[{"file_id":"1dOaLcrRIctGehyXDy_sGNp5OSTl_4vCh","timestamp":1588615519643}],"collapsed_sections":[],"authorship_tag":"ABX9TyM5O2LtjdxCgF5QiGsHde1k"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import yuenbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=yuenbt(df.cell_1, df.cell_2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]} -------------------------------------------------------------------------------- /hypothesize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from hypothesize import compare_groups_with_single_factor, measuring_associations, \ 3 | compare_groups_with_two_factors 4 | 5 | -------------------------------------------------------------------------------- /hypothesize/compare_groups_with_single_factor/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from ._compare_groups_with_single_factor import * 3 | -------------------------------------------------------------------------------- /hypothesize/compare_groups_with_single_factor/_compare_groups_with_single_factor.py: -------------------------------------------------------------------------------- 1 | __all__ = ["yuenbt", "pb2gen", "linconb", "rmmcppb", 2 | "lindepbt", "bootdpci", "ydbt", "tmcppb", "l2drmci"] 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from scipy.stats import trim_mean 7 | from hypothesize.utilities import yuend, trimse, lincon, trimparts, trimpartt, pandas_to_arrays, \ 8 | con1way, con2way, bptdpsi, rmmcp, trimcibt, remove_nans_based_on_design 9 | 10 | def yuenbt(x, y, tr=.2, alpha=.05, nboot=599, seed=False): 11 | 12 | """ 13 | Compute a 1-alpha confidence interval for the difference between 14 | the trimmed means corresponding to two independent groups. 15 | The bootstrap-t method is used. During the bootstrapping, 16 | the absolute value of the test statistic is used (the "two-sided method"). 17 | 18 | 19 | :param x: Pandas Series 20 | Data for group one 21 | 22 | :param y: Pandas Series 23 | Data for group two 24 | 25 | :param tr: float 26 | Proportion to trim (default is .2) 27 | 28 | :param alpha: float 29 | Alpha level (default is .05) 30 | 31 | :param nboot: int 32 | Number of bootstrap samples (default is 599) 33 | 34 | :param seed: bool 35 | Random seed for reprodicible results. Default is `False`. 36 | 37 | :return: 38 | Dictionary of results 39 | 40 | ci: list 41 | 42 | Confidence interval 43 | 44 | est_dif: float 45 | Estimated difference between group one and two 46 | 47 | est_1: float 48 | Estimated value (based on `est`) for group one 49 | 50 | est_2: float 51 | Estimated value (based on `est`) for group two 52 | 53 | p_value: float 54 | 55 | p-value 56 | 57 | test_stat: float 58 | Test statistic 59 | 60 | """ 61 | 62 | x, y=pandas_to_arrays([x, y]) 63 | 64 | if seed: 65 | np.random.seed(seed) 66 | 67 | ci=[] 68 | x=x[~np.isnan(x)] 69 | y=y[~np.isnan(y)] 70 | 71 | xcen = x - trim_mean(x, tr) 72 | ycen = y - trim_mean(y, tr) 73 | 74 | test_stat = (trim_mean(x, tr) - trim_mean(y, tr)) / \ 75 | np.sqrt(trimse(x, tr = tr) ** 2 + trimse(y, tr = tr) ** 2) 76 | 77 | datax = np.random.choice(xcen, size=(nboot, len(x))) 78 | datay = np.random.choice(ycen, size=(nboot, len(y))) 79 | 80 | top = trim_mean(datax, .2, axis=1) - trim_mean(datay, .2, axis=1) 81 | 82 | #botx = list(map(lambda row: trimse(row,.2), datax)) 83 | botx = np.array([trimse(x) for x in datax]) 84 | boty = np.array([trimse(x) for x in datay]) 85 | tval = top / np.sqrt(botx ** 2 + boty ** 2) 86 | tval = abs(tval) 87 | tval = sorted(tval) 88 | icrit = int(np.floor((1 - alpha) * nboot + .5)) 89 | #ibot = int(np.floor(alpha * nboot / 2 + .5)) 90 | #itop = int(np.floor((1 - alpha / 2) * nboot + .5)) 91 | se = np.sqrt((trimse(x, tr)) ** 2 + (trimse(y, tr)) ** 2) 92 | ci.append(trim_mean(x, tr) - trim_mean(y, tr) - tval[icrit] * se) 93 | ci.append(trim_mean(x, tr) - trim_mean(y, tr) + tval[icrit] * se) 94 | p_value = sum(np.abs(test_stat) <= np.abs(tval)) / nboot 95 | est_x = trim_mean(x,tr) 96 | est_y = trim_mean(y, tr) 97 | est_dif = est_x - est_y 98 | 99 | results = {'ci': ci, 'test_stat': test_stat, 'p_value': p_value, 100 | 'est_x': est_x, 'est_y': est_y, 'est_dif': est_dif} 101 | 102 | return results 103 | 104 | def linconb(x, con, tr=.2, alpha=.05, nboot=599, seed=False): 105 | 106 | """ 107 | Compute a 1-alpha confidence interval for a set of d linear contrasts 108 | involving trimmed means using the bootstrap-t bootstrap method. 109 | Independent groups are assumed. CIs are adjusted to control FWE 110 | (p values are not adjusted). 111 | 112 | 113 | :param x: DataFrame 114 | Each column represents a group of data 115 | 116 | :param con: array 117 | `con` is a J (number of columns) by d (number of contrasts) 118 | matrix containing the contrast coefficents of interest. 119 | All linear constrasts can be created automatically by using the function [con1way](J) 120 | (the result of which can be used for `con`). 121 | 122 | :param tr: float 123 | Proportion to trim (default is .2) 124 | 125 | :param alpha: float 126 | Alpha level (default is .05) 127 | 128 | :param nboot: int 129 | Number of bootstrap samples (default is 2000) 130 | 131 | :param seed: bool 132 | Random seed for reprodicible results. Default is `False`. 133 | 134 | :return: 135 | Dictionary of results 136 | 137 | con: array 138 | Contrast matrix 139 | 140 | crit: float 141 | Critical value 142 | 143 | n: list 144 | Number of observations for each group 145 | 146 | psihat: DataFrame 147 | Difference score and CI for each contrast 148 | 149 | test: DataFrame 150 | Test statistic, standard error, and p-value for each contrast 151 | 152 | 153 | """ 154 | 155 | x=pandas_to_arrays(x) 156 | 157 | J = len(x) 158 | x = np.asarray([j[~np.isnan(j)] for j in x]) 159 | #Jm = J - 1 160 | #d = (J ** 2 - J) / 2 161 | 162 | if con.shape[0] != len(x): 163 | raise Exception("The number of groups does not match the number of contrast coefficients.") 164 | 165 | bvec = np.zeros([nboot, J, 2]) 166 | 167 | if seed: 168 | np.random.seed(seed) 169 | 170 | nsam = [len(xi) for xi in x] 171 | for j in range(J): 172 | 173 | xcen = x[j] - trim_mean(x[j], tr) 174 | data = np.random.choice(xcen, size=(nboot, len(x[j]))) 175 | 176 | for i, row in enumerate(data): 177 | bvec[i,j,:]=trimparts(row, tr) 178 | 179 | m1 = bvec[:,:,0].T 180 | m2 = bvec[:,:, 1].T 181 | boot = np.zeros([con.shape[1], nboot]) 182 | for d in range(con.shape[1]): 183 | top = np.asarray([trimpartt(row, con[:,d]) for row in m1.T]) 184 | consq = con[:, d] ** 2 185 | bot = np.asarray([trimpartt(row,consq) for row in m2.T]) 186 | boot[d,:] = np.abs(top) / np.sqrt(bot) 187 | 188 | testb=np.asarray([max(row) for row in boot.T]) 189 | ic = int(np.floor((1 - alpha) * nboot) -1) # one less than R 190 | testb = np.sort(testb) 191 | psihat = np.zeros([con.shape[1], 4]) 192 | test = np.zeros([con.shape[1], 4]) 193 | 194 | for d in range(con.shape[1]): 195 | test[d, 0] = d 196 | psihat[d, 0] = d 197 | testit = lincon(x, np.array([con[:,d]]).T, tr, alpha) # column slice of contrast matrix 198 | #test[d, 1]=testit['test'][0, 1] 199 | test[d, 1]=testit['test']['test'][0] 200 | #pval = np.mean((abs(testit['test'][0, 1]) < boot[d,:])) 201 | pval = np.mean((abs(testit['test']['test'][0]) < boot[d,:])) 202 | test[d, 3] = pval 203 | #print(testit['test']) 204 | #print(testit['psihat']) 205 | # psihat[d, 2] = testit['psihat'][0, 1] - testb[ic] * testit['test'][0, 3] 206 | # psihat[d, 3] = testit['psihat'][0, 1] + testb[ic] * testit['test'][0, 3] 207 | # psihat[d, 1] = testit['psihat'][0, 1] 208 | psihat[d, 2] = testit['psihat']['psihat'][0] - testb[ic] * testit['test']['se'][0] 209 | psihat[d, 3] = testit['psihat']['psihat'][0] + testb[ic] * testit['test']['se'][0] 210 | psihat[d, 1] = testit['psihat']['psihat'][0] 211 | #test[d, 2] = testit['test'][0, 3] 212 | test[d, 2] = testit['test']['se'][0] 213 | 214 | 215 | 216 | psihat_col_names=['contrast_index', 'psihat', 'ci_low', 'ci_up'] 217 | test_col_names = ['contrast_index', 'test', 'se', 'p_value'] 218 | 219 | psihat = pd.DataFrame(psihat, columns=psihat_col_names) 220 | test=pd.DataFrame(test, columns=test_col_names) 221 | 222 | return {'n': nsam, 'psihat': psihat, 'test': test, 'crit': testb[ic], 'con': con} 223 | 224 | def rmmcppb(x, est, *args, alpha=.05, con=None, 225 | dif=True, nboot=None, BA=False, 226 | hoch=False, SR=False, seed=False): 227 | 228 | """ 229 | Use a percentile bootstrap method to compare dependent groups. 230 | By default, compute a .95 confidence interval for all linear contrasts 231 | specified by con, a J-by-C matrix, where C is the number of 232 | contrasts to be tested, and the columns of `con` are the 233 | contrast coefficients. If con is not specified, 234 | all pairwise comparisons are done. 235 | 236 | If `est` is the function `onestep` or `mom` (these are not implemeted yet), 237 | method SR can be used to control the probability of at least one Type I error. 238 | Otherwise, Hochberg's method is used. 239 | 240 | If `dif` is `False` and `BA` is `True`, the bias adjusted 241 | estimate of the generalized p-value is recommended. 242 | Using `BA`=`True` (when `dif`=`False`) 243 | is recommended when comparing groups 244 | with M-estimators and MOM, but it is not necessary when 245 | comparing 20% trimmed means (Wilcox & Keselman, 2002). 246 | 247 | Hochberg's sequentially rejective method can be used and is used 248 | if n>=80. 249 | 250 | Note that arguments up to and including `args` are positional arguments 251 | 252 | :param x: Pandas DataFrame 253 | Each column represents a group of data 254 | 255 | :param est: function 256 | Measure of location (currently only `trim_mean` is supported) 257 | 258 | :param args: list/value 259 | Parameter(s) for measure of location (e.g., .2) 260 | 261 | :param alpha: float 262 | Alpha level (default is .05) 263 | 264 | :param con: array 265 | `con` is a J (number of columns) by d (number of contrasts) 266 | matrix containing the contrast coefficents of interest. 267 | All linear constrasts can be created automatically by using the function [con1way](J) 268 | (the result of which can be used for `con`). The default is `None` and in this 269 | case all linear contrasts are created automatically. 270 | 271 | :param dif: bool 272 | When `True`, use difference scores, otherwise use marginal distributions 273 | 274 | :param nboot: int 275 | Number of bootstrap samples. Default is `None` 276 | in which case `nboot` will be chosen for you 277 | based on the number of contrasts. 278 | 279 | :param BA: bool 280 | When `True`, use the bias adjusted estimate of the 281 | generalized p-value is applied (e.g., when `dif` is `False`) 282 | 283 | :param hoch: bool 284 | When `True`, Hochberg's sequentially rejective method can be used and is used 285 | if n>=80. 286 | 287 | :param SR: bool 288 | When `True`, use the modified "sequentially rejective", especially when 289 | comparing one-step M-estimators or M-estimators. 290 | 291 | :param seed: bool 292 | Random seed for reprodicible results (default is `False`) 293 | 294 | :return: 295 | Dictionary of results 296 | 297 | con: array 298 | Contrast matrix 299 | 300 | num_sig: int 301 | Number of statistically significant results 302 | 303 | output: DataFrame 304 | Difference score, p-value, critical value, and CI for each contrast 305 | """ 306 | 307 | called_directly=False 308 | if type(x) is pd.core.frame.DataFrame: 309 | called_directly=True 310 | x=x.dropna().values 311 | 312 | if hoch: 313 | SR=False 314 | 315 | if SR: 316 | raise Exception("onestep and mom estimators are not yet implemented" 317 | "and only these can be used with SR method. Please set SR to False for now.") 318 | 319 | if dif: 320 | print("analysis is being done on difference scores", 321 | "each confidence interval has probability coverage of 1-alpha.") 322 | 323 | temp=rmmcppbd(x,est, *args, alpha=alpha,con=con, 324 | nboot=nboot,hoch=True) 325 | 326 | if called_directly: 327 | 328 | col_names = ['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper'] 329 | 330 | return {'output': pd.DataFrame(temp['output'], columns=col_names), 331 | 'con': temp['con'], "num_sig": temp['num_sig']} 332 | 333 | else: 334 | 335 | return {'output': temp['output'], 336 | 'con': temp['con'], "num_sig": temp['num_sig']} 337 | 338 | else: 339 | print("dif=False so using marginal distributions") 340 | 341 | if not BA: 342 | print("If and when MOM and/or onestep estimators are implemeted, " 343 | "it is suggested to use BA=True and hoch=T") 344 | 345 | J=x.shape[1] 346 | xcen=np.full([x.shape[0], x.shape[1]], np.nan) 347 | for j in range(J): 348 | xcen[:, j] = x[:, j] - est(x[:, j], *args) 349 | 350 | if con is None: 351 | con=con1way(J) 352 | 353 | d=con.shape[1] 354 | 355 | if nboot is None: 356 | if d<4: 357 | nboot=1000 358 | elif d>4: 359 | nboot=5000 360 | 361 | n=x.shape[0] 362 | connum=con.shape[1] 363 | 364 | if seed: 365 | np.random.seed(seed) 366 | 367 | xbars=est(x,*args) 368 | 369 | psidat=np.zeros(connum) 370 | for ic in range(connum): 371 | psidat[ic]=np.sum(con[:,ic] * xbars) 372 | 373 | psihat=np.zeros([connum, nboot]) 374 | psihatcen=np.zeros([connum, nboot]) 375 | bvec=np.full([nboot,J], np.nan) 376 | bveccen = np.full([nboot, J], np.nan) 377 | data=np.random.randint(n,size=(nboot,n)) 378 | for ib in range(nboot): 379 | bvec[ib,:] = est(x[data[ib,:],:], *args) 380 | bveccen[ib, :] = est(xcen[data[ib, :], :], *args) 381 | 382 | test=np.full(connum, np.nan) 383 | bias=np.full(connum, np.nan) 384 | 385 | for ic in range(connum): 386 | psihat[ic,:]=[bptdpsi(row, con[:, ic]) for row in bvec] 387 | psihatcen[ic,:] = [bptdpsi(row, con[:,ic]) for row in bveccen] 388 | bias[ic] = np.sum((psihatcen[ic,:] > 0)) / nboot - .5 389 | ptemp =(np.sum(psihat[ic,:] > 0) + .5 * np.sum(psihat[ic,:] == 0)) / nboot 390 | 391 | if BA: 392 | test[ic] = ptemp - .1 * bias[ic] 393 | 394 | if not BA: 395 | test[ic] = ptemp 396 | 397 | test[ic] = np.min([test[ic], 1 - test[ic]]) 398 | test[ic] = np.max([test[ic], 0]) # bias corrected might be less than zero 399 | 400 | test=2*test 401 | ncon=con.shape[1] 402 | dvec=alpha/np.arange(1,ncon+1) 403 | 404 | if SR: 405 | 406 | if alpha == .05: 407 | 408 | dvec =[.025, 409 | .025, 410 | .0169, 411 | .0127, 412 | .0102, 413 | .00851, 414 | .0073, 415 | .00639, 416 | .00568, 417 | .00511] 418 | 419 | dvecba = [.05, 420 | .025, 421 | .0169, 422 | .0127, 423 | .0102, 424 | .00851, 425 | .0073, 426 | .00639, 427 | .00568, 428 | .00511] 429 | 430 | if ncon > 10: 431 | avec = .05 / np.arange(11,ncon+1) 432 | dvec = np.append(dvec, avec) 433 | 434 | elif alpha == .01: 435 | 436 | dvec =[.005, 437 | .005, 438 | .00334, 439 | .00251, 440 | .00201, 441 | .00167, 442 | .00143, 443 | .00126, 444 | .00112, 445 | .00101] 446 | 447 | dvecba =[.01, 448 | .005, 449 | .00334, 450 | .00251, 451 | .00201, 452 | .00167, 453 | .00143, 454 | .00126, 455 | .00112, 456 | .00101] 457 | 458 | if ncon > 10: 459 | avec = .01 / np.arange(11,ncon+1) 460 | dvec = np.append(dvec, avec) 461 | 462 | 463 | else: 464 | 465 | dvec = alpha / np.arange(1,ncon+1) 466 | dvecba = dvec 467 | dvec[1] = alpha 468 | 469 | if hoch: 470 | dvec=alpha/np.arange(1,ncon+1) 471 | 472 | dvecba=dvec 473 | temp2 = (-test).argsort() 474 | zvec = dvec[:ncon] 475 | 476 | if BA: 477 | zvec = dvecba[:ncon] 478 | 479 | output=np.zeros([connum, 6]) 480 | tmeans=est(x, *args) 481 | 482 | output[temp2, 3] = zvec 483 | for ic in range(ncon): 484 | output[ic, 1] = np.sum(con[:, ic] * tmeans) 485 | output[ic, 0] = ic 486 | output[ic, 2] = test[ic] 487 | temp = np.sort(psihat[ic, :]) 488 | icl = round(alpha * nboot / 2) #+ 1 489 | icu = nboot - icl - 1 #nboot - (icl - 1) 490 | output[ic, 4] = temp[icl] 491 | output[ic, 5] = temp[icu] 492 | 493 | num_sig = output.shape[0] 494 | ior = (-output[:, 2]).argsort() 495 | for j in range(output.shape[0]): 496 | if output[ior[j], 2] <= output[ior[j], 3]: 497 | break 498 | else: 499 | num_sig = num_sig - 1 500 | 501 | if called_directly: 502 | col_names=['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper'] 503 | results={"output": pd.DataFrame(output, columns=col_names), "con": con, "num_sig": num_sig} 504 | print(results) 505 | 506 | else: 507 | results={"output": output, "con": con, "num_sig": num_sig} 508 | 509 | 510 | return results 511 | 512 | def rmmcppbd(x, est, *args, alpha=.05, con=None, 513 | nboot=None, hoch=True, seed=False): 514 | 515 | """ 516 | Use a percentile bootstrap method to compare dependent groups 517 | based on difference scores. 518 | By default, 519 | compute a .95 confidence interval for all linear contrasts 520 | specified by con, a J by C matrix, where C is the number of 521 | contrasts to be tested, and the columns of con are the 522 | contrast coefficients. 523 | If con is not specified, all pairwise comparisons are done. 524 | 525 | nboot is the bootstrap sample size. If not specified, a value will 526 | be chosen depending on the number of contrasts there are. 527 | 528 | A sequentially rejective method is used to control alpha. 529 | If n>=80, hochberg's method is used. 530 | 531 | Note that arguments up to and including `args` are positional arguments 532 | 533 | :param x: 534 | :param y: 535 | :param alpha: 536 | :param con: 537 | :param est: 538 | :param nboot: 539 | :param hoch: 540 | :param seed: 541 | :return: 542 | """ 543 | 544 | x = x[~np.isnan(x).any(axis=1)] 545 | J=x.shape[1] 546 | n=x.shape[0] 547 | if n>=80: 548 | hoch=True 549 | 550 | #Jm=J-1 551 | if con is None: 552 | con=con1way(J) 553 | 554 | d = con.shape[1] 555 | if not nboot: 556 | 557 | if d <= 10: 558 | nboot = 3000 559 | 560 | elif d <= 6: 561 | nboot = 2000 562 | 563 | elif d <= 4: 564 | nboot = 1000 565 | 566 | else: 567 | nboot=5000 568 | 569 | connum=d 570 | xx=x@con 571 | 572 | if seed: 573 | np.random.seed(seed) 574 | 575 | psihat=np.zeros([connum, nboot]) 576 | data=np.random.randint(n, size=(nboot,n)) 577 | 578 | # wilcox's implementation in R is a bit more complicated, 579 | # I have simplified. Hopefully correctly. 580 | for ib in range(nboot): 581 | psihat[:,ib]=est(xx[data[ib,:], :], *args) 582 | 583 | test = np.full(connum, np.nan) 584 | icl = round(alpha * nboot // 2) #+ 1 585 | icu = nboot - icl - 2 #- 1 586 | cimat=np.full([connum, 2], np.nan) 587 | 588 | for ic in range(connum): 589 | 590 | test[ic] =(sum(psihat[ic, :] > 0) + .5 * sum(psihat[ic, :] == 0)) / nboot 591 | test[ic] = min(test[ic], 1 - test[ic]) 592 | temp = np.sort(psihat[ic, :]) 593 | cimat[ic, 0] = temp[icl] 594 | cimat[ic, 1] = temp[icu] 595 | 596 | test = 2 * test 597 | ncon = con.shape[1] 598 | 599 | if alpha == .05: 600 | dvec =[.025, 601 | .025, 602 | .0169, 603 | .0127, 604 | .0102, 605 | .00851, 606 | .0073, 607 | .00639, 608 | .00568, 609 | .00511] 610 | 611 | if ncon > 10: 612 | avec = .05 / np.arange(11, ncon+1) 613 | dvec = np.append(dvec, avec) 614 | 615 | elif alpha == .01: 616 | dvec =[.005, 617 | .005, 618 | .00334, 619 | .00251, 620 | .00201, 621 | .00167, 622 | .00143, 623 | .00126, 624 | .00112, 625 | .00101] 626 | 627 | if ncon > 10: 628 | avec = .01 / np.arange(11,ncon+1) 629 | dvec = np.append(dvec, avec) 630 | 631 | else: 632 | dvec = alpha / np.arange(1,ncon+1) 633 | dvec[1] = alpha / 2 634 | 635 | if hoch: 636 | dvec = alpha / (2 * np.arange(1,ncon+1)) 637 | 638 | dvec = 2 * dvec 639 | temp2 = (-test).argsort() 640 | ncon = con.shape[1] 641 | zvec = dvec[:ncon] 642 | output=np.zeros([connum, 6]) 643 | 644 | tmeans=est(xx,*args) 645 | output[temp2, 3] = zvec 646 | 647 | for ic in range(ncon): 648 | output[ic, 1] = tmeans[ic] 649 | output[ic, 0] = ic 650 | output[ic, 2] = test[ic] 651 | output[ic, 4:6] = cimat[ic,:] 652 | 653 | num_sig = np.sum(output[:, 2] <= output[:, 3]) 654 | 655 | return {"output": output, "con": con, "num_sig": num_sig} 656 | 657 | def lindepbt(x, tr=.2, con=None, alpha=.05, nboot=599, dif=True, seed=False): 658 | 659 | """ 660 | Multiple comparisons on trimmed means with FWE controlled with Rom's method 661 | Using a bootstrap-t method. 662 | 663 | :param x: Pandas DataFrame 664 | Each column in the data represents a different group 665 | 666 | :param tr: float 667 | Proportion to trim (default is .2) 668 | 669 | :param con: array 670 | `con` is a J (number of groups) by d (number of contrasts) 671 | matrix containing the contrast coefficents of interest. 672 | All linear constrasts can be created automatically by using the function [con1way](J) 673 | (the result of which can be used for `con`). The default is `None` and in this 674 | case all linear contrasts are created automatically. 675 | 676 | :param alpha: float 677 | Alpha level. Default is .05. 678 | 679 | :param nboot: int 680 | Number of bootstrap samples (default is 2000) 681 | 682 | :param dif: bool 683 | When `True`, use difference scores, otherwise use marginal distributions 684 | 685 | :param seed: bool 686 | Random seed for reprodicible results (default is `False`) 687 | 688 | :return: 689 | Dictionary of results 690 | 691 | con: array 692 | Contrast matrix 693 | 694 | num_sig: int 695 | Number of observations for each group 696 | 697 | psihat: DataFrame 698 | Difference score and CI for each contrast 699 | 700 | test: DataFrame 701 | Test statistic, p-value, critical value, and standard error 702 | for each contrast 703 | """ 704 | 705 | called_directly=False 706 | if type(x) is pd.DataFrame: 707 | x = pandas_to_arrays(x) 708 | x = remove_nans_based_on_design(x, design_values=len(x), design_type='dependent_groups') 709 | x = np.r_[x].T 710 | called_directly=True 711 | 712 | from hypothesize.measuring_associations import wincor 713 | 714 | if seed: 715 | np.random.seed(seed) 716 | 717 | if con is None: 718 | con=con2way(1,x.shape[1])[1] # all pairwise 719 | ncon = con.shape[1] 720 | 721 | else: 722 | ncon = con.shape[1] 723 | 724 | x = x[~np.isnan(x).any(axis=1)] 725 | n=x.shape[0] 726 | J=x.shape[1] 727 | nval=x.shape[0] 728 | h1 = nval - 2 * np.floor(tr * nval) 729 | #df=h1-1 730 | xbar=trim_mean(x, tr) 731 | 732 | if alpha == .05: 733 | 734 | dvec = [.05, 735 | .025, 736 | .0169, 737 | .0127, 738 | .0102, 739 | .00851, 740 | .0073, 741 | .00639, 742 | .00568, 743 | .00511] 744 | 745 | if ncon > 10: 746 | avec = .05 / np.arange(11, ncon + 1) 747 | dvec = np.append(dvec, avec) 748 | 749 | elif alpha == .01: 750 | 751 | dvec = [.01, 752 | .005, 753 | .00334, 754 | .00251, 755 | .00201, 756 | .00167, 757 | .00143, 758 | .00126, 759 | .00112, 760 | .00101] 761 | 762 | if ncon > 10: 763 | avec = .01 / np.arange(11, ncon + 1) 764 | dvec = np.append(dvec, avec) 765 | 766 | 767 | else: 768 | dvec = alpha / np.arange(1, ncon + 1) 769 | 770 | 771 | psihat=np.zeros([ncon,4]) 772 | test = np.zeros([ncon, 5]) 773 | temp1=np.array([]) 774 | 775 | for d in range(ncon): 776 | psihat[d, 0] = d 777 | 778 | if not dif: 779 | psihat[d, 1] = np.sum(con[:, d] * xbar) 780 | sejk = 0 781 | 782 | for j in range(J): 783 | for k in range(J): 784 | djk = (nval - 1) * wincor(x[:, j], x[:, k], tr)['wcov'] / (h1 * (h1 - 1)) 785 | sejk = sejk + con[j, d] * con[k, d] * djk 786 | 787 | sejk = np.sqrt(sejk) 788 | test[d, 0] = d 789 | test[d, 1] = np.sum(con[:, d] * xbar) / sejk 790 | test[d, 4] = sejk 791 | 792 | data=np.random.randint(n, size=(nboot, n)) 793 | xcen = np.full([x.shape[0], x.shape[1]], np.nan) 794 | for j in range(J): 795 | xcen[:, j] = x[:, j] - trim_mean(x[:, j], tr) 796 | 797 | bvec=[lindep_sub(data_row, xcen, con[:,d], tr=tr) 798 | for data_row in data] 799 | 800 | bsort = np.sort(np.abs(bvec)) 801 | ic = round((1 - alpha) * nboot) - 1 # correct for python with the "- 1"? 802 | psihat[d, 2] = psihat[d, 1] - bsort[ic] * test[d, 4] 803 | psihat[d, 3] = psihat[d, 1] + bsort[ic] * test[d, 4] 804 | p_value = np.mean(np.abs(test[d, 1]) <= np.abs(bvec)) 805 | temp1 = np.append(temp1, p_value) 806 | 807 | elif dif: 808 | 809 | for j in range(J): 810 | if j==0: 811 | dval=con[j,d] * x[:,j] 812 | 813 | elif j>0: 814 | dval=dval+con[j,d] * x[:,j] 815 | 816 | temp = trimcibt(dval,tr=tr,alpha=alpha,nboot=nboot,seed=seed) 817 | temp1 = np.append(temp1, temp['p_value']) 818 | test[d, 0] = d 819 | test[d, 1]=temp['test_stat'] ## missing in R? 820 | test[d, 4] = trimse(dval, tr=tr) 821 | psihat[d, 1] = trim_mean(dval, tr) 822 | psihat[d, 2] = temp['ci'][0] 823 | psihat[d, 3] = temp['ci'][1] 824 | 825 | test[:, 2] = temp1 826 | temp2 = (-temp1).argsort() 827 | zvec = dvec[:ncon] 828 | test[temp2, 3] = zvec 829 | 830 | # if flagcon 831 | num_sig = np.sum(test[:, 2] <= test[:, 3]) 832 | 833 | if called_directly: 834 | 835 | test=pd.DataFrame(test, columns=["con_num", "test", "p_value", "p_crit", "se"]) 836 | psihat=pd.DataFrame(psihat, columns=["con_num", "psihat", "ci_lower", "ci_upper"]) 837 | 838 | 839 | return {'test': test, 'psihat': psihat, 'con': con, 'num_sig': num_sig} 840 | 841 | def lindep_sub(data, x, con = None, tr = .2): 842 | 843 | con = con.reshape(len(con), 1) # make 2D col vector 844 | res = rmmcp(x[data,:], con=con, tr=tr, dif=False)['test'][:, 1] 845 | 846 | return res[0] 847 | 848 | def pb2gen(x, y, est, *args, alpha=.05, nboot=2000, seed=False): 849 | 850 | """ 851 | Compute a bootstrap confidence interval for the 852 | the difference between any two parameters corresponding to two 853 | independent groups. 854 | 855 | Note that arguments up to and including `args` are positional arguments 856 | 857 | :param x: Pandas Series 858 | Data for group one 859 | 860 | :param y: Pandas Series 861 | Data for group two 862 | 863 | :param est: function 864 | Measure of location (currently only `trim_mean` is supported) 865 | 866 | :param args: list/value 867 | Parameter(s) for measure of location (e.g., .2) 868 | 869 | :param alpha: float 870 | Alpha level (default is .05) 871 | 872 | :param nboot: int 873 | Number of bootstrap samples (default is 2000) 874 | 875 | :param seed: bool 876 | Random seed for reprodicible results (default is `False`) 877 | 878 | :return: 879 | Dictionary of results 880 | 881 | ci: list 882 | 883 | Confidence interval 884 | 885 | est_1: float 886 | Estimated value (based on `est`) for group one 887 | 888 | est_2: float 889 | Estimated value (based on `est`) for group two 890 | 891 | est_dif: float 892 | Estimated difference between group one and two 893 | 894 | n1: int 895 | Number of observations in group one 896 | 897 | n2: int 898 | Number of observations in group two 899 | 900 | p_value: float 901 | 902 | p-value 903 | 904 | variance: float 905 | Variance of group one and two 906 | """ 907 | 908 | x, y = pandas_to_arrays([x, y]) 909 | 910 | x=x[~np.isnan(x)] 911 | y=y[~np.isnan(y)] 912 | 913 | if seed: 914 | np.random.seed(seed) 915 | 916 | 917 | datax = np.random.choice(x, size=(nboot, len(x))) 918 | datay = np.random.choice(y, size=(nboot, len(y))) 919 | 920 | bvecx=est(datax, *args, axis=1) 921 | bvecy = est(datay, *args, axis=1) 922 | 923 | bvec = np.sort(bvecx - bvecy) 924 | low = round((alpha / 2) * nboot) #+ 1 925 | up = nboot - low - 2 926 | temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot) 927 | sig_level = 2 * (min(temp, 1 - temp)) 928 | se = np.var(bvec) 929 | 930 | results={'est_1': est(x,*args), 931 | 'est_2': est(y,*args), 932 | 'est_dif': est(x, *args) - est(y, *args), 933 | 'ci': [bvec[low], bvec[up]], 934 | 'p_value': sig_level, 935 | 'variance': se, 936 | 'n1': len(x), 937 | 'n2': len(y)} 938 | 939 | return results 940 | 941 | def bootdpci(x, est, *args, nboot=None, alpha=.05, 942 | dif=True, BA=False, SR=False): 943 | 944 | """ 945 | Use percentile bootstrap method, compute a .95 confidence interval 946 | for the difference between a measure of location or scale 947 | when comparing two dependent groups. 948 | 949 | The argument `dif` defaults to `True` indicating 950 | that difference scores will be used, in which case Hochberg’s 951 | method is used to control FWE. If `dif` is `False`, measures of 952 | location associated with the marginal distributions are used 953 | instead. 954 | 955 | If `dif` is `False` and `BA` is `True`, the bias adjusted 956 | estimate of the generalized p-value is recommended. 957 | Using `BA`=`True` (when `dif`=`False`) 958 | is recommended when comparing groups 959 | with M-estimators and MOM, but it is not necessary when 960 | comparing 20% trimmed means (Wilcox & Keselman, 2002). 961 | 962 | The so-called the SR method, which is a slight 963 | modification of Hochberg's (1988) "sequentially rejective" 964 | method can be applied to control FWE, especially when 965 | comparing one-step M-estimators or M-estimators. 966 | 967 | Note that arguments up to and including `args` are positional arguments 968 | 969 | :param x: Pandas DataFrame 970 | Each column represents a group of data 971 | 972 | :param est: function 973 | Measure of location (currently only `trim_mean` is supported) 974 | 975 | :param args: list/value 976 | Parameter(s) for measure of location (e.g., .2) 977 | 978 | :param alpha: float 979 | Alpha level. Default is .05. 980 | 981 | :param nboot: int 982 | Number of bootstrap samples. Default is `None` 983 | in which case `nboot` will be chosen for you 984 | based on the number of contrasts. 985 | 986 | :param dif: bool 987 | When `True`, use difference scores, otherwise use marginal distributions 988 | 989 | :param BA: bool 990 | When `True`, use the bias adjusted estimate of the 991 | generalized p-value is applied (e.g., when `dif` is `False`) 992 | 993 | :param SR: bool 994 | When `True`, use the modified "sequentially rejective", especially when 995 | comparing one-step M-estimators or M-estimators 996 | 997 | :return: 998 | Dictionary of results 999 | 1000 | con: array 1001 | Contrast matrix 1002 | 1003 | num_sig: int 1004 | Number of statistically significant results 1005 | 1006 | output: DataFrame 1007 | Difference score, p-value, critical value, and CI for each contrast 1008 | 1009 | """ 1010 | 1011 | # replace with actual estimators when implemented 1012 | if SR and est not in ('onestep', 'mom'): 1013 | SR=False 1014 | print("setting SR to False. SR=True should apparently " 1015 | "only be used with onestep or mom") 1016 | 1017 | ## in R 1018 | # okay=False 1019 | # if est in (onestep, mom): 1020 | # okay=True 1021 | # 1022 | # if not okay: 1023 | # SR=False 1024 | 1025 | results=rmmcppb(x, est, *args, nboot=nboot,alpha=alpha, 1026 | SR=SR, dif=dif, BA=BA) 1027 | 1028 | col_names = ['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper'] 1029 | results.update({'output': pd.DataFrame(results['output'], columns=col_names)}) 1030 | 1031 | return results 1032 | 1033 | def ydbt(x, y, tr=.2, alpha=.05, nboot=599, side=True, seed=False): 1034 | 1035 | """ 1036 | Using the bootstrap-t method, 1037 | compute a .95 confidence interval for the difference between 1038 | the marginal trimmed means of paired data. 1039 | By default, 20% trimming is used with 599 bootstrap samples. 1040 | 1041 | 1042 | :param x: Pandas Series 1043 | Data for group one 1044 | 1045 | :param y: Pandas Series 1046 | Data for group two 1047 | 1048 | :param tr: float 1049 | Proportion to trim (default is .2) 1050 | 1051 | :param alpha: float 1052 | Alpha level. Default is .05. 1053 | 1054 | :param nboot: int 1055 | Number of bootstrap samples (default is 2000) 1056 | 1057 | :param side: boolWhen `True` the function returns a symmetric CI and a p value, 1058 | otherwise the function returns equal-tailed CI (no p value) 1059 | 1060 | :param seed: bool 1061 | Random seed for reprodicible results (default is `False`) 1062 | 1063 | :return: 1064 | Dictionary of results 1065 | 1066 | ci: list 1067 | Confidence interval 1068 | 1069 | dif: float 1070 | Difference between group one and two 1071 | 1072 | p_value: float 1073 | p-value 1074 | """ 1075 | 1076 | x = pandas_to_arrays([x, y]) 1077 | x=remove_nans_based_on_design(x, 2, 'dependent_groups') 1078 | x,y=[x[0], x[1]] 1079 | 1080 | if seed: 1081 | np.random.seed(seed) 1082 | 1083 | data = np.random.randint(len(x), size=(nboot, len(x))) 1084 | 1085 | xcen = x - trim_mean(x, tr) 1086 | ycen = y - trim_mean(y, tr) 1087 | 1088 | bvec=[tsub(row, xcen, ycen, tr) for row in data] 1089 | 1090 | dotest = yuend(x, y, tr=tr) 1091 | 1092 | estse = dotest['se'] 1093 | p_value = np.nan 1094 | dif = trim_mean(x, tr) - trim_mean(y, tr) 1095 | ci=[] 1096 | 1097 | if not side: 1098 | print('p_value is only returned when side=True') 1099 | ilow = round((alpha / 2) * nboot) -1 1100 | ihi = nboot - ilow - 2 1101 | bsort = np.sort(bvec) 1102 | ci.append(dif - bsort[ihi] * estse) 1103 | ci.append(dif - bsort[ilow + 1] * estse) 1104 | 1105 | else: 1106 | bsort = np.sort(np.abs(bvec)) 1107 | ic = round((1 - alpha) * nboot)-1 1108 | ci.append(dif - bsort[ic] * estse) 1109 | ci.append(dif + bsort[ic] * estse) 1110 | p_value = (np.sum(np.abs(dotest['teststat']) <= np.abs(bvec))) / nboot 1111 | 1112 | 1113 | return {'ci': ci, 'dif': dif, 'p_value': p_value} 1114 | 1115 | def tsub(isub, x, y, tr): 1116 | 1117 | """ 1118 | Compute test statistic for trimmed means 1119 | when comparing dependent groups. 1120 | By default, 20% trimmed means are used. 1121 | isub is an array of length n of random integers 1122 | to control bootstrap sampling. 1123 | 1124 | This function is used by ydbt 1125 | 1126 | :param isub: 1127 | :param x: 1128 | :param y: 1129 | :param tr: 1130 | :return: 1131 | """ 1132 | 1133 | tsub_res = yuend(x[isub], y[isub], tr = tr)['teststat'] 1134 | 1135 | return tsub_res 1136 | 1137 | def tmcppb(x, est, *args, con=None, bhop=False, alpha=.05, nboot=None, seed=False): 1138 | 1139 | """ 1140 | Multiple comparisons for J independent groups using trimmed means and 1141 | the percentile bootstrap method. Rom’s method is used to control the 1142 | probability of one or more type I errors. For C > 10 hypotheses, 1143 | or when the goal is to test at some level other than .05 and .01, 1144 | Hochberg’s method is used. Setting the argument `bhop` to `True` uses the 1145 | Benjamini–Hochberg method instead. 1146 | 1147 | Note that arguments up to and including `args` are positional arguments 1148 | 1149 | :param x: Pandas DataFrame 1150 | Each column represents a group of data 1151 | 1152 | :param est: function 1153 | Measure of location (currently only `trim_mean` is supported) 1154 | 1155 | :param args: list/value 1156 | Parameter(s) for measure of location (e.g., .2) 1157 | 1158 | :param con: array 1159 | `con` is a J (number of columns) by d (number of contrasts) 1160 | matrix containing the contrast coefficents of interest. 1161 | All linear constrasts can be created automatically by using the function [con1way](J) 1162 | (the result of which can be used for `con`). The default is `None` and in this 1163 | case all linear contrasts are created automatically. 1164 | 1165 | :param bhop: bool 1166 | If `True`, the Benjamini–Hochberg method is used to control FWE 1167 | 1168 | :param alpha: float 1169 | Alpha level. Default is .05. 1170 | 1171 | :param nboot: int 1172 | Number of bootstrap samples (default is 2000) 1173 | 1174 | :param seed: bool 1175 | Random seed for reproducible results. Default is `False`. 1176 | 1177 | :return: 1178 | Dictionary of results 1179 | 1180 | con: array 1181 | Contrast matrix 1182 | 1183 | num_sig: int 1184 | Number of statistically significant results 1185 | 1186 | output: DataFrame 1187 | Difference score, p-value, critical value, and CI for each contrast 1188 | """ 1189 | 1190 | x=pandas_to_arrays(x) 1191 | x=remove_nans_based_on_design(x, len(x), 'independent_groups') 1192 | J=len(x) 1193 | 1194 | mvec = [est(i, *args) for i in x] 1195 | 1196 | if con is None: 1197 | con=con1way(J) 1198 | 1199 | ncon=con.shape[1] 1200 | 1201 | if not nboot: 1202 | nboot = 5000 1203 | if J <= 8: 1204 | nboot = 4000 1205 | elif J <= 3: 1206 | nboot = 2000 1207 | 1208 | if not bhop: 1209 | 1210 | if alpha == .05: 1211 | dvec=[.05, 1212 | .025, 1213 | .0169, 1214 | .0127, 1215 | .0102, 1216 | .00851, 1217 | .0073, 1218 | .00639, 1219 | .00568, 1220 | .00511] 1221 | 1222 | if ncon > 10: 1223 | avec = .05 / np.arange(11,ncon+1) 1224 | dvec = [dvec, avec] 1225 | 1226 | elif alpha == .01: 1227 | dvec =[.01, 1228 | .005, 1229 | .00334, 1230 | .00251, 1231 | .00201, 1232 | .00167, 1233 | .00143, 1234 | .00126, 1235 | .00112, 1236 | .00101] 1237 | 1238 | if ncon > 10: 1239 | avec = .01 / np.arange(11,ncon+1) 1240 | dvec = [dvec, avec] 1241 | 1242 | else: #not (alpha != .05 or alpha != .01): 1243 | dvec = alpha / np.arange(1,ncon+1) 1244 | 1245 | else: 1246 | dvec = (ncon - np.arange(1,ncon+1) + 1) * alpha / ncon 1247 | 1248 | if seed: 1249 | np.random.seed(seed) 1250 | 1251 | bvec=np.full([J,nboot], np.nan) 1252 | for i, j in enumerate(x): 1253 | data = np.random.choice(j, size=(nboot, len(j))) 1254 | bvec[i,:]=[est(row, *args) for row in data] 1255 | 1256 | bcon=con.T @ bvec 1257 | tvec=con.T @ mvec 1258 | test=np.full(ncon, np.nan) 1259 | for d in range(ncon): 1260 | tv = np.sum(bcon[d,:] == 0) / nboot 1261 | test[d] = np.sum(bcon[d, :] > 0) / nboot + .5 * tv 1262 | if test[d] > .5: 1263 | test[d] = 1 - test[d] 1264 | 1265 | output=np.full([ncon,6], np.nan) 1266 | test=2*test 1267 | temp2=(-test).argsort() 1268 | zvec = dvec[:ncon] 1269 | output[temp2, 3] = zvec 1270 | icl = int(np.round(dvec[-1] * nboot / 2) + 1) - 1 1271 | icu = nboot - icl - 3 1272 | 1273 | for ic in range(ncon): 1274 | output[ic, 1] = tvec[ic] 1275 | output[ic, 0] = ic 1276 | output[ic, 2] = test[ic] 1277 | temp = np.sort(bcon[ic, :]) 1278 | output[ic, 4] = temp[icl] 1279 | output[ic, 5] = temp[icu] 1280 | 1281 | 1282 | num_sig = np.sum(output[:, 2] <= output[:, 3]) 1283 | cols=["con_num","psihat", "p_value", "p_crit", "ci_lower", "ci_upper"] 1284 | output=pd.DataFrame(output, columns=cols) 1285 | 1286 | results={'output': output, 'con': con, 'num_sig': num_sig} 1287 | 1288 | return results 1289 | 1290 | def l2drmci(x,y, est, *args, pairwise_drop_na=True, alpha=.05, nboot=2000, seed=False): 1291 | 1292 | """ 1293 | Compute a bootstrap confidence interval for a 1294 | measure of location associated with the distribution of x-y. 1295 | That is, compare x and y by looking at all possible difference scores 1296 | in random samples of `x` and `y`. `x` and `y` are possibly dependent. 1297 | 1298 | Note that arguments up to and including `args` are positional arguments 1299 | 1300 | :param x: Pandas Series 1301 | Data for group one 1302 | 1303 | :param y: Pandas Series 1304 | Data for group two 1305 | 1306 | :param est: function 1307 | Measure of location (currently only `trim_mean` is supported) 1308 | 1309 | :param args: list/value 1310 | Parameter(s) for measure of location (e.g., .2) 1311 | 1312 | :param pairwise_drop_na: bool 1313 | If True, treat data as dependent and remove any row with missing data. If False, 1314 | remove missing data for each group seperately (cannot deal with unequal sample sizes) 1315 | 1316 | :param alpha: float 1317 | Alpha level (default is .05) 1318 | 1319 | :param nboot: int 1320 | Number of bootstrap samples (default is 2000) 1321 | 1322 | :param seed: bool 1323 | Random seed for reprodicible results (default is `False`) 1324 | 1325 | :return: 1326 | Dictionary of results 1327 | 1328 | ci: list 1329 | 1330 | Confidence interval 1331 | 1332 | p_value: float 1333 | 1334 | p-value 1335 | """ 1336 | 1337 | x, y = pandas_to_arrays([x, y]) 1338 | 1339 | if pairwise_drop_na: 1340 | m1 = np.c_[x, y] # cbind 1341 | x = m1[~np.isnan(m1).any(axis=1)] 1342 | 1343 | else: 1344 | x = x[~np.isnan(x)] 1345 | y = y[~np.isnan(y)] 1346 | 1347 | if len(x) != len(y): 1348 | raise Exception("With unequal sample sizes, you might consider wmwpb " 1349 | "(currently not implemented)") 1350 | 1351 | else: 1352 | x = np.c_[x, y] # cbind 1353 | 1354 | if seed: 1355 | np.random.seed(seed) 1356 | 1357 | data = np.random.choice(x.shape[0], size=(nboot, len(x))) 1358 | 1359 | bvec=np.full(nboot, np.nan) 1360 | for i in range(nboot): 1361 | bvec[i] = \ 1362 | loc2dif(x[data[i,:], 0], x[data[i,:], 1], est, *args, 1363 | drop_na=pairwise_drop_na) 1364 | 1365 | bvec=np.sort(bvec) 1366 | low = int(np.round((alpha / 2) * nboot) + 1) -1 1367 | up = nboot - low -2 1368 | temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot) 1369 | sig_level = 2 * (np.min([temp, 1 - temp])) 1370 | ci=[bvec[low], bvec[up]] 1371 | 1372 | results=dict(zip(['ci', 'p_value'], [ci, sig_level])) 1373 | 1374 | return results 1375 | 1376 | def loc2dif(x,y, est, *args, drop_na=True): 1377 | 1378 | """ 1379 | Compute a measure of location associated with the 1380 | distribution of x-y, the typical difference between two randomly sampled values. 1381 | The measure of location is indicated by the argument 1382 | est. 1383 | 1384 | x and y are paired data or independent variables having the same length. 1385 | If x and y have different lengths, use the function wmwloc (not currently implemented) 1386 | 1387 | Advantage of this estimator: relatively high efficiency even under normality versus 1388 | using sample means. 1389 | 1390 | :param x: 1391 | :param y: 1392 | :param est: 1393 | :param args: 1394 | :param drop_na: 1395 | :return: 1396 | """ 1397 | 1398 | if drop_na: 1399 | m1 = np.c_[x, y] # cbind 1400 | m1 = m1[~np.isnan(m1).any(axis=1)] 1401 | x, y = [m1[:,0], m1[:,1]] 1402 | 1403 | else: 1404 | x=x[~np.isnan(x)] 1405 | y=y[~np.isnan(y)] 1406 | 1407 | temp=np.subtract.outer(x,y).reshape(len(x)*len(y)) 1408 | val=est(temp, *args) 1409 | 1410 | return val 1411 | 1412 | 1413 | 1414 | 1415 | 1416 | 1417 | 1418 | 1419 | 1420 | 1421 | 1422 | -------------------------------------------------------------------------------- /hypothesize/compare_groups_with_two_factors/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from ._compare_groups_with_two_factors import * 3 | -------------------------------------------------------------------------------- /hypothesize/measuring_associations/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from ._measuring_associations import * -------------------------------------------------------------------------------- /hypothesize/measuring_associations/_measuring_associations.py: -------------------------------------------------------------------------------- 1 | __all__ = ["wincor", "pbcor", "corb", "pball", "winall"] 2 | 3 | import numpy as np 4 | from scipy.stats.mstats import winsorize 5 | from scipy.stats import t, chi2, trim_mean 6 | from hypothesize.utilities import pandas_to_arrays 7 | 8 | def wincor(x, y, tr=.2): 9 | 10 | """ 11 | Compute the winsorized correlation between `x` and `y`. 12 | This function also returns the winsorized covariance. 13 | 14 | 15 | :param x: Pandas Series 16 | Data for group one 17 | 18 | :param y: Pandas Series 19 | Data for group two 20 | 21 | :param tr: float 22 | Proportion to winsorize (default is .2) 23 | 24 | :return: 25 | Dictionary of results 26 | 27 | cor: float 28 | Winsorized correlation 29 | 30 | nval: int 31 | Number of observations 32 | 33 | sig: float 34 | p-value 35 | 36 | wcov: float 37 | Winsorized covariance 38 | """ 39 | 40 | if type(x) is not np.ndarray: 41 | x, y=pandas_to_arrays([x, y]) 42 | 43 | m1 = np.c_[x, y] # cbind 44 | m1 = m1[~np.isnan(m1).any(axis=1)] 45 | nval = m1.shape[0] 46 | x = m1[:, 0] 47 | y = m1[:, 1] 48 | g = np.floor(tr * len(x)) 49 | xvec = winsorize(x, limits=(tr,tr)) 50 | yvec = winsorize(y, limits=(tr,tr)) 51 | wcor = np.corrcoef(xvec, yvec)[0,1] 52 | wcov = np.cov(xvec, yvec)[0,1] 53 | test = wcor * np.sqrt((len(x) - 2) / (1. - wcor ** 2)) 54 | sig = 2 * (1 - t.cdf(abs(test), len(x) - 2 * g - 2)) 55 | 56 | res={'cor': wcor, 'wcov': wcov, 'sig': sig, 'nval': nval} 57 | 58 | return res 59 | 60 | def pbcor(x, y, beta=.2): 61 | 62 | """ 63 | Compute the percentage bend 64 | correlation between `x` and `y` 65 | 66 | 67 | :param x: Pandas Series 68 | Data for group one 69 | 70 | :param y: Pandas Series 71 | Data for group two 72 | 73 | :param beta: float 74 | `0 < beta < .5`. Beta is analogous to trimming in 75 | other functions and related to the measure of 76 | dispersion used in the percentage bend 77 | calculation. 78 | 79 | :return: 80 | Dictionary of results 81 | 82 | cor: float 83 | Correlation 84 | 85 | nval: int 86 | Number of observations 87 | 88 | p_value 89 | p-value 90 | 91 | test: float 92 | Test statistic 93 | 94 | """ 95 | 96 | if type(x) is not np.ndarray: 97 | x, y = pandas_to_arrays([x, y]) 98 | 99 | if len(x) != len(y): 100 | raise Exception("The arrays do not have equal lengths") 101 | 102 | m1 = np.c_[x, y] # cbind 103 | m1 = m1[~np.isnan(m1).any(axis=1)] 104 | nval = m1.shape[0] 105 | x = m1[:, 0] 106 | y = m1[:, 1] 107 | temp = np.sort(abs(x - np.median(x))) 108 | omhatx = temp[int(np.floor((1 - beta) * len(x)))-1] 109 | temp = np.sort(abs(y - np.median(y))) 110 | omhaty = temp[int(np.floor((1 - beta) * len(y)))-1] 111 | 112 | a = (x - pbos(x, beta)) / omhatx 113 | b = (y - pbos(y, beta)) / omhaty 114 | 115 | a = np.where(a <= -1, -1, a) 116 | a = np.where(a >= 1, 1, a) 117 | b = np.where(b <= -1, -1, b) 118 | b = np.where(b >= 1, 1, b) 119 | 120 | pbcor_result = sum(a * b) / np.sqrt(sum(a ** 2) * sum(b ** 2)) 121 | test = pbcor_result * np.sqrt((len(x) - 2) / (1 - pbcor_result ** 2)) 122 | sig = 2 * (1 - t.cdf(abs(test), len(x) - 2)) 123 | 124 | res = {'cor': pbcor_result, 'test': test, 'p_value': sig, 'nval': nval} 125 | return res 126 | 127 | def pbos(x, beta=.2): 128 | 129 | """ 130 | Compute the one-step percentage bend measure of location 131 | 132 | :param x: 133 | :param beta: 134 | :return: 135 | """ 136 | 137 | temp = np.sort(abs(x - np.median(x))) 138 | omhatx = temp[int(np.floor((1 - beta) * len(x)))-1] 139 | psi = (x - np.median(x)) / omhatx 140 | i1 = len(psi[psi < -1]) 141 | i2 = len(psi[psi > 1]) 142 | 143 | sx = np.where(psi < -1, 0, x) 144 | sx = np.where(psi > 1, 0, sx) 145 | 146 | pbos_result = (sum(sx) + omhatx * (i2 - i1)) / (len(x) - i1 - i2) 147 | 148 | return pbos_result 149 | 150 | def corb(corfun, x, y, alpha, nboot, *args, seed=False): 151 | 152 | """ 153 | Compute a 1-alpha confidence interval for a 154 | correlation using percentile bootstrap method 155 | The function `corfun` is any function that returns a 156 | correlation coefficient. The functions pbcor and 157 | wincor follow this convention. When using 158 | Pearson's correlation, and when n<250, use 159 | lsfitci instead (not yet implemented). 160 | 161 | Note that arguments up to and including `args` are positional arguments 162 | 163 | :param corfun: function 164 | corfun is any function that returns a correlation coefficient 165 | 166 | :param x: Pandas Series 167 | Data for group one 168 | 169 | :param y: Pandas Series 170 | Data for group two 171 | 172 | :param alpha: float 173 | Alpha level (default is .05) 174 | 175 | :param nboot: int 176 | Number of bootstrap samples 177 | 178 | :param args: list/value 179 | List of arguments to corfun (e.g., .2) 180 | 181 | :param seed: bool 182 | Random seed for reprodicible results. Default is `False`. 183 | 184 | :return: 185 | Dictionary of results 186 | 187 | ci: list 188 | Confidence interval 189 | 190 | cor: float 191 | Correlation estimate 192 | 193 | p_value: float 194 | p-value 195 | 196 | """ 197 | 198 | x, y=pandas_to_arrays([x, y]) 199 | 200 | 201 | m1 = np.c_[x, y] # cbind 202 | m1 = m1[~np.isnan(m1).any(axis=1)] 203 | nval = m1.shape[0] 204 | x = m1[:, 0] 205 | y = m1[:, 1] 206 | est = corfun(x, y, *args)['cor']#[0] 207 | 208 | if seed: 209 | np.random.seed(seed) 210 | 211 | data_inds = np.random.choice(len(x), size=(nboot, len(x))) 212 | bvec = np.array([corbsub(row_inds, x, y, corfun, *args) for row_inds in data_inds]) 213 | 214 | ihi = int(np.floor((1 - alpha / 2) * nboot + .5)) 215 | ilow = int(np.floor((alpha / 2) * nboot + .5)) 216 | bsort = sorted(bvec) 217 | corci = [bsort[ilow], bsort[ihi]] 218 | phat = sum(bvec < 0) / nboot 219 | sig = 2 * min(phat, 1 - phat) 220 | 221 | #return corci, sig, est 222 | return {'ci': corci, 'p_value': sig, 'cor': est} 223 | 224 | def corbsub(isub, x, y, corfun, *args): 225 | 226 | """ 227 | Compute correlation for x[isub] and y[isub] 228 | isub is a vector of length n, 229 | a bootstrap sample from the sequence of integers 230 | 0, 1, 2, 3, ..., n 231 | 232 | This function is used by other functions when computing 233 | bootstrap estimates. 234 | 235 | corfun is some correlation function 236 | """ 237 | 238 | corbsub_results = corfun(x[isub], y[isub], *args)['cor']#[0] 239 | 240 | return corbsub_results 241 | 242 | def pball(x, beta=.2): 243 | 244 | """ 245 | Compute the percentage bend correlation matrix 246 | for all pairs of columns in `x`. This function also 247 | returns the two-sided significance level for all pairs 248 | of variables, plus a test of zero correlation 249 | among all pairs. 250 | 251 | 252 | :param x: Pandas DataFrame 253 | Each column represents a variable to use in the correlations 254 | 255 | :param beta: float 256 | `0 < beta < .5`. Beta is analogous to trimming in 257 | other functions and related to the measure of 258 | dispersion used in the percentage bend 259 | calculation. 260 | 261 | :return: 262 | Dictionary of results 263 | 264 | H: float 265 | The test statistic $H$.Reject null if $H > \chi^2_{1−lpha}$ , 266 | the 1−α quantile. 267 | 268 | H_p_value: float 269 | p-value corresponding to the test that all correlations are equal to zero 270 | 271 | p_value: array 272 | p-value matrix corresponding to each pairwise correlation 273 | 274 | pbcorm: array 275 | Correlation matrix 276 | 277 | """ 278 | 279 | m=x.values 280 | ncol=m.shape[1] 281 | 282 | pbcorm=np.zeros([ncol, ncol]) 283 | temp=np.ones([ncol, ncol]) 284 | siglevel=np.full([ncol, ncol], np.nan) 285 | #cmat = np.zeros([ncol, ncol]) 286 | 287 | for i in range(ncol): 288 | for j in range(i,ncol): 289 | if i < j: 290 | pbc = pbcor(m[:, i], m[:, j], beta) 291 | pbcorm[i, j] = pbc['cor'] 292 | temp[i, j] = pbcorm[i, j] 293 | temp[j, i] = pbcorm[i, j] 294 | siglevel[i, j] = pbc['p_value'] 295 | siglevel[j, i] = siglevel[i, j] 296 | 297 | 298 | tstat = pbcorm * np.sqrt((m.shape[0] - 2) / (1 - pbcorm ** 2)) 299 | cmat = np.sqrt((m.shape[0] - 2.5) * np.log(1 + tstat ** 2 / (m.shape[0] - 2))) 300 | bv = 48 * (m.shape[0] - 2.5) ** 2 301 | cmat = \ 302 | cmat + (cmat ** 3 + 3 * cmat) / bv - (4 * cmat ** 7 + 33 * cmat ** 5 + 240 ** cmat ** 3 + 855 * cmat) / \ 303 | (10 * bv ** 2 + 8 * bv * cmat ** 4 + 1000 * bv) 304 | 305 | H = np.sum(cmat ** 2) 306 | df = ncol * (ncol - 1) / 2 307 | h_siglevel = 1 - chi2.cdf(H, df) 308 | 309 | results={"pbcorm": temp, "p_value": siglevel, 310 | "H":H, "H_p_value": h_siglevel} 311 | 312 | return results 313 | 314 | def winall(x, tr=.2): 315 | 316 | """ 317 | Compute the Winsorized correlation and covariance matrix 318 | for all pairs of columns in `x`. This function also 319 | returns the two-sided significance level for all pairs 320 | of variables, plus a test of zero correlation 321 | among all pairs. 322 | 323 | 324 | :param x: Pandas DataFrame 325 | Each column represents a variable to use in the correlations 326 | 327 | :param tr: float 328 | Proportion to winsorize (default is .2) 329 | 330 | :return: 331 | Dictionary of results 332 | 333 | center: array 334 | Trimmed mean for each group 335 | 336 | p_value: array 337 | p-value array corresponding to the pairwise correlations 338 | 339 | wcor: array 340 | Winsorized correlation matrix 341 | 342 | wcov: array 343 | Winsorized covariance matrix 344 | 345 | 346 | """ 347 | 348 | m = x.values 349 | ncol = m.shape[1] 350 | 351 | wcor = np.ones([ncol, ncol]) 352 | wcov = np.zeros([ncol, ncol]) 353 | siglevel = np.full([ncol, ncol], np.nan) 354 | 355 | for i in range(ncol): 356 | #ip = i 357 | for j in range(i,ncol): 358 | val = wincor(m[:, i], m[:, j], tr) 359 | wcor[i, j] = val['cor'] 360 | wcor[j, i] = wcor[i, j] 361 | 362 | if i == j: 363 | wcor[i, j] = 1 364 | 365 | wcov[i, j] = val['cor'] 366 | wcov[j, i] = wcov[i, j] 367 | 368 | if i != j: 369 | siglevel[i, j] = val['sig'] 370 | siglevel[j, i] = siglevel[i, j] 371 | 372 | m=m[~np.isnan(m).any(axis=1)] 373 | cent=trim_mean(m, tr) 374 | 375 | return {"wcor": wcor, "wcov": wcov, "center": cent, "p_value": siglevel} 376 | -------------------------------------------------------------------------------- /hypothesize/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/__init__.py -------------------------------------------------------------------------------- /hypothesize/tests/build_test_data.py: -------------------------------------------------------------------------------- 1 | from hypothesize.measuring_associations import * 2 | from hypothesize.compare_groups_with_single_factor import * 3 | from hypothesize.compare_groups_with_two_factors import * 4 | from hypothesize.utilities import create_example_data, trim_mean, con1way, con2way 5 | import numpy as np 6 | import pickle 7 | 8 | alpha=.05 9 | nboot=100 10 | tr=.2 11 | beta=.2 12 | 13 | def pkl_l2drmci(): 14 | 15 | np.random.seed(42) 16 | df = create_example_data(2) 17 | results = l2drmci(df.cell_1, df.cell_2, trim_mean, tr) 18 | pickle.dump(results, open("hypothesize/tests/test_data/l2drmci.pkl", "wb")) 19 | 20 | def pkl_linconb(): 21 | 22 | np.random.seed(42) 23 | df = create_example_data(3) 24 | results = linconb(df, con1way(3)) 25 | pickle.dump(results, open("hypothesize/tests/test_data/linconb.pkl", "wb")) 26 | 27 | def pkl_pb2gen(): 28 | 29 | np.random.seed(42) 30 | df = create_example_data(2) 31 | results = pb2gen(df.cell_1, df.cell_2, trim_mean, tr) 32 | pickle.dump(results, open("hypothesize/tests/test_data/pb2gen.pkl", "wb")) 33 | 34 | def pkl_tmcppb(): 35 | 36 | np.random.seed(42) 37 | df = create_example_data(3) 38 | results = tmcppb(df, trim_mean, tr) 39 | pickle.dump(results, open("hypothesize/tests/test_data/tmcppb.pkl", "wb")) 40 | 41 | def pkl_yuenbt(): 42 | 43 | np.random.seed(42) 44 | df = create_example_data(2) 45 | results = yuenbt(df.cell_1, df.cell_2) 46 | pickle.dump(results, open("hypothesize/tests/test_data/yuenbt.pkl", "wb")) 47 | 48 | def pkl_bootdpci(): 49 | 50 | np.random.seed(42) 51 | df = create_example_data(3) 52 | results = bootdpci(df, trim_mean, tr) 53 | pickle.dump(results, open("hypothesize/tests/test_data/bootdpci.pkl", "wb")) 54 | 55 | def pkl_rmmcppb(): 56 | 57 | np.random.seed(42) 58 | df = create_example_data(3) 59 | results = rmmcppb(df, trim_mean, tr) 60 | pickle.dump(results, open("hypothesize/tests/test_data/rmmcppb.pkl", "wb")) 61 | 62 | def pkl_lindepbt(): 63 | 64 | np.random.seed(42) 65 | df = create_example_data(3) 66 | results = lindepbt(df) 67 | pickle.dump(results, open("hypothesize/tests/test_data/lindepbt.pkl", "wb")) 68 | 69 | def pkl_ydbt(): 70 | 71 | np.random.seed(42) 72 | df = create_example_data(2) 73 | results = ydbt(df.cell_1, df.cell_2) 74 | pickle.dump(results, open("hypothesize/tests/test_data/ydbt.pkl", "wb")) 75 | 76 | def pkl_wwmcppb(): 77 | 78 | np.random.seed(42) 79 | df = create_example_data(6) 80 | results = wwmcppb(2, 3, df, trim_mean, tr) 81 | pickle.dump(results, open("hypothesize/tests/test_data/wwmcppb.pkl", "wb")) 82 | 83 | def pkl_wwmcpbt(): 84 | 85 | np.random.seed(42) 86 | df = create_example_data(6) 87 | results = wwmcpbt(2, 3, df, tr) 88 | pickle.dump(results, open("hypothesize/tests/test_data/wwmcpbt.pkl", "wb")) 89 | 90 | def pkl_bwamcp(): 91 | 92 | np.random.seed(42) 93 | df = create_example_data(6) 94 | results = bwamcp(2, 3, df) 95 | pickle.dump(results, open("hypothesize/tests/test_data/bwamcp.pkl", "wb")) 96 | 97 | def pkl_bwbmcp(): 98 | 99 | np.random.seed(42) 100 | df = create_example_data(6) 101 | results = bwbmcp(2, 3, df) 102 | pickle.dump(results, open("hypothesize/tests/test_data/bwbmcp.pkl", "wb")) 103 | 104 | def pkl_bwmcp(): 105 | 106 | np.random.seed(42) 107 | df = create_example_data(6) 108 | results = bwmcp(2, 3, df) 109 | pickle.dump(results, open("hypothesize/tests/test_data/bwmcp.pkl", "wb")) 110 | 111 | def pkl_bwimcp(): 112 | 113 | np.random.seed(42) 114 | df = create_example_data(6) 115 | results = bwimcp(2, 3, df) 116 | pickle.dump(results, open("hypothesize/tests/test_data/bwimcp.pkl", "wb")) 117 | 118 | def pkl_bwmcppb(): 119 | 120 | np.random.seed(42) 121 | df = create_example_data(6) 122 | results = bwmcppb(2, 3, df, trim_mean, tr) 123 | pickle.dump(results, open("hypothesize/tests/test_data/bwmcppb.pkl", "wb")) 124 | 125 | def pkl_spmcpa(): 126 | 127 | np.random.seed(42) 128 | df = create_example_data(6) 129 | results = spmcpa(2, 3, df, trim_mean, tr) 130 | pickle.dump(results, open("hypothesize/tests/test_data/spmcpa.pkl", "wb")) 131 | 132 | def pkl_spmcpb(): 133 | 134 | np.random.seed(42) 135 | df = create_example_data(6) 136 | results = spmcpb(2, 3, df, trim_mean, tr) 137 | pickle.dump(results, open("hypothesize/tests/test_data/spmcpb.pkl", "wb")) 138 | 139 | def pkl_spmcpi(): 140 | 141 | np.random.seed(42) 142 | df = create_example_data(6) 143 | results = spmcpi(2, 3, df, trim_mean, tr) 144 | pickle.dump(results, open("hypothesize/tests/test_data/spmcpi.pkl", "wb")) 145 | 146 | def pkl_corb(): 147 | 148 | np.random.seed(42) 149 | df = create_example_data(2) 150 | results = corb(wincor, df.cell_1, df.cell_2, alpha, nboot, tr) 151 | pickle.dump(results, open("hypothesize/tests/test_data/corb.pkl", "wb")) 152 | 153 | def pkl_pball(): 154 | 155 | np.random.seed(42) 156 | df = create_example_data(3) 157 | results = pball(df) 158 | pickle.dump(results, open("hypothesize/tests/test_data/pball.pkl", "wb")) 159 | 160 | def pkl_pbcor(): 161 | 162 | np.random.seed(42) 163 | df = create_example_data(2) 164 | results = pbcor(df.cell_1, df.cell_2) 165 | pickle.dump(results, open("hypothesize/tests/test_data/pbcor.pkl", "wb")) 166 | 167 | def pkl_winall(): 168 | 169 | np.random.seed(42) 170 | df = create_example_data(3) 171 | results = winall(df) 172 | pickle.dump(results, open("hypothesize/tests/test_data/winall.pkl", "wb")) 173 | 174 | def pkl_wincor(): 175 | 176 | np.random.seed(42) 177 | df = create_example_data(2) 178 | results = wincor(df.cell_1, df.cell_2) 179 | pickle.dump(results, open("hypothesize/tests/test_data/wincor.pkl", "wb")) 180 | -------------------------------------------------------------------------------- /hypothesize/tests/test_data/bootdpci.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bootdpci.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/bwamcp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwamcp.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/bwbmcp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwbmcp.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/bwimcp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwimcp.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/bwmcp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwmcp.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/bwmcppb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwmcppb.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/corb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/corb.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/l2drmci.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/l2drmci.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/linconb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/linconb.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/lindepbt.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/lindepbt.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/pb2gen.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pb2gen.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/pball.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pball.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/pbcor.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pbcor.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/rmmcppb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/rmmcppb.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/spmcpa.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpa.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/spmcpb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpb.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/spmcpi.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpi.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/tmcppb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/tmcppb.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/winall.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/winall.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/wincor.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wincor.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/wwmcpbt.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wwmcpbt.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/wwmcppb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wwmcppb.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/ydbt.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/ydbt.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_data/yuenbt.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/yuenbt.pkl -------------------------------------------------------------------------------- /hypothesize/tests/test_funcs.py: -------------------------------------------------------------------------------- 1 | from hypothesize.measuring_associations import * 2 | from hypothesize.compare_groups_with_single_factor import * 3 | from hypothesize.compare_groups_with_two_factors import * 4 | from hypothesize.utilities import create_example_data, trim_mean, con1way 5 | import numpy as np 6 | import pandas as pd 7 | from pandas._testing import assert_frame_equal 8 | import pickle 9 | import os 10 | 11 | alpha=.05 12 | nboot=100 13 | tr=.2 14 | beta=.2 15 | 16 | try: 17 | os.chdir('hypothesize/tests') 18 | except: 19 | pass 20 | 21 | def run_all_pkl_funcs(): 22 | 23 | from hypothesize.tests import build_test_data 24 | 25 | for i in dir(build_test_data): 26 | item = getattr(build_test_data,i) 27 | if callable(item) and i.startswith('pkl'): 28 | item() 29 | 30 | def build_truth_list(expected_results): 31 | 32 | truth_list=[] 33 | 34 | if type(expected_results) is list: 35 | 36 | for item in expected_results: 37 | nested_truth_list=build_truth_list(item) 38 | truth_list.append(nested_truth_list) 39 | 40 | elif type(expected_results) is dict: 41 | 42 | for k in expected_results: 43 | 44 | if type(expected_results[k]) is dict: 45 | nested_truth_list=[True] * len(expected_results[k]) 46 | 47 | truth_list.append(nested_truth_list) 48 | else: 49 | truth_list.append(True) 50 | 51 | return truth_list 52 | 53 | def check_dict_items_equality(expected_results, actual_results): 54 | 55 | actual_truth=[] 56 | 57 | if type(expected_results) is list: 58 | for exp_item, act_item in zip(expected_results, actual_results): 59 | nested_truth = check_dict_items_equality(exp_item, act_item) 60 | actual_truth.append(nested_truth) 61 | 62 | elif type(expected_results) is dict: 63 | 64 | for k in expected_results: 65 | 66 | if type(expected_results[k]) is np.ndarray: 67 | 68 | # truth=True if not np.testing.assert_array_equal(expected_results[k], actual_results[k]) \ 69 | # else False 70 | 71 | truth=True if not np.testing.assert_allclose(expected_results[k], actual_results[k]) \ 72 | else False 73 | 74 | actual_truth.append(truth) 75 | 76 | elif type(expected_results[k]) is pd.DataFrame: 77 | 78 | # truth=True if not assert_frame_equal(expected_results[k], actual_results[k]) \ 79 | # else False 80 | 81 | truth=True if not assert_frame_equal(expected_results[k], actual_results[k], check_less_precise=True) \ 82 | else False 83 | 84 | actual_truth.append(truth) 85 | 86 | elif type(expected_results[k]) is dict: 87 | nested_truth=check_dict_items_equality(expected_results[k], actual_results[k]) 88 | actual_truth.append(nested_truth) 89 | 90 | else: 91 | 92 | if expected_results[k] is None and actual_results[k] is None: \ 93 | truth = True 94 | else: 95 | truth=True if not np.testing.assert_almost_equal(expected_results[k], actual_results[k]) \ 96 | else False 97 | 98 | actual_truth.append(truth) 99 | 100 | return actual_truth 101 | 102 | def test_l2drmci(): 103 | 104 | np.random.seed(42) 105 | df = create_example_data(2) 106 | results = l2drmci(df.cell_1, df.cell_2, trim_mean, tr) 107 | expected = pickle.load(open("test_data/l2drmci.pkl", "rb")) 108 | expected_truth=build_truth_list(expected) 109 | actual_truth = check_dict_items_equality(expected, results) 110 | 111 | #assert results == expected 112 | assert actual_truth == expected_truth 113 | 114 | def test_linconb(): 115 | 116 | np.random.seed(42) 117 | df = create_example_data(3) 118 | results = linconb(df, con1way(3)) 119 | expected = pickle.load(open("test_data/linconb.pkl", "rb")) 120 | expected_truth=build_truth_list(expected) 121 | actual_truth = check_dict_items_equality(expected, results) 122 | 123 | assert actual_truth == expected_truth 124 | 125 | def test_pb2gen(): 126 | 127 | np.random.seed(42) 128 | df = create_example_data(2) 129 | results = pb2gen(df.cell_1, df.cell_2, trim_mean, tr) 130 | expected = pickle.load(open("test_data/pb2gen.pkl", "rb")) 131 | expected_truth=build_truth_list(expected) 132 | actual_truth = check_dict_items_equality(expected, results) 133 | 134 | #assert results == expected 135 | assert actual_truth == expected_truth 136 | 137 | def test_tmcppb(): 138 | 139 | np.random.seed(42) 140 | df = create_example_data(3) 141 | results = tmcppb(df, trim_mean, tr) 142 | expected = pickle.load(open("test_data/tmcppb.pkl", "rb")) 143 | expected_truth=build_truth_list(expected) 144 | actual_truth = check_dict_items_equality(expected, results) 145 | 146 | assert actual_truth == expected_truth 147 | 148 | def test_yuenbt(): 149 | 150 | np.random.seed(42) 151 | df = create_example_data(2) 152 | results = yuenbt(df.cell_1, df.cell_2) 153 | expected = pickle.load(open("test_data/yuenbt.pkl", "rb")) 154 | expected_truth=build_truth_list(expected) 155 | actual_truth = check_dict_items_equality(expected, results) 156 | 157 | #assert results == expected 158 | assert actual_truth == expected_truth 159 | 160 | def test_bootdpci(): 161 | 162 | np.random.seed(42) 163 | df = create_example_data(3) 164 | results = bootdpci(df, trim_mean, tr) 165 | expected = pickle.load(open("test_data/bootdpci.pkl", "rb")) 166 | expected_truth=build_truth_list(expected) 167 | actual_truth = check_dict_items_equality(expected, results) 168 | 169 | assert actual_truth == expected_truth 170 | 171 | def test_rmmcppb(): 172 | 173 | np.random.seed(42) 174 | df = create_example_data(3) 175 | results = rmmcppb(df, trim_mean, tr) 176 | expected = pickle.load(open("test_data/rmmcppb.pkl", "rb")) 177 | expected_truth=build_truth_list(expected) 178 | actual_truth = check_dict_items_equality(expected, results) 179 | 180 | assert actual_truth == expected_truth 181 | 182 | def test_lindepbt(): 183 | 184 | np.random.seed(42) 185 | df = create_example_data(3) 186 | results = lindepbt(df) 187 | expected = pickle.load(open("test_data/lindepbt.pkl", "rb")) 188 | expected_truth=build_truth_list(expected) 189 | actual_truth = check_dict_items_equality(expected, results) 190 | 191 | assert actual_truth == expected_truth 192 | 193 | def test_ydbt(): 194 | 195 | np.random.seed(42) 196 | df = create_example_data(2) 197 | results = ydbt(df.cell_1, df.cell_2) 198 | expected = pickle.load(open("test_data/ydbt.pkl", "rb")) 199 | expected_truth=build_truth_list(expected) 200 | actual_truth = check_dict_items_equality(expected, results) 201 | 202 | #assert results == expected 203 | assert actual_truth == expected_truth 204 | 205 | def test_wwmcppb(): 206 | 207 | np.random.seed(42) 208 | df = create_example_data(6) 209 | results = wwmcppb(2, 3, df, trim_mean, tr) 210 | expected = pickle.load(open("test_data/wwmcppb.pkl", "rb")) 211 | expected_truth=build_truth_list(expected) 212 | actual_truth = check_dict_items_equality(expected, results) 213 | 214 | assert actual_truth == expected_truth 215 | 216 | def test_wwmcpbt(): 217 | 218 | np.random.seed(42) 219 | df = create_example_data(6) 220 | results = wwmcpbt(2, 3, df, tr) 221 | expected = pickle.load(open("test_data/wwmcpbt.pkl", "rb")) 222 | expected_truth=build_truth_list(expected) 223 | actual_truth = check_dict_items_equality(expected, results) 224 | 225 | assert actual_truth == expected_truth 226 | 227 | def test_bwamcp(): 228 | 229 | np.random.seed(42) 230 | df = create_example_data(6) 231 | results = bwamcp(2, 3, df) 232 | expected = pickle.load(open("test_data/bwamcp.pkl", "rb")) 233 | expected_truth=build_truth_list(expected) 234 | actual_truth = check_dict_items_equality(expected, results) 235 | 236 | assert actual_truth == expected_truth 237 | 238 | def test_bwbmcp(): 239 | 240 | np.random.seed(42) 241 | df = create_example_data(6) 242 | results = bwbmcp(2, 3, df) 243 | expected = pickle.load(open("test_data/bwbmcp.pkl", "rb")) 244 | 245 | print(results) 246 | print(expected) 247 | expected_truth=build_truth_list(expected) 248 | actual_truth = check_dict_items_equality(expected, results) 249 | 250 | assert actual_truth == expected_truth 251 | 252 | def test_bwmcp(): 253 | 254 | np.random.seed(42) 255 | df = create_example_data(6) 256 | results = bwmcp(2, 3, df) 257 | expected = pickle.load(open("test_data/bwmcp.pkl", "rb")) 258 | expected_truth=build_truth_list(expected) 259 | actual_truth = check_dict_items_equality(expected, results) 260 | 261 | assert actual_truth == expected_truth 262 | 263 | def test_bwimcp(): 264 | 265 | np.random.seed(42) 266 | df = create_example_data(6) 267 | results = bwimcp(2, 3, df) 268 | expected = pickle.load(open("test_data/bwimcp.pkl", "rb")) 269 | expected_truth=build_truth_list(expected) 270 | actual_truth = check_dict_items_equality(expected, results) 271 | 272 | assert actual_truth == expected_truth 273 | 274 | def test_bwmcppb(): 275 | 276 | np.random.seed(42) 277 | df = create_example_data(6) 278 | results = bwmcppb(2, 3, df, trim_mean, tr) 279 | expected = pickle.load(open("test_data/bwmcppb.pkl", "rb")) 280 | expected_truth=build_truth_list(expected) 281 | actual_truth = check_dict_items_equality(expected, results) 282 | 283 | assert actual_truth == expected_truth 284 | 285 | def test_spmcpa(): 286 | 287 | np.random.seed(42) 288 | df = create_example_data(6) 289 | results = spmcpa(2, 3, df, trim_mean, tr) 290 | expected = pickle.load(open("test_data/spmcpa.pkl", "rb")) 291 | expected_truth=build_truth_list(expected) 292 | actual_truth = check_dict_items_equality(expected, results) 293 | 294 | assert actual_truth == expected_truth 295 | 296 | def test_spmcpb(): 297 | 298 | np.random.seed(42) 299 | df = create_example_data(6) 300 | results = spmcpb(2, 3, df, trim_mean, tr) 301 | expected = pickle.load(open("test_data/spmcpb.pkl", "rb")) 302 | expected_truth=build_truth_list(expected) 303 | actual_truth = check_dict_items_equality(expected, results) 304 | 305 | assert actual_truth == expected_truth 306 | 307 | def test_spmcpi(): 308 | 309 | np.random.seed(42) 310 | df = create_example_data(6) 311 | results = spmcpi(2, 3, df, trim_mean, tr) 312 | expected = pickle.load(open("test_data/spmcpi.pkl", "rb")) 313 | expected_truth=build_truth_list(expected) 314 | actual_truth = check_dict_items_equality(expected, results) 315 | 316 | assert actual_truth == expected_truth 317 | 318 | def test_corb(): 319 | 320 | np.random.seed(42) 321 | df = create_example_data(2) 322 | results = corb(wincor, df.cell_1, df.cell_2, alpha, nboot, tr) 323 | expected = pickle.load(open("test_data/corb.pkl", "rb")) 324 | expected_truth = build_truth_list(expected) 325 | actual_truth = check_dict_items_equality(expected, results) 326 | 327 | #assert results == expected 328 | assert actual_truth == expected_truth 329 | 330 | def test_pball(): 331 | 332 | np.random.seed(42) 333 | df = create_example_data(3) 334 | results = pball(df) 335 | expected = pickle.load(open("test_data/pball.pkl", "rb")) 336 | expected_truth=build_truth_list(expected) 337 | actual_truth = check_dict_items_equality(expected, results) 338 | 339 | assert actual_truth == expected_truth 340 | 341 | def test_pbcor(): 342 | 343 | np.random.seed(42) 344 | df = create_example_data(2) 345 | results = pbcor(df.cell_1, df.cell_2) 346 | expected = pickle.load(open("test_data/pbcor.pkl", "rb")) 347 | expected_truth=build_truth_list(expected) 348 | actual_truth = check_dict_items_equality(expected, results) 349 | 350 | #assert results == expected 351 | assert actual_truth == expected_truth 352 | 353 | def test_winall(): 354 | 355 | np.random.seed(42) 356 | df = create_example_data(3) 357 | results = winall(df) 358 | expected = pickle.load(open("test_data/winall.pkl", "rb")) 359 | expected_truth=build_truth_list(expected) 360 | actual_truth = check_dict_items_equality(expected, results) 361 | 362 | assert actual_truth == expected_truth 363 | 364 | def test_wincor(): 365 | 366 | np.random.seed(42) 367 | df = create_example_data(2) 368 | results = wincor(df.cell_1, df.cell_2) 369 | expected = pickle.load(open("test_data/wincor.pkl", "rb")) 370 | print(results) 371 | print(expected) 372 | expected_truth = build_truth_list(expected) 373 | actual_truth = check_dict_items_equality(expected, results) 374 | 375 | #assert results == expected 376 | assert actual_truth == expected_truth 377 | 378 | 379 | 380 | -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @article{20000755025, 2 | author="Tukey, J. W.", 3 | title="A survey of sampling from contaminated distributions", 4 | journal="Contributions to Probability and Statistics", 5 | ISSN="", 6 | publisher="Stanford University Press", 7 | year="1960", 8 | month="", 9 | volume="", 10 | number="", 11 | pages="448-485", 12 | URL="https://ci.nii.ac.jp/naid/20000755025/en/", 13 | DOI="", 14 | } 15 | 16 | @article{bradley1993introduction, 17 | title={An introduction to the bootstrap}, 18 | author={Efron, Bradley and Tibshirani, Robert J}, 19 | journal={Monographs on Statistics and Applied Probability}, 20 | volume={57}, 21 | year={1993} 22 | } 23 | 24 | @article{wilcox1998many, 25 | title={How many discoveries have been lost by ignoring modern statistical methods?}, 26 | author={Wilcox, Rand R}, 27 | journal={American Psychologist}, 28 | volume={53}, 29 | number={3}, 30 | pages={300}, 31 | year={1998}, 32 | publisher={American Psychological Association}, 33 | DOI={10.1037/0003-066X.53.3.300} 34 | } 35 | 36 | @book{wilcox2013introduction, 37 | title={Introduction to robust estimation and hypothesis testing}, 38 | author={Wilcox, Rand R}, 39 | year={2013}, 40 | publisher={Academic press}, 41 | DOI={10.1016/c2010-0-67044-1} 42 | } 43 | 44 | @inproceedings{seabold2010statsmodels, 45 | title={statsmodels: Econometric and statistical modeling with python}, 46 | author={Seabold, Skipper and Perktold, Josef}, 47 | booktitle={9th Python in Science Conference}, 48 | year={2010}, 49 | DOI={10.25080/majora-92bf1922-011} 50 | } 51 | 52 | @article{ho2019moving, 53 | title={Moving beyond P values: Data analysis with estimation graphics}, 54 | author={Ho, Joses and Tumkaya, Tayfun and Aryal, Sameer and Choi, Hyungwon and Claridge-Chang, Adam}, 55 | journal={Nature Methods}, 56 | volume={16}, 57 | number={7}, 58 | pages={565--566}, 59 | year={2019}, 60 | publisher={Nature Publishing Group}, 61 | DOI={10.1038/s41592-019-0470-3} 62 | } 63 | 64 | @InProceedings{mckinney-proc-scipy-2010, 65 | author = {{W}es {M}c{K}inney }, 66 | title = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython}, 67 | booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, 68 | pages = {56 - 61}, 69 | year = {2010 }, 70 | editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, 71 | doi = {10.25080/Majora-92bf1922-00a} 72 | } 73 | 74 | @article{Vallat2018, 75 | doi = {10.21105/joss.01026}, 76 | url = {https://doi.org/10.21105/joss.01026}, 77 | year = {2018}, 78 | publisher = {The Open Journal}, 79 | volume = {3}, 80 | number = {31}, 81 | pages = {1026}, 82 | author = {Raphael Vallat}, 83 | title = {Pingouin: Statistics in {P}ython}, 84 | journal = {Journal of Open Source Software} 85 | } 86 | 87 | @article{rom1990sequentially, 88 | title={A sequentially rejective test procedure based on a modified {B}onferroni inequality}, 89 | author={Rom, Dror M}, 90 | journal={Biometrika}, 91 | volume={77}, 92 | number={3}, 93 | pages={663--665}, 94 | year={1990}, 95 | publisher={Oxford University Press}, 96 | DOI={10.1093/biomet/77.3.663} 97 | } 98 | 99 | @article{hochberg1988sharper, 100 | title={A sharper {B}onferroni procedure for multiple tests of significance}, 101 | author={Hochberg, Yosef}, 102 | journal={Biometrika}, 103 | volume={75}, 104 | number={4}, 105 | pages={800--802}, 106 | year={1988}, 107 | publisher={Oxford University Press}, 108 | DOI = {10.1093/biomet/75.4.800} 109 | } 110 | 111 | @article{benjamini1995controlling, 112 | title={Controlling the false discovery rate: A practical and powerful approach to multiple testing}, 113 | author={Benjamini, Y and Hochberg}, 114 | journal={Journal of the Royal Statistical Society. Series B (Methodological)}, 115 | volume={57}, 116 | pages={289--300}, 117 | year={1995}, 118 | DOI = {10.1111/j.2517-6161.1995.tb02031.x} 119 | } -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Hypothesize: Robust Statistics for Python' 3 | tags: 4 | - Python 5 | - R 6 | - statistics 7 | - statistical analysis 8 | - bootstrapping 9 | - trimmed mean 10 | - data analysis 11 | - data science 12 | - social science 13 | - hypothesis testing 14 | authors: 15 | - name: Allan Campopiano 16 | orcid: 0000-0002-3280-4447 17 | affiliation: 1 18 | - name: Rand R. Wilcox 19 | orcid: 0000-0002-2524-2976 20 | affiliation: 2 21 | 22 | affiliations: 23 | - name: Halton Catholic District School Board 24 | index: 1 25 | - name: University of Southern California 26 | index: 2 27 | date: 08 May 2020 28 | bibliography: paper.bib 29 | --- 30 | 31 | # Summary 32 | 33 | Hypothesize is a robust null hypothesis significance testing (NHST) library for Python. In general, 34 | robust hypothesis testing uses techniques which minimize the effects of violating standard statistical 35 | assumptions. In particular, robust methods based on the trimmed mean [@20000755025] 36 | and/or bootstrapping [@bradley1993introduction], routinely outperform traditional statistical 37 | approaches in terms of power and accuracy. This is especially true when dealing with 38 | distributions that produce outliers [@wilcox1998many; @wilcox2013introduction]. 39 | 40 | Hypothesize is based on Rand R. Wilcox's collection of [R functions](https://dornsife.usc.edu/labs/rwilcox/software/) 41 | which contains hundreds of robust methods developed since the 1960's. 42 | Hypothesize brings many of these functions into the Python library ecosystem with the goal 43 | of making robust hypothesis testing easy for researchers, even 44 | if they have not had extensive training in statistics or computer science. It is, however, assumed 45 | that users have a basic understanding of the concepts and terms related to robust hypothesis 46 | testing (e.g., trimmed mean and bootstrapping). 47 | 48 | In contrast to other statistical libraries in Python [@Vallat2018; @seabold2010statsmodels; @ho2019moving], 49 | Hypothesize is focused solely on robust methods for comparing groups and measuring associations. Researchers 50 | who are familiar with traditional NHST and related concepts (e.g., t-test, ANOVA, Pearson's correlation) 51 | will find analogous approaches in Hypothesize. A broad range of choices exist in Hypothesize both in terms of the 52 | supported statistical designs as well as options for fine-grained control over how tests are computed. 53 | For example: 54 | 55 | - Where applicable, many hypothesis tests allow the specification of an estimator. That is, users may 56 | choose when to use the mean, median, trimmed mean, winsorized correlation, percentage bend correlation, 57 | or any other compatible statistical estimator. 58 | 59 | - Single- and multi-factor designs are supported, and this includes supporting 60 | independent, dependent, and mixed groups. 61 | 62 | - Family-wise error can be robustly controlled with sequentially 63 | rejective methods [@rom1990sequentially; @hochberg1988sharper; @benjamini1995controlling]. 64 | 65 | In terms of learning to use the software, Hypothesize keeps the barrier to entry low for researchers. For example: 66 | 67 | - To easily incorporate Hypothesize with standard data processing tools 68 | [see @mckinney-proc-scipy-2010], all top-level 69 | functions take a Pandas DataFrame/Series as input and return a Python Dictionary. 70 | 71 | - The API maps cleanly onto features of the user's statistical design. 72 | This makes it easier to find and discover the set of appropriate functions for a 73 | given use case. 74 | 75 | - All top-level functions can be run directly in the browser alongside the documentation via 76 | [Google Colab Notebooks](https://colab.research.google.com/notebooks/intro.ipynb) 77 | (no local installation required). 78 | 79 | # Acknowledgements 80 | 81 | The authors would like to thank 82 | James Desjardins, 83 | Stefon van Noordt, 84 | Lisa Collimore, 85 | Martina G. Vilas, 86 | Andrew Bennett, 87 | Charlotte Soneson, 88 | Whedon, 89 | the Journal of Open Source Software, 90 | and the Halton Catholic District School Board 91 | for their support of this project. 92 | 93 | # References -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy==1.7.3 2 | pandas==1.4.2 3 | numpy==1.22.0 4 | more-itertools==8.12.0 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | import io 3 | import os 4 | 5 | def get_install_requirements(path): 6 | content = read(path) 7 | return [req for req in content.split("\n") if req != "" and not req.startswith("#")] 8 | 9 | def read(path, encoding="utf-8"): 10 | path = os.path.join(os.path.dirname(__file__), path) 11 | with io.open(path, encoding=encoding) as fp: 12 | return fp.read() 13 | 14 | setup( 15 | name='hypothesize', 16 | version='1.2.2', 17 | description='A Python package for comparing groups and measuring associations using robust statistics.', 18 | author='Allan Campopiano', 19 | author_email="campopianoa@hcdsb.org", 20 | license='BSD 3-clause', 21 | long_description=read('README.md'), 22 | long_description_content_type='text/markdown', 23 | url="https://github.com/Alcampopiano/hypothesize", 24 | packages=find_packages(), 25 | include_package_data=True, 26 | install_requires=get_install_requirements("requirements.txt"), 27 | python_requires=">=3.6", 28 | tests_require=['pytest'], 29 | classifiers=[ 30 | "Development Status :: 5 - Production/Stable", 31 | "Environment :: Console", 32 | "Intended Audience :: Science/Research", 33 | "License :: OSI Approved :: BSD License", 34 | "Natural Language :: English", 35 | "Programming Language :: Python :: 3.6", 36 | "Programming Language :: Python :: 3.7", 37 | "Programming Language :: Python :: 3.8", 38 | "Programming Language :: Python :: 3.10", 39 | ], 40 | 41 | ) 42 | --------------------------------------------------------------------------------