├── .github
    └── workflows
    │   └── test_and_lint.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── docs
    │   ├── FAQ.md
    │   ├── basic_tutorial.md
    │   ├── function_guide.md
    │   ├── img
    │   │   ├── dist_overlay.png
    │   │   ├── ide_pycharm.png
    │   │   ├── package_import_viz.gif
    │   │   ├── vp.png
    │   │   └── vp_inv.png
    │   ├── index.md
    │   ├── install_dep.md
    │   ├── overview.md
    │   └── stylesheets
    │   │   └── al_extra.css
    └── mkdocs.yml
├── examples
    ├── bootdpci.ipynb
    ├── bwamcp.ipynb
    ├── bwbmcp.ipynb
    ├── bwimcp.ipynb
    ├── bwmcp.ipynb
    ├── bwmcppb.ipynb
    ├── corb.ipynb
    ├── hypothesize_notebook_general_examples.ipynb
    ├── l2drmci.ipynb
    ├── linconb.ipynb
    ├── lindepbt.ipynb
    ├── pb2gen.ipynb
    ├── pball.ipynb
    ├── pbcor.ipynb
    ├── rmmcppb.ipynb
    ├── spmcpa.ipynb
    ├── spmcpb.ipynb
    ├── spmcpi.ipynb
    ├── tmcppb.ipynb
    ├── winall.ipynb
    ├── wincor.ipynb
    ├── wwmcpbt.ipynb
    ├── wwmcppb.ipynb
    ├── ydbt.ipynb
    └── yuenbt.ipynb
├── hypothesize
    ├── __init__.py
    ├── compare_groups_with_single_factor
    │   ├── __init__.py
    │   └── _compare_groups_with_single_factor.py
    ├── compare_groups_with_two_factors
    │   ├── __init__.py
    │   └── _compare_groups_with_two_factors.py
    ├── measuring_associations
    │   ├── __init__.py
    │   └── _measuring_associations.py
    ├── tests
    │   ├── __init__.py
    │   ├── build_test_data.py
    │   ├── test_data
    │   │   ├── bootdpci.pkl
    │   │   ├── bwamcp.pkl
    │   │   ├── bwbmcp.pkl
    │   │   ├── bwimcp.pkl
    │   │   ├── bwmcp.pkl
    │   │   ├── bwmcppb.pkl
    │   │   ├── corb.pkl
    │   │   ├── l2drmci.pkl
    │   │   ├── linconb.pkl
    │   │   ├── lindepbt.pkl
    │   │   ├── pb2gen.pkl
    │   │   ├── pball.pkl
    │   │   ├── pbcor.pkl
    │   │   ├── rmmcppb.pkl
    │   │   ├── spmcpa.pkl
    │   │   ├── spmcpb.pkl
    │   │   ├── spmcpi.pkl
    │   │   ├── tmcppb.pkl
    │   │   ├── winall.pkl
    │   │   ├── wincor.pkl
    │   │   ├── wwmcpbt.pkl
    │   │   ├── wwmcppb.pkl
    │   │   ├── ydbt.pkl
    │   │   └── yuenbt.pkl
    │   └── test_funcs.py
    └── utilities.py
├── paper
    ├── paper.bib
    └── paper.md
├── requirements.txt
└── setup.py


/.github/workflows/test_and_lint.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python version 3.9.13
20 |       uses: actions/setup-python@v1
21 |       with:
22 |         python-version: 3.9.13
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install flake8 pytest
27 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 |     - name: Lint with flake8
29 |       run: |
30 |         # stop the build if there are Python syntax errors or undefined names
31 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
32 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
34 |     - name: Test with pytest
35 |       run: |
36 |         pytest
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *.cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # DotEnv configuration
60 | .env
61 | 
62 | # Database
63 | *.db
64 | *.rdb
65 | 
66 | # Pycharm
67 | .idea
68 | 
69 | # VS Code
70 | .vscode/
71 | 
72 | # Spyder
73 | .spyproject/
74 | 
75 | # Jupyter NB Checkpoints
76 | .ipynb_checkpoints/
77 | 
78 | # exclude data from source control by default
79 | /data/
80 | 
81 | # Mac OS-specific storage files
82 | .DS_Store
83 | 
84 | # vim
85 | *.swp
86 | *.swo
87 | 
88 | # Mypy cache
89 | .mypy_cache/
90 | 
91 | # documentation build
92 | docs/site/
93 | 
94 | # R history file
95 | *.Rhistory
96 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Feedback and contribution
  2 | 
  3 | Feedback, bug reports, and contributions are welcome via the 
  4 | [Hypothesize GitHub Repository](http://github.com/Alcampopiano/hypothesize/).
  5 | 
  6 | ## How to contribute new functions to Hypothesize
  7 | 
  8 | A great way to contribute would be to choose a function from the 
  9 | [WRS](https://dornsife.usc.edu/labs/rwilcox/software/) that does not yet exist in
 10 | Hypothesize and convert it to Python. There is a current wish list 
 11 | [here](https://github.com/Alcampopiano/hypothesize/issues/2)
 12 | but certainly any WRS function would be a welcome addition to the library. A list of the currently available
 13 | functions in Hypothesize can be found in the documentation's
 14 | [function reference](https://alcampopiano.github.io/hypothesize/function_guide/).
 15 | 
 16 | #### Create example data to be used in R and Python
 17 | 
 18 | It is helpful to be able to create some example data that can be used in both R and Python. 
 19 | One way to do this is to use Hypothesize's 
 20 | [create_example_data](https://alcampopiano.github.io/hypothesize/function_guide/#create_example_data) function. 
 21 | It will generate a DataFrame of random data (to be used in Python) as 
 22 | well save Numpy arrays that can be read into R with the
 23 | [RcppCNPy](https://cran.r-project.org/web/packages/RcppCNPy/index.html) 
 24 | and [Rcpp](https://cran.r-project.org/web/packages/Rcpp/index.html) libraries.
 25 | 
 26 | #### IDE for R and Python
 27 | 
 28 | It is convenient to use the same IDE when converting functions from R to Python.
 29 | One suggestion is to use PyCharm's 
 30 | [r-language-for-intellij](https://plugins.jetbrains.com/plugin/6632-r-language-for-intellij)
 31 | Plugin. This makes it possible to have an interpreter and editor for 
 32 | both languages in the same IDE. Like so:
 33 | 
 34 | <img src="https://github.com/Alcampopiano/hypothesize/blob/master/docs/docs/img/ide_pycharm.png?raw=true" alt="drawing"/>
 35 | 
 36 | Of course there are many ways that one might go about converting WRS functions to Python. 
 37 | These are merely suggestions. 
 38 | 
 39 | ### Setting up your Git environment
 40 | 
 41 | 1. Install the latest version of Hypothesize locally using 
 42 |     
 43 |     ```
 44 |     $ pip install git+https://github.com/Alcampopiano/hypothesize/
 45 |     ```
 46 | 
 47 | 2. Fork the repository on GitHub and clone the fork to you local
 48 | machine. For more details on forking see the [GitHub
 49 | Documentation](https://help.github.com/en/articles/fork-a-repo).
 50 |     
 51 |     ```
 52 |     $ git clone https://github.com/YOUR-USERNAME/hypothesize.git
 53 |     ```
 54 | 
 55 | 3. Create a sync to the original upstream repository by creating a so-called 
 56 | [remote](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/configuring-a-remote-for-a-fork).
 57 | 
 58 |     ```
 59 |     $ git remote add upstream https://github.com/Alcampopiano/hypothesize.git
 60 |     $ git checkout master
 61 |     $ git pull upstream master
 62 |     ```
 63 | 
 64 | Now you will have all of the updates in the master branch of your local fork.
 65 | Note that git will complain if you've committed changes to your local master
 66 | branch that are not on the upstream repository. This is one reason why it's good practice to avoid
 67 | working directly on your master branch.
 68 | 
 69 | ### Commiting new code to Hypothesize
 70 | 
 71 | 1. Create a new local branch and commit changes to your remote branch:
 72 | 
 73 |     ```
 74 |     $ git checkout -b <branch-name>
 75 |     ```
 76 |     
 77 |     With this branch checked-out, make the desired changes to the package.
 78 |     When you are happy with your changes, you can commit them to a remote branch by running
 79 |     
 80 |     ```
 81 |     $ git add <modified-file>
 82 |     $ git commit -m "Some descriptive message about your change"
 83 |     $ git push origin <branch-name>
 84 |     ```
 85 | 
 86 | 2. Write a unit test for your code (optional)
 87 | 
 88 |     Hypothesize uses `pytest` for unit testing. The strategy currently used for testing
 89 |     is to pickle results that are assumed to be correct and compare those
 90 |     against fresh results from the modified code (see the
 91 |     [tests](https://github.com/Alcampopiano/hypothesize/tree/master/hypothesize/tests) folder for examples).
 92 |     If you would like to write a test for your new code, you may follow the strategy 
 93 |     described above or come up with another way to test your code. To run the test suite,
 94 |     first navigate to the "tests" directory then use the `pytest` command from your terminal.
 95 | 
 96 | 3. Submit a pull request (PR) to merge your new branch to Hypothesize's master branch
 97 | 
 98 |     For details on creating a PR see GitHub documentation [Creating a pull
 99 |     request](https://help.github.com/en/articles/creating-a-pull-request). 
100 | 
101 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2020, Allan Campopiano
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | * Redistributions of source code must retain the above copyright notice, this
 9 |   list of conditions and the following disclaimer.
10 | 
11 | * Redistributions in binary form must reproduce the above copyright notice, this
12 |   list of conditions and the following disclaimer in the documentation and/or
13 |   other materials provided with the distribution.
14 | 
15 | * Neither the name of hypothesize nor the names of its
16 |   contributors may be used to endorse or promote products derived from this
17 |   software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
23 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
26 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
28 | OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include LICENSE
3 | include requirements.txt
4 | recursive-include src *.py *.json *.ipynb *.html
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Hypothesize <a href="https://Alcampopiano.github.io/hypothesize/"><img align="right" src="https://github.com/Alcampopiano/hypothesize/blob/master/docs/docs/img/vp_inv.png" height="50"></img></a>
 2 | 
 3 | [![status](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926/status.svg)](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926)
 4 | ![tests](https://github.com/Alcampopiano/hypothesize/workflows/tests/badge.svg)
 5 | [![PyPI version](https://img.shields.io/pypi/v/hypothesize?style=flat-square)](https://pypi.org/project/hypothesize/)
 6 | [![PyPI - Downloads](https://img.shields.io/pypi/dw/hypothesize?style=flat-square)](https://pypistats.org/packages/hypothesize)
 7 | [![license](https://img.shields.io/pypi/l/hypothesize?style=flat-square)](https://github.com/Alcampopiano/hypothesize/blob/master/LICENSE)
 8 | 
 9 | A Python package for hypothesis testing using robust statistics
10 | 
11 | ## Basic Example
12 | 
13 | ### A robust measure of association with winsorized correlation
14 | [<img src="https://deepnote.com/buttons/launch-in-deepnote-white-small.svg">](https://deepnote.com/launch?name=wincor&url=https://github.com/Alcampopiano/hypothesize/blob/master/examples/wincor.ipynb
15 | )
16 | 
17 | ```python
18 | from hypothesize.measuring_associations import wincor
19 | from hypothesize.utilities import create_example_data
20 | 
21 | # creating an example DataFrame with columns "cell_1" and "cell_2"
22 | df=create_example_data(2)
23 | 
24 | results=wincor(df.cell_1, df.cell_2)
25 | 
26 | # returning the correlation, number of observations, p-value, and winsorized covariance
27 | print(results)
28 | {'cor': 0.11, 'nval': 50, 'sig': 0.44, 'wcov': 0.01}
29 | ```
30 | 
31 | ## Documentation
32 | :book: Please visit the [Hypothesize documentation site](https://Alcampopiano.github.io/hypothesize/).
33 | Note that each statistical test in the can be launched 
34 | directly in [Deepnote's](deepnote.com) hosted notebook environment—complete with sample data 
35 | (as shown in the example above 👆). 
36 | 
37 | ## Citing Hypothesize
38 | 
39 | [![status](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926/status.svg)](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926)
40 | 
41 | If you use Hypothesize in academic work, please use the following citation:
42 | 
43 | Campopiano, A., & Wilcox, R. R. (2020). Hypothesize: Robust Statistics for Python. 
44 | Journal of Open Source Software, 5(50), 2241, https://doi.org/10.21105/joss.02241
45 | 
46 | BibTex:
47 | 
48 | ```bib
49 | @article{Campopiano2020,
50 |   doi = {10.21105/joss.02241},
51 |   url = {https://doi.org/10.21105/joss.02241},
52 |   year = {2020},
53 |   publisher = {The Open Journal},
54 |   volume = {5},
55 |   number = {50},
56 |   pages = {2241},
57 |   author = {Allan Campopiano and Rand R. Wilcox},
58 |   title = {Hypothesize: Robust Statistics for Python},
59 |   journal = {Journal of Open Source Software}
60 | }
61 | ```
62 | 


--------------------------------------------------------------------------------
/docs/docs/FAQ.md:
--------------------------------------------------------------------------------
  1 | # Frequently asked questions
  2 | 
  3 | No attempt is made to fully explain the following
  4 | concepts, but hopefully this gets
  5 | you started. The Internet has plenty of resources on these topics
  6 | if you would like to learn more.
  7 | 
  8 | ## What is a trimmed mean?
  9 | 
 10 | The trimmed mean involves calculating the sample mean after
 11 | removing a proportion of values from each
 12 | tail of the distribution. In symbols the trimmed mean is expressed as
 13 | follows:
 14 | 
 15 | $$
 16 | \bar{X}_t = \frac{X_{(g+1)}\,+,...,+\,X_{(n-g)}}{n-2g}
 17 | $$
 18 | 
 19 | where $X_1, \,X_2,\,...\,,X_n$ is a random sample and
 20 | $X_{(1)}, \le X_{(2)}\,,...,\,\le X_{(n)}$ are the observations in
 21 | ascending order. The proportion to trim is $\gamma\,(0\lt \gamma \lt.5)$
 22 | and $g = [ \gamma n ]$ rounded down to the nearest integer.
 23 | 
 24 | ## What is bootstrapping?
 25 | 
 26 | In the context of hypothesis testing and generally speaking,
 27 | bootstrapping involves taking many random samples (with replacement)
 28 | from the data at hand in order to estimate a sampling
 29 | distribution of interest. This is in contrast to traditional methods
 30 | which assume the shape of the particular sampling distribution under study.
 31 | Once we have an emprically derived sampling distribution,
 32 | obtaining CIs and p values is relatively straightforward.
 33 | 
 34 | ## What is a contrast matrix?
 35 | 
 36 | First, it is helpful to imagine your
 37 | design arranged into a JxK matrix. 
 38 | 
 39 | $$
 40 | A=\begin{bmatrix} 
 41 | a_{1,1} & a_{1,2} & ... & a_{1,K} \\ 
 42 | a_{2,1} & a_{2,2} & ... & a_{2,K} \\
 43 | a_{J,1} & a_{J,2} & ... & a_{J,K}
 44 | \end{bmatrix}
 45 | $$
 46 | 
 47 | A contrast matrix specifies which cells (or elements) in the above
 48 | design are to be compared. The rows in a contrast matrix
 49 | correspond to the cells in your design. The columns correspond
 50 | to the contrasts that you wish to make.
 51 |     
 52 | ### Examples of contrast matrices for different designs
 53 | 
 54 | Matrix notation is used to explain which cells are
 55 | being compared, followed by the corresponding 
 56 | contrast matrix.
 57 | 
 58 | === "design with 2 groups"
 59 |     
 60 |     ${a_{1,1} - a_{1,2}}$
 61 |     
 62 |     | contrast 1 |
 63 |     |------------|
 64 |     |  1         |
 65 |     |  -1        |
 66 |     
 67 | === "design with 3 groups"
 68 | 
 69 |     1. $\Large{a_{1,1} - a_{1,2}}$  
 70 |     2. $\Large{a_{1,1} - a_{1,3}}$  
 71 |     3. $\Large{a_{1,2} - a_{1,3}}$  
 72 | 
 73 |     | contrast 1 | contrast 2 | contrast 3 | 
 74 |     |------------|------------|------------|
 75 |     |  1         |   1        |    0       | 
 76 |     | -1         |   0        |    1       | 
 77 |     |  0         |  -1        |    -1      | 
 78 | 
 79 | === "2x2 design"
 80 |     **Factor A**
 81 |     
 82 |     $\Large{(a_{1,1} + a_{1,2})-(a_{2,1} + a_{2,2})}$  
 83 |     
 84 |     | contrast 1 | 
 85 |     |------------|
 86 |     |  1         |  
 87 |     |  1         |  
 88 |     | -1         |  
 89 |     | -1         |  
 90 |     
 91 |     **Factor B**
 92 |     
 93 |     $\Large{(a_{1,1} + a_{2,1})-(a_{1,2} + a_{2,2})}$  
 94 |     
 95 |     | contrast 1 | 
 96 |     |------------|
 97 |     |  1         |  
 98 |     | -1         |  
 99 |     |  1         |  
100 |     | -1         | 
101 |     
102 |     **Interaction**
103 |     
104 |     $\Large{(a_{1,1} + a_{2,2})-(a_{1,2} + a_{2,1})}$  
105 |     
106 |     That is, the difference of the differences
107 | 
108 |     | contrast 1 | 
109 |     |------------|
110 |     |  1         |  
111 |     | -1         |  
112 |     | -1         |  
113 |     |  1         | 
114 |     
115 | === "2x3 design"
116 |     **Factor A**
117 |     
118 |     $\Large{(a_{1,1} + a_{1,2} + a_{1,3})-(a_{2,1} + a_{2,2} + a_{2,3})}$  
119 |     
120 |     | contrast 1 |   
121 |     |------------|
122 |     |  1         |  
123 |     |  1         |  
124 |     |  1         |  
125 |     | -1         |  
126 |     | -1         |  
127 |     | -1         |  
128 |         
129 |     **Factor B**
130 |     
131 |     1. $\Large{(a_{1,1} + a_{2,1})-(a_{1,2} + a_{2,2})}$  
132 |     -  $\Large{(a_{1,1} + a_{2,1})-(a_{1,3} + a_{2,3})}$   
133 |     -  $\Large{(a_{1,2} + a_{2,2})-(a_{1,3} + a_{2,3})}$    
134 |     
135 |     | contrast 1 | contrast 2 | contrast 3 | 
136 |     |------------|------------|------------|
137 |     |  1         |   1        |    0       | 
138 |     | -1         |   0        |    1       | 
139 |     |  0         |  -1        |    -1      | 
140 |     |  1         |   1        |    0       | 
141 |     | -1         |   0        |    1       | 
142 |     |  0         |  -1        |    -1      | 
143 |     
144 |     **Interactions**
145 |     
146 |     1. $\Large{(a_{1,1} + a_{2,2})-(a_{1,2} + a_{2,1})}$  
147 |     -  $\Large{(a_{1,1} + a_{2,3})-(a_{1,3} + a_{2,1})}$   
148 |     -  $\Large{(a_{1,2} + a_{2,3})-(a_{1,3} + a_{2,2})}$  
149 |     
150 |     | contrast 1 | contrast 2 | contrast 3 | 
151 |     |------------|------------|------------|
152 |     |  1         |   1        |    0       | 
153 |     | -1         |   0        |    1       | 
154 |     |  0         |  -1        |    -1      | 
155 |     |  -1        |  -1        |     0      | 
156 |     |  1         |   0        |   -1       | 
157 |     |  0         |   1        |    1       | 
158 |     
159 |     
160 | !!! success "Not a fan of contrast matrices?"
161 |     Don't worry, Hypothesize can generate all linear
162 |     contrasts automatically (see functions [con1Way](https://alcampopiano.github.io/hypothesize/function_guide/#con1way)
163 |     and [con2way](https://alcampopiano.github.io/hypothesize/function_guide/#con2way)). However, it is useful to 
164 |     understand this concept so that you know
165 |     which comparisons are being made and 
166 |     how to specify your own if necessary.
167 |     
168 | <br>


--------------------------------------------------------------------------------
/docs/docs/basic_tutorial.md:
--------------------------------------------------------------------------------
 1 | # Basic Tutorial
 2 | 
 3 | The following tutorial demonstrates how to perform a 
 4 | robust hypothesis test using 20% trimmed means and 
 5 | the bootstrap-t test. The data correspond to a 
 6 | 2 (between-subjects) x 3 (within-subjects) factorial design. 
 7 | 
 8 | ### Getting your data into Hypothesize
 9 | 
10 | In Hypothesize, input data are always specified as a Pandas DataFrame or Series. 
11 | In this example, we have a 2x3 factorial design so the data would take the form of 
12 | a six-column DataFrame (i.e., J levels x K levels). Using Pandas you can read your data into Python and 
13 | use one of the appropriate functions from Hypothesize. In this case we will use the function `bwmcp`
14 | but there are [many others](function_guide.md) to choose from.
15 | 
16 | !!! note ""What about my column names?""
17 |     Don't worry, Hypothesize doesn't make use of your column names. 
18 |     Feel free to name them however you like!
19 | 
20 | 
21 | ```python
22 | import pandas as pd
23 | 
24 | df=pd.read_csv('my_data.csv')
25 | 
26 | df.head() 
27 | ```
28 | 
29 | | cell_1_1   |   cell_1_2 |   cell_1_3 |   cell_2_1 |   cell_2_2 |   cell_2_3 |
30 | |------------|------------|------------|------------|------------|------------|
31 | |  0.04      |   0.90     |   0.79     |  0.51      |   0.33     |  0.23      |
32 | |  0.76      |   0.29     |   0.84     |  0.03      |   0.5      |  0.73      |
33 | |  0.71      |   0.59     |   0.11     |  0.89      |   0.76     |  0.04      |
34 | |  0.17      |   0.26     |   0.88     |  0.28      |   0.1      |  0.21      |
35 | |  0.95      |   0.22     |   0.83     |  0.59      |   0.65     |  0.20      |
36 |     
37 | ```python
38 | from hypothesize.compare_groups_with_two_factors import bwmcp
39 | 
40 | results=bwmcp(J=2, K=3, x=df)
41 | ```
42 | 
43 | ### Examining your results
44 | 
45 | The results are returned as a Python Dictionary containing simple Python objects
46 |  or DataFrames (when the results are best given as a matrix). For example, here are the 
47 |  previously computed results for the interaction returned as a DataFrame.
48 | 
49 | ```python
50 | results['factor_AB']
51 | ```
52 |     
53 | |   con_num |     psihat |       se |      test |   crit_value |   p_value |
54 | |---------- |----------- |--------- |---------- |------------- |---------- |
55 | |         0 | -0.100698  | 0.126135 | -0.798336 |       2.3771 |  0.410684 |
56 | |         1 | -0.037972  | 0.151841 | -0.250078 |       2.3771 |  0.804674 |
57 | |         2 |  0.0627261 | 0.135392 |  0.463291 |       2.3771 |  0.659432 |
58 | 
59 | 
60 | <a href="https://deepnote.com/launch?name=hypothesize_examples&url=https://github.com/Alcampopiano/hypothesize/blob/master/examples/hypothesize_notebook_general_examples.ipynb"
61 | target="_blank" class="button"><img src="https://deepnote.com/buttons/launch-in-deepnote-white.svg">
62 | </a>
63 | 


--------------------------------------------------------------------------------
/docs/docs/img/dist_overlay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/dist_overlay.png


--------------------------------------------------------------------------------
/docs/docs/img/ide_pycharm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/ide_pycharm.png


--------------------------------------------------------------------------------
/docs/docs/img/package_import_viz.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/package_import_viz.gif


--------------------------------------------------------------------------------
/docs/docs/img/vp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/vp.png


--------------------------------------------------------------------------------
/docs/docs/img/vp_inv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/vp_inv.png


--------------------------------------------------------------------------------
/docs/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Hypothesize: robust statistics in Python
 2 | 
 3 | ![Screenshot](img/dist_overlay.png)
 4 | 
 5 | Hypothesize is a robust statistics library for 
 6 | Python based on Rand R. Wilcox's R package [WRS](https://dornsife.usc.edu/labs/rwilcox/software/). 
 7 | With Hypothesize you can compare groups and 
 8 | measure associations using methods that outperform 
 9 | traditional statistical approaches in terms of power 
10 | and accuracy. 
11 | 
12 | For more information on robust methods please see Wilcox's book 
13 | [Introduction to Robust Estimation and Hypothesis Testing](https://play.google.com/store/books/details?id=8f8nBb4__EYC&gl=ca&hl=en-CA&source=productsearch&utm_source=HA_Desktop_US&utm_medium=SEM&utm_campaign=PLA&pcampaignid=MKTAD0930BO1&gclid=CjwKCAiA44LzBRB-EiwA-jJipJzyqx9kwNMq5MMU7fG2RrwBK9F7sirX4pfhS8wO7k9Uz_Sqf2P28BoCYzcQAvD_BwE&gclsrc=aw.ds).
14 | 
15 | ## Getting Started
16 | 
17 | - [Overview](overview.md)
18 | - [Installation](install_dep.md)
19 | - [Basic Tutorial](basic_tutorial.md#)
20 | 
21 | ## User Guide
22 | 
23 | - [Function reference](function_guide.md)
24 | - [Frequently asked questions](FAQ.md)
25 | 
26 | ## Bug reports and Questions
27 | Hypothesize is BSD-licenced and the source code is available
28 | on [GitHub](https://github.com/Alcampopiano/hypothesize).
29 | For issues and questions, 
30 | please use [GitHub Issues](https://github.com/Alcampopiano/hypothesize/issues).
31 | 
32 | ## Citing Hypothesize
33 | 
34 | [![status](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926/status.svg)](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926)
35 | 
36 | If you use Hypothesize in academic work, please use the following citation:
37 | 
38 | Campopiano, A., & Wilcox, R. R. (2020). Hypothesize: Robust Statistics for Python. 
39 | Journal of Open Source Software, 5(50), 2241, https://doi.org/10.21105/joss.02241
40 | 
41 | BibTex:
42 | 
43 | ```bib
44 | @article{Campopiano2020,
45 |   doi = {10.21105/joss.02241},
46 |   url = {https://doi.org/10.21105/joss.02241},
47 |   year = {2020},
48 |   publisher = {The Open Journal},
49 |   volume = {5},
50 |   number = {50},
51 |   pages = {2241},
52 |   author = {Allan Campopiano and Rand R. Wilcox},
53 |   title = {Hypothesize: Robust Statistics for Python},
54 |   journal = {Journal of Open Source Software}
55 | }
56 | ```
57 | 
58 | ## Contributing to Hypothesize
59 | 
60 | The best way to contribute to Hypothesize is to take any function from the WRS collection 
61 | and convert it to Python. For more details, please see 
62 | [CONTRIBUTING.md](https://github.com/Alcampopiano/hypothesize/blob/master/CONTRIBUTING.md)
63 | in the GitHub repository.


--------------------------------------------------------------------------------
/docs/docs/install_dep.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | Hypothesize can be installed using `pip`:
 4 | 
 5 | ```
 6 | $ pip install hypothesize
 7 | ```
 8 | 
 9 | # Dependencies
10 | 
11 | Hypothesesize has the following dependencies, 
12 | all of which are installed automatically 
13 | with the above installation command:
14 | 
15 | - python 3.6 or newer
16 | - [NumPy](https://numpy.org/)
17 | - [Pandas](https://pandas.pydata.org/)
18 | - [SciPy](https://www.scipy.org/)
19 | - [more-itertools](https://pypi.org/project/more-itertools/)
20 | 


--------------------------------------------------------------------------------
/docs/docs/overview.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | The benefits of using robust methods for hypothesis testing 
 4 | have been known for the last half century. 
 5 | They have been shown to substantially increase power and accuracy when compared to 
 6 | traditional approaches.
 7 | The issues of robustness and the functions in this library are described in detail in Rand R. Wilcox's book 
 8 | [Introduction to Robust Estimation and Hypothesis Testing](https://play.google.com/store/books/details?id=8f8nBb4__EYC&gl=ca&hl=en-CA&source=productsearch&utm_source=HA_Desktop_US&utm_medium=SEM&utm_campaign=PLA&pcampaignid=MKTAD0930BO1&gclid=CjwKCAiA44LzBRB-EiwA-jJipJzyqx9kwNMq5MMU7fG2RrwBK9F7sirX4pfhS8wO7k9Uz_Sqf2P28BoCYzcQAvD_BwE&gclsrc=aw.ds).
 9 | 
10 | The code and function names in Hypothesize are based on Wilcox's R functions in the [WRS](somwhere) package. 
11 | Hypothesize simply brings many of these helpful and well-studied robust methods to the Python ecosystem. 
12 | In addition, Hypothesize provides a user-friendly API and package structure 
13 | as well as one-click, [ready-to-run examples](function_guide.md) for every top-level 
14 | function.
15 | 
16 | ## Hypothesize is easy to use
17 | 
18 | Hypothesize's API is friendly and 
19 | consistent, making it easy for you to discover 
20 | and use robust functions that are appropriate for 
21 | your statistical design.
22 | 
23 | ### Package Structure
24 | 
25 | Hypothesize organizes functions
26 | based on the statistical design. The following visualizations show
27 | how the package is structured and how
28 | this is reflected in practice when importing from the library:
29 | 
30 | ```mermaid
31 | graph TB
32 | linkStyle default interpolate basis
33 | A[Hypothesize]
34 | A --> B(compare groups with single factor)
35 | A --> C(compare groups with two factors)
36 | A --> D(measure associations)
37 | 
38 | B --> F(f<sub>1</sub>)
39 | B --> G(f<sub>2</sub>)
40 | B --- H(f<sub>n</sub>)
41 | 
42 | C --> F1(f<sub>1</sub>)
43 | C --> G2(f<sub>2</sub>)
44 | C --> H3(f<sub>n</sub>)
45 | 
46 | D --> F5(f<sub>1</sub>)
47 | D --> G6(f<sub>2</sub>)
48 | D --> H7(f<sub>n</sub>)
49 | ```
50 | 	
51 | ---
52 | ![Screenshot](img/package_import_viz.gif)
53 | 
54 | ---
55 | ## Hypothesize is flexible and powerful
56 | 
57 | A broad range of choices exist in Hypothesize both in
58 | terms of the supported statistical designs as well as options for fine-grained control over how
59 | tests are computed. For example:
60 | 
61 | - Where applicable, many hypothesis tests allow the specification of an estimator. That is,
62 | users may choose when to use the mean, median, trimmed mean, winsorized correlation,
63 | percentage bend correlation, or any other compatible statistical estimator.
64 | 
65 | - Single- and multi-factor designs are supported, and this includes supporting independent,
66 | dependent, and mixed groups.
67 | 
68 | - Family-wise error can be robustly controlled with sequentially rejective methods (Benjamini & Hochberg, 1995; Hochberg, 1988; Rom, 1990).
69 | 
70 | <br>
71 | 
72 | Visit the [tutorial section](basic_tutorial.md) and the 
73 | [function documentation](function_guide.md) for complete examples
74 | using Hypothesize.


--------------------------------------------------------------------------------
/docs/docs/stylesheets/al_extra.css:
--------------------------------------------------------------------------------
 1 | /*
 2 | .button {
 3 |   display: block;
 4 |   width: 100%;
 5 |   font-size: 16px;
 6 |   background-color: #5867be;
 7 |   color: #ffffff !important;
 8 |   padding: 10px;
 9 |   box-shadow: 10;
10 |   border-radius: 2px;
11 |   text-align: center;
12 |   border: none;
13 | }
14 | */
15 | 
16 | .button {
17 |   display: block;
18 |   text-align: center;
19 | }
20 | 
21 | /*
22 | .button:hover {
23 |   background-color: grey;
24 |   color: white !important;
25 | 
26 | }
27 | */
28 | 
29 | .mermaid svg {
30 |   text-align: center !important;
31 | }
32 | 


--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Hypothesize
 2 | google_analytics:
 3 |   - UA-165284209-1
 4 |   - auto
 5 | 
 6 | nav:
 7 |     - Home: index.md
 8 |     - Overview: overview.md
 9 |     - Tutorial: basic_tutorial.md
10 |     - Function Reference: function_guide.md
11 |     - FAQ: FAQ.md
12 | 
13 | repo_name: <i class="fa fa-github"></i> Github
14 | repo_url: https://github.com/Alcampopiano/hypothesize
15 | edit_uri: ""
16 | theme:
17 |   logo: img/vp.png
18 |   favicon: img/vp_inv.png
19 |   name: material
20 |   palette:
21 |     primary: black
22 |     accent: red
23 | copyright: "Hypothesize is licensed under the <a href='https://github.com/Alcampopiano/hypothesize/blob/master/LICENSE'>BSD 3-Clause license"
24 | 
25 | markdown_extensions:
26 |   - admonition
27 |   - codehilite:
28 |       guess_lang: false
29 |   - footnotes
30 |   - toc:
31 |       permalink: true
32 |   - pymdownx.tabbed
33 |   - pymdownx.inlinehilite
34 |   - pymdownx.arithmatex
35 |   - pymdownx.superfences:
36 |       custom_fences:
37 |         - name: mermaid
38 |           class: mermaid
39 |           format: !!python/name:pymdownx.superfences.fence_div_format
40 | 
41 | extra_css:
42 |   - stylesheets/al_extra.css
43 |   - https://unpkg.com/mermaid@7.1.2/dist/mermaid.css
44 | extra_javascript:
45 |   - https://unpkg.com/mermaid@7.1.2/dist/mermaid.min.js
46 |   - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML


--------------------------------------------------------------------------------
/examples/bootdpci.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bootdpci.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588616000402}],"collapsed_sections":[],"authorship_tag":"ABX9TyODOlntM0MT9CHgvn8VVl/O"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import bootdpci"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bootdpci(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/bwamcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwamcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632200919},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyPVfyF9avy0DbfAuarJl8iy"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import bwamcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwamcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/bwbmcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwbmcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632296831},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyMCKdZY/KiF2LwgsqoxM9z0"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwbmcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwbmcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results[0]['test']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/bwimcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwimcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632574529},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNi+qxTq2XoeXVqKQ9JR4Ob"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":228},"outputId":"edf97991-0dd9-4813-e161-38e0804b024c","executionInfo":{"status":"ok","timestamp":1588632621496,"user_tz":240,"elapsed":4200,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["!pip install hypothesize"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Collecting hypothesize\n","  Downloading https://files.pythonhosted.org/packages/00/64/d9067b4a72585b2003bbd1823cceaada7f0c9a28441921201df42d31332e/hypothesize-0.1.dev23-py3-none-any.whl\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.0.3)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.18.3)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.4.1)\n","Requirement already satisfied: more-itertools in /usr/local/lib/python3.6/dist-packages (from hypothesize) (8.2.0)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->hypothesize) (2018.9)\n","Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->hypothesize) (2.8.1)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas->hypothesize) (1.12.0)\n","Installing collected packages: hypothesize\n","Successfully installed hypothesize-0.1.dev23\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import bwimcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwimcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/bwmcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwmcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632373867},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNq9mbugbYwk9HTz7/3Brgd"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwmcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwmcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/bwmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwmcppb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632676077},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNJh6K+fRRnm1Mgfr3EEWRU"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwmcppb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/corb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"corb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOuT/tbSiQ385N9p7Y0OE2s"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import corb, wincor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=corb(wincor, df.cell_1, df.cell_2, .05, 1000, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/hypothesize_notebook_general_examples.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.4"},"colab":{"name":"hypothesize_notebook_for_colab.ipynb","provenance":[{"file_id":"https://github.com/Alcampopiano/hypothesize/blob/master/examples/hypothesize_notebook_for_colab.ipynb","timestamp":1589199962420}],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"SR6bFvjJtqjq","colab_type":"text"},"source":["## Hypothesize tutorial\n","\n","This notebook provides a few examples of how to use Hypothesize with a few common statistical designs. There are many more functions that could work for these designs but hopefully this helps to get you started.\n","\n"]},{"cell_type":"code","metadata":{"id":"AXTC2Xzu3zM9","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"9TPllfTh3zNE","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"zNFQXwRd3zNJ","colab_type":"text"},"source":["### How to compare two groups"]},{"cell_type":"markdown","metadata":{"id":"pA-fXciM3zNK","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"ZrMIEtaw3zNM","colab_type":"code","outputId":"bb369725-d23c-4d9b-c0f1-2a4f917587f6","executionInfo":{"status":"ok","timestamp":1589200167867,"user_tz":240,"elapsed":4569,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["#df=pd.read_csv(\"/home/allan/two_groups_data.csv\")\n","df=create_example_data(design_values=2)\n","\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>cell_1</th>\n","      <th>cell_2</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.608798</td>\n","      <td>0.582123</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.622826</td>\n","      <td>0.854637</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.264165</td>\n","      <td>0.655077</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.794185</td>\n","      <td>0.378080</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.907687</td>\n","      <td>0.468066</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["     cell_1    cell_2\n","0  0.608798  0.582123\n","1  0.622826  0.854637\n","2  0.264165  0.655077\n","3  0.794185  0.378080\n","4  0.907687  0.468066"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"b7DXVXTP3zNR","colab_type":"text"},"source":["#### Import the desired function and pass in the data for each group\n","- This example uses the bootstrapped-t method with 20% trimmed means\n","- The output is a dictionary containing the results (95% confidence interval, p_value, test statistics, etc...)"]},{"cell_type":"code","metadata":{"id":"2hapgjCg3zNU","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"cb66df69-d846-411c-a603-9b0007a1cad9","executionInfo":{"status":"ok","timestamp":1589200168221,"user_tz":240,"elapsed":4920,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["from hypothesize.compare_groups_with_single_factor import yuenbt\n","\n","results=yuenbt(df.cell_1, df.cell_2)\n","\n","results['ci']"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[-0.09190770159731171, 0.25635146839797]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"rCYUwGzw3zNY","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"iU8nQykk3zNZ","colab_type":"text"},"source":["### How to compare three groups"]},{"cell_type":"markdown","metadata":{"id":"GOw1Y9_v3zNb","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"akjpBynJ3zNd","colab_type":"code","outputId":"6209634a-446c-42fb-d106-2cafa7350431","executionInfo":{"status":"ok","timestamp":1589200168223,"user_tz":240,"elapsed":4916,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/one_way_data.csv\")\n","df=create_example_data(design_values=3)\n","\n","df.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>cell_1</th>\n","      <th>cell_2</th>\n","      <th>cell_3</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.265109</td>\n","      <td>0.088914</td>\n","      <td>0.480468</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.119988</td>\n","      <td>0.482773</td>\n","      <td>0.079476</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.109533</td>\n","      <td>0.521834</td>\n","      <td>0.762804</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.152454</td>\n","      <td>0.177596</td>\n","      <td>0.741767</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.355403</td>\n","      <td>0.520991</td>\n","      <td>0.380219</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["     cell_1    cell_2    cell_3\n","0  0.265109  0.088914  0.480468\n","1  0.119988  0.482773  0.079476\n","2  0.109533  0.521834  0.762804\n","3  0.152454  0.177596  0.741767\n","4  0.355403  0.520991  0.380219"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"jRxALG1a3zNh","colab_type":"text"},"source":["#### Import the desired functions and pass in the inputs\n","- One approach is to use a set of linear contrasts that will test all pairwise comparisons\n","- Then, the bootstrap-t method and the 20% trimmed mean can be used\n","- CIs are adjusted to control for FWE\n","- All pairwise contrasts can be created automatically using the `con1way` function\n","- The results are a dictionary of DataFrames that contain various statistics (p_value, CIs, standard error, test statistics, etc)"]},{"cell_type":"code","metadata":{"id":"NJ5LK8G_3zNi","colab_type":"code","colab":{}},"source":["from hypothesize.compare_groups_with_single_factor import linconb\n","from hypothesize.utilities import con1way\n","\n","results=linconb(df, con=con1way(3))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"et1Acy1S3zNm","colab_type":"code","outputId":"b562fb9f-7d8a-4203-db4f-2e4cf157e96f","executionInfo":{"status":"ok","timestamp":1589200168984,"user_tz":240,"elapsed":5668,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['test']"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>contrast_index</th>\n","      <th>test</th>\n","      <th>se</th>\n","      <th>p_value</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.0</td>\n","      <td>0.417745</td>\n","      <td>0.081921</td>\n","      <td>0.691152</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1.0</td>\n","      <td>-0.043381</td>\n","      <td>0.085225</td>\n","      <td>0.959933</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2.0</td>\n","      <td>-0.501332</td>\n","      <td>0.075636</td>\n","      <td>0.602671</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   contrast_index      test        se   p_value\n","0             0.0  0.417745  0.081921  0.691152\n","1             1.0 -0.043381  0.085225  0.959933\n","2             2.0 -0.501332  0.075636  0.602671"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"CCMzuKYX3zNq","colab_type":"code","outputId":"2f0d4212-cb97-479a-aeef-aace296a05a6","executionInfo":{"status":"ok","timestamp":1589200168987,"user_tz":240,"elapsed":5664,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['psihat']"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>contrast_index</th>\n","      <th>psihat</th>\n","      <th>ci_low</th>\n","      <th>ci_up</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.0</td>\n","      <td>0.034222</td>\n","      <td>-0.168168</td>\n","      <td>0.236612</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1.0</td>\n","      <td>-0.003697</td>\n","      <td>-0.214251</td>\n","      <td>0.206857</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2.0</td>\n","      <td>-0.037919</td>\n","      <td>-0.224784</td>\n","      <td>0.148946</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   contrast_index    psihat    ci_low     ci_up\n","0             0.0  0.034222 -0.168168  0.236612\n","1             1.0 -0.003697 -0.214251  0.206857\n","2             2.0 -0.037919 -0.224784  0.148946"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"d-AMqtzP3zNv","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"XO-FNoJw3zNw","colab_type":"text"},"source":["### How to compare groups in a factorial design"]},{"cell_type":"markdown","metadata":{"id":"qJcHGgDv3zNx","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"jE-FN9Lx3zNz","colab_type":"code","outputId":"b328e4c9-97d3-4cf8-b1ce-70bd1c44ea06","executionInfo":{"status":"ok","timestamp":1589200168990,"user_tz":240,"elapsed":5661,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/two_way_data.csv\")\n","df=create_example_data(design_values=[2,3])\n","\n","df.head()"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>cell_1_1</th>\n","      <th>cell_1_2</th>\n","      <th>cell_1_3</th>\n","      <th>cell_2_1</th>\n","      <th>cell_2_2</th>\n","      <th>cell_2_3</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.827524</td>\n","      <td>0.476294</td>\n","      <td>0.131720</td>\n","      <td>0.410999</td>\n","      <td>0.320306</td>\n","      <td>0.370742</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.632281</td>\n","      <td>0.588368</td>\n","      <td>0.662648</td>\n","      <td>0.242547</td>\n","      <td>0.270292</td>\n","      <td>0.700103</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.073064</td>\n","      <td>0.472047</td>\n","      <td>0.053942</td>\n","      <td>0.069097</td>\n","      <td>0.851596</td>\n","      <td>0.962723</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.843377</td>\n","      <td>0.095956</td>\n","      <td>0.617434</td>\n","      <td>0.765279</td>\n","      <td>0.420772</td>\n","      <td>0.993871</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.190709</td>\n","      <td>0.013727</td>\n","      <td>0.255385</td>\n","      <td>0.577916</td>\n","      <td>0.218277</td>\n","      <td>0.125772</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   cell_1_1  cell_1_2  cell_1_3  cell_2_1  cell_2_2  cell_2_3\n","0  0.827524  0.476294  0.131720  0.410999  0.320306  0.370742\n","1  0.632281  0.588368  0.662648  0.242547  0.270292  0.700103\n","2  0.073064  0.472047  0.053942  0.069097  0.851596  0.962723\n","3  0.843377  0.095956  0.617434  0.765279  0.420772  0.993871\n","4  0.190709  0.013727  0.255385  0.577916  0.218277  0.125772"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"aaVxg64m3zN2","colab_type":"text"},"source":["#### Import the desired function and pass in the data\n","- This example uses a 2-by-3 design\n","- One approach is to use a set of linear contrasts that will test all main effects and interactions\n","- Then, the bootstrap-t method and the 20% trimmed mean can be used\n","- The results are a dictionary of DataFrames that contain various statistics for each factor and the interactions"]},{"cell_type":"code","metadata":{"id":"X_muz_Lz3zN4","colab_type":"code","colab":{}},"source":["from hypothesize.compare_groups_with_two_factors import bwmcp\n","\n","results=bwmcp(J=2, K=3, x=df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"0Sm5AHgQ3zN8","colab_type":"code","outputId":"d39a4bc9-8313-479d-ba7d-5da63ca6c85b","executionInfo":{"status":"ok","timestamp":1589200173973,"user_tz":240,"elapsed":10635,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":79}},"source":["results['factor_A']"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>con_num</th>\n","      <th>psihat</th>\n","      <th>se</th>\n","      <th>test</th>\n","      <th>crit_value</th>\n","      <th>p_value</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.0</td>\n","      <td>0.173207</td>\n","      <td>0.128072</td>\n","      <td>1.352418</td>\n","      <td>1.960025</td>\n","      <td>0.15192</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   con_num    psihat        se      test  crit_value  p_value\n","0      0.0  0.173207  0.128072  1.352418    1.960025  0.15192"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"Ff6ipBF23zN_","colab_type":"code","outputId":"b7712848-0549-4c3d-9c84-bb6bb1e60b69","executionInfo":{"status":"ok","timestamp":1589200173974,"user_tz":240,"elapsed":10630,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['factor_B']"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>con_num</th>\n","      <th>psihat</th>\n","      <th>se</th>\n","      <th>test</th>\n","      <th>crit_value</th>\n","      <th>p_value</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.0</td>\n","      <td>-0.067502</td>\n","      <td>0.120091</td>\n","      <td>-0.562091</td>\n","      <td>2.494032</td>\n","      <td>0.559265</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1.0</td>\n","      <td>0.039398</td>\n","      <td>0.116328</td>\n","      <td>0.338680</td>\n","      <td>2.494032</td>\n","      <td>0.721202</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2.0</td>\n","      <td>0.106900</td>\n","      <td>0.098491</td>\n","      <td>1.085373</td>\n","      <td>2.494032</td>\n","      <td>0.307179</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   con_num    psihat        se      test  crit_value   p_value\n","0      0.0 -0.067502  0.120091 -0.562091    2.494032  0.559265\n","1      1.0  0.039398  0.116328  0.338680    2.494032  0.721202\n","2      2.0  0.106900  0.098491  1.085373    2.494032  0.307179"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"2cKKYZh83zOF","colab_type":"code","outputId":"9be1a209-ab26-4fc9-c31a-d6809d2b3c94","executionInfo":{"status":"ok","timestamp":1589200173975,"user_tz":240,"elapsed":10624,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['factor_AB']"],"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>con_num</th>\n","      <th>psihat</th>\n","      <th>se</th>\n","      <th>test</th>\n","      <th>crit_value</th>\n","      <th>p_value</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.0</td>\n","      <td>-0.183242</td>\n","      <td>0.120091</td>\n","      <td>-1.525869</td>\n","      <td>2.3983</td>\n","      <td>0.118531</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1.0</td>\n","      <td>-0.163525</td>\n","      <td>0.116328</td>\n","      <td>-1.405720</td>\n","      <td>2.3983</td>\n","      <td>0.186978</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2.0</td>\n","      <td>0.019718</td>\n","      <td>0.098491</td>\n","      <td>0.200196</td>\n","      <td>2.3983</td>\n","      <td>0.833055</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   con_num    psihat        se      test  crit_value   p_value\n","0      0.0 -0.183242  0.120091 -1.525869      2.3983  0.118531\n","1      1.0 -0.163525  0.116328 -1.405720      2.3983  0.186978\n","2      2.0  0.019718  0.098491  0.200196      2.3983  0.833055"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"markdown","metadata":{"id":"uOyKCT9M3zOJ","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"JZxF8Ygi3zOK","colab_type":"text"},"source":["### How to compute a robust correlation"]},{"cell_type":"markdown","metadata":{"id":"a2WTERe43zOL","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"FU8Ey3iI3zON","colab_type":"code","outputId":"eb04b788-6f9d-446c-83fd-1576065361fc","executionInfo":{"status":"ok","timestamp":1589200173975,"user_tz":240,"elapsed":10618,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/two_groups_data.csv\")\n","df=create_example_data(design_values=2)\n","\n","df.head()"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>cell_1</th>\n","      <th>cell_2</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.402284</td>\n","      <td>0.049092</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.208278</td>\n","      <td>0.550764</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.958482</td>\n","      <td>0.986547</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.957759</td>\n","      <td>0.277685</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.702811</td>\n","      <td>0.749065</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["     cell_1    cell_2\n","0  0.402284  0.049092\n","1  0.208278  0.550764\n","2  0.958482  0.986547\n","3  0.957759  0.277685\n","4  0.702811  0.749065"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"qY-7yf8Q3zOQ","colab_type":"text"},"source":["#### Import the desired function and pass in the data for each group\n","- One approach is to winsorize the x and y data\n","- A heteroscedastic method for testing zero correlation is also provided in this package but not shown here \n"," - Please see the function `corb` which uses the percentile bootstrap to compute a 1-alpha CI and p_value for any correlation \n","- The output is a dictionary containing various statistics (the winsorized correlation, winsorized covariance, etc...)"]},{"cell_type":"code","metadata":{"id":"mMeESqd33zOR","colab_type":"code","outputId":"a34a06fa-0113-4201-ce0b-e0d3f5d41930","executionInfo":{"status":"ok","timestamp":1589200173976,"user_tz":240,"elapsed":10612,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["from hypothesize.measuring_associations import wincor\n","\n","results=wincor(df.cell_1, df.cell_2)\n","\n","results['cor']"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.2025744763450888"]},"metadata":{"tags":[]},"execution_count":15}]}]}


--------------------------------------------------------------------------------
/examples/l2drmci.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"l2drmci.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPdELHUc+SP48pbwUqZCoT5"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import l2drmci"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=l2drmci(df.cell_1, df.cell_2, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/linconb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"linconb.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPH7skGRd9m7ywyto/ckjRS"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, con1way\n","from hypothesize.compare_groups_with_single_factor import linconb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=linconb(df, con1way(3))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['psihat']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3zbT5WdQvTVv","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/lindepbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"lindepbt.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588618128796}],"collapsed_sections":[],"authorship_tag":"ABX9TyO26ovh0/ccrrbqL9dVEnIm"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import lindepbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=lindepbt(df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['psihat']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3zbT5WdQvTVv","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/pb2gen.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pb2gen.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588613922534}],"collapsed_sections":[],"authorship_tag":"ABX9TyM4JaaKmMa7ybUIDVPP24nv"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import pb2gen"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pb2gen(df.cell_1, df.cell_2, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/pball.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pball.ipynb","provenance":[{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyMxgE04vzxNhd/2/0DYU2le"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import pball"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pball(df, beta=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results['pbcorm']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/pbcor.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pbcor.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637753763},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOznWr6vPehd9iyX3yBDNFl"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import pbcor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pbcor(df.cell_1, df.cell_2, beta=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/rmmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"rmmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588616377427}],"collapsed_sections":[],"authorship_tag":"ABX9TyPzvAJueyiG1/st3fdvLHPD"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import rmmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=rmmcppb(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/spmcpa.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpa.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632756490},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyPpFAYJ9Pgd170X7K5xYNmT"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpa"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpa(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/spmcpb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632952101},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNYt8uKlk3sEvH8XgwuD61D"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":202},"outputId":"12ed3517-6b93-41e6-f991-ffd8537060a4","executionInfo":{"status":"ok","timestamp":1588632983358,"user_tz":240,"elapsed":4430,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>cell_1</th>\n","      <th>cell_2</th>\n","      <th>cell_3</th>\n","      <th>cell_4</th>\n","      <th>cell_5</th>\n","      <th>cell_6</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.542487</td>\n","      <td>0.781840</td>\n","      <td>0.635284</td>\n","      <td>0.874977</td>\n","      <td>0.479860</td>\n","      <td>0.589451</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.984139</td>\n","      <td>0.414655</td>\n","      <td>0.581826</td>\n","      <td>0.430758</td>\n","      <td>0.529403</td>\n","      <td>0.197294</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.184603</td>\n","      <td>0.821967</td>\n","      <td>0.569723</td>\n","      <td>0.279681</td>\n","      <td>0.990154</td>\n","      <td>0.212335</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.476937</td>\n","      <td>0.351505</td>\n","      <td>0.101760</td>\n","      <td>0.087372</td>\n","      <td>0.826408</td>\n","      <td>0.847228</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.730113</td>\n","      <td>0.392344</td>\n","      <td>0.422978</td>\n","      <td>0.835971</td>\n","      <td>0.006801</td>\n","      <td>0.418546</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["     cell_1    cell_2    cell_3    cell_4    cell_5    cell_6\n","0  0.542487  0.781840  0.635284  0.874977  0.479860  0.589451\n","1  0.984139  0.414655  0.581826  0.430758  0.529403  0.197294\n","2  0.184603  0.821967  0.569723  0.279681  0.990154  0.212335\n","3  0.476937  0.351505  0.101760  0.087372  0.826408  0.847228\n","4  0.730113  0.392344  0.422978  0.835971  0.006801  0.418546"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/spmcpi.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpi.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633014822},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOIO6PyIZ6fl34R9C9+J1Vy"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpi"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpi(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/tmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"tmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588614092764}],"collapsed_sections":[],"authorship_tag":"ABX9TyPLA0Nj1FqLpYcQhJu8eacQ"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, con1way, trim_mean\n","from hypothesize.compare_groups_with_single_factor import tmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":202},"outputId":"49a67e90-f7e3-4a3b-d397-dd6c3b76a769","executionInfo":{"status":"ok","timestamp":1588614138956,"user_tz":240,"elapsed":4864,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>cell_1</th>\n","      <th>cell_2</th>\n","      <th>cell_3</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0.988089</td>\n","      <td>0.531594</td>\n","      <td>0.898677</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.040062</td>\n","      <td>0.990704</td>\n","      <td>0.393328</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0.563470</td>\n","      <td>0.395695</td>\n","      <td>0.345625</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.856980</td>\n","      <td>0.959441</td>\n","      <td>0.168044</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.158802</td>\n","      <td>0.391446</td>\n","      <td>0.324284</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["     cell_1    cell_2    cell_3\n","0  0.988089  0.531594  0.898677\n","1  0.040062  0.990704  0.393328\n","2  0.563470  0.395695  0.345625\n","3  0.856980  0.959441  0.168044\n","4  0.158802  0.391446  0.324284"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=tmcppb(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/winall.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"winall.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637846077},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyN3me0/wTmMxBS9uYJsVVYz"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import winall"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=winall(df, tr=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":69},"outputId":"9eea20c3-b5a7-427b-e522-d401e998d22d","executionInfo":{"status":"ok","timestamp":1588637940868,"user_tz":240,"elapsed":413,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["results['wcor']"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[ 1.        ,  0.23237836,  0.05106066],\n","       [ 0.23237836,  1.        , -0.12543308],\n","       [ 0.05106066, -0.12543308,  1.        ]])"]},"metadata":{"tags":[]},"execution_count":9}]}]}


--------------------------------------------------------------------------------
/examples/wincor.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wincor.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637954798},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNio2+2ctQ6oNKTHdc5jl9w"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import wincor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=wincor(df.cell_1, df.cell_2, tr=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/wwmcpbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wwmcpbt.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588631937930},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNp6W4ZcH5oqiCDO+AIxThj"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import wwmcpbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"e7d3c247-7ff5-40d8-a3b6-0a79c722595a","executionInfo":{"status":"ok","timestamp":1588632020005,"user_tz":240,"elapsed":1968,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["results=wwmcpbt(2, 3, df, .2)"],"execution_count":9,"outputs":[{"output_type":"stream","text":["ask wilcox if dif is supposed to be a argument here\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']['test']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/wwmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wwmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOTVb1bjVl+ueXtjfFOxCqb"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import wwmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=wwmcppb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']['output']"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/ydbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ydbt.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588623469716}],"collapsed_sections":[],"authorship_tag":"ABX9TyOFK1ec5drLBgCKpHf13ETw"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import ydbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=ydbt(df.cell_1, df.cell_2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/examples/yuenbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"yuenbt.ipynb","provenance":[{"file_id":"1dOaLcrRIctGehyXDy_sGNp5OSTl_4vCh","timestamp":1588615519643}],"collapsed_sections":[],"authorship_tag":"ABX9TyM5O2LtjdxCgF5QiGsHde1k"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import yuenbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=yuenbt(df.cell_1, df.cell_2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}


--------------------------------------------------------------------------------
/hypothesize/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from hypothesize import compare_groups_with_single_factor, measuring_associations, \
3 |     compare_groups_with_two_factors
4 | 
5 | 


--------------------------------------------------------------------------------
/hypothesize/compare_groups_with_single_factor/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from ._compare_groups_with_single_factor import *
3 | 


--------------------------------------------------------------------------------
/hypothesize/compare_groups_with_single_factor/_compare_groups_with_single_factor.py:
--------------------------------------------------------------------------------
   1 | __all__ = ["yuenbt", "pb2gen", "linconb", "rmmcppb",
   2 |            "lindepbt", "bootdpci", "ydbt", "tmcppb", "l2drmci"]
   3 | 
   4 | import numpy as np
   5 | import pandas as pd
   6 | from scipy.stats import trim_mean
   7 | from hypothesize.utilities import yuend, trimse, lincon, trimparts, trimpartt, pandas_to_arrays, \
   8 |     con1way, con2way, bptdpsi, rmmcp, trimcibt, remove_nans_based_on_design
   9 | 
  10 | def yuenbt(x, y, tr=.2, alpha=.05, nboot=599, seed=False):
  11 | 
  12 |     """
  13 |     Compute a 1-alpha confidence interval for the difference between
  14 |     the trimmed means corresponding to two independent groups.
  15 |     The bootstrap-t method is used. During the bootstrapping,
  16 |     the absolute value of the test statistic is used (the "two-sided method").
  17 | 
  18 | 
  19 |     :param x: Pandas Series
  20 |     Data for group one
  21 | 
  22 |     :param y: Pandas Series
  23 |     Data for group two
  24 | 
  25 |     :param tr: float
  26 |     Proportion to trim (default is .2)
  27 | 
  28 |     :param alpha: float
  29 |     Alpha level (default is .05)
  30 | 
  31 |     :param nboot: int
  32 |     Number of bootstrap samples (default is 599)
  33 | 
  34 |     :param seed: bool
  35 |     Random seed for reprodicible results. Default is `False`.
  36 | 
  37 |     :return:
  38 |     Dictionary of results
  39 | 
  40 |     ci: list
  41 | 
  42 |     Confidence interval
  43 | 
  44 |     est_dif: float
  45 |     Estimated difference between group one and two
  46 | 
  47 |     est_1: float
  48 |     Estimated value (based on `est`) for group one
  49 | 
  50 |     est_2: float
  51 |     Estimated value (based on `est`) for group two
  52 | 
  53 |     p_value: float
  54 | 
  55 |     p-value
  56 | 
  57 |     test_stat: float
  58 |     Test statistic
  59 | 
  60 |     """
  61 | 
  62 |     x, y=pandas_to_arrays([x, y])
  63 | 
  64 |     if seed:
  65 |         np.random.seed(seed)
  66 | 
  67 |     ci=[]
  68 |     x=x[~np.isnan(x)]
  69 |     y=y[~np.isnan(y)]
  70 | 
  71 |     xcen = x - trim_mean(x, tr)
  72 |     ycen = y - trim_mean(y, tr)
  73 | 
  74 |     test_stat = (trim_mean(x, tr) - trim_mean(y, tr)) / \
  75 |            np.sqrt(trimse(x, tr = tr) ** 2 + trimse(y, tr = tr) ** 2)
  76 | 
  77 |     datax = np.random.choice(xcen, size=(nboot, len(x)))
  78 |     datay = np.random.choice(ycen, size=(nboot, len(y)))
  79 | 
  80 |     top = trim_mean(datax, .2, axis=1) - trim_mean(datay, .2, axis=1)
  81 | 
  82 |     #botx = list(map(lambda row: trimse(row,.2), datax))
  83 |     botx = np.array([trimse(x) for x in datax])
  84 |     boty = np.array([trimse(x) for x in datay])
  85 |     tval = top / np.sqrt(botx ** 2 + boty ** 2)
  86 |     tval = abs(tval)
  87 |     tval = sorted(tval)
  88 |     icrit = int(np.floor((1 - alpha) * nboot + .5))
  89 |     #ibot = int(np.floor(alpha * nboot / 2 + .5))
  90 |     #itop = int(np.floor((1 - alpha / 2) * nboot + .5))
  91 |     se = np.sqrt((trimse(x, tr)) ** 2 + (trimse(y, tr)) ** 2)
  92 |     ci.append(trim_mean(x, tr) - trim_mean(y, tr) - tval[icrit] * se)
  93 |     ci.append(trim_mean(x, tr) - trim_mean(y, tr) + tval[icrit] * se)
  94 |     p_value = sum(np.abs(test_stat) <= np.abs(tval)) / nboot
  95 |     est_x = trim_mean(x,tr)
  96 |     est_y = trim_mean(y, tr)
  97 |     est_dif = est_x - est_y
  98 | 
  99 |     results = {'ci': ci, 'test_stat': test_stat, 'p_value': p_value,
 100 |                'est_x': est_x, 'est_y': est_y, 'est_dif': est_dif}
 101 | 
 102 |     return results
 103 | 
 104 | def linconb(x, con, tr=.2, alpha=.05, nboot=599, seed=False):
 105 | 
 106 |     """
 107 |     Compute a 1-alpha confidence interval for a set of d linear contrasts
 108 |     involving trimmed means using the bootstrap-t bootstrap method.
 109 |     Independent groups are assumed. CIs are adjusted to control FWE
 110 |     (p values are not adjusted).
 111 | 
 112 | 
 113 |     :param x: DataFrame
 114 |     Each column represents a group of data
 115 | 
 116 |     :param con: array
 117 |     `con` is a J (number of columns) by d (number of contrasts)
 118 |     matrix containing the contrast coefficents of interest.
 119 |     All linear constrasts can be created automatically by using the function [con1way](J)
 120 |     (the result of which can be used for `con`).
 121 | 
 122 |     :param tr: float
 123 |     Proportion to trim (default is .2)
 124 | 
 125 |     :param alpha: float
 126 |     Alpha level (default is .05)
 127 | 
 128 |     :param nboot: int
 129 |     Number of bootstrap samples (default is 2000)
 130 | 
 131 |     :param seed: bool
 132 |     Random seed for reprodicible results. Default is `False`.
 133 | 
 134 |     :return:
 135 |     Dictionary of results
 136 | 
 137 |     con: array
 138 |     Contrast matrix
 139 | 
 140 |     crit: float
 141 |     Critical value
 142 | 
 143 |     n: list
 144 |     Number of observations for each group
 145 | 
 146 |     psihat: DataFrame
 147 |     Difference score and CI for each contrast
 148 | 
 149 |     test: DataFrame
 150 |     Test statistic, standard error, and p-value for each contrast
 151 | 
 152 | 
 153 |     """
 154 | 
 155 |     x=pandas_to_arrays(x)
 156 | 
 157 |     J = len(x)
 158 |     x = np.asarray([j[~np.isnan(j)] for j in x])
 159 |     #Jm = J - 1
 160 |     #d = (J ** 2 - J) / 2
 161 | 
 162 |     if con.shape[0] != len(x):
 163 |       raise Exception("The number of groups does not match the number of contrast coefficients.")
 164 | 
 165 |     bvec = np.zeros([nboot, J, 2])
 166 | 
 167 |     if seed:
 168 |         np.random.seed(seed)
 169 | 
 170 |     nsam = [len(xi) for xi in x]
 171 |     for j in range(J):
 172 | 
 173 |         xcen = x[j] - trim_mean(x[j], tr)
 174 |         data = np.random.choice(xcen, size=(nboot, len(x[j])))
 175 | 
 176 |         for i, row in enumerate(data):
 177 |             bvec[i,j,:]=trimparts(row, tr)
 178 | 
 179 |     m1 = bvec[:,:,0].T
 180 |     m2 = bvec[:,:, 1].T
 181 |     boot = np.zeros([con.shape[1], nboot])
 182 |     for d in range(con.shape[1]):
 183 |         top = np.asarray([trimpartt(row, con[:,d]) for row in m1.T])
 184 |         consq = con[:, d] ** 2
 185 |         bot = np.asarray([trimpartt(row,consq) for row in m2.T])
 186 |         boot[d,:] = np.abs(top) / np.sqrt(bot)
 187 | 
 188 |     testb=np.asarray([max(row) for row in boot.T])
 189 |     ic = int(np.floor((1 - alpha) * nboot) -1) # one less than R
 190 |     testb = np.sort(testb)
 191 |     psihat = np.zeros([con.shape[1], 4])
 192 |     test = np.zeros([con.shape[1], 4])
 193 | 
 194 |     for d in range(con.shape[1]):
 195 |         test[d, 0] = d
 196 |         psihat[d, 0] = d
 197 |         testit = lincon(x, np.array([con[:,d]]).T, tr, alpha) # column slice of contrast matrix
 198 |         #test[d, 1]=testit['test'][0, 1]
 199 |         test[d, 1]=testit['test']['test'][0]
 200 |         #pval = np.mean((abs(testit['test'][0, 1]) < boot[d,:]))
 201 |         pval = np.mean((abs(testit['test']['test'][0]) < boot[d,:]))
 202 |         test[d, 3] = pval
 203 |         #print(testit['test'])
 204 |         #print(testit['psihat'])
 205 |         # psihat[d, 2] = testit['psihat'][0, 1] - testb[ic] * testit['test'][0, 3]
 206 |         # psihat[d, 3] = testit['psihat'][0, 1] + testb[ic] * testit['test'][0, 3]
 207 |         # psihat[d, 1] = testit['psihat'][0, 1]
 208 |         psihat[d, 2] = testit['psihat']['psihat'][0] - testb[ic] * testit['test']['se'][0]
 209 |         psihat[d, 3] = testit['psihat']['psihat'][0] + testb[ic] * testit['test']['se'][0]
 210 |         psihat[d, 1] = testit['psihat']['psihat'][0]
 211 |         #test[d, 2] = testit['test'][0, 3]
 212 |         test[d, 2] = testit['test']['se'][0]
 213 | 
 214 | 
 215 | 
 216 |     psihat_col_names=['contrast_index', 'psihat', 'ci_low', 'ci_up']
 217 |     test_col_names = ['contrast_index', 'test', 'se', 'p_value']
 218 | 
 219 |     psihat = pd.DataFrame(psihat, columns=psihat_col_names)
 220 |     test=pd.DataFrame(test, columns=test_col_names)
 221 | 
 222 |     return {'n': nsam, 'psihat': psihat,  'test': test, 'crit': testb[ic], 'con': con}
 223 | 
 224 | def rmmcppb(x,  est, *args,  alpha=.05, con=None,
 225 |             dif=True, nboot=None, BA=False,
 226 |             hoch=False, SR=False, seed=False):
 227 | 
 228 |     """
 229 |     Use a percentile bootstrap method to compare dependent groups.
 230 |     By default, compute a .95 confidence interval for all linear contrasts
 231 |     specified by con, a J-by-C matrix, where C is the number of
 232 |     contrasts to be tested, and the columns of `con` are the
 233 |     contrast coefficients. If con is not specified,
 234 |     all pairwise comparisons are done.
 235 | 
 236 |     If `est` is the function `onestep` or `mom` (these are not implemeted yet),
 237 |     method SR can be used to control the probability of at least one Type I error.
 238 |     Otherwise, Hochberg's method is used.
 239 | 
 240 |     If `dif` is `False` and `BA` is `True`, the bias adjusted
 241 |     estimate of the generalized p-value is recommended.
 242 |     Using `BA`=`True` (when `dif`=`False`)
 243 |     is recommended when comparing groups
 244 |     with M-estimators and MOM, but it is not necessary when
 245 |     comparing 20% trimmed means (Wilcox & Keselman, 2002).
 246 | 
 247 |     Hochberg's sequentially rejective method can be used and is used
 248 |     if n>=80.
 249 | 
 250 |     Note that arguments up to and including `args` are positional arguments
 251 | 
 252 |     :param x: Pandas DataFrame
 253 |     Each column represents a group of data
 254 | 
 255 |     :param est: function
 256 |     Measure of location (currently only `trim_mean` is supported)
 257 | 
 258 |     :param args: list/value
 259 |     Parameter(s) for measure of location (e.g., .2)
 260 | 
 261 |     :param alpha: float
 262 |     Alpha level (default is .05)
 263 | 
 264 |     :param con: array
 265 |     `con` is a J (number of columns) by d (number of contrasts)
 266 |     matrix containing the contrast coefficents of interest.
 267 |     All linear constrasts can be created automatically by using the function [con1way](J)
 268 |     (the result of which can be used for `con`). The default is `None` and in this
 269 |     case all linear contrasts are created automatically.
 270 | 
 271 |     :param dif: bool
 272 |     When `True`, use difference scores, otherwise use marginal distributions
 273 | 
 274 |     :param nboot: int
 275 |     Number of bootstrap samples. Default is `None`
 276 |     in which case `nboot` will be chosen for you
 277 |     based on the number of contrasts.
 278 | 
 279 |     :param BA: bool
 280 |     When `True`, use the bias adjusted estimate of the
 281 |     generalized p-value is applied (e.g., when `dif` is `False`)
 282 | 
 283 |     :param hoch: bool
 284 |     When `True`, Hochberg's sequentially rejective method can be used and is used
 285 |     if n>=80.
 286 | 
 287 |     :param SR: bool
 288 |     When `True`, use the modified "sequentially rejective", especially when
 289 |     comparing one-step M-estimators or M-estimators.
 290 | 
 291 |     :param seed: bool
 292 |     Random seed for reprodicible results (default is `False`)
 293 | 
 294 |     :return:
 295 |     Dictionary of results
 296 | 
 297 |     con: array
 298 |     Contrast matrix
 299 | 
 300 |     num_sig: int
 301 |     Number of statistically significant results
 302 | 
 303 |     output: DataFrame
 304 |     Difference score, p-value, critical value, and CI for each contrast
 305 |     """
 306 | 
 307 |     called_directly=False
 308 |     if type(x) is pd.core.frame.DataFrame:
 309 |         called_directly=True
 310 |         x=x.dropna().values
 311 | 
 312 |     if hoch:
 313 |         SR=False
 314 | 
 315 |     if SR:
 316 |         raise Exception("onestep and mom estimators are not yet implemented"
 317 |                         "and only these can be used with SR method. Please set SR to False for now.")
 318 | 
 319 |     if dif:
 320 |         print("analysis is being done on difference scores",
 321 |               "each confidence interval has probability coverage of 1-alpha.")
 322 | 
 323 |         temp=rmmcppbd(x,est, *args, alpha=alpha,con=con,
 324 |                       nboot=nboot,hoch=True)
 325 | 
 326 |         if called_directly:
 327 | 
 328 |             col_names = ['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper']
 329 | 
 330 |             return {'output': pd.DataFrame(temp['output'], columns=col_names),
 331 |                     'con':  temp['con'], "num_sig": temp['num_sig']}
 332 | 
 333 |         else:
 334 | 
 335 |             return {'output': temp['output'],
 336 |                     'con':  temp['con'], "num_sig": temp['num_sig']}
 337 | 
 338 |     else:
 339 |         print("dif=False so using marginal distributions")
 340 | 
 341 |         if not BA:
 342 |             print("If and when MOM and/or onestep estimators are implemeted, "
 343 |                   "it is suggested to use BA=True and hoch=T")
 344 | 
 345 |         J=x.shape[1]
 346 |         xcen=np.full([x.shape[0], x.shape[1]], np.nan)
 347 |         for j in range(J):
 348 |             xcen[:, j] = x[:, j] - est(x[:, j], *args)
 349 | 
 350 |         if con is None:
 351 |             con=con1way(J)
 352 | 
 353 |         d=con.shape[1]
 354 | 
 355 |         if nboot is None:
 356 |             if d<4:
 357 |                 nboot=1000
 358 |             elif d>4:
 359 |                 nboot=5000
 360 | 
 361 |         n=x.shape[0]
 362 |         connum=con.shape[1]
 363 | 
 364 |         if seed:
 365 |             np.random.seed(seed)
 366 | 
 367 |         xbars=est(x,*args)
 368 | 
 369 |         psidat=np.zeros(connum)
 370 |         for ic in range(connum):
 371 |             psidat[ic]=np.sum(con[:,ic] * xbars)
 372 | 
 373 |         psihat=np.zeros([connum, nboot])
 374 |         psihatcen=np.zeros([connum, nboot])
 375 |         bvec=np.full([nboot,J], np.nan)
 376 |         bveccen = np.full([nboot, J], np.nan)
 377 |         data=np.random.randint(n,size=(nboot,n))
 378 |         for ib in range(nboot):
 379 |             bvec[ib,:] = est(x[data[ib,:],:], *args)
 380 |             bveccen[ib, :] = est(xcen[data[ib, :], :], *args)
 381 | 
 382 |         test=np.full(connum, np.nan)
 383 |         bias=np.full(connum, np.nan)
 384 | 
 385 |         for ic in range(connum):
 386 |             psihat[ic,:]=[bptdpsi(row, con[:, ic]) for row in bvec]
 387 |             psihatcen[ic,:] = [bptdpsi(row, con[:,ic]) for row in bveccen]
 388 |             bias[ic] = np.sum((psihatcen[ic,:] > 0)) / nboot - .5
 389 |             ptemp =(np.sum(psihat[ic,:] > 0) + .5 * np.sum(psihat[ic,:] == 0)) / nboot
 390 | 
 391 |             if BA:
 392 |                 test[ic] = ptemp - .1 * bias[ic]
 393 | 
 394 |             if not BA:
 395 |                 test[ic] = ptemp
 396 | 
 397 |             test[ic] = np.min([test[ic], 1 - test[ic]])
 398 |             test[ic] = np.max([test[ic], 0])  # bias corrected might be less than zero
 399 | 
 400 |         test=2*test
 401 |         ncon=con.shape[1]
 402 |         dvec=alpha/np.arange(1,ncon+1)
 403 | 
 404 |         if SR:
 405 | 
 406 |             if alpha == .05:
 407 | 
 408 |                 dvec =[.025,
 409 |                 .025,
 410 |                 .0169,
 411 |                 .0127,
 412 |                 .0102,
 413 |                 .00851,
 414 |                 .0073,
 415 |                 .00639,
 416 |                 .00568,
 417 |                 .00511]
 418 | 
 419 |                 dvecba = [.05,
 420 |                 .025,
 421 |                 .0169,
 422 |                 .0127,
 423 |                 .0102,
 424 |                 .00851,
 425 |                 .0073,
 426 |                 .00639,
 427 |                 .00568,
 428 |                 .00511]
 429 | 
 430 |                 if ncon > 10:
 431 |                     avec = .05 / np.arange(11,ncon+1)
 432 |                     dvec = np.append(dvec, avec)
 433 | 
 434 |             elif alpha == .01:
 435 | 
 436 |                 dvec =[.005,
 437 |                 .005,
 438 |                 .00334,
 439 |                 .00251,
 440 |                 .00201,
 441 |                 .00167,
 442 |                 .00143,
 443 |                 .00126,
 444 |                 .00112,
 445 |                 .00101]
 446 | 
 447 |                 dvecba =[.01,
 448 |                 .005,
 449 |                 .00334,
 450 |                 .00251,
 451 |                 .00201,
 452 |                 .00167,
 453 |                 .00143,
 454 |                 .00126,
 455 |                 .00112,
 456 |                 .00101]
 457 | 
 458 |                 if ncon > 10:
 459 |                     avec = .01 / np.arange(11,ncon+1)
 460 |                     dvec = np.append(dvec, avec)
 461 | 
 462 | 
 463 |             else:
 464 | 
 465 |                 dvec = alpha / np.arange(1,ncon+1)
 466 |                 dvecba = dvec
 467 |                 dvec[1] = alpha
 468 | 
 469 |         if hoch:
 470 |             dvec=alpha/np.arange(1,ncon+1)
 471 | 
 472 |         dvecba=dvec
 473 |         temp2 = (-test).argsort()
 474 |         zvec = dvec[:ncon]
 475 | 
 476 |         if BA:
 477 |             zvec = dvecba[:ncon]
 478 | 
 479 |         output=np.zeros([connum, 6])
 480 |         tmeans=est(x, *args)
 481 | 
 482 |         output[temp2, 3] = zvec
 483 |         for ic in range(ncon):
 484 |             output[ic, 1] = np.sum(con[:, ic] * tmeans)
 485 |             output[ic, 0] = ic
 486 |             output[ic, 2] = test[ic]
 487 |             temp = np.sort(psihat[ic, :])
 488 |             icl = round(alpha * nboot / 2) #+ 1
 489 |             icu = nboot - icl - 1 #nboot - (icl - 1)
 490 |             output[ic, 4] = temp[icl]
 491 |             output[ic, 5] = temp[icu]
 492 | 
 493 |     num_sig = output.shape[0]
 494 |     ior = (-output[:, 2]).argsort()
 495 |     for j in range(output.shape[0]):
 496 |         if output[ior[j], 2] <= output[ior[j], 3]:
 497 |             break
 498 |         else:
 499 |             num_sig = num_sig - 1
 500 | 
 501 |     if called_directly:
 502 |         col_names=['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper']
 503 |         results={"output": pd.DataFrame(output, columns=col_names), "con": con, "num_sig": num_sig}
 504 |         print(results)
 505 | 
 506 |     else:
 507 |         results={"output": output, "con": con, "num_sig": num_sig}
 508 | 
 509 | 
 510 |     return results
 511 | 
 512 | def rmmcppbd(x,  est, *args, alpha=.05, con=None,
 513 |              nboot=None, hoch=True, seed=False):
 514 | 
 515 |     """
 516 |     Use a percentile bootstrap method to compare dependent groups
 517 |     based on difference scores.
 518 |     By default,
 519 |     compute a .95 confidence interval for all linear contrasts
 520 |     specified by con, a J by C matrix, where  C is the number of
 521 |     contrasts to be tested, and the columns of con are the
 522 |     contrast coefficients.
 523 |     If con is not specified, all pairwise comparisons are done.
 524 | 
 525 |     nboot is the bootstrap sample size. If not specified, a value will
 526 |     be chosen depending on the number of contrasts there are.
 527 | 
 528 |     A sequentially rejective method is used to control alpha.
 529 |     If n>=80, hochberg's method is used.
 530 | 
 531 |     Note that arguments up to and including `args` are positional arguments
 532 | 
 533 |     :param x:
 534 |     :param y:
 535 |     :param alpha:
 536 |     :param con:
 537 |     :param est:
 538 |     :param nboot:
 539 |     :param hoch:
 540 |     :param seed:
 541 |     :return:
 542 |     """
 543 | 
 544 |     x = x[~np.isnan(x).any(axis=1)]
 545 |     J=x.shape[1]
 546 |     n=x.shape[0]
 547 |     if n>=80:
 548 |         hoch=True
 549 | 
 550 |     #Jm=J-1
 551 |     if con is None:
 552 |         con=con1way(J)
 553 | 
 554 |     d = con.shape[1]
 555 |     if not nboot:
 556 | 
 557 |       if d <= 10:
 558 |           nboot = 3000
 559 | 
 560 |       elif d <= 6:
 561 |           nboot = 2000
 562 | 
 563 |       elif d <= 4:
 564 |           nboot = 1000
 565 | 
 566 |       else:
 567 |           nboot=5000
 568 | 
 569 |     connum=d
 570 |     xx=x@con
 571 | 
 572 |     if seed:
 573 |         np.random.seed(seed)
 574 | 
 575 |     psihat=np.zeros([connum, nboot])
 576 |     data=np.random.randint(n, size=(nboot,n))
 577 | 
 578 |     # wilcox's implementation in R is a bit more complicated,
 579 |     # I have simplified. Hopefully correctly.
 580 |     for ib in range(nboot):
 581 |         psihat[:,ib]=est(xx[data[ib,:], :], *args)
 582 | 
 583 |     test = np.full(connum, np.nan)
 584 |     icl = round(alpha * nboot // 2) #+ 1
 585 |     icu = nboot - icl -  2 #- 1
 586 |     cimat=np.full([connum, 2], np.nan)
 587 | 
 588 |     for ic in range(connum):
 589 | 
 590 |       test[ic] =(sum(psihat[ic, :] > 0) + .5 * sum(psihat[ic, :] == 0)) / nboot
 591 |       test[ic] = min(test[ic], 1 - test[ic])
 592 |       temp = np.sort(psihat[ic, :])
 593 |       cimat[ic, 0] = temp[icl]
 594 |       cimat[ic, 1] = temp[icu]
 595 | 
 596 |     test = 2 * test
 597 |     ncon = con.shape[1]
 598 | 
 599 |     if alpha == .05:
 600 |       dvec =[.025,
 601 |           .025,
 602 |           .0169,
 603 |           .0127,
 604 |           .0102,
 605 |           .00851,
 606 |           .0073,
 607 |           .00639,
 608 |           .00568,
 609 |           .00511]
 610 | 
 611 |       if ncon > 10:
 612 |         avec = .05 / np.arange(11, ncon+1)
 613 |         dvec = np.append(dvec, avec)
 614 | 
 615 |     elif alpha == .01:
 616 |       dvec =[.005,
 617 |           .005,
 618 |           .00334,
 619 |           .00251,
 620 |           .00201,
 621 |           .00167,
 622 |           .00143,
 623 |           .00126,
 624 |           .00112,
 625 |           .00101]
 626 | 
 627 |       if ncon > 10:
 628 |         avec = .01 / np.arange(11,ncon+1)
 629 |         dvec = np.append(dvec, avec)
 630 | 
 631 |     else:
 632 |       dvec = alpha / np.arange(1,ncon+1)
 633 |       dvec[1] = alpha / 2
 634 | 
 635 |     if hoch:
 636 |       dvec = alpha / (2 * np.arange(1,ncon+1))
 637 | 
 638 |     dvec = 2 * dvec
 639 |     temp2 = (-test).argsort()
 640 |     ncon = con.shape[1]
 641 |     zvec = dvec[:ncon]
 642 |     output=np.zeros([connum, 6])
 643 | 
 644 |     tmeans=est(xx,*args)
 645 |     output[temp2, 3] = zvec
 646 | 
 647 |     for ic in range(ncon):
 648 |       output[ic, 1] = tmeans[ic]
 649 |       output[ic, 0] = ic
 650 |       output[ic, 2] = test[ic]
 651 |       output[ic, 4:6] = cimat[ic,:]
 652 | 
 653 |     num_sig = np.sum(output[:, 2] <= output[:, 3])
 654 | 
 655 |     return {"output": output, "con": con, "num_sig": num_sig}
 656 | 
 657 | def lindepbt(x, tr=.2, con=None, alpha=.05, nboot=599, dif=True, seed=False):
 658 | 
 659 |     """
 660 |     Multiple comparisons on trimmed means with FWE controlled with Rom's method
 661 |     Using a bootstrap-t method.
 662 | 
 663 |     :param x: Pandas DataFrame
 664 |     Each column in the data represents a different group
 665 | 
 666 |     :param tr: float
 667 |     Proportion to trim (default is .2)
 668 | 
 669 |     :param con: array
 670 |     `con` is a J (number of groups) by d (number of contrasts)
 671 |     matrix containing the contrast coefficents of interest.
 672 |     All linear constrasts can be created automatically by using the function [con1way](J)
 673 |     (the result of which can be used for `con`). The default is `None` and in this
 674 |     case all linear contrasts are created automatically.
 675 | 
 676 |     :param alpha: float
 677 |     Alpha level. Default is .05.
 678 | 
 679 |     :param nboot: int
 680 |     Number of bootstrap samples (default is 2000)
 681 | 
 682 |     :param dif: bool
 683 |     When `True`, use difference scores, otherwise use marginal distributions
 684 | 
 685 |     :param seed: bool
 686 |     Random seed for reprodicible results (default is `False`)
 687 | 
 688 |     :return:
 689 |     Dictionary of results
 690 | 
 691 |     con: array
 692 |     Contrast matrix
 693 | 
 694 |     num_sig: int
 695 |     Number of observations for each group
 696 | 
 697 |     psihat: DataFrame
 698 |     Difference score and CI for each contrast
 699 | 
 700 |     test: DataFrame
 701 |     Test statistic, p-value, critical value, and standard error
 702 |     for each contrast
 703 |     """
 704 | 
 705 |     called_directly=False
 706 |     if type(x) is pd.DataFrame:
 707 |         x = pandas_to_arrays(x)
 708 |         x = remove_nans_based_on_design(x, design_values=len(x), design_type='dependent_groups')
 709 |         x = np.r_[x].T
 710 |         called_directly=True
 711 | 
 712 |     from hypothesize.measuring_associations import wincor
 713 | 
 714 |     if seed:
 715 |         np.random.seed(seed)
 716 | 
 717 |     if con is None:
 718 |         con=con2way(1,x.shape[1])[1] # all pairwise
 719 |         ncon = con.shape[1]
 720 | 
 721 |     else:
 722 |         ncon = con.shape[1]
 723 | 
 724 |     x = x[~np.isnan(x).any(axis=1)]
 725 |     n=x.shape[0]
 726 |     J=x.shape[1]
 727 |     nval=x.shape[0]
 728 |     h1 = nval - 2 * np.floor(tr * nval)
 729 |     #df=h1-1
 730 |     xbar=trim_mean(x, tr)
 731 | 
 732 |     if alpha == .05:
 733 | 
 734 |         dvec = [.05,
 735 |                   .025,
 736 |                   .0169,
 737 |                   .0127,
 738 |                   .0102,
 739 |                   .00851,
 740 |                   .0073,
 741 |                   .00639,
 742 |                   .00568,
 743 |                   .00511]
 744 | 
 745 |         if ncon > 10:
 746 |             avec = .05 / np.arange(11, ncon + 1)
 747 |             dvec = np.append(dvec, avec)
 748 | 
 749 |     elif alpha == .01:
 750 | 
 751 |         dvec = [.01,
 752 |                 .005,
 753 |                 .00334,
 754 |                 .00251,
 755 |                 .00201,
 756 |                 .00167,
 757 |                 .00143,
 758 |                 .00126,
 759 |                 .00112,
 760 |                 .00101]
 761 | 
 762 |         if ncon > 10:
 763 |             avec = .01 / np.arange(11, ncon + 1)
 764 |             dvec = np.append(dvec, avec)
 765 | 
 766 | 
 767 |     else:
 768 |         dvec = alpha / np.arange(1, ncon + 1)
 769 | 
 770 | 
 771 |     psihat=np.zeros([ncon,4])
 772 |     test = np.zeros([ncon, 5])
 773 |     temp1=np.array([])
 774 | 
 775 |     for d in range(ncon):
 776 |         psihat[d, 0] = d
 777 | 
 778 |         if not dif:
 779 |             psihat[d, 1] = np.sum(con[:, d] * xbar)
 780 |             sejk = 0
 781 | 
 782 |             for j in range(J):
 783 |                 for k in range(J):
 784 |                     djk = (nval - 1) * wincor(x[:, j], x[:, k], tr)['wcov'] / (h1 * (h1 - 1))
 785 |                     sejk = sejk + con[j, d] * con[k, d] * djk
 786 | 
 787 |             sejk = np.sqrt(sejk)
 788 |             test[d, 0] = d
 789 |             test[d, 1] = np.sum(con[:, d] * xbar) / sejk
 790 |             test[d, 4] = sejk
 791 | 
 792 |             data=np.random.randint(n, size=(nboot, n))
 793 |             xcen = np.full([x.shape[0], x.shape[1]], np.nan)
 794 |             for j in range(J):
 795 |                 xcen[:, j] = x[:, j] - trim_mean(x[:, j], tr)
 796 | 
 797 |             bvec=[lindep_sub(data_row, xcen, con[:,d], tr=tr)
 798 |                   for data_row in data]
 799 | 
 800 |             bsort = np.sort(np.abs(bvec))
 801 |             ic = round((1 - alpha) * nboot) - 1 # correct for python with the "- 1"?
 802 |             psihat[d, 2] = psihat[d, 1] - bsort[ic] * test[d, 4]
 803 |             psihat[d, 3] = psihat[d, 1] + bsort[ic] * test[d, 4]
 804 |             p_value = np.mean(np.abs(test[d, 1]) <= np.abs(bvec))
 805 |             temp1 = np.append(temp1, p_value)
 806 | 
 807 |         elif dif:
 808 | 
 809 |             for j in range(J):
 810 |                 if j==0:
 811 |                     dval=con[j,d] * x[:,j]
 812 | 
 813 |                 elif j>0:
 814 |                     dval=dval+con[j,d] * x[:,j]
 815 | 
 816 |             temp = trimcibt(dval,tr=tr,alpha=alpha,nboot=nboot,seed=seed)
 817 |             temp1 = np.append(temp1, temp['p_value'])
 818 |             test[d, 0] = d
 819 |             test[d, 1]=temp['test_stat'] ## missing in R?
 820 |             test[d, 4] = trimse(dval, tr=tr)
 821 |             psihat[d, 1] = trim_mean(dval, tr)
 822 |             psihat[d, 2] = temp['ci'][0]
 823 |             psihat[d, 3] = temp['ci'][1]
 824 | 
 825 |     test[:, 2] = temp1
 826 |     temp2 =  (-temp1).argsort()
 827 |     zvec = dvec[:ncon]
 828 |     test[temp2, 3] = zvec
 829 | 
 830 |     # if flagcon
 831 |     num_sig = np.sum(test[:, 2] <= test[:, 3])
 832 | 
 833 |     if called_directly:
 834 | 
 835 |         test=pd.DataFrame(test, columns=["con_num", "test", "p_value", "p_crit", "se"])
 836 |         psihat=pd.DataFrame(psihat, columns=["con_num", "psihat", "ci_lower", "ci_upper"])
 837 | 
 838 | 
 839 |     return {'test': test, 'psihat': psihat, 'con': con, 'num_sig': num_sig}
 840 | 
 841 | def lindep_sub(data, x, con = None, tr = .2):
 842 | 
 843 |     con = con.reshape(len(con), 1) # make 2D col vector
 844 |     res = rmmcp(x[data,:], con=con, tr=tr, dif=False)['test'][:, 1]
 845 | 
 846 |     return res[0]
 847 | 
 848 | def pb2gen(x, y, est, *args, alpha=.05, nboot=2000, seed=False):
 849 | 
 850 |     """
 851 |     Compute a bootstrap confidence interval for the
 852 |     the difference between any two parameters corresponding to two
 853 |     independent groups.
 854 | 
 855 |     Note that arguments up to and including `args` are positional arguments
 856 | 
 857 |     :param x: Pandas Series
 858 |     Data for group one
 859 | 
 860 |     :param y: Pandas Series
 861 |     Data for group two
 862 | 
 863 |     :param est: function
 864 |     Measure of location (currently only `trim_mean` is supported)
 865 | 
 866 |     :param args: list/value
 867 |     Parameter(s) for measure of location (e.g., .2)
 868 | 
 869 |     :param alpha: float
 870 |     Alpha level (default is .05)
 871 | 
 872 |     :param nboot: int
 873 |     Number of bootstrap samples (default is 2000)
 874 | 
 875 |     :param seed: bool
 876 |     Random seed for reprodicible results (default is `False`)
 877 | 
 878 |     :return:
 879 |     Dictionary of results
 880 | 
 881 |     ci: list
 882 | 
 883 |     Confidence interval
 884 | 
 885 |     est_1: float
 886 |     Estimated value (based on `est`) for group one
 887 | 
 888 |     est_2: float
 889 |     Estimated value (based on `est`) for group two
 890 | 
 891 |     est_dif: float
 892 |     Estimated difference between group one and two
 893 | 
 894 |     n1: int
 895 |     Number of observations in group one
 896 | 
 897 |     n2: int
 898 |     Number of observations in group two
 899 | 
 900 |     p_value: float
 901 | 
 902 |     p-value
 903 | 
 904 |     variance: float
 905 |     Variance of group one and two
 906 |     """
 907 | 
 908 |     x, y = pandas_to_arrays([x, y])
 909 | 
 910 |     x=x[~np.isnan(x)]
 911 |     y=y[~np.isnan(y)]
 912 | 
 913 |     if seed:
 914 |         np.random.seed(seed)
 915 | 
 916 | 
 917 |     datax = np.random.choice(x, size=(nboot, len(x)))
 918 |     datay = np.random.choice(y, size=(nboot, len(y)))
 919 | 
 920 |     bvecx=est(datax, *args, axis=1)
 921 |     bvecy = est(datay, *args, axis=1)
 922 | 
 923 |     bvec = np.sort(bvecx - bvecy)
 924 |     low = round((alpha / 2) * nboot) #+ 1
 925 |     up = nboot - low - 2
 926 |     temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot)
 927 |     sig_level = 2 * (min(temp, 1 - temp))
 928 |     se = np.var(bvec)
 929 | 
 930 |     results={'est_1': est(x,*args),
 931 |              'est_2': est(y,*args),
 932 |              'est_dif': est(x, *args) - est(y, *args),
 933 |              'ci': [bvec[low], bvec[up]],
 934 |              'p_value': sig_level,
 935 |              'variance': se,
 936 |              'n1': len(x),
 937 |              'n2': len(y)}
 938 | 
 939 |     return results
 940 | 
 941 | def bootdpci(x, est, *args, nboot=None, alpha=.05,
 942 |              dif=True, BA=False, SR=False):
 943 | 
 944 |     """
 945 |     Use percentile bootstrap method, compute a .95 confidence interval
 946 |     for the difference between a measure of location or scale
 947 |     when comparing two dependent groups.
 948 | 
 949 |     The argument `dif` defaults to `True` indicating
 950 |     that difference scores will be used, in which case Hochberg’s
 951 |     method is used to control FWE. If `dif` is `False`, measures of
 952 |     location associated with the marginal distributions are used
 953 |     instead.
 954 | 
 955 |     If `dif` is `False` and `BA` is `True`, the bias adjusted
 956 |     estimate of the generalized p-value is recommended.
 957 |     Using `BA`=`True` (when `dif`=`False`)
 958 |     is recommended when comparing groups
 959 |     with M-estimators and MOM, but it is not necessary when
 960 |     comparing 20% trimmed means (Wilcox & Keselman, 2002).
 961 | 
 962 |     The so-called the SR method, which is a slight
 963 |     modification of Hochberg's (1988) "sequentially rejective"
 964 |     method can be applied to control FWE, especially when
 965 |     comparing one-step M-estimators or M-estimators.
 966 | 
 967 |     Note that arguments up to and including `args` are positional arguments
 968 | 
 969 |     :param x: Pandas DataFrame
 970 |     Each column represents a group of data
 971 | 
 972 |     :param est: function
 973 |     Measure of location (currently only `trim_mean` is supported)
 974 | 
 975 |     :param args: list/value
 976 |     Parameter(s) for measure of location (e.g., .2)
 977 | 
 978 |     :param alpha: float
 979 |     Alpha level. Default is .05.
 980 | 
 981 |     :param nboot: int
 982 |     Number of bootstrap samples. Default is `None`
 983 |     in which case `nboot` will be chosen for you
 984 |     based on the number of contrasts.
 985 | 
 986 |     :param dif: bool
 987 |     When `True`, use difference scores, otherwise use marginal distributions
 988 | 
 989 |     :param BA: bool
 990 |     When `True`, use the bias adjusted estimate of the
 991 |     generalized p-value is applied (e.g., when `dif` is `False`)
 992 | 
 993 |     :param SR: bool
 994 |     When `True`, use the modified "sequentially rejective", especially when
 995 |     comparing one-step M-estimators or M-estimators
 996 | 
 997 |     :return:
 998 |     Dictionary of results
 999 | 
1000 |     con: array
1001 |     Contrast matrix
1002 | 
1003 |     num_sig: int
1004 |     Number of statistically significant results
1005 | 
1006 |     output: DataFrame
1007 |     Difference score, p-value, critical value, and CI for each contrast
1008 | 
1009 |     """
1010 | 
1011 |     # replace with actual estimators when implemented
1012 |     if SR and est not in ('onestep', 'mom'):
1013 |         SR=False
1014 |         print("setting SR to False. SR=True should apparently "
1015 |               "only be used with onestep or mom")
1016 | 
1017 |     ## in R
1018 |     # okay=False
1019 |     # if est in (onestep, mom):
1020 |     #     okay=True
1021 |     #
1022 |     # if not okay:
1023 |     #     SR=False
1024 | 
1025 |     results=rmmcppb(x, est, *args, nboot=nboot,alpha=alpha,
1026 |                    SR=SR, dif=dif, BA=BA)
1027 | 
1028 |     col_names = ['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper']
1029 |     results.update({'output': pd.DataFrame(results['output'], columns=col_names)})
1030 | 
1031 |     return results
1032 | 
1033 | def ydbt(x, y, tr=.2, alpha=.05, nboot=599, side=True, seed=False):
1034 | 
1035 |     """
1036 |     Using the bootstrap-t method,
1037 |     compute a .95 confidence interval for the difference between
1038 |     the marginal trimmed means of paired data.
1039 |     By default, 20% trimming is used with 599 bootstrap samples.
1040 | 
1041 | 
1042 |     :param x: Pandas Series
1043 |     Data for group one
1044 | 
1045 |     :param y: Pandas Series
1046 |     Data for group two
1047 | 
1048 |     :param tr: float
1049 |     Proportion to trim (default is .2)
1050 | 
1051 |     :param alpha: float
1052 |     Alpha level. Default is .05.
1053 | 
1054 |     :param nboot: int
1055 |     Number of bootstrap samples (default is 2000)
1056 | 
1057 |     :param side: boolWhen `True` the function returns a symmetric CI and a p value,
1058 |     otherwise the function returns equal-tailed CI (no p value)
1059 | 
1060 |     :param seed: bool
1061 |     Random seed for reprodicible results (default is `False`)
1062 | 
1063 |     :return:
1064 |     Dictionary of results
1065 | 
1066 |     ci: list
1067 |     Confidence interval
1068 | 
1069 |     dif: float
1070 |     Difference between group one and two
1071 | 
1072 |     p_value: float
1073 |     p-value
1074 |     """
1075 | 
1076 |     x = pandas_to_arrays([x, y])
1077 |     x=remove_nans_based_on_design(x, 2, 'dependent_groups')
1078 |     x,y=[x[0], x[1]]
1079 | 
1080 |     if seed:
1081 |         np.random.seed(seed)
1082 | 
1083 |     data = np.random.randint(len(x), size=(nboot, len(x)))
1084 | 
1085 |     xcen = x - trim_mean(x, tr)
1086 |     ycen = y - trim_mean(y, tr)
1087 | 
1088 |     bvec=[tsub(row, xcen, ycen, tr) for row in data]
1089 | 
1090 |     dotest = yuend(x, y, tr=tr)
1091 | 
1092 |     estse = dotest['se']
1093 |     p_value = np.nan
1094 |     dif = trim_mean(x, tr) - trim_mean(y, tr)
1095 |     ci=[]
1096 | 
1097 |     if not side:
1098 |         print('p_value is only returned when side=True')
1099 |         ilow = round((alpha / 2) * nboot) -1
1100 |         ihi = nboot - ilow - 2
1101 |         bsort = np.sort(bvec)
1102 |         ci.append(dif - bsort[ihi] * estse)
1103 |         ci.append(dif - bsort[ilow + 1] * estse)
1104 | 
1105 |     else:
1106 |         bsort = np.sort(np.abs(bvec))
1107 |         ic = round((1 - alpha) * nboot)-1
1108 |         ci.append(dif - bsort[ic] * estse)
1109 |         ci.append(dif + bsort[ic] * estse)
1110 |         p_value = (np.sum(np.abs(dotest['teststat']) <= np.abs(bvec))) / nboot
1111 | 
1112 | 
1113 |     return {'ci': ci, 'dif': dif, 'p_value': p_value}
1114 | 
1115 | def tsub(isub, x, y, tr):
1116 | 
1117 |     """
1118 |     Compute test statistic for trimmed means
1119 |     when comparing dependent groups.
1120 |     By default, 20% trimmed means are used.
1121 |     isub is an array of length n of random integers
1122 |     to control bootstrap sampling.
1123 | 
1124 |     This function is used by ydbt
1125 | 
1126 |     :param isub:
1127 |     :param x:
1128 |     :param y:
1129 |     :param tr:
1130 |     :return:
1131 |     """
1132 | 
1133 |     tsub_res = yuend(x[isub], y[isub], tr = tr)['teststat']
1134 | 
1135 |     return tsub_res
1136 | 
1137 | def tmcppb(x, est, *args, con=None, bhop=False, alpha=.05, nboot=None, seed=False):
1138 | 
1139 |     """
1140 |     Multiple comparisons for J independent groups using trimmed means and
1141 |     the percentile bootstrap method. Rom’s method is used to control the
1142 |     probability of one or more type I errors. For C > 10 hypotheses,
1143 |     or when the goal is to test at some level other than .05 and .01,
1144 |     Hochberg’s method is used. Setting the argument `bhop` to `True` uses the
1145 |     Benjamini–Hochberg method instead.
1146 | 
1147 |     Note that arguments up to and including `args` are positional arguments
1148 | 
1149 |     :param x: Pandas DataFrame
1150 |     Each column represents a group of data
1151 | 
1152 |     :param est: function
1153 |     Measure of location (currently only `trim_mean` is supported)
1154 | 
1155 |     :param args: list/value
1156 |     Parameter(s) for measure of location (e.g., .2)
1157 | 
1158 |     :param con: array
1159 |     `con` is a J (number of columns) by d (number of contrasts)
1160 |     matrix containing the contrast coefficents of interest.
1161 |     All linear constrasts can be created automatically by using the function [con1way](J)
1162 |     (the result of which can be used for `con`). The default is `None` and in this
1163 |     case all linear contrasts are created automatically.
1164 | 
1165 |     :param bhop: bool
1166 |     If `True`, the Benjamini–Hochberg method is used to control FWE
1167 | 
1168 |     :param alpha: float
1169 |     Alpha level. Default is .05.
1170 | 
1171 |     :param nboot: int
1172 |     Number of bootstrap samples (default is 2000)
1173 | 
1174 |     :param seed: bool
1175 |     Random seed for reproducible results. Default is `False`.
1176 | 
1177 |     :return:
1178 |     Dictionary of results
1179 | 
1180 |     con: array
1181 |     Contrast matrix
1182 | 
1183 |     num_sig: int
1184 |     Number of statistically significant results
1185 | 
1186 |     output: DataFrame
1187 |     Difference score, p-value, critical value, and CI for each contrast
1188 |     """
1189 | 
1190 |     x=pandas_to_arrays(x)
1191 |     x=remove_nans_based_on_design(x, len(x), 'independent_groups')
1192 |     J=len(x)
1193 | 
1194 |     mvec = [est(i, *args) for i in x]
1195 | 
1196 |     if con is None:
1197 |         con=con1way(J)
1198 | 
1199 |     ncon=con.shape[1]
1200 | 
1201 |     if not nboot:
1202 |       nboot = 5000
1203 |       if J <= 8:
1204 |         nboot = 4000
1205 |       elif J <= 3:
1206 |         nboot = 2000
1207 | 
1208 |     if not bhop:
1209 | 
1210 |         if alpha == .05:
1211 |             dvec=[.05,
1212 |             .025,
1213 |             .0169,
1214 |             .0127,
1215 |             .0102,
1216 |             .00851,
1217 |             .0073,
1218 |             .00639,
1219 |             .00568,
1220 |             .00511]
1221 | 
1222 |             if ncon > 10:
1223 |                 avec = .05 / np.arange(11,ncon+1)
1224 |                 dvec = [dvec, avec]
1225 | 
1226 |         elif alpha == .01:
1227 |             dvec =[.01,
1228 |             .005,
1229 |             .00334,
1230 |             .00251,
1231 |             .00201,
1232 |             .00167,
1233 |             .00143,
1234 |             .00126,
1235 |             .00112,
1236 |             .00101]
1237 | 
1238 |             if ncon > 10:
1239 |                 avec = .01 / np.arange(11,ncon+1)
1240 |                 dvec = [dvec, avec]
1241 | 
1242 |         else: #not (alpha != .05 or alpha != .01):
1243 |             dvec = alpha / np.arange(1,ncon+1)
1244 | 
1245 |     else:
1246 |         dvec = (ncon - np.arange(1,ncon+1) + 1) * alpha / ncon
1247 | 
1248 |     if seed:
1249 |         np.random.seed(seed)
1250 | 
1251 |     bvec=np.full([J,nboot], np.nan)
1252 |     for i, j in enumerate(x):
1253 |         data = np.random.choice(j, size=(nboot, len(j)))
1254 |         bvec[i,:]=[est(row, *args) for row in data]
1255 | 
1256 |     bcon=con.T @ bvec
1257 |     tvec=con.T @ mvec
1258 |     test=np.full(ncon, np.nan)
1259 |     for d in range(ncon):
1260 |         tv = np.sum(bcon[d,:] == 0) / nboot
1261 |         test[d] = np.sum(bcon[d, :] > 0) / nboot + .5 * tv
1262 |         if test[d] > .5:
1263 |             test[d] = 1 - test[d]
1264 | 
1265 |     output=np.full([ncon,6], np.nan)
1266 |     test=2*test
1267 |     temp2=(-test).argsort()
1268 |     zvec = dvec[:ncon]
1269 |     output[temp2, 3] = zvec
1270 |     icl = int(np.round(dvec[-1] * nboot / 2) + 1) - 1
1271 |     icu = nboot - icl - 3
1272 | 
1273 |     for ic in range(ncon):
1274 |         output[ic, 1] = tvec[ic]
1275 |         output[ic, 0] = ic
1276 |         output[ic, 2] = test[ic]
1277 |         temp = np.sort(bcon[ic, :])
1278 |         output[ic, 4] = temp[icl]
1279 |         output[ic, 5] = temp[icu]
1280 | 
1281 | 
1282 |     num_sig = np.sum(output[:, 2] <= output[:, 3])
1283 |     cols=["con_num","psihat", "p_value", "p_crit", "ci_lower", "ci_upper"]
1284 |     output=pd.DataFrame(output, columns=cols)
1285 | 
1286 |     results={'output': output, 'con': con, 'num_sig': num_sig}
1287 | 
1288 |     return results
1289 | 
1290 | def l2drmci(x,y, est, *args, pairwise_drop_na=True, alpha=.05, nboot=2000, seed=False):
1291 | 
1292 |     """
1293 |     Compute a bootstrap confidence interval for a
1294 |     measure of location associated with the distribution of x-y.
1295 |     That is, compare x and y by looking at all possible difference scores
1296 |     in random samples of `x` and `y`. `x` and `y` are possibly dependent.
1297 | 
1298 |     Note that arguments up to and including `args` are positional arguments
1299 | 
1300 |     :param x: Pandas Series
1301 |     Data for group one
1302 | 
1303 |     :param y: Pandas Series
1304 |     Data for group two
1305 | 
1306 |     :param est: function
1307 |     Measure of location (currently only `trim_mean` is supported)
1308 | 
1309 |     :param args: list/value
1310 |     Parameter(s) for measure of location (e.g., .2)
1311 | 
1312 |     :param pairwise_drop_na: bool
1313 |     If True, treat data as dependent and remove any row with missing data. If False,
1314 |     remove missing data for each group seperately (cannot deal with unequal sample sizes)
1315 | 
1316 |     :param alpha: float
1317 |     Alpha level (default is .05)
1318 | 
1319 |     :param nboot: int
1320 |     Number of bootstrap samples (default is 2000)
1321 | 
1322 |     :param seed: bool
1323 |     Random seed for reprodicible results (default is `False`)
1324 | 
1325 |     :return:
1326 |     Dictionary of results
1327 | 
1328 |     ci: list
1329 | 
1330 |     Confidence interval
1331 | 
1332 |     p_value: float
1333 | 
1334 |     p-value
1335 |     """
1336 | 
1337 |     x, y = pandas_to_arrays([x, y])
1338 | 
1339 |     if pairwise_drop_na:
1340 |         m1 = np.c_[x, y]  # cbind
1341 |         x = m1[~np.isnan(m1).any(axis=1)]
1342 | 
1343 |     else:
1344 |         x = x[~np.isnan(x)]
1345 |         y = y[~np.isnan(y)]
1346 | 
1347 |         if len(x) != len(y):
1348 |              raise Exception("With unequal sample sizes, you might consider wmwpb "
1349 |                     "(currently not implemented)")
1350 | 
1351 |         else:
1352 |             x = np.c_[x, y]  # cbind
1353 | 
1354 |     if seed:
1355 |         np.random.seed(seed)
1356 | 
1357 |     data = np.random.choice(x.shape[0], size=(nboot, len(x)))
1358 | 
1359 |     bvec=np.full(nboot, np.nan)
1360 |     for i in range(nboot):
1361 |         bvec[i] = \
1362 |              loc2dif(x[data[i,:], 0], x[data[i,:], 1], est, *args,
1363 |                      drop_na=pairwise_drop_na)
1364 | 
1365 |     bvec=np.sort(bvec)
1366 |     low = int(np.round((alpha / 2) * nboot) + 1) -1
1367 |     up = nboot - low -2
1368 |     temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot)
1369 |     sig_level = 2 * (np.min([temp, 1 - temp]))
1370 |     ci=[bvec[low], bvec[up]]
1371 | 
1372 |     results=dict(zip(['ci', 'p_value'], [ci, sig_level]))
1373 | 
1374 |     return results
1375 | 
1376 | def loc2dif(x,y, est, *args, drop_na=True):
1377 | 
1378 |     """
1379 |     Compute a measure of location associated with the
1380 |     distribution of x-y, the typical difference between two randomly sampled values.
1381 |     The measure of location is indicated by the argument
1382 |     est.
1383 | 
1384 |     x and y are paired data or independent variables having the same length.
1385 |     If x and y have different lengths, use the function wmwloc (not currently implemented)
1386 | 
1387 |     Advantage of this estimator: relatively high efficiency even under normality versus
1388 |     using sample means.
1389 | 
1390 |     :param x:
1391 |     :param y:
1392 |     :param est:
1393 |     :param args:
1394 |     :param drop_na:
1395 |     :return:
1396 |     """
1397 | 
1398 |     if drop_na:
1399 |         m1 = np.c_[x, y]  # cbind
1400 |         m1 = m1[~np.isnan(m1).any(axis=1)]
1401 |         x, y = [m1[:,0], m1[:,1]]
1402 | 
1403 |     else:
1404 |         x=x[~np.isnan(x)]
1405 |         y=y[~np.isnan(y)]
1406 | 
1407 |     temp=np.subtract.outer(x,y).reshape(len(x)*len(y))
1408 |     val=est(temp, *args)
1409 | 
1410 |     return val
1411 | 
1412 | 
1413 | 
1414 | 
1415 | 
1416 | 
1417 | 
1418 | 
1419 | 
1420 | 
1421 | 
1422 | 


--------------------------------------------------------------------------------
/hypothesize/compare_groups_with_two_factors/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from ._compare_groups_with_two_factors import *
3 | 


--------------------------------------------------------------------------------
/hypothesize/measuring_associations/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from ._measuring_associations import *


--------------------------------------------------------------------------------
/hypothesize/measuring_associations/_measuring_associations.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["wincor", "pbcor", "corb", "pball", "winall"]
  2 | 
  3 | import numpy as np
  4 | from scipy.stats.mstats import winsorize
  5 | from scipy.stats import t, chi2, trim_mean
  6 | from hypothesize.utilities import pandas_to_arrays
  7 | 
  8 | def wincor(x, y, tr=.2):
  9 | 
 10 |     """
 11 |     Compute the winsorized correlation between `x` and `y`.
 12 |     This function also returns the winsorized covariance.
 13 | 
 14 | 
 15 |     :param x: Pandas Series
 16 |     Data for group one
 17 | 
 18 |     :param y: Pandas Series
 19 |     Data for group two
 20 | 
 21 |     :param tr: float
 22 |     Proportion to winsorize (default is .2)
 23 | 
 24 |     :return:
 25 |     Dictionary of results
 26 | 
 27 |     cor: float
 28 |     Winsorized correlation
 29 | 
 30 |     nval: int
 31 |     Number of observations
 32 | 
 33 |     sig: float
 34 |     p-value
 35 | 
 36 |     wcov: float
 37 |     Winsorized covariance
 38 |     """
 39 | 
 40 |     if type(x) is not np.ndarray:
 41 |         x, y=pandas_to_arrays([x, y])
 42 | 
 43 |     m1 = np.c_[x, y] # cbind
 44 |     m1 = m1[~np.isnan(m1).any(axis=1)]
 45 |     nval = m1.shape[0]
 46 |     x = m1[:, 0]
 47 |     y = m1[:, 1]
 48 |     g = np.floor(tr * len(x))
 49 |     xvec = winsorize(x, limits=(tr,tr))
 50 |     yvec = winsorize(y, limits=(tr,tr))
 51 |     wcor = np.corrcoef(xvec, yvec)[0,1]
 52 |     wcov = np.cov(xvec, yvec)[0,1]
 53 |     test = wcor * np.sqrt((len(x) - 2) / (1. - wcor ** 2))
 54 |     sig = 2 * (1 - t.cdf(abs(test), len(x) - 2 * g - 2))
 55 | 
 56 |     res={'cor': wcor, 'wcov': wcov, 'sig': sig, 'nval': nval}
 57 | 
 58 |     return res
 59 | 
 60 | def pbcor(x, y, beta=.2):
 61 | 
 62 |     """
 63 |     Compute the percentage bend
 64 |     correlation between `x` and `y`
 65 | 
 66 | 
 67 |     :param x: Pandas Series
 68 |     Data for group one
 69 | 
 70 |     :param y: Pandas Series
 71 |     Data for group two
 72 | 
 73 |     :param beta: float
 74 |     `0 < beta < .5`. Beta is analogous to trimming in
 75 |     other functions and related to the measure of
 76 |     dispersion used in the percentage bend
 77 |     calculation.
 78 | 
 79 |     :return:
 80 |     Dictionary of results
 81 | 
 82 |     cor: float
 83 |     Correlation
 84 | 
 85 |     nval: int
 86 |     Number of observations
 87 | 
 88 |     p_value
 89 |     p-value
 90 | 
 91 |     test: float
 92 |     Test statistic
 93 | 
 94 |     """
 95 | 
 96 |     if type(x) is not np.ndarray:
 97 |         x, y = pandas_to_arrays([x, y])
 98 | 
 99 |     if len(x) != len(y):
100 |         raise Exception("The arrays do not have equal lengths")
101 | 
102 |     m1 = np.c_[x, y] # cbind
103 |     m1 = m1[~np.isnan(m1).any(axis=1)]
104 |     nval = m1.shape[0]
105 |     x = m1[:, 0]
106 |     y = m1[:, 1]
107 |     temp = np.sort(abs(x - np.median(x)))
108 |     omhatx = temp[int(np.floor((1 - beta) * len(x)))-1]
109 |     temp = np.sort(abs(y - np.median(y)))
110 |     omhaty = temp[int(np.floor((1 - beta) * len(y)))-1]
111 | 
112 |     a = (x - pbos(x, beta)) / omhatx
113 |     b = (y - pbos(y, beta)) / omhaty
114 | 
115 |     a = np.where(a <= -1, -1, a)
116 |     a = np.where(a >= 1, 1, a)
117 |     b = np.where(b <= -1, -1, b)
118 |     b = np.where(b >= 1, 1, b)
119 | 
120 |     pbcor_result = sum(a * b) / np.sqrt(sum(a ** 2) * sum(b ** 2))
121 |     test = pbcor_result * np.sqrt((len(x) - 2) / (1 - pbcor_result ** 2))
122 |     sig = 2 * (1 - t.cdf(abs(test), len(x) - 2))
123 | 
124 |     res = {'cor': pbcor_result, 'test': test, 'p_value': sig, 'nval': nval}
125 |     return res
126 | 
127 | def pbos(x, beta=.2):
128 | 
129 |     """
130 |     Compute the one-step percentage bend measure of location
131 | 
132 |     :param x:
133 |     :param beta:
134 |     :return:
135 |     """
136 | 
137 |     temp = np.sort(abs(x - np.median(x)))
138 |     omhatx = temp[int(np.floor((1 - beta) * len(x)))-1]
139 |     psi = (x - np.median(x)) / omhatx
140 |     i1 = len(psi[psi < -1])
141 |     i2 = len(psi[psi > 1])
142 | 
143 |     sx = np.where(psi < -1, 0, x)
144 |     sx = np.where(psi > 1, 0, sx)
145 | 
146 |     pbos_result = (sum(sx) + omhatx * (i2 - i1)) / (len(x) - i1 - i2)
147 | 
148 |     return  pbos_result
149 | 
150 | def corb(corfun, x, y, alpha, nboot, *args, seed=False):
151 | 
152 |     """
153 |     Compute a 1-alpha confidence interval for a
154 |     correlation using percentile bootstrap method
155 |     The function `corfun` is any function that returns a
156 |     correlation coefficient. The functions pbcor and
157 |     wincor follow this convention. When using
158 |     Pearson's correlation, and when n<250, use
159 |     lsfitci instead (not yet implemented).
160 | 
161 |     Note that arguments up to and including `args` are positional arguments
162 | 
163 |     :param corfun: function
164 |     corfun is any function that returns a correlation coefficient
165 | 
166 |     :param x: Pandas Series
167 |     Data for group one
168 | 
169 |     :param y: Pandas Series
170 |     Data for group two
171 | 
172 |     :param alpha: float
173 |     Alpha level (default is .05)
174 | 
175 |     :param nboot: int
176 |     Number of bootstrap samples
177 | 
178 |     :param args: list/value
179 |     List of arguments to corfun (e.g., .2)
180 | 
181 |     :param seed: bool
182 |     Random seed for reprodicible results. Default is `False`.
183 | 
184 |     :return:
185 |     Dictionary of results
186 | 
187 |     ci: list
188 |     Confidence interval
189 | 
190 |     cor: float
191 |     Correlation estimate
192 | 
193 |     p_value: float
194 |     p-value
195 | 
196 |     """
197 | 
198 |     x, y=pandas_to_arrays([x, y])
199 | 
200 | 
201 |     m1 = np.c_[x, y] # cbind
202 |     m1 = m1[~np.isnan(m1).any(axis=1)]
203 |     nval = m1.shape[0]
204 |     x = m1[:, 0]
205 |     y = m1[:, 1]
206 |     est = corfun(x, y, *args)['cor']#[0]
207 | 
208 |     if seed:
209 |         np.random.seed(seed)
210 | 
211 |     data_inds = np.random.choice(len(x), size=(nboot, len(x)))
212 |     bvec = np.array([corbsub(row_inds, x, y, corfun, *args) for row_inds in data_inds])
213 | 
214 |     ihi = int(np.floor((1 - alpha / 2) * nboot + .5))
215 |     ilow = int(np.floor((alpha / 2) * nboot + .5))
216 |     bsort = sorted(bvec)
217 |     corci = [bsort[ilow], bsort[ihi]]
218 |     phat = sum(bvec < 0) / nboot
219 |     sig =  2 * min(phat, 1 - phat)
220 | 
221 |     #return corci, sig, est
222 |     return {'ci': corci, 'p_value': sig, 'cor': est}
223 | 
224 | def corbsub(isub, x, y, corfun, *args):
225 | 
226 |     """
227 |     Compute correlation for x[isub] and y[isub]
228 |     isub is a vector of length n,
229 |     a bootstrap sample from the sequence of integers
230 |     0, 1, 2, 3, ..., n
231 | 
232 |     This function is used by other functions when computing
233 |     bootstrap estimates.
234 | 
235 |     corfun is some correlation function
236 |     """
237 | 
238 |     corbsub_results = corfun(x[isub], y[isub], *args)['cor']#[0]
239 | 
240 |     return corbsub_results
241 | 
242 | def pball(x, beta=.2):
243 | 
244 |     """
245 |     Compute the percentage bend correlation matrix
246 |     for all pairs of columns in `x`. This function also
247 |     returns the two-sided significance level for all pairs
248 |     of variables, plus a test of zero correlation
249 |     among all pairs.
250 | 
251 | 
252 |     :param x: Pandas DataFrame
253 |     Each column represents a variable to use in the correlations
254 | 
255 |     :param beta: float
256 |     `0 < beta < .5`. Beta is analogous to trimming in
257 |     other functions and related to the measure of
258 |     dispersion used in the percentage bend
259 |     calculation.
260 | 
261 |     :return:
262 |     Dictionary of results
263 | 
264 |     H: float
265 |     The test statistic $H$.Reject null if $H > \chi^2_{1−lpha}$ ,
266 |     the 1−α quantile.
267 | 
268 |     H_p_value: float
269 |     p-value corresponding to the test that all correlations are equal to zero
270 | 
271 |     p_value: array
272 |     p-value matrix corresponding to each pairwise correlation
273 | 
274 |     pbcorm: array
275 |     Correlation matrix
276 | 
277 |     """
278 | 
279 |     m=x.values
280 |     ncol=m.shape[1]
281 | 
282 |     pbcorm=np.zeros([ncol, ncol])
283 |     temp=np.ones([ncol, ncol])
284 |     siglevel=np.full([ncol, ncol], np.nan)
285 |     #cmat = np.zeros([ncol, ncol])
286 | 
287 |     for i in range(ncol):
288 |         for j in range(i,ncol):
289 |             if i < j:
290 |                 pbc = pbcor(m[:, i], m[:, j], beta)
291 |                 pbcorm[i, j] = pbc['cor']
292 |                 temp[i, j] = pbcorm[i, j]
293 |                 temp[j, i] = pbcorm[i, j]
294 |                 siglevel[i, j] = pbc['p_value']
295 |                 siglevel[j, i] = siglevel[i, j]
296 | 
297 | 
298 |     tstat = pbcorm * np.sqrt((m.shape[0] - 2) / (1 - pbcorm ** 2))
299 |     cmat = np.sqrt((m.shape[0] - 2.5) * np.log(1 + tstat ** 2 / (m.shape[0] - 2)))
300 |     bv = 48 * (m.shape[0] - 2.5) ** 2
301 |     cmat = \
302 |     cmat + (cmat ** 3 + 3 * cmat) / bv - (4 * cmat ** 7 + 33 * cmat ** 5 + 240 ** cmat ** 3 + 855 * cmat) / \
303 |         (10 * bv ** 2 + 8 * bv * cmat ** 4 + 1000 * bv)
304 | 
305 |     H = np.sum(cmat ** 2)
306 |     df = ncol * (ncol - 1) / 2
307 |     h_siglevel = 1 - chi2.cdf(H, df)
308 | 
309 |     results={"pbcorm": temp, "p_value": siglevel,
310 |              "H":H, "H_p_value": h_siglevel}
311 | 
312 |     return results
313 | 
314 | def winall(x, tr=.2):
315 | 
316 |     """
317 |     Compute the Winsorized correlation and covariance matrix
318 |     for all pairs of columns in `x`. This function also
319 |     returns the two-sided significance level for all pairs
320 |     of variables, plus a test of zero correlation
321 |     among all pairs.
322 | 
323 | 
324 |     :param x: Pandas DataFrame
325 |     Each column represents a variable to use in the correlations
326 | 
327 |     :param tr: float
328 |     Proportion to winsorize (default is .2)
329 | 
330 |     :return:
331 |     Dictionary of results
332 | 
333 |     center: array
334 |     Trimmed mean for each group
335 | 
336 |     p_value: array
337 |     p-value array corresponding to the pairwise correlations
338 | 
339 |     wcor: array
340 |     Winsorized correlation matrix
341 | 
342 |     wcov: array
343 |     Winsorized covariance matrix
344 | 
345 | 
346 |     """
347 | 
348 |     m = x.values
349 |     ncol = m.shape[1]
350 | 
351 |     wcor = np.ones([ncol, ncol])
352 |     wcov = np.zeros([ncol, ncol])
353 |     siglevel = np.full([ncol, ncol], np.nan)
354 | 
355 |     for i in range(ncol):
356 |         #ip = i
357 |         for j in range(i,ncol):
358 |             val = wincor(m[:, i], m[:, j], tr)
359 |             wcor[i, j] = val['cor']
360 |             wcor[j, i] = wcor[i, j]
361 | 
362 |             if i == j:
363 |                 wcor[i, j] = 1
364 | 
365 |             wcov[i, j] = val['cor']
366 |             wcov[j, i] = wcov[i, j]
367 | 
368 |             if i != j:
369 |                 siglevel[i, j] = val['sig']
370 |                 siglevel[j, i] = siglevel[i, j]
371 | 
372 |     m=m[~np.isnan(m).any(axis=1)]
373 |     cent=trim_mean(m, tr)
374 | 
375 |     return {"wcor": wcor, "wcov": wcov, "center": cent, "p_value": siglevel}
376 | 


--------------------------------------------------------------------------------
/hypothesize/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/__init__.py


--------------------------------------------------------------------------------
/hypothesize/tests/build_test_data.py:
--------------------------------------------------------------------------------
  1 | from hypothesize.measuring_associations import *
  2 | from hypothesize.compare_groups_with_single_factor import *
  3 | from hypothesize.compare_groups_with_two_factors import *
  4 | from hypothesize.utilities import create_example_data, trim_mean, con1way, con2way
  5 | import numpy as np
  6 | import pickle
  7 | 
  8 | alpha=.05
  9 | nboot=100
 10 | tr=.2
 11 | beta=.2
 12 | 
 13 | def pkl_l2drmci():
 14 | 
 15 |     np.random.seed(42)
 16 |     df = create_example_data(2)
 17 |     results = l2drmci(df.cell_1, df.cell_2, trim_mean, tr)
 18 |     pickle.dump(results, open("hypothesize/tests/test_data/l2drmci.pkl", "wb"))
 19 | 
 20 | def pkl_linconb():
 21 | 
 22 |     np.random.seed(42)
 23 |     df = create_example_data(3)
 24 |     results = linconb(df, con1way(3))
 25 |     pickle.dump(results, open("hypothesize/tests/test_data/linconb.pkl", "wb"))
 26 | 
 27 | def pkl_pb2gen():
 28 | 
 29 |     np.random.seed(42)
 30 |     df = create_example_data(2)
 31 |     results = pb2gen(df.cell_1, df.cell_2, trim_mean, tr)
 32 |     pickle.dump(results, open("hypothesize/tests/test_data/pb2gen.pkl", "wb"))
 33 | 
 34 | def pkl_tmcppb():
 35 | 
 36 |     np.random.seed(42)
 37 |     df = create_example_data(3)
 38 |     results = tmcppb(df, trim_mean, tr)
 39 |     pickle.dump(results, open("hypothesize/tests/test_data/tmcppb.pkl", "wb"))
 40 | 
 41 | def pkl_yuenbt():
 42 | 
 43 |     np.random.seed(42)
 44 |     df = create_example_data(2)
 45 |     results = yuenbt(df.cell_1, df.cell_2)
 46 |     pickle.dump(results, open("hypothesize/tests/test_data/yuenbt.pkl", "wb"))
 47 | 
 48 | def pkl_bootdpci():
 49 | 
 50 |     np.random.seed(42)
 51 |     df = create_example_data(3)
 52 |     results = bootdpci(df, trim_mean, tr)
 53 |     pickle.dump(results, open("hypothesize/tests/test_data/bootdpci.pkl", "wb"))
 54 | 
 55 | def pkl_rmmcppb():
 56 | 
 57 |     np.random.seed(42)
 58 |     df = create_example_data(3)
 59 |     results = rmmcppb(df, trim_mean, tr)
 60 |     pickle.dump(results, open("hypothesize/tests/test_data/rmmcppb.pkl", "wb"))
 61 | 
 62 | def pkl_lindepbt():
 63 | 
 64 |     np.random.seed(42)
 65 |     df = create_example_data(3)
 66 |     results = lindepbt(df)
 67 |     pickle.dump(results, open("hypothesize/tests/test_data/lindepbt.pkl", "wb"))
 68 | 
 69 | def pkl_ydbt():
 70 | 
 71 |     np.random.seed(42)
 72 |     df = create_example_data(2)
 73 |     results = ydbt(df.cell_1, df.cell_2)
 74 |     pickle.dump(results, open("hypothesize/tests/test_data/ydbt.pkl", "wb"))
 75 | 
 76 | def pkl_wwmcppb():
 77 | 
 78 |     np.random.seed(42)
 79 |     df = create_example_data(6)
 80 |     results = wwmcppb(2, 3, df, trim_mean, tr)
 81 |     pickle.dump(results, open("hypothesize/tests/test_data/wwmcppb.pkl", "wb"))
 82 | 
 83 | def pkl_wwmcpbt():
 84 | 
 85 |     np.random.seed(42)
 86 |     df = create_example_data(6)
 87 |     results = wwmcpbt(2, 3, df, tr)
 88 |     pickle.dump(results, open("hypothesize/tests/test_data/wwmcpbt.pkl", "wb"))
 89 | 
 90 | def pkl_bwamcp():
 91 | 
 92 |     np.random.seed(42)
 93 |     df = create_example_data(6)
 94 |     results = bwamcp(2, 3, df)
 95 |     pickle.dump(results, open("hypothesize/tests/test_data/bwamcp.pkl", "wb"))
 96 | 
 97 | def pkl_bwbmcp():
 98 | 
 99 |     np.random.seed(42)
100 |     df = create_example_data(6)
101 |     results = bwbmcp(2, 3, df)
102 |     pickle.dump(results, open("hypothesize/tests/test_data/bwbmcp.pkl", "wb"))
103 | 
104 | def pkl_bwmcp():
105 | 
106 |     np.random.seed(42)
107 |     df = create_example_data(6)
108 |     results = bwmcp(2, 3, df)
109 |     pickle.dump(results, open("hypothesize/tests/test_data/bwmcp.pkl", "wb"))
110 | 
111 | def pkl_bwimcp():
112 | 
113 |     np.random.seed(42)
114 |     df = create_example_data(6)
115 |     results = bwimcp(2, 3, df)
116 |     pickle.dump(results, open("hypothesize/tests/test_data/bwimcp.pkl", "wb"))
117 | 
118 | def pkl_bwmcppb():
119 | 
120 |     np.random.seed(42)
121 |     df = create_example_data(6)
122 |     results = bwmcppb(2, 3, df, trim_mean, tr)
123 |     pickle.dump(results, open("hypothesize/tests/test_data/bwmcppb.pkl", "wb"))
124 | 
125 | def pkl_spmcpa():
126 | 
127 |     np.random.seed(42)
128 |     df = create_example_data(6)
129 |     results = spmcpa(2, 3, df, trim_mean, tr)
130 |     pickle.dump(results, open("hypothesize/tests/test_data/spmcpa.pkl", "wb"))
131 | 
132 | def pkl_spmcpb():
133 | 
134 |     np.random.seed(42)
135 |     df = create_example_data(6)
136 |     results = spmcpb(2, 3, df, trim_mean, tr)
137 |     pickle.dump(results, open("hypothesize/tests/test_data/spmcpb.pkl", "wb"))
138 | 
139 | def pkl_spmcpi():
140 | 
141 |     np.random.seed(42)
142 |     df = create_example_data(6)
143 |     results = spmcpi(2, 3, df, trim_mean, tr)
144 |     pickle.dump(results, open("hypothesize/tests/test_data/spmcpi.pkl", "wb"))
145 | 
146 | def pkl_corb():
147 | 
148 |     np.random.seed(42)
149 |     df = create_example_data(2)
150 |     results = corb(wincor, df.cell_1, df.cell_2, alpha, nboot, tr)
151 |     pickle.dump(results, open("hypothesize/tests/test_data/corb.pkl", "wb"))
152 | 
153 | def pkl_pball():
154 | 
155 |     np.random.seed(42)
156 |     df = create_example_data(3)
157 |     results = pball(df)
158 |     pickle.dump(results, open("hypothesize/tests/test_data/pball.pkl", "wb"))
159 | 
160 | def pkl_pbcor():
161 | 
162 |     np.random.seed(42)
163 |     df = create_example_data(2)
164 |     results = pbcor(df.cell_1, df.cell_2)
165 |     pickle.dump(results, open("hypothesize/tests/test_data/pbcor.pkl", "wb"))
166 | 
167 | def pkl_winall():
168 | 
169 |     np.random.seed(42)
170 |     df = create_example_data(3)
171 |     results = winall(df)
172 |     pickle.dump(results, open("hypothesize/tests/test_data/winall.pkl", "wb"))
173 | 
174 | def pkl_wincor():
175 | 
176 |     np.random.seed(42)
177 |     df = create_example_data(2)
178 |     results = wincor(df.cell_1, df.cell_2)
179 |     pickle.dump(results, open("hypothesize/tests/test_data/wincor.pkl", "wb"))
180 | 


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bootdpci.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bootdpci.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwamcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwamcp.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwbmcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwbmcp.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwimcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwimcp.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwmcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwmcp.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwmcppb.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/corb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/corb.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/l2drmci.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/l2drmci.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/linconb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/linconb.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/lindepbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/lindepbt.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/pb2gen.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pb2gen.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/pball.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pball.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/pbcor.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pbcor.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/rmmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/rmmcppb.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/spmcpa.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpa.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/spmcpb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpb.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/spmcpi.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpi.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/tmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/tmcppb.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/winall.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/winall.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/wincor.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wincor.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/wwmcpbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wwmcpbt.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/wwmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wwmcppb.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/ydbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/ydbt.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_data/yuenbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/yuenbt.pkl


--------------------------------------------------------------------------------
/hypothesize/tests/test_funcs.py:
--------------------------------------------------------------------------------
  1 | from hypothesize.measuring_associations import *
  2 | from hypothesize.compare_groups_with_single_factor import *
  3 | from hypothesize.compare_groups_with_two_factors import *
  4 | from hypothesize.utilities import create_example_data, trim_mean, con1way
  5 | import numpy as np
  6 | import pandas as pd
  7 | from pandas._testing import assert_frame_equal
  8 | import pickle
  9 | import os
 10 | 
 11 | alpha=.05
 12 | nboot=100
 13 | tr=.2
 14 | beta=.2
 15 | 
 16 | try:
 17 |     os.chdir('hypothesize/tests')
 18 | except:
 19 |     pass
 20 | 
 21 | def run_all_pkl_funcs():
 22 | 
 23 |     from hypothesize.tests import build_test_data
 24 | 
 25 |     for i in dir(build_test_data):
 26 |         item = getattr(build_test_data,i)
 27 |         if callable(item) and i.startswith('pkl'):
 28 |             item()
 29 | 
 30 | def build_truth_list(expected_results):
 31 | 
 32 |     truth_list=[]
 33 | 
 34 |     if type(expected_results) is list:
 35 | 
 36 |         for item in expected_results:
 37 |             nested_truth_list=build_truth_list(item)
 38 |             truth_list.append(nested_truth_list)
 39 | 
 40 |     elif type(expected_results) is dict:
 41 | 
 42 |         for k in expected_results:
 43 | 
 44 |             if type(expected_results[k]) is dict:
 45 |                 nested_truth_list=[True] * len(expected_results[k])
 46 | 
 47 |                 truth_list.append(nested_truth_list)
 48 |             else:
 49 |                 truth_list.append(True)
 50 | 
 51 |     return truth_list
 52 | 
 53 | def check_dict_items_equality(expected_results, actual_results):
 54 | 
 55 |     actual_truth=[]
 56 | 
 57 |     if type(expected_results) is list:
 58 |         for exp_item, act_item in zip(expected_results, actual_results):
 59 |             nested_truth = check_dict_items_equality(exp_item, act_item)
 60 |             actual_truth.append(nested_truth)
 61 | 
 62 |     elif type(expected_results) is dict:
 63 | 
 64 |         for k in expected_results:
 65 | 
 66 |             if type(expected_results[k]) is np.ndarray:
 67 | 
 68 |                 # truth=True if not np.testing.assert_array_equal(expected_results[k], actual_results[k]) \
 69 |                 #     else False
 70 | 
 71 |                 truth=True if not np.testing.assert_allclose(expected_results[k], actual_results[k]) \
 72 |                     else False
 73 | 
 74 |                 actual_truth.append(truth)
 75 | 
 76 |             elif type(expected_results[k]) is pd.DataFrame:
 77 | 
 78 |                 # truth=True if not assert_frame_equal(expected_results[k], actual_results[k]) \
 79 |                 #     else False
 80 | 
 81 |                 truth=True if not assert_frame_equal(expected_results[k], actual_results[k], check_less_precise=True) \
 82 |                     else False
 83 | 
 84 |                 actual_truth.append(truth)
 85 | 
 86 |             elif type(expected_results[k]) is dict:
 87 |                 nested_truth=check_dict_items_equality(expected_results[k], actual_results[k])
 88 |                 actual_truth.append(nested_truth)
 89 | 
 90 |             else:
 91 | 
 92 |                 if expected_results[k] is None and actual_results[k] is None: \
 93 |                     truth = True
 94 |                 else:
 95 |                     truth=True if not np.testing.assert_almost_equal(expected_results[k], actual_results[k]) \
 96 |                         else False
 97 | 
 98 |                 actual_truth.append(truth)
 99 | 
100 |     return actual_truth
101 | 
102 | def test_l2drmci():
103 | 
104 |     np.random.seed(42)
105 |     df = create_example_data(2)
106 |     results = l2drmci(df.cell_1, df.cell_2, trim_mean, tr)
107 |     expected = pickle.load(open("test_data/l2drmci.pkl", "rb"))
108 |     expected_truth=build_truth_list(expected)
109 |     actual_truth = check_dict_items_equality(expected, results)
110 | 
111 |     #assert results == expected
112 |     assert actual_truth == expected_truth
113 | 
114 | def test_linconb():
115 | 
116 |     np.random.seed(42)
117 |     df = create_example_data(3)
118 |     results = linconb(df, con1way(3))
119 |     expected = pickle.load(open("test_data/linconb.pkl", "rb"))
120 |     expected_truth=build_truth_list(expected)
121 |     actual_truth = check_dict_items_equality(expected, results)
122 | 
123 |     assert actual_truth == expected_truth
124 | 
125 | def test_pb2gen():
126 | 
127 |     np.random.seed(42)
128 |     df = create_example_data(2)
129 |     results = pb2gen(df.cell_1, df.cell_2, trim_mean, tr)
130 |     expected = pickle.load(open("test_data/pb2gen.pkl", "rb"))
131 |     expected_truth=build_truth_list(expected)
132 |     actual_truth = check_dict_items_equality(expected, results)
133 | 
134 |     #assert results == expected
135 |     assert actual_truth == expected_truth
136 | 
137 | def test_tmcppb():
138 | 
139 |     np.random.seed(42)
140 |     df = create_example_data(3)
141 |     results = tmcppb(df, trim_mean, tr)
142 |     expected = pickle.load(open("test_data/tmcppb.pkl", "rb"))
143 |     expected_truth=build_truth_list(expected)
144 |     actual_truth = check_dict_items_equality(expected, results)
145 | 
146 |     assert actual_truth == expected_truth
147 | 
148 | def test_yuenbt():
149 | 
150 |     np.random.seed(42)
151 |     df = create_example_data(2)
152 |     results = yuenbt(df.cell_1, df.cell_2)
153 |     expected = pickle.load(open("test_data/yuenbt.pkl", "rb"))
154 |     expected_truth=build_truth_list(expected)
155 |     actual_truth = check_dict_items_equality(expected, results)
156 | 
157 |     #assert results == expected
158 |     assert actual_truth == expected_truth
159 | 
160 | def test_bootdpci():
161 | 
162 |     np.random.seed(42)
163 |     df = create_example_data(3)
164 |     results = bootdpci(df, trim_mean, tr)
165 |     expected = pickle.load(open("test_data/bootdpci.pkl", "rb"))
166 |     expected_truth=build_truth_list(expected)
167 |     actual_truth = check_dict_items_equality(expected, results)
168 | 
169 |     assert actual_truth == expected_truth
170 | 
171 | def test_rmmcppb():
172 | 
173 |     np.random.seed(42)
174 |     df = create_example_data(3)
175 |     results = rmmcppb(df, trim_mean, tr)
176 |     expected = pickle.load(open("test_data/rmmcppb.pkl", "rb"))
177 |     expected_truth=build_truth_list(expected)
178 |     actual_truth = check_dict_items_equality(expected, results)
179 | 
180 |     assert actual_truth == expected_truth
181 | 
182 | def test_lindepbt():
183 | 
184 |     np.random.seed(42)
185 |     df = create_example_data(3)
186 |     results = lindepbt(df)
187 |     expected = pickle.load(open("test_data/lindepbt.pkl", "rb"))
188 |     expected_truth=build_truth_list(expected)
189 |     actual_truth = check_dict_items_equality(expected, results)
190 | 
191 |     assert actual_truth == expected_truth
192 | 
193 | def test_ydbt():
194 | 
195 |     np.random.seed(42)
196 |     df = create_example_data(2)
197 |     results = ydbt(df.cell_1, df.cell_2)
198 |     expected = pickle.load(open("test_data/ydbt.pkl", "rb"))
199 |     expected_truth=build_truth_list(expected)
200 |     actual_truth = check_dict_items_equality(expected, results)
201 | 
202 |     #assert results == expected
203 |     assert actual_truth == expected_truth
204 | 
205 | def test_wwmcppb():
206 | 
207 |     np.random.seed(42)
208 |     df = create_example_data(6)
209 |     results = wwmcppb(2, 3, df, trim_mean, tr)
210 |     expected = pickle.load(open("test_data/wwmcppb.pkl", "rb"))
211 |     expected_truth=build_truth_list(expected)
212 |     actual_truth = check_dict_items_equality(expected, results)
213 | 
214 |     assert actual_truth == expected_truth
215 | 
216 | def test_wwmcpbt():
217 | 
218 |     np.random.seed(42)
219 |     df = create_example_data(6)
220 |     results = wwmcpbt(2, 3, df, tr)
221 |     expected = pickle.load(open("test_data/wwmcpbt.pkl", "rb"))
222 |     expected_truth=build_truth_list(expected)
223 |     actual_truth = check_dict_items_equality(expected, results)
224 | 
225 |     assert actual_truth == expected_truth
226 | 
227 | def test_bwamcp():
228 | 
229 |     np.random.seed(42)
230 |     df = create_example_data(6)
231 |     results = bwamcp(2, 3, df)
232 |     expected = pickle.load(open("test_data/bwamcp.pkl", "rb"))
233 |     expected_truth=build_truth_list(expected)
234 |     actual_truth = check_dict_items_equality(expected, results)
235 | 
236 |     assert actual_truth == expected_truth
237 | 
238 | def test_bwbmcp():
239 | 
240 |     np.random.seed(42)
241 |     df = create_example_data(6)
242 |     results = bwbmcp(2, 3, df)
243 |     expected = pickle.load(open("test_data/bwbmcp.pkl", "rb"))
244 | 
245 |     print(results)
246 |     print(expected)
247 |     expected_truth=build_truth_list(expected)
248 |     actual_truth = check_dict_items_equality(expected, results)
249 | 
250 |     assert actual_truth == expected_truth
251 | 
252 | def test_bwmcp():
253 | 
254 |     np.random.seed(42)
255 |     df = create_example_data(6)
256 |     results = bwmcp(2, 3, df)
257 |     expected = pickle.load(open("test_data/bwmcp.pkl", "rb"))
258 |     expected_truth=build_truth_list(expected)
259 |     actual_truth = check_dict_items_equality(expected, results)
260 | 
261 |     assert actual_truth == expected_truth
262 | 
263 | def test_bwimcp():
264 | 
265 |     np.random.seed(42)
266 |     df = create_example_data(6)
267 |     results = bwimcp(2, 3, df)
268 |     expected = pickle.load(open("test_data/bwimcp.pkl", "rb"))
269 |     expected_truth=build_truth_list(expected)
270 |     actual_truth = check_dict_items_equality(expected, results)
271 | 
272 |     assert actual_truth == expected_truth
273 | 
274 | def test_bwmcppb():
275 | 
276 |     np.random.seed(42)
277 |     df = create_example_data(6)
278 |     results = bwmcppb(2, 3, df, trim_mean, tr)
279 |     expected = pickle.load(open("test_data/bwmcppb.pkl", "rb"))
280 |     expected_truth=build_truth_list(expected)
281 |     actual_truth = check_dict_items_equality(expected, results)
282 | 
283 |     assert actual_truth == expected_truth
284 | 
285 | def test_spmcpa():
286 | 
287 |     np.random.seed(42)
288 |     df = create_example_data(6)
289 |     results = spmcpa(2, 3, df, trim_mean, tr)
290 |     expected = pickle.load(open("test_data/spmcpa.pkl", "rb"))
291 |     expected_truth=build_truth_list(expected)
292 |     actual_truth = check_dict_items_equality(expected, results)
293 | 
294 |     assert actual_truth == expected_truth
295 | 
296 | def test_spmcpb():
297 | 
298 |     np.random.seed(42)
299 |     df = create_example_data(6)
300 |     results = spmcpb(2, 3, df, trim_mean, tr)
301 |     expected = pickle.load(open("test_data/spmcpb.pkl", "rb"))
302 |     expected_truth=build_truth_list(expected)
303 |     actual_truth = check_dict_items_equality(expected, results)
304 | 
305 |     assert actual_truth == expected_truth
306 | 
307 | def test_spmcpi():
308 | 
309 |     np.random.seed(42)
310 |     df = create_example_data(6)
311 |     results = spmcpi(2, 3, df, trim_mean, tr)
312 |     expected = pickle.load(open("test_data/spmcpi.pkl", "rb"))
313 |     expected_truth=build_truth_list(expected)
314 |     actual_truth = check_dict_items_equality(expected, results)
315 | 
316 |     assert actual_truth == expected_truth
317 | 
318 | def test_corb():
319 | 
320 |     np.random.seed(42)
321 |     df = create_example_data(2)
322 |     results = corb(wincor, df.cell_1, df.cell_2, alpha, nboot, tr)
323 |     expected = pickle.load(open("test_data/corb.pkl", "rb"))
324 |     expected_truth = build_truth_list(expected)
325 |     actual_truth = check_dict_items_equality(expected, results)
326 | 
327 |     #assert results  == expected
328 |     assert actual_truth == expected_truth
329 | 
330 | def test_pball():
331 | 
332 |     np.random.seed(42)
333 |     df = create_example_data(3)
334 |     results = pball(df)
335 |     expected = pickle.load(open("test_data/pball.pkl", "rb"))
336 |     expected_truth=build_truth_list(expected)
337 |     actual_truth = check_dict_items_equality(expected, results)
338 | 
339 |     assert actual_truth == expected_truth
340 | 
341 | def test_pbcor():
342 | 
343 |     np.random.seed(42)
344 |     df = create_example_data(2)
345 |     results = pbcor(df.cell_1, df.cell_2)
346 |     expected = pickle.load(open("test_data/pbcor.pkl", "rb"))
347 |     expected_truth=build_truth_list(expected)
348 |     actual_truth = check_dict_items_equality(expected, results)
349 | 
350 |     #assert results == expected
351 |     assert actual_truth == expected_truth
352 | 
353 | def test_winall():
354 | 
355 |     np.random.seed(42)
356 |     df = create_example_data(3)
357 |     results = winall(df)
358 |     expected = pickle.load(open("test_data/winall.pkl", "rb"))
359 |     expected_truth=build_truth_list(expected)
360 |     actual_truth = check_dict_items_equality(expected, results)
361 | 
362 |     assert actual_truth == expected_truth
363 | 
364 | def test_wincor():
365 | 
366 |     np.random.seed(42)
367 |     df = create_example_data(2)
368 |     results = wincor(df.cell_1, df.cell_2)
369 |     expected = pickle.load(open("test_data/wincor.pkl", "rb"))
370 |     print(results)
371 |     print(expected)
372 |     expected_truth = build_truth_list(expected)
373 |     actual_truth = check_dict_items_equality(expected, results)
374 | 
375 |     #assert results  == expected
376 |     assert actual_truth == expected_truth
377 | 
378 | 
379 | 
380 | 


--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
  1 | @article{20000755025,
  2 | author="Tukey, J. W.",
  3 | title="A survey of sampling from contaminated distributions",
  4 | journal="Contributions to Probability and Statistics",
  5 | ISSN="",
  6 | publisher="Stanford University Press",
  7 | year="1960",
  8 | month="",
  9 | volume="",
 10 | number="",
 11 | pages="448-485",
 12 | URL="https://ci.nii.ac.jp/naid/20000755025/en/",
 13 | DOI="",
 14 | }
 15 | 
 16 | @article{bradley1993introduction,
 17 |   title={An introduction to the bootstrap},
 18 |   author={Efron, Bradley and Tibshirani, Robert J},
 19 |   journal={Monographs on Statistics and Applied Probability},
 20 |   volume={57},
 21 |   year={1993}
 22 | }
 23 | 
 24 | @article{wilcox1998many,
 25 |   title={How many discoveries have been lost by ignoring modern statistical methods?},
 26 |   author={Wilcox, Rand R},
 27 |   journal={American Psychologist},
 28 |   volume={53},
 29 |   number={3},
 30 |   pages={300},
 31 |   year={1998},
 32 |   publisher={American Psychological Association},
 33 |   DOI={10.1037/0003-066X.53.3.300}
 34 | }
 35 | 
 36 | @book{wilcox2013introduction,
 37 |   title={Introduction to robust estimation and hypothesis testing},
 38 |   author={Wilcox, Rand R},
 39 |   year={2013},
 40 |   publisher={Academic press},
 41 |   DOI={10.1016/c2010-0-67044-1}
 42 | }
 43 | 
 44 | @inproceedings{seabold2010statsmodels,
 45 |   title={statsmodels: Econometric and statistical modeling with python},
 46 |   author={Seabold, Skipper and Perktold, Josef},
 47 |   booktitle={9th Python in Science Conference},
 48 |   year={2010},
 49 |   DOI={10.25080/majora-92bf1922-011}
 50 | }
 51 | 
 52 | @article{ho2019moving,
 53 |   title={Moving beyond P values: Data analysis with estimation graphics},
 54 |   author={Ho, Joses and Tumkaya, Tayfun and Aryal, Sameer and Choi, Hyungwon and Claridge-Chang, Adam},
 55 |   journal={Nature Methods},
 56 |   volume={16},
 57 |   number={7},
 58 |   pages={565--566},
 59 |   year={2019},
 60 |   publisher={Nature Publishing Group},
 61 |   DOI={10.1038/s41592-019-0470-3}
 62 | }
 63 | 
 64 | @InProceedings{mckinney-proc-scipy-2010,
 65 |   author    = {{W}es {M}c{K}inney },
 66 |   title     = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython},
 67 |   booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference},
 68 |   pages     = {56 - 61},
 69 |   year      = {2010 },
 70 |   editor    = {{S}t\'efan van der {W}alt and {J}arrod {M}illman},
 71 |   doi       = {10.25080/Majora-92bf1922-00a}
 72 | }
 73 | 
 74 | @article{Vallat2018,
 75 |   doi = {10.21105/joss.01026},
 76 |   url = {https://doi.org/10.21105/joss.01026},
 77 |   year = {2018},
 78 |   publisher = {The Open Journal},
 79 |   volume = {3},
 80 |   number = {31},
 81 |   pages = {1026},
 82 |   author = {Raphael Vallat},
 83 |   title = {Pingouin: Statistics in {P}ython},
 84 |   journal = {Journal of Open Source Software}
 85 | }
 86 | 
 87 | @article{rom1990sequentially,
 88 |   title={A sequentially rejective test procedure based on a modified {B}onferroni inequality},
 89 |   author={Rom, Dror M},
 90 |   journal={Biometrika},
 91 |   volume={77},
 92 |   number={3},
 93 |   pages={663--665},
 94 |   year={1990},
 95 |   publisher={Oxford University Press},
 96 |   DOI={10.1093/biomet/77.3.663}
 97 | }
 98 | 
 99 | @article{hochberg1988sharper,
100 |   title={A sharper {B}onferroni procedure for multiple tests of significance},
101 |   author={Hochberg, Yosef},
102 |   journal={Biometrika},
103 |   volume={75},
104 |   number={4},
105 |   pages={800--802},
106 |   year={1988},
107 |   publisher={Oxford University Press},
108 |   DOI = {10.1093/biomet/75.4.800}
109 | }
110 | 
111 | @article{benjamini1995controlling,
112 |   title={Controlling the false discovery rate: A practical and powerful approach to multiple testing},
113 |   author={Benjamini, Y and Hochberg},
114 |   journal={Journal of the Royal Statistical Society. Series B (Methodological)},
115 |   volume={57},
116 |   pages={289--300},
117 |   year={1995},
118 |   DOI = {10.1111/j.2517-6161.1995.tb02031.x}
119 | }


--------------------------------------------------------------------------------
/paper/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: 'Hypothesize: Robust Statistics for Python'
 3 | tags:
 4 |   - Python
 5 |   - R
 6 |   - statistics
 7 |   - statistical analysis
 8 |   - bootstrapping
 9 |   - trimmed mean
10 |   - data analysis
11 |   - data science
12 |   - social science
13 |   - hypothesis testing
14 | authors:
15 |   - name: Allan Campopiano
16 |     orcid: 0000-0002-3280-4447
17 |     affiliation: 1
18 |   - name: Rand R. Wilcox
19 |     orcid: 0000-0002-2524-2976
20 |     affiliation: 2
21 |     
22 | affiliations:
23 |   - name: Halton Catholic District School Board
24 |     index: 1
25 |   - name: University of Southern California
26 |     index: 2
27 | date: 08 May 2020
28 | bibliography: paper.bib
29 | ---
30 | 
31 | # Summary
32 | 
33 | Hypothesize is a robust null hypothesis significance testing (NHST) library for Python. In general,
34 | robust hypothesis testing uses techniques which minimize the effects of violating standard statistical 
35 | assumptions. In particular, robust methods based on the trimmed mean [@20000755025] 
36 | and/or bootstrapping [@bradley1993introduction], routinely outperform traditional statistical 
37 | approaches in terms of power and accuracy. This is especially true when dealing with
38 | distributions that produce outliers [@wilcox1998many; @wilcox2013introduction].
39 | 
40 | Hypothesize is based on Rand R. Wilcox's collection of [R functions](https://dornsife.usc.edu/labs/rwilcox/software/)
41 | which contains hundreds of robust methods developed since the 1960's. 
42 | Hypothesize brings many of these functions into the Python library ecosystem with the goal
43 | of making robust hypothesis testing easy for researchers, even
44 | if they have not had extensive training in statistics or computer science. It is, however, assumed 
45 | that users have a basic understanding of the concepts and terms related to robust hypothesis 
46 | testing (e.g., trimmed mean and bootstrapping).
47 | 
48 | In contrast to other statistical libraries in Python [@Vallat2018; @seabold2010statsmodels; @ho2019moving],
49 | Hypothesize is focused solely on robust methods for comparing groups and measuring associations. Researchers
50 | who are familiar with traditional NHST and related concepts (e.g., t-test, ANOVA, Pearson's correlation) 
51 | will find analogous approaches in Hypothesize. A broad range of choices exist in Hypothesize both in terms of the
52 | supported statistical designs as well as options for fine-grained control over how tests are computed.
53 | For example:
54 |  
55 | - Where applicable, many hypothesis tests allow the specification of an estimator. That is, users may 
56 | choose when to use the mean, median, trimmed mean, winsorized correlation, percentage bend correlation, 
57 | or any other compatible statistical estimator.
58 | 
59 | - Single- and multi-factor designs are supported, and this includes supporting 
60 |     independent, dependent, and mixed groups.
61 | 
62 | - Family-wise error can be robustly controlled with sequentially 
63 |     rejective methods [@rom1990sequentially; @hochberg1988sharper; @benjamini1995controlling].
64 | 
65 | In terms of learning to use the software, Hypothesize keeps the barrier to entry low for researchers. For example:
66 | 
67 |  - To easily incorporate Hypothesize with standard data processing tools
68 |  [see @mckinney-proc-scipy-2010], all top-level 
69 |  functions take a Pandas DataFrame/Series as input and return a Python Dictionary.
70 |  
71 |  - The API maps cleanly onto features of the user's statistical design. 
72 |  This makes it easier to find and discover the set of appropriate functions for a
73 |  given use case.
74 |  
75 |  - All top-level functions can be run directly in the browser alongside the documentation via 
76 | [Google Colab Notebooks](https://colab.research.google.com/notebooks/intro.ipynb) 
77 | (no local installation required).
78 | 
79 | # Acknowledgements
80 | 
81 | The authors would like to thank 
82 | James Desjardins, 
83 | Stefon van Noordt, 
84 | Lisa Collimore, 
85 | Martina G. Vilas, 
86 | Andrew Bennett, 
87 | Charlotte Soneson, 
88 | Whedon,
89 | the Journal of Open Source Software,
90 | and the Halton Catholic District School Board 
91 | for their support of this project.
92 | 
93 | # References


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy==1.7.3
2 | pandas==1.4.2
3 | numpy==1.22.0
4 | more-itertools==8.12.0
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | import io
 3 | import os
 4 | 
 5 | def get_install_requirements(path):
 6 |     content = read(path)
 7 |     return [req for req in content.split("\n") if req != "" and not req.startswith("#")]
 8 | 
 9 | def read(path, encoding="utf-8"):
10 |     path = os.path.join(os.path.dirname(__file__), path)
11 |     with io.open(path, encoding=encoding) as fp:
12 |         return fp.read()
13 | 
14 | setup(
15 |     name='hypothesize',
16 |     version='1.2.2',
17 |     description='A Python package for comparing groups and measuring associations using robust statistics.',
18 |     author='Allan Campopiano',
19 |     author_email="campopianoa@hcdsb.org",
20 |     license='BSD 3-clause',
21 |     long_description=read('README.md'),
22 |     long_description_content_type='text/markdown',
23 |     url="https://github.com/Alcampopiano/hypothesize",
24 |     packages=find_packages(),
25 |     include_package_data=True,
26 |     install_requires=get_install_requirements("requirements.txt"),
27 |     python_requires=">=3.6",
28 |     tests_require=['pytest'],
29 |     classifiers=[
30 |         "Development Status :: 5 - Production/Stable",
31 |         "Environment :: Console",
32 |         "Intended Audience :: Science/Research",
33 |         "License :: OSI Approved :: BSD License",
34 |         "Natural Language :: English",
35 |         "Programming Language :: Python :: 3.6",
36 |         "Programming Language :: Python :: 3.7",
37 |         "Programming Language :: Python :: 3.8",
38 |         "Programming Language :: Python :: 3.10",
39 |     ],
40 | 
41 | )
42 | 


--------------------------------------------------------------------------------