├── .github
└── workflows
│ └── test_and_lint.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
├── docs
│ ├── FAQ.md
│ ├── basic_tutorial.md
│ ├── function_guide.md
│ ├── img
│ │ ├── dist_overlay.png
│ │ ├── ide_pycharm.png
│ │ ├── package_import_viz.gif
│ │ ├── vp.png
│ │ └── vp_inv.png
│ ├── index.md
│ ├── install_dep.md
│ ├── overview.md
│ └── stylesheets
│ │ └── al_extra.css
└── mkdocs.yml
├── examples
├── bootdpci.ipynb
├── bwamcp.ipynb
├── bwbmcp.ipynb
├── bwimcp.ipynb
├── bwmcp.ipynb
├── bwmcppb.ipynb
├── corb.ipynb
├── hypothesize_notebook_general_examples.ipynb
├── l2drmci.ipynb
├── linconb.ipynb
├── lindepbt.ipynb
├── pb2gen.ipynb
├── pball.ipynb
├── pbcor.ipynb
├── rmmcppb.ipynb
├── spmcpa.ipynb
├── spmcpb.ipynb
├── spmcpi.ipynb
├── tmcppb.ipynb
├── winall.ipynb
├── wincor.ipynb
├── wwmcpbt.ipynb
├── wwmcppb.ipynb
├── ydbt.ipynb
└── yuenbt.ipynb
├── hypothesize
├── __init__.py
├── compare_groups_with_single_factor
│ ├── __init__.py
│ └── _compare_groups_with_single_factor.py
├── compare_groups_with_two_factors
│ ├── __init__.py
│ └── _compare_groups_with_two_factors.py
├── measuring_associations
│ ├── __init__.py
│ └── _measuring_associations.py
├── tests
│ ├── __init__.py
│ ├── build_test_data.py
│ ├── test_data
│ │ ├── bootdpci.pkl
│ │ ├── bwamcp.pkl
│ │ ├── bwbmcp.pkl
│ │ ├── bwimcp.pkl
│ │ ├── bwmcp.pkl
│ │ ├── bwmcppb.pkl
│ │ ├── corb.pkl
│ │ ├── l2drmci.pkl
│ │ ├── linconb.pkl
│ │ ├── lindepbt.pkl
│ │ ├── pb2gen.pkl
│ │ ├── pball.pkl
│ │ ├── pbcor.pkl
│ │ ├── rmmcppb.pkl
│ │ ├── spmcpa.pkl
│ │ ├── spmcpb.pkl
│ │ ├── spmcpi.pkl
│ │ ├── tmcppb.pkl
│ │ ├── winall.pkl
│ │ ├── wincor.pkl
│ │ ├── wwmcpbt.pkl
│ │ ├── wwmcppb.pkl
│ │ ├── ydbt.pkl
│ │ └── yuenbt.pkl
│ └── test_funcs.py
└── utilities.py
├── paper
├── paper.bib
└── paper.md
├── requirements.txt
└── setup.py
/.github/workflows/test_and_lint.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: tests
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Python version 3.9.13
20 | uses: actions/setup-python@v1
21 | with:
22 | python-version: 3.9.13
23 | - name: Install dependencies
24 | run: |
25 | python -m pip install --upgrade pip
26 | pip install flake8 pytest
27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 | - name: Lint with flake8
29 | run: |
30 | # stop the build if there are Python syntax errors or undefined names
31 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
32 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
34 | - name: Test with pytest
35 | run: |
36 | pytest
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *.cover
45 |
46 | # Translations
47 | *.mo
48 | *.pot
49 |
50 | # Django stuff:
51 | *.log
52 |
53 | # Sphinx documentation
54 | docs/_build/
55 |
56 | # PyBuilder
57 | target/
58 |
59 | # DotEnv configuration
60 | .env
61 |
62 | # Database
63 | *.db
64 | *.rdb
65 |
66 | # Pycharm
67 | .idea
68 |
69 | # VS Code
70 | .vscode/
71 |
72 | # Spyder
73 | .spyproject/
74 |
75 | # Jupyter NB Checkpoints
76 | .ipynb_checkpoints/
77 |
78 | # exclude data from source control by default
79 | /data/
80 |
81 | # Mac OS-specific storage files
82 | .DS_Store
83 |
84 | # vim
85 | *.swp
86 | *.swo
87 |
88 | # Mypy cache
89 | .mypy_cache/
90 |
91 | # documentation build
92 | docs/site/
93 |
94 | # R history file
95 | *.Rhistory
96 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Feedback and contribution
2 |
3 | Feedback, bug reports, and contributions are welcome via the
4 | [Hypothesize GitHub Repository](http://github.com/Alcampopiano/hypothesize/).
5 |
6 | ## How to contribute new functions to Hypothesize
7 |
8 | A great way to contribute would be to choose a function from the
9 | [WRS](https://dornsife.usc.edu/labs/rwilcox/software/) that does not yet exist in
10 | Hypothesize and convert it to Python. There is a current wish list
11 | [here](https://github.com/Alcampopiano/hypothesize/issues/2)
12 | but certainly any WRS function would be a welcome addition to the library. A list of the currently available
13 | functions in Hypothesize can be found in the documentation's
14 | [function reference](https://alcampopiano.github.io/hypothesize/function_guide/).
15 |
16 | #### Create example data to be used in R and Python
17 |
18 | It is helpful to be able to create some example data that can be used in both R and Python.
19 | One way to do this is to use Hypothesize's
20 | [create_example_data](https://alcampopiano.github.io/hypothesize/function_guide/#create_example_data) function.
21 | It will generate a DataFrame of random data (to be used in Python) as
22 | well save Numpy arrays that can be read into R with the
23 | [RcppCNPy](https://cran.r-project.org/web/packages/RcppCNPy/index.html)
24 | and [Rcpp](https://cran.r-project.org/web/packages/Rcpp/index.html) libraries.
25 |
26 | #### IDE for R and Python
27 |
28 | It is convenient to use the same IDE when converting functions from R to Python.
29 | One suggestion is to use PyCharm's
30 | [r-language-for-intellij](https://plugins.jetbrains.com/plugin/6632-r-language-for-intellij)
31 | Plugin. This makes it possible to have an interpreter and editor for
32 | both languages in the same IDE. Like so:
33 |
34 |
35 |
36 | Of course there are many ways that one might go about converting WRS functions to Python.
37 | These are merely suggestions.
38 |
39 | ### Setting up your Git environment
40 |
41 | 1. Install the latest version of Hypothesize locally using
42 |
43 | ```
44 | $ pip install git+https://github.com/Alcampopiano/hypothesize/
45 | ```
46 |
47 | 2. Fork the repository on GitHub and clone the fork to you local
48 | machine. For more details on forking see the [GitHub
49 | Documentation](https://help.github.com/en/articles/fork-a-repo).
50 |
51 | ```
52 | $ git clone https://github.com/YOUR-USERNAME/hypothesize.git
53 | ```
54 |
55 | 3. Create a sync to the original upstream repository by creating a so-called
56 | [remote](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/configuring-a-remote-for-a-fork).
57 |
58 | ```
59 | $ git remote add upstream https://github.com/Alcampopiano/hypothesize.git
60 | $ git checkout master
61 | $ git pull upstream master
62 | ```
63 |
64 | Now you will have all of the updates in the master branch of your local fork.
65 | Note that git will complain if you've committed changes to your local master
66 | branch that are not on the upstream repository. This is one reason why it's good practice to avoid
67 | working directly on your master branch.
68 |
69 | ### Commiting new code to Hypothesize
70 |
71 | 1. Create a new local branch and commit changes to your remote branch:
72 |
73 | ```
74 | $ git checkout -b
75 | ```
76 |
77 | With this branch checked-out, make the desired changes to the package.
78 | When you are happy with your changes, you can commit them to a remote branch by running
79 |
80 | ```
81 | $ git add
82 | $ git commit -m "Some descriptive message about your change"
83 | $ git push origin
84 | ```
85 |
86 | 2. Write a unit test for your code (optional)
87 |
88 | Hypothesize uses `pytest` for unit testing. The strategy currently used for testing
89 | is to pickle results that are assumed to be correct and compare those
90 | against fresh results from the modified code (see the
91 | [tests](https://github.com/Alcampopiano/hypothesize/tree/master/hypothesize/tests) folder for examples).
92 | If you would like to write a test for your new code, you may follow the strategy
93 | described above or come up with another way to test your code. To run the test suite,
94 | first navigate to the "tests" directory then use the `pytest` command from your terminal.
95 |
96 | 3. Submit a pull request (PR) to merge your new branch to Hypothesize's master branch
97 |
98 | For details on creating a PR see GitHub documentation [Creating a pull
99 | request](https://help.github.com/en/articles/creating-a-pull-request).
100 |
101 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Copyright (c) 2020, Allan Campopiano
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification,
6 | are permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above copyright notice, this
12 | list of conditions and the following disclaimer in the documentation and/or
13 | other materials provided with the distribution.
14 |
15 | * Neither the name of hypothesize nor the names of its
16 | contributors may be used to endorse or promote products derived from this
17 | software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
23 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
26 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
28 | OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include LICENSE
3 | include requirements.txt
4 | recursive-include src *.py *.json *.ipynb *.html
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Hypothesize
2 |
3 | [](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926)
4 | 
5 | [](https://pypi.org/project/hypothesize/)
6 | [](https://pypistats.org/packages/hypothesize)
7 | [](https://github.com/Alcampopiano/hypothesize/blob/master/LICENSE)
8 |
9 | A Python package for hypothesis testing using robust statistics
10 |
11 | ## Basic Example
12 |
13 | ### A robust measure of association with winsorized correlation
14 | [
](https://deepnote.com/launch?name=wincor&url=https://github.com/Alcampopiano/hypothesize/blob/master/examples/wincor.ipynb
15 | )
16 |
17 | ```python
18 | from hypothesize.measuring_associations import wincor
19 | from hypothesize.utilities import create_example_data
20 |
21 | # creating an example DataFrame with columns "cell_1" and "cell_2"
22 | df=create_example_data(2)
23 |
24 | results=wincor(df.cell_1, df.cell_2)
25 |
26 | # returning the correlation, number of observations, p-value, and winsorized covariance
27 | print(results)
28 | {'cor': 0.11, 'nval': 50, 'sig': 0.44, 'wcov': 0.01}
29 | ```
30 |
31 | ## Documentation
32 | :book: Please visit the [Hypothesize documentation site](https://Alcampopiano.github.io/hypothesize/).
33 | Note that each statistical test in the can be launched
34 | directly in [Deepnote's](deepnote.com) hosted notebook environment—complete with sample data
35 | (as shown in the example above 👆).
36 |
37 | ## Citing Hypothesize
38 |
39 | [](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926)
40 |
41 | If you use Hypothesize in academic work, please use the following citation:
42 |
43 | Campopiano, A., & Wilcox, R. R. (2020). Hypothesize: Robust Statistics for Python.
44 | Journal of Open Source Software, 5(50), 2241, https://doi.org/10.21105/joss.02241
45 |
46 | BibTex:
47 |
48 | ```bib
49 | @article{Campopiano2020,
50 | doi = {10.21105/joss.02241},
51 | url = {https://doi.org/10.21105/joss.02241},
52 | year = {2020},
53 | publisher = {The Open Journal},
54 | volume = {5},
55 | number = {50},
56 | pages = {2241},
57 | author = {Allan Campopiano and Rand R. Wilcox},
58 | title = {Hypothesize: Robust Statistics for Python},
59 | journal = {Journal of Open Source Software}
60 | }
61 | ```
62 |
--------------------------------------------------------------------------------
/docs/docs/FAQ.md:
--------------------------------------------------------------------------------
1 | # Frequently asked questions
2 |
3 | No attempt is made to fully explain the following
4 | concepts, but hopefully this gets
5 | you started. The Internet has plenty of resources on these topics
6 | if you would like to learn more.
7 |
8 | ## What is a trimmed mean?
9 |
10 | The trimmed mean involves calculating the sample mean after
11 | removing a proportion of values from each
12 | tail of the distribution. In symbols the trimmed mean is expressed as
13 | follows:
14 |
15 | $$
16 | \bar{X}_t = \frac{X_{(g+1)}\,+,...,+\,X_{(n-g)}}{n-2g}
17 | $$
18 |
19 | where $X_1, \,X_2,\,...\,,X_n$ is a random sample and
20 | $X_{(1)}, \le X_{(2)}\,,...,\,\le X_{(n)}$ are the observations in
21 | ascending order. The proportion to trim is $\gamma\,(0\lt \gamma \lt.5)$
22 | and $g = [ \gamma n ]$ rounded down to the nearest integer.
23 |
24 | ## What is bootstrapping?
25 |
26 | In the context of hypothesis testing and generally speaking,
27 | bootstrapping involves taking many random samples (with replacement)
28 | from the data at hand in order to estimate a sampling
29 | distribution of interest. This is in contrast to traditional methods
30 | which assume the shape of the particular sampling distribution under study.
31 | Once we have an emprically derived sampling distribution,
32 | obtaining CIs and p values is relatively straightforward.
33 |
34 | ## What is a contrast matrix?
35 |
36 | First, it is helpful to imagine your
37 | design arranged into a JxK matrix.
38 |
39 | $$
40 | A=\begin{bmatrix}
41 | a_{1,1} & a_{1,2} & ... & a_{1,K} \\
42 | a_{2,1} & a_{2,2} & ... & a_{2,K} \\
43 | a_{J,1} & a_{J,2} & ... & a_{J,K}
44 | \end{bmatrix}
45 | $$
46 |
47 | A contrast matrix specifies which cells (or elements) in the above
48 | design are to be compared. The rows in a contrast matrix
49 | correspond to the cells in your design. The columns correspond
50 | to the contrasts that you wish to make.
51 |
52 | ### Examples of contrast matrices for different designs
53 |
54 | Matrix notation is used to explain which cells are
55 | being compared, followed by the corresponding
56 | contrast matrix.
57 |
58 | === "design with 2 groups"
59 |
60 | ${a_{1,1} - a_{1,2}}$
61 |
62 | | contrast 1 |
63 | |------------|
64 | | 1 |
65 | | -1 |
66 |
67 | === "design with 3 groups"
68 |
69 | 1. $\Large{a_{1,1} - a_{1,2}}$
70 | 2. $\Large{a_{1,1} - a_{1,3}}$
71 | 3. $\Large{a_{1,2} - a_{1,3}}$
72 |
73 | | contrast 1 | contrast 2 | contrast 3 |
74 | |------------|------------|------------|
75 | | 1 | 1 | 0 |
76 | | -1 | 0 | 1 |
77 | | 0 | -1 | -1 |
78 |
79 | === "2x2 design"
80 | **Factor A**
81 |
82 | $\Large{(a_{1,1} + a_{1,2})-(a_{2,1} + a_{2,2})}$
83 |
84 | | contrast 1 |
85 | |------------|
86 | | 1 |
87 | | 1 |
88 | | -1 |
89 | | -1 |
90 |
91 | **Factor B**
92 |
93 | $\Large{(a_{1,1} + a_{2,1})-(a_{1,2} + a_{2,2})}$
94 |
95 | | contrast 1 |
96 | |------------|
97 | | 1 |
98 | | -1 |
99 | | 1 |
100 | | -1 |
101 |
102 | **Interaction**
103 |
104 | $\Large{(a_{1,1} + a_{2,2})-(a_{1,2} + a_{2,1})}$
105 |
106 | That is, the difference of the differences
107 |
108 | | contrast 1 |
109 | |------------|
110 | | 1 |
111 | | -1 |
112 | | -1 |
113 | | 1 |
114 |
115 | === "2x3 design"
116 | **Factor A**
117 |
118 | $\Large{(a_{1,1} + a_{1,2} + a_{1,3})-(a_{2,1} + a_{2,2} + a_{2,3})}$
119 |
120 | | contrast 1 |
121 | |------------|
122 | | 1 |
123 | | 1 |
124 | | 1 |
125 | | -1 |
126 | | -1 |
127 | | -1 |
128 |
129 | **Factor B**
130 |
131 | 1. $\Large{(a_{1,1} + a_{2,1})-(a_{1,2} + a_{2,2})}$
132 | - $\Large{(a_{1,1} + a_{2,1})-(a_{1,3} + a_{2,3})}$
133 | - $\Large{(a_{1,2} + a_{2,2})-(a_{1,3} + a_{2,3})}$
134 |
135 | | contrast 1 | contrast 2 | contrast 3 |
136 | |------------|------------|------------|
137 | | 1 | 1 | 0 |
138 | | -1 | 0 | 1 |
139 | | 0 | -1 | -1 |
140 | | 1 | 1 | 0 |
141 | | -1 | 0 | 1 |
142 | | 0 | -1 | -1 |
143 |
144 | **Interactions**
145 |
146 | 1. $\Large{(a_{1,1} + a_{2,2})-(a_{1,2} + a_{2,1})}$
147 | - $\Large{(a_{1,1} + a_{2,3})-(a_{1,3} + a_{2,1})}$
148 | - $\Large{(a_{1,2} + a_{2,3})-(a_{1,3} + a_{2,2})}$
149 |
150 | | contrast 1 | contrast 2 | contrast 3 |
151 | |------------|------------|------------|
152 | | 1 | 1 | 0 |
153 | | -1 | 0 | 1 |
154 | | 0 | -1 | -1 |
155 | | -1 | -1 | 0 |
156 | | 1 | 0 | -1 |
157 | | 0 | 1 | 1 |
158 |
159 |
160 | !!! success "Not a fan of contrast matrices?"
161 | Don't worry, Hypothesize can generate all linear
162 | contrasts automatically (see functions [con1Way](https://alcampopiano.github.io/hypothesize/function_guide/#con1way)
163 | and [con2way](https://alcampopiano.github.io/hypothesize/function_guide/#con2way)). However, it is useful to
164 | understand this concept so that you know
165 | which comparisons are being made and
166 | how to specify your own if necessary.
167 |
168 |
--------------------------------------------------------------------------------
/docs/docs/basic_tutorial.md:
--------------------------------------------------------------------------------
1 | # Basic Tutorial
2 |
3 | The following tutorial demonstrates how to perform a
4 | robust hypothesis test using 20% trimmed means and
5 | the bootstrap-t test. The data correspond to a
6 | 2 (between-subjects) x 3 (within-subjects) factorial design.
7 |
8 | ### Getting your data into Hypothesize
9 |
10 | In Hypothesize, input data are always specified as a Pandas DataFrame or Series.
11 | In this example, we have a 2x3 factorial design so the data would take the form of
12 | a six-column DataFrame (i.e., J levels x K levels). Using Pandas you can read your data into Python and
13 | use one of the appropriate functions from Hypothesize. In this case we will use the function `bwmcp`
14 | but there are [many others](function_guide.md) to choose from.
15 |
16 | !!! note ""What about my column names?""
17 | Don't worry, Hypothesize doesn't make use of your column names.
18 | Feel free to name them however you like!
19 |
20 |
21 | ```python
22 | import pandas as pd
23 |
24 | df=pd.read_csv('my_data.csv')
25 |
26 | df.head()
27 | ```
28 |
29 | | cell_1_1 | cell_1_2 | cell_1_3 | cell_2_1 | cell_2_2 | cell_2_3 |
30 | |------------|------------|------------|------------|------------|------------|
31 | | 0.04 | 0.90 | 0.79 | 0.51 | 0.33 | 0.23 |
32 | | 0.76 | 0.29 | 0.84 | 0.03 | 0.5 | 0.73 |
33 | | 0.71 | 0.59 | 0.11 | 0.89 | 0.76 | 0.04 |
34 | | 0.17 | 0.26 | 0.88 | 0.28 | 0.1 | 0.21 |
35 | | 0.95 | 0.22 | 0.83 | 0.59 | 0.65 | 0.20 |
36 |
37 | ```python
38 | from hypothesize.compare_groups_with_two_factors import bwmcp
39 |
40 | results=bwmcp(J=2, K=3, x=df)
41 | ```
42 |
43 | ### Examining your results
44 |
45 | The results are returned as a Python Dictionary containing simple Python objects
46 | or DataFrames (when the results are best given as a matrix). For example, here are the
47 | previously computed results for the interaction returned as a DataFrame.
48 |
49 | ```python
50 | results['factor_AB']
51 | ```
52 |
53 | | con_num | psihat | se | test | crit_value | p_value |
54 | |---------- |----------- |--------- |---------- |------------- |---------- |
55 | | 0 | -0.100698 | 0.126135 | -0.798336 | 2.3771 | 0.410684 |
56 | | 1 | -0.037972 | 0.151841 | -0.250078 | 2.3771 | 0.804674 |
57 | | 2 | 0.0627261 | 0.135392 | 0.463291 | 2.3771 | 0.659432 |
58 |
59 |
60 |
62 |
63 |
--------------------------------------------------------------------------------
/docs/docs/img/dist_overlay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/dist_overlay.png
--------------------------------------------------------------------------------
/docs/docs/img/ide_pycharm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/ide_pycharm.png
--------------------------------------------------------------------------------
/docs/docs/img/package_import_viz.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/package_import_viz.gif
--------------------------------------------------------------------------------
/docs/docs/img/vp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/vp.png
--------------------------------------------------------------------------------
/docs/docs/img/vp_inv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/docs/docs/img/vp_inv.png
--------------------------------------------------------------------------------
/docs/docs/index.md:
--------------------------------------------------------------------------------
1 | # Hypothesize: robust statistics in Python
2 |
3 | 
4 |
5 | Hypothesize is a robust statistics library for
6 | Python based on Rand R. Wilcox's R package [WRS](https://dornsife.usc.edu/labs/rwilcox/software/).
7 | With Hypothesize you can compare groups and
8 | measure associations using methods that outperform
9 | traditional statistical approaches in terms of power
10 | and accuracy.
11 |
12 | For more information on robust methods please see Wilcox's book
13 | [Introduction to Robust Estimation and Hypothesis Testing](https://play.google.com/store/books/details?id=8f8nBb4__EYC&gl=ca&hl=en-CA&source=productsearch&utm_source=HA_Desktop_US&utm_medium=SEM&utm_campaign=PLA&pcampaignid=MKTAD0930BO1&gclid=CjwKCAiA44LzBRB-EiwA-jJipJzyqx9kwNMq5MMU7fG2RrwBK9F7sirX4pfhS8wO7k9Uz_Sqf2P28BoCYzcQAvD_BwE&gclsrc=aw.ds).
14 |
15 | ## Getting Started
16 |
17 | - [Overview](overview.md)
18 | - [Installation](install_dep.md)
19 | - [Basic Tutorial](basic_tutorial.md#)
20 |
21 | ## User Guide
22 |
23 | - [Function reference](function_guide.md)
24 | - [Frequently asked questions](FAQ.md)
25 |
26 | ## Bug reports and Questions
27 | Hypothesize is BSD-licenced and the source code is available
28 | on [GitHub](https://github.com/Alcampopiano/hypothesize).
29 | For issues and questions,
30 | please use [GitHub Issues](https://github.com/Alcampopiano/hypothesize/issues).
31 |
32 | ## Citing Hypothesize
33 |
34 | [](https://joss.theoj.org/papers/caf4095b3cdcc3adbb0252c995d59926)
35 |
36 | If you use Hypothesize in academic work, please use the following citation:
37 |
38 | Campopiano, A., & Wilcox, R. R. (2020). Hypothesize: Robust Statistics for Python.
39 | Journal of Open Source Software, 5(50), 2241, https://doi.org/10.21105/joss.02241
40 |
41 | BibTex:
42 |
43 | ```bib
44 | @article{Campopiano2020,
45 | doi = {10.21105/joss.02241},
46 | url = {https://doi.org/10.21105/joss.02241},
47 | year = {2020},
48 | publisher = {The Open Journal},
49 | volume = {5},
50 | number = {50},
51 | pages = {2241},
52 | author = {Allan Campopiano and Rand R. Wilcox},
53 | title = {Hypothesize: Robust Statistics for Python},
54 | journal = {Journal of Open Source Software}
55 | }
56 | ```
57 |
58 | ## Contributing to Hypothesize
59 |
60 | The best way to contribute to Hypothesize is to take any function from the WRS collection
61 | and convert it to Python. For more details, please see
62 | [CONTRIBUTING.md](https://github.com/Alcampopiano/hypothesize/blob/master/CONTRIBUTING.md)
63 | in the GitHub repository.
--------------------------------------------------------------------------------
/docs/docs/install_dep.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | Hypothesize can be installed using `pip`:
4 |
5 | ```
6 | $ pip install hypothesize
7 | ```
8 |
9 | # Dependencies
10 |
11 | Hypothesesize has the following dependencies,
12 | all of which are installed automatically
13 | with the above installation command:
14 |
15 | - python 3.6 or newer
16 | - [NumPy](https://numpy.org/)
17 | - [Pandas](https://pandas.pydata.org/)
18 | - [SciPy](https://www.scipy.org/)
19 | - [more-itertools](https://pypi.org/project/more-itertools/)
20 |
--------------------------------------------------------------------------------
/docs/docs/overview.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | The benefits of using robust methods for hypothesis testing
4 | have been known for the last half century.
5 | They have been shown to substantially increase power and accuracy when compared to
6 | traditional approaches.
7 | The issues of robustness and the functions in this library are described in detail in Rand R. Wilcox's book
8 | [Introduction to Robust Estimation and Hypothesis Testing](https://play.google.com/store/books/details?id=8f8nBb4__EYC&gl=ca&hl=en-CA&source=productsearch&utm_source=HA_Desktop_US&utm_medium=SEM&utm_campaign=PLA&pcampaignid=MKTAD0930BO1&gclid=CjwKCAiA44LzBRB-EiwA-jJipJzyqx9kwNMq5MMU7fG2RrwBK9F7sirX4pfhS8wO7k9Uz_Sqf2P28BoCYzcQAvD_BwE&gclsrc=aw.ds).
9 |
10 | The code and function names in Hypothesize are based on Wilcox's R functions in the [WRS](somwhere) package.
11 | Hypothesize simply brings many of these helpful and well-studied robust methods to the Python ecosystem.
12 | In addition, Hypothesize provides a user-friendly API and package structure
13 | as well as one-click, [ready-to-run examples](function_guide.md) for every top-level
14 | function.
15 |
16 | ## Hypothesize is easy to use
17 |
18 | Hypothesize's API is friendly and
19 | consistent, making it easy for you to discover
20 | and use robust functions that are appropriate for
21 | your statistical design.
22 |
23 | ### Package Structure
24 |
25 | Hypothesize organizes functions
26 | based on the statistical design. The following visualizations show
27 | how the package is structured and how
28 | this is reflected in practice when importing from the library:
29 |
30 | ```mermaid
31 | graph TB
32 | linkStyle default interpolate basis
33 | A[Hypothesize]
34 | A --> B(compare groups with single factor)
35 | A --> C(compare groups with two factors)
36 | A --> D(measure associations)
37 |
38 | B --> F(f1)
39 | B --> G(f2)
40 | B --- H(fn)
41 |
42 | C --> F1(f1)
43 | C --> G2(f2)
44 | C --> H3(fn)
45 |
46 | D --> F5(f1)
47 | D --> G6(f2)
48 | D --> H7(fn)
49 | ```
50 |
51 | ---
52 | 
53 |
54 | ---
55 | ## Hypothesize is flexible and powerful
56 |
57 | A broad range of choices exist in Hypothesize both in
58 | terms of the supported statistical designs as well as options for fine-grained control over how
59 | tests are computed. For example:
60 |
61 | - Where applicable, many hypothesis tests allow the specification of an estimator. That is,
62 | users may choose when to use the mean, median, trimmed mean, winsorized correlation,
63 | percentage bend correlation, or any other compatible statistical estimator.
64 |
65 | - Single- and multi-factor designs are supported, and this includes supporting independent,
66 | dependent, and mixed groups.
67 |
68 | - Family-wise error can be robustly controlled with sequentially rejective methods (Benjamini & Hochberg, 1995; Hochberg, 1988; Rom, 1990).
69 |
70 |
71 |
72 | Visit the [tutorial section](basic_tutorial.md) and the
73 | [function documentation](function_guide.md) for complete examples
74 | using Hypothesize.
--------------------------------------------------------------------------------
/docs/docs/stylesheets/al_extra.css:
--------------------------------------------------------------------------------
1 | /*
2 | .button {
3 | display: block;
4 | width: 100%;
5 | font-size: 16px;
6 | background-color: #5867be;
7 | color: #ffffff !important;
8 | padding: 10px;
9 | box-shadow: 10;
10 | border-radius: 2px;
11 | text-align: center;
12 | border: none;
13 | }
14 | */
15 |
16 | .button {
17 | display: block;
18 | text-align: center;
19 | }
20 |
21 | /*
22 | .button:hover {
23 | background-color: grey;
24 | color: white !important;
25 |
26 | }
27 | */
28 |
29 | .mermaid svg {
30 | text-align: center !important;
31 | }
32 |
--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: Hypothesize
2 | google_analytics:
3 | - UA-165284209-1
4 | - auto
5 |
6 | nav:
7 | - Home: index.md
8 | - Overview: overview.md
9 | - Tutorial: basic_tutorial.md
10 | - Function Reference: function_guide.md
11 | - FAQ: FAQ.md
12 |
13 | repo_name: Github
14 | repo_url: https://github.com/Alcampopiano/hypothesize
15 | edit_uri: ""
16 | theme:
17 | logo: img/vp.png
18 | favicon: img/vp_inv.png
19 | name: material
20 | palette:
21 | primary: black
22 | accent: red
23 | copyright: "Hypothesize is licensed under the BSD 3-Clause license"
24 |
25 | markdown_extensions:
26 | - admonition
27 | - codehilite:
28 | guess_lang: false
29 | - footnotes
30 | - toc:
31 | permalink: true
32 | - pymdownx.tabbed
33 | - pymdownx.inlinehilite
34 | - pymdownx.arithmatex
35 | - pymdownx.superfences:
36 | custom_fences:
37 | - name: mermaid
38 | class: mermaid
39 | format: !!python/name:pymdownx.superfences.fence_div_format
40 |
41 | extra_css:
42 | - stylesheets/al_extra.css
43 | - https://unpkg.com/mermaid@7.1.2/dist/mermaid.css
44 | extra_javascript:
45 | - https://unpkg.com/mermaid@7.1.2/dist/mermaid.min.js
46 | - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML
--------------------------------------------------------------------------------
/examples/bootdpci.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bootdpci.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588616000402}],"collapsed_sections":[],"authorship_tag":"ABX9TyODOlntM0MT9CHgvn8VVl/O"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import bootdpci"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bootdpci(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/bwamcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwamcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632200919},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyPVfyF9avy0DbfAuarJl8iy"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import bwamcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwamcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/bwbmcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwbmcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632296831},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyMCKdZY/KiF2LwgsqoxM9z0"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwbmcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwbmcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results[0]['test']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/bwimcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwimcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632574529},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNi+qxTq2XoeXVqKQ9JR4Ob"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":228},"outputId":"edf97991-0dd9-4813-e161-38e0804b024c","executionInfo":{"status":"ok","timestamp":1588632621496,"user_tz":240,"elapsed":4200,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["!pip install hypothesize"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Collecting hypothesize\n"," Downloading https://files.pythonhosted.org/packages/00/64/d9067b4a72585b2003bbd1823cceaada7f0c9a28441921201df42d31332e/hypothesize-0.1.dev23-py3-none-any.whl\n","Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.0.3)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.18.3)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from hypothesize) (1.4.1)\n","Requirement already satisfied: more-itertools in /usr/local/lib/python3.6/dist-packages (from hypothesize) (8.2.0)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->hypothesize) (2018.9)\n","Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->hypothesize) (2.8.1)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.6.1->pandas->hypothesize) (1.12.0)\n","Installing collected packages: hypothesize\n","Successfully installed hypothesize-0.1.dev23\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import bwimcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwimcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/bwmcp.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwmcp.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632373867},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNq9mbugbYwk9HTz7/3Brgd"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwmcp"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwmcp(2, 3, df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/bwmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"bwmcppb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632676077},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNJh6K+fRRnm1Mgfr3EEWRU"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import bwmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=bwmcppb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/corb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"corb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOuT/tbSiQ385N9p7Y0OE2s"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import corb, wincor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=corb(wincor, df.cell_1, df.cell_2, .05, 1000, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/hypothesize_notebook_general_examples.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.4"},"colab":{"name":"hypothesize_notebook_for_colab.ipynb","provenance":[{"file_id":"https://github.com/Alcampopiano/hypothesize/blob/master/examples/hypothesize_notebook_for_colab.ipynb","timestamp":1589199962420}],"collapsed_sections":[]}},"cells":[{"cell_type":"markdown","metadata":{"id":"SR6bFvjJtqjq","colab_type":"text"},"source":["## Hypothesize tutorial\n","\n","This notebook provides a few examples of how to use Hypothesize with a few common statistical designs. There are many more functions that could work for these designs but hopefully this helps to get you started.\n","\n"]},{"cell_type":"code","metadata":{"id":"AXTC2Xzu3zM9","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"9TPllfTh3zNE","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"zNFQXwRd3zNJ","colab_type":"text"},"source":["### How to compare two groups"]},{"cell_type":"markdown","metadata":{"id":"pA-fXciM3zNK","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"ZrMIEtaw3zNM","colab_type":"code","outputId":"bb369725-d23c-4d9b-c0f1-2a4f917587f6","executionInfo":{"status":"ok","timestamp":1589200167867,"user_tz":240,"elapsed":4569,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["#df=pd.read_csv(\"/home/allan/two_groups_data.csv\")\n","df=create_example_data(design_values=2)\n","\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," cell_1 | \n"," cell_2 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.608798 | \n"," 0.582123 | \n","
\n"," \n"," 1 | \n"," 0.622826 | \n"," 0.854637 | \n","
\n"," \n"," 2 | \n"," 0.264165 | \n"," 0.655077 | \n","
\n"," \n"," 3 | \n"," 0.794185 | \n"," 0.378080 | \n","
\n"," \n"," 4 | \n"," 0.907687 | \n"," 0.468066 | \n","
\n"," \n","
\n","
"],"text/plain":[" cell_1 cell_2\n","0 0.608798 0.582123\n","1 0.622826 0.854637\n","2 0.264165 0.655077\n","3 0.794185 0.378080\n","4 0.907687 0.468066"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"b7DXVXTP3zNR","colab_type":"text"},"source":["#### Import the desired function and pass in the data for each group\n","- This example uses the bootstrapped-t method with 20% trimmed means\n","- The output is a dictionary containing the results (95% confidence interval, p_value, test statistics, etc...)"]},{"cell_type":"code","metadata":{"id":"2hapgjCg3zNU","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"cb66df69-d846-411c-a603-9b0007a1cad9","executionInfo":{"status":"ok","timestamp":1589200168221,"user_tz":240,"elapsed":4920,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["from hypothesize.compare_groups_with_single_factor import yuenbt\n","\n","results=yuenbt(df.cell_1, df.cell_2)\n","\n","results['ci']"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[-0.09190770159731171, 0.25635146839797]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"rCYUwGzw3zNY","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"iU8nQykk3zNZ","colab_type":"text"},"source":["### How to compare three groups"]},{"cell_type":"markdown","metadata":{"id":"GOw1Y9_v3zNb","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"akjpBynJ3zNd","colab_type":"code","outputId":"6209634a-446c-42fb-d106-2cafa7350431","executionInfo":{"status":"ok","timestamp":1589200168223,"user_tz":240,"elapsed":4916,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/one_way_data.csv\")\n","df=create_example_data(design_values=3)\n","\n","df.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," cell_1 | \n"," cell_2 | \n"," cell_3 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.265109 | \n"," 0.088914 | \n"," 0.480468 | \n","
\n"," \n"," 1 | \n"," 0.119988 | \n"," 0.482773 | \n"," 0.079476 | \n","
\n"," \n"," 2 | \n"," 0.109533 | \n"," 0.521834 | \n"," 0.762804 | \n","
\n"," \n"," 3 | \n"," 0.152454 | \n"," 0.177596 | \n"," 0.741767 | \n","
\n"," \n"," 4 | \n"," 0.355403 | \n"," 0.520991 | \n"," 0.380219 | \n","
\n"," \n","
\n","
"],"text/plain":[" cell_1 cell_2 cell_3\n","0 0.265109 0.088914 0.480468\n","1 0.119988 0.482773 0.079476\n","2 0.109533 0.521834 0.762804\n","3 0.152454 0.177596 0.741767\n","4 0.355403 0.520991 0.380219"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"jRxALG1a3zNh","colab_type":"text"},"source":["#### Import the desired functions and pass in the inputs\n","- One approach is to use a set of linear contrasts that will test all pairwise comparisons\n","- Then, the bootstrap-t method and the 20% trimmed mean can be used\n","- CIs are adjusted to control for FWE\n","- All pairwise contrasts can be created automatically using the `con1way` function\n","- The results are a dictionary of DataFrames that contain various statistics (p_value, CIs, standard error, test statistics, etc)"]},{"cell_type":"code","metadata":{"id":"NJ5LK8G_3zNi","colab_type":"code","colab":{}},"source":["from hypothesize.compare_groups_with_single_factor import linconb\n","from hypothesize.utilities import con1way\n","\n","results=linconb(df, con=con1way(3))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"et1Acy1S3zNm","colab_type":"code","outputId":"b562fb9f-7d8a-4203-db4f-2e4cf157e96f","executionInfo":{"status":"ok","timestamp":1589200168984,"user_tz":240,"elapsed":5668,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['test']"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," contrast_index | \n"," test | \n"," se | \n"," p_value | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.0 | \n"," 0.417745 | \n"," 0.081921 | \n"," 0.691152 | \n","
\n"," \n"," 1 | \n"," 1.0 | \n"," -0.043381 | \n"," 0.085225 | \n"," 0.959933 | \n","
\n"," \n"," 2 | \n"," 2.0 | \n"," -0.501332 | \n"," 0.075636 | \n"," 0.602671 | \n","
\n"," \n","
\n","
"],"text/plain":[" contrast_index test se p_value\n","0 0.0 0.417745 0.081921 0.691152\n","1 1.0 -0.043381 0.085225 0.959933\n","2 2.0 -0.501332 0.075636 0.602671"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"CCMzuKYX3zNq","colab_type":"code","outputId":"2f0d4212-cb97-479a-aeef-aace296a05a6","executionInfo":{"status":"ok","timestamp":1589200168987,"user_tz":240,"elapsed":5664,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['psihat']"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," contrast_index | \n"," psihat | \n"," ci_low | \n"," ci_up | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.0 | \n"," 0.034222 | \n"," -0.168168 | \n"," 0.236612 | \n","
\n"," \n"," 1 | \n"," 1.0 | \n"," -0.003697 | \n"," -0.214251 | \n"," 0.206857 | \n","
\n"," \n"," 2 | \n"," 2.0 | \n"," -0.037919 | \n"," -0.224784 | \n"," 0.148946 | \n","
\n"," \n","
\n","
"],"text/plain":[" contrast_index psihat ci_low ci_up\n","0 0.0 0.034222 -0.168168 0.236612\n","1 1.0 -0.003697 -0.214251 0.206857\n","2 2.0 -0.037919 -0.224784 0.148946"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"d-AMqtzP3zNv","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"XO-FNoJw3zNw","colab_type":"text"},"source":["### How to compare groups in a factorial design"]},{"cell_type":"markdown","metadata":{"id":"qJcHGgDv3zNx","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"jE-FN9Lx3zNz","colab_type":"code","outputId":"b328e4c9-97d3-4cf8-b1ce-70bd1c44ea06","executionInfo":{"status":"ok","timestamp":1589200168990,"user_tz":240,"elapsed":5661,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/two_way_data.csv\")\n","df=create_example_data(design_values=[2,3])\n","\n","df.head()"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," cell_1_1 | \n"," cell_1_2 | \n"," cell_1_3 | \n"," cell_2_1 | \n"," cell_2_2 | \n"," cell_2_3 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.827524 | \n"," 0.476294 | \n"," 0.131720 | \n"," 0.410999 | \n"," 0.320306 | \n"," 0.370742 | \n","
\n"," \n"," 1 | \n"," 0.632281 | \n"," 0.588368 | \n"," 0.662648 | \n"," 0.242547 | \n"," 0.270292 | \n"," 0.700103 | \n","
\n"," \n"," 2 | \n"," 0.073064 | \n"," 0.472047 | \n"," 0.053942 | \n"," 0.069097 | \n"," 0.851596 | \n"," 0.962723 | \n","
\n"," \n"," 3 | \n"," 0.843377 | \n"," 0.095956 | \n"," 0.617434 | \n"," 0.765279 | \n"," 0.420772 | \n"," 0.993871 | \n","
\n"," \n"," 4 | \n"," 0.190709 | \n"," 0.013727 | \n"," 0.255385 | \n"," 0.577916 | \n"," 0.218277 | \n"," 0.125772 | \n","
\n"," \n","
\n","
"],"text/plain":[" cell_1_1 cell_1_2 cell_1_3 cell_2_1 cell_2_2 cell_2_3\n","0 0.827524 0.476294 0.131720 0.410999 0.320306 0.370742\n","1 0.632281 0.588368 0.662648 0.242547 0.270292 0.700103\n","2 0.073064 0.472047 0.053942 0.069097 0.851596 0.962723\n","3 0.843377 0.095956 0.617434 0.765279 0.420772 0.993871\n","4 0.190709 0.013727 0.255385 0.577916 0.218277 0.125772"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"aaVxg64m3zN2","colab_type":"text"},"source":["#### Import the desired function and pass in the data\n","- This example uses a 2-by-3 design\n","- One approach is to use a set of linear contrasts that will test all main effects and interactions\n","- Then, the bootstrap-t method and the 20% trimmed mean can be used\n","- The results are a dictionary of DataFrames that contain various statistics for each factor and the interactions"]},{"cell_type":"code","metadata":{"id":"X_muz_Lz3zN4","colab_type":"code","colab":{}},"source":["from hypothesize.compare_groups_with_two_factors import bwmcp\n","\n","results=bwmcp(J=2, K=3, x=df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"0Sm5AHgQ3zN8","colab_type":"code","outputId":"d39a4bc9-8313-479d-ba7d-5da63ca6c85b","executionInfo":{"status":"ok","timestamp":1589200173973,"user_tz":240,"elapsed":10635,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":79}},"source":["results['factor_A']"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," con_num | \n"," psihat | \n"," se | \n"," test | \n"," crit_value | \n"," p_value | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.0 | \n"," 0.173207 | \n"," 0.128072 | \n"," 1.352418 | \n"," 1.960025 | \n"," 0.15192 | \n","
\n"," \n","
\n","
"],"text/plain":[" con_num psihat se test crit_value p_value\n","0 0.0 0.173207 0.128072 1.352418 1.960025 0.15192"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"Ff6ipBF23zN_","colab_type":"code","outputId":"b7712848-0549-4c3d-9c84-bb6bb1e60b69","executionInfo":{"status":"ok","timestamp":1589200173974,"user_tz":240,"elapsed":10630,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['factor_B']"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," con_num | \n"," psihat | \n"," se | \n"," test | \n"," crit_value | \n"," p_value | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.0 | \n"," -0.067502 | \n"," 0.120091 | \n"," -0.562091 | \n"," 2.494032 | \n"," 0.559265 | \n","
\n"," \n"," 1 | \n"," 1.0 | \n"," 0.039398 | \n"," 0.116328 | \n"," 0.338680 | \n"," 2.494032 | \n"," 0.721202 | \n","
\n"," \n"," 2 | \n"," 2.0 | \n"," 0.106900 | \n"," 0.098491 | \n"," 1.085373 | \n"," 2.494032 | \n"," 0.307179 | \n","
\n"," \n","
\n","
"],"text/plain":[" con_num psihat se test crit_value p_value\n","0 0.0 -0.067502 0.120091 -0.562091 2.494032 0.559265\n","1 1.0 0.039398 0.116328 0.338680 2.494032 0.721202\n","2 2.0 0.106900 0.098491 1.085373 2.494032 0.307179"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"2cKKYZh83zOF","colab_type":"code","outputId":"9be1a209-ab26-4fc9-c31a-d6809d2b3c94","executionInfo":{"status":"ok","timestamp":1589200173975,"user_tz":240,"elapsed":10624,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":141}},"source":["results['factor_AB']"],"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," con_num | \n"," psihat | \n"," se | \n"," test | \n"," crit_value | \n"," p_value | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.0 | \n"," -0.183242 | \n"," 0.120091 | \n"," -1.525869 | \n"," 2.3983 | \n"," 0.118531 | \n","
\n"," \n"," 1 | \n"," 1.0 | \n"," -0.163525 | \n"," 0.116328 | \n"," -1.405720 | \n"," 2.3983 | \n"," 0.186978 | \n","
\n"," \n"," 2 | \n"," 2.0 | \n"," 0.019718 | \n"," 0.098491 | \n"," 0.200196 | \n"," 2.3983 | \n"," 0.833055 | \n","
\n"," \n","
\n","
"],"text/plain":[" con_num psihat se test crit_value p_value\n","0 0.0 -0.183242 0.120091 -1.525869 2.3983 0.118531\n","1 1.0 -0.163525 0.116328 -1.405720 2.3983 0.186978\n","2 2.0 0.019718 0.098491 0.200196 2.3983 0.833055"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"markdown","metadata":{"id":"uOyKCT9M3zOJ","colab_type":"text"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"JZxF8Ygi3zOK","colab_type":"text"},"source":["### How to compute a robust correlation"]},{"cell_type":"markdown","metadata":{"id":"a2WTERe43zOL","colab_type":"text"},"source":["#### Load data from a CSV or create some random data"]},{"cell_type":"code","metadata":{"id":"FU8Ey3iI3zON","colab_type":"code","outputId":"eb04b788-6f9d-446c-83fd-1576065361fc","executionInfo":{"status":"ok","timestamp":1589200173975,"user_tz":240,"elapsed":10618,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":202}},"source":["import pandas as pd\n","\n","#df=pd.read_csv(\"/home/allan/two_groups_data.csv\")\n","df=create_example_data(design_values=2)\n","\n","df.head()"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," cell_1 | \n"," cell_2 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.402284 | \n"," 0.049092 | \n","
\n"," \n"," 1 | \n"," 0.208278 | \n"," 0.550764 | \n","
\n"," \n"," 2 | \n"," 0.958482 | \n"," 0.986547 | \n","
\n"," \n"," 3 | \n"," 0.957759 | \n"," 0.277685 | \n","
\n"," \n"," 4 | \n"," 0.702811 | \n"," 0.749065 | \n","
\n"," \n","
\n","
"],"text/plain":[" cell_1 cell_2\n","0 0.402284 0.049092\n","1 0.208278 0.550764\n","2 0.958482 0.986547\n","3 0.957759 0.277685\n","4 0.702811 0.749065"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"qY-7yf8Q3zOQ","colab_type":"text"},"source":["#### Import the desired function and pass in the data for each group\n","- One approach is to winsorize the x and y data\n","- A heteroscedastic method for testing zero correlation is also provided in this package but not shown here \n"," - Please see the function `corb` which uses the percentile bootstrap to compute a 1-alpha CI and p_value for any correlation \n","- The output is a dictionary containing various statistics (the winsorized correlation, winsorized covariance, etc...)"]},{"cell_type":"code","metadata":{"id":"mMeESqd33zOR","colab_type":"code","outputId":"a34a06fa-0113-4201-ce0b-e0d3f5d41930","executionInfo":{"status":"ok","timestamp":1589200173976,"user_tz":240,"elapsed":10612,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["from hypothesize.measuring_associations import wincor\n","\n","results=wincor(df.cell_1, df.cell_2)\n","\n","results['cor']"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.2025744763450888"]},"metadata":{"tags":[]},"execution_count":15}]}]}
--------------------------------------------------------------------------------
/examples/l2drmci.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"l2drmci.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPdELHUc+SP48pbwUqZCoT5"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import l2drmci"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=l2drmci(df.cell_1, df.cell_2, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/linconb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"linconb.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyPH7skGRd9m7ywyto/ckjRS"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, con1way\n","from hypothesize.compare_groups_with_single_factor import linconb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=linconb(df, con1way(3))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['psihat']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3zbT5WdQvTVv","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/lindepbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"lindepbt.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588618128796}],"collapsed_sections":[],"authorship_tag":"ABX9TyO26ovh0/ccrrbqL9dVEnIm"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import lindepbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=lindepbt(df)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['psihat']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3zbT5WdQvTVv","colab_type":"code","colab":{}},"source":["results['test']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/pb2gen.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pb2gen.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588613922534}],"collapsed_sections":[],"authorship_tag":"ABX9TyM4JaaKmMa7ybUIDVPP24nv"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import pb2gen"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pb2gen(df.cell_1, df.cell_2, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/pball.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pball.ipynb","provenance":[{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyMxgE04vzxNhd/2/0DYU2le"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import pball"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pball(df, beta=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results['pbcorm']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/pbcor.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"pbcor.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637753763},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOznWr6vPehd9iyX3yBDNFl"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import pbcor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=pbcor(df.cell_1, df.cell_2, beta=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/rmmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"rmmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588616377427}],"collapsed_sections":[],"authorship_tag":"ABX9TyPzvAJueyiG1/st3fdvLHPD"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_single_factor import rmmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=rmmcppb(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/spmcpa.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpa.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632756490},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyPpFAYJ9Pgd170X7K5xYNmT"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpa"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpa(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/spmcpb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpb.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588632952101},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNYt8uKlk3sEvH8XgwuD61D"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":202},"outputId":"12ed3517-6b93-41e6-f991-ffd8537060a4","executionInfo":{"status":"ok","timestamp":1588632983358,"user_tz":240,"elapsed":4430,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," cell_1 | \n"," cell_2 | \n"," cell_3 | \n"," cell_4 | \n"," cell_5 | \n"," cell_6 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.542487 | \n"," 0.781840 | \n"," 0.635284 | \n"," 0.874977 | \n"," 0.479860 | \n"," 0.589451 | \n","
\n"," \n"," 1 | \n"," 0.984139 | \n"," 0.414655 | \n"," 0.581826 | \n"," 0.430758 | \n"," 0.529403 | \n"," 0.197294 | \n","
\n"," \n"," 2 | \n"," 0.184603 | \n"," 0.821967 | \n"," 0.569723 | \n"," 0.279681 | \n"," 0.990154 | \n"," 0.212335 | \n","
\n"," \n"," 3 | \n"," 0.476937 | \n"," 0.351505 | \n"," 0.101760 | \n"," 0.087372 | \n"," 0.826408 | \n"," 0.847228 | \n","
\n"," \n"," 4 | \n"," 0.730113 | \n"," 0.392344 | \n"," 0.422978 | \n"," 0.835971 | \n"," 0.006801 | \n"," 0.418546 | \n","
\n"," \n","
\n","
"],"text/plain":[" cell_1 cell_2 cell_3 cell_4 cell_5 cell_6\n","0 0.542487 0.781840 0.635284 0.874977 0.479860 0.589451\n","1 0.984139 0.414655 0.581826 0.430758 0.529403 0.197294\n","2 0.184603 0.821967 0.569723 0.279681 0.990154 0.212335\n","3 0.476937 0.351505 0.101760 0.087372 0.826408 0.847228\n","4 0.730113 0.392344 0.422978 0.835971 0.006801 0.418546"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/spmcpi.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"spmcpi.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633014822},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOIO6PyIZ6fl34R9C9+J1Vy"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import spmcpi"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=spmcpi(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/tmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"tmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588614092764}],"collapsed_sections":[],"authorship_tag":"ABX9TyPLA0Nj1FqLpYcQhJu8eacQ"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, con1way, trim_mean\n","from hypothesize.compare_groups_with_single_factor import tmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":202},"outputId":"49a67e90-f7e3-4a3b-d397-dd6c3b76a769","executionInfo":{"status":"ok","timestamp":1588614138956,"user_tz":240,"elapsed":4864,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","\n","
\n"," \n"," \n"," | \n"," cell_1 | \n"," cell_2 | \n"," cell_3 | \n","
\n"," \n"," \n"," \n"," 0 | \n"," 0.988089 | \n"," 0.531594 | \n"," 0.898677 | \n","
\n"," \n"," 1 | \n"," 0.040062 | \n"," 0.990704 | \n"," 0.393328 | \n","
\n"," \n"," 2 | \n"," 0.563470 | \n"," 0.395695 | \n"," 0.345625 | \n","
\n"," \n"," 3 | \n"," 0.856980 | \n"," 0.959441 | \n"," 0.168044 | \n","
\n"," \n"," 4 | \n"," 0.158802 | \n"," 0.391446 | \n"," 0.324284 | \n","
\n"," \n","
\n","
"],"text/plain":[" cell_1 cell_2 cell_3\n","0 0.988089 0.531594 0.898677\n","1 0.040062 0.990704 0.393328\n","2 0.563470 0.395695 0.345625\n","3 0.856980 0.959441 0.168044\n","4 0.158802 0.391446 0.324284"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=tmcppb(df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['output']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/winall.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"winall.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637846077},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyN3me0/wTmMxBS9uYJsVVYz"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import winall"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(3)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=winall(df, tr=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":69},"outputId":"9eea20c3-b5a7-427b-e522-d401e998d22d","executionInfo":{"status":"ok","timestamp":1588637940868,"user_tz":240,"elapsed":413,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["results['wcor']"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[ 1. , 0.23237836, 0.05106066],\n"," [ 0.23237836, 1. , -0.12543308],\n"," [ 0.05106066, -0.12543308, 1. ]])"]},"metadata":{"tags":[]},"execution_count":9}]}]}
--------------------------------------------------------------------------------
/examples/wincor.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wincor.ipynb","provenance":[{"file_id":"1q9AQFB99VEoYCD_uskwR9EUJR2OpGaJW","timestamp":1588637954798},{"file_id":"1pqn_y9Q_EQ6Z74HfSwaSud-Z6xccXLNt","timestamp":1588635862807},{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588633061518},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNio2+2ctQ6oNKTHdc5jl9w"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.measuring_associations import wincor"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=wincor(df.cell_1, df.cell_2, tr=.2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"pxpuR_9DDEYX","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/wwmcpbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wwmcpbt.ipynb","provenance":[{"file_id":"15Ivi4luJOwacOekBdbZ1LLTa7ts-qg_1","timestamp":1588631937930},{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyNp6W4ZcH5oqiCDO+AIxThj"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_two_factors import wwmcpbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"outputId":"e7d3c247-7ff5-40d8-a3b6-0a79c722595a","executionInfo":{"status":"ok","timestamp":1588632020005,"user_tz":240,"elapsed":1968,"user":{"displayName":"Allan Campopiano","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjlYfMDyh8NOFcvZGREnofHZqDUdwEY7UmTbKZ_VQ=s64","userId":"17937508290212649605"}}},"source":["results=wwmcpbt(2, 3, df, .2)"],"execution_count":9,"outputs":[{"output_type":"stream","text":["ask wilcox if dif is supposed to be a argument here\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']['test']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/wwmcppb.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"wwmcppb.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588624101480}],"collapsed_sections":[],"authorship_tag":"ABX9TyOTVb1bjVl+ueXtjfFOxCqb"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data, trim_mean\n","from hypothesize.compare_groups_with_two_factors import wwmcppb"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(6)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=wwmcppb(2, 3, df, trim_mean, .2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results['factor_B']['output']"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/ydbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ydbt.ipynb","provenance":[{"file_id":"1E8c_xBF8l36H4Zrd-npTCoU_ZXXIhh0i","timestamp":1588623469716}],"collapsed_sections":[],"authorship_tag":"ABX9TyOFK1ec5drLBgCKpHf13ETw"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import ydbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=ydbt(df.cell_1, df.cell_2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/examples/yuenbt.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"yuenbt.ipynb","provenance":[{"file_id":"1dOaLcrRIctGehyXDy_sGNp5OSTl_4vCh","timestamp":1588615519643}],"collapsed_sections":[],"authorship_tag":"ABX9TyM5O2LtjdxCgF5QiGsHde1k"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"QZhIwy1isa1F","colab_type":"code","colab":{}},"source":["!pip install hypothesize"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c_ulEnBms7RI","colab_type":"code","colab":{}},"source":["from hypothesize.utilities import create_example_data\n","from hypothesize.compare_groups_with_single_factor import yuenbt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3HSmG9exs_2C","colab_type":"code","colab":{}},"source":["df=create_example_data(2)\n","df.head()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"bm4pbHTRtfra","colab_type":"code","colab":{}},"source":["results=yuenbt(df.cell_1, df.cell_2)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-HCK1vVat-jd","colab_type":"code","colab":{}},"source":["results"],"execution_count":0,"outputs":[]}]}
--------------------------------------------------------------------------------
/hypothesize/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from hypothesize import compare_groups_with_single_factor, measuring_associations, \
3 | compare_groups_with_two_factors
4 |
5 |
--------------------------------------------------------------------------------
/hypothesize/compare_groups_with_single_factor/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from ._compare_groups_with_single_factor import *
3 |
--------------------------------------------------------------------------------
/hypothesize/compare_groups_with_single_factor/_compare_groups_with_single_factor.py:
--------------------------------------------------------------------------------
1 | __all__ = ["yuenbt", "pb2gen", "linconb", "rmmcppb",
2 | "lindepbt", "bootdpci", "ydbt", "tmcppb", "l2drmci"]
3 |
4 | import numpy as np
5 | import pandas as pd
6 | from scipy.stats import trim_mean
7 | from hypothesize.utilities import yuend, trimse, lincon, trimparts, trimpartt, pandas_to_arrays, \
8 | con1way, con2way, bptdpsi, rmmcp, trimcibt, remove_nans_based_on_design
9 |
10 | def yuenbt(x, y, tr=.2, alpha=.05, nboot=599, seed=False):
11 |
12 | """
13 | Compute a 1-alpha confidence interval for the difference between
14 | the trimmed means corresponding to two independent groups.
15 | The bootstrap-t method is used. During the bootstrapping,
16 | the absolute value of the test statistic is used (the "two-sided method").
17 |
18 |
19 | :param x: Pandas Series
20 | Data for group one
21 |
22 | :param y: Pandas Series
23 | Data for group two
24 |
25 | :param tr: float
26 | Proportion to trim (default is .2)
27 |
28 | :param alpha: float
29 | Alpha level (default is .05)
30 |
31 | :param nboot: int
32 | Number of bootstrap samples (default is 599)
33 |
34 | :param seed: bool
35 | Random seed for reprodicible results. Default is `False`.
36 |
37 | :return:
38 | Dictionary of results
39 |
40 | ci: list
41 |
42 | Confidence interval
43 |
44 | est_dif: float
45 | Estimated difference between group one and two
46 |
47 | est_1: float
48 | Estimated value (based on `est`) for group one
49 |
50 | est_2: float
51 | Estimated value (based on `est`) for group two
52 |
53 | p_value: float
54 |
55 | p-value
56 |
57 | test_stat: float
58 | Test statistic
59 |
60 | """
61 |
62 | x, y=pandas_to_arrays([x, y])
63 |
64 | if seed:
65 | np.random.seed(seed)
66 |
67 | ci=[]
68 | x=x[~np.isnan(x)]
69 | y=y[~np.isnan(y)]
70 |
71 | xcen = x - trim_mean(x, tr)
72 | ycen = y - trim_mean(y, tr)
73 |
74 | test_stat = (trim_mean(x, tr) - trim_mean(y, tr)) / \
75 | np.sqrt(trimse(x, tr = tr) ** 2 + trimse(y, tr = tr) ** 2)
76 |
77 | datax = np.random.choice(xcen, size=(nboot, len(x)))
78 | datay = np.random.choice(ycen, size=(nboot, len(y)))
79 |
80 | top = trim_mean(datax, .2, axis=1) - trim_mean(datay, .2, axis=1)
81 |
82 | #botx = list(map(lambda row: trimse(row,.2), datax))
83 | botx = np.array([trimse(x) for x in datax])
84 | boty = np.array([trimse(x) for x in datay])
85 | tval = top / np.sqrt(botx ** 2 + boty ** 2)
86 | tval = abs(tval)
87 | tval = sorted(tval)
88 | icrit = int(np.floor((1 - alpha) * nboot + .5))
89 | #ibot = int(np.floor(alpha * nboot / 2 + .5))
90 | #itop = int(np.floor((1 - alpha / 2) * nboot + .5))
91 | se = np.sqrt((trimse(x, tr)) ** 2 + (trimse(y, tr)) ** 2)
92 | ci.append(trim_mean(x, tr) - trim_mean(y, tr) - tval[icrit] * se)
93 | ci.append(trim_mean(x, tr) - trim_mean(y, tr) + tval[icrit] * se)
94 | p_value = sum(np.abs(test_stat) <= np.abs(tval)) / nboot
95 | est_x = trim_mean(x,tr)
96 | est_y = trim_mean(y, tr)
97 | est_dif = est_x - est_y
98 |
99 | results = {'ci': ci, 'test_stat': test_stat, 'p_value': p_value,
100 | 'est_x': est_x, 'est_y': est_y, 'est_dif': est_dif}
101 |
102 | return results
103 |
104 | def linconb(x, con, tr=.2, alpha=.05, nboot=599, seed=False):
105 |
106 | """
107 | Compute a 1-alpha confidence interval for a set of d linear contrasts
108 | involving trimmed means using the bootstrap-t bootstrap method.
109 | Independent groups are assumed. CIs are adjusted to control FWE
110 | (p values are not adjusted).
111 |
112 |
113 | :param x: DataFrame
114 | Each column represents a group of data
115 |
116 | :param con: array
117 | `con` is a J (number of columns) by d (number of contrasts)
118 | matrix containing the contrast coefficents of interest.
119 | All linear constrasts can be created automatically by using the function [con1way](J)
120 | (the result of which can be used for `con`).
121 |
122 | :param tr: float
123 | Proportion to trim (default is .2)
124 |
125 | :param alpha: float
126 | Alpha level (default is .05)
127 |
128 | :param nboot: int
129 | Number of bootstrap samples (default is 2000)
130 |
131 | :param seed: bool
132 | Random seed for reprodicible results. Default is `False`.
133 |
134 | :return:
135 | Dictionary of results
136 |
137 | con: array
138 | Contrast matrix
139 |
140 | crit: float
141 | Critical value
142 |
143 | n: list
144 | Number of observations for each group
145 |
146 | psihat: DataFrame
147 | Difference score and CI for each contrast
148 |
149 | test: DataFrame
150 | Test statistic, standard error, and p-value for each contrast
151 |
152 |
153 | """
154 |
155 | x=pandas_to_arrays(x)
156 |
157 | J = len(x)
158 | x = np.asarray([j[~np.isnan(j)] for j in x])
159 | #Jm = J - 1
160 | #d = (J ** 2 - J) / 2
161 |
162 | if con.shape[0] != len(x):
163 | raise Exception("The number of groups does not match the number of contrast coefficients.")
164 |
165 | bvec = np.zeros([nboot, J, 2])
166 |
167 | if seed:
168 | np.random.seed(seed)
169 |
170 | nsam = [len(xi) for xi in x]
171 | for j in range(J):
172 |
173 | xcen = x[j] - trim_mean(x[j], tr)
174 | data = np.random.choice(xcen, size=(nboot, len(x[j])))
175 |
176 | for i, row in enumerate(data):
177 | bvec[i,j,:]=trimparts(row, tr)
178 |
179 | m1 = bvec[:,:,0].T
180 | m2 = bvec[:,:, 1].T
181 | boot = np.zeros([con.shape[1], nboot])
182 | for d in range(con.shape[1]):
183 | top = np.asarray([trimpartt(row, con[:,d]) for row in m1.T])
184 | consq = con[:, d] ** 2
185 | bot = np.asarray([trimpartt(row,consq) for row in m2.T])
186 | boot[d,:] = np.abs(top) / np.sqrt(bot)
187 |
188 | testb=np.asarray([max(row) for row in boot.T])
189 | ic = int(np.floor((1 - alpha) * nboot) -1) # one less than R
190 | testb = np.sort(testb)
191 | psihat = np.zeros([con.shape[1], 4])
192 | test = np.zeros([con.shape[1], 4])
193 |
194 | for d in range(con.shape[1]):
195 | test[d, 0] = d
196 | psihat[d, 0] = d
197 | testit = lincon(x, np.array([con[:,d]]).T, tr, alpha) # column slice of contrast matrix
198 | #test[d, 1]=testit['test'][0, 1]
199 | test[d, 1]=testit['test']['test'][0]
200 | #pval = np.mean((abs(testit['test'][0, 1]) < boot[d,:]))
201 | pval = np.mean((abs(testit['test']['test'][0]) < boot[d,:]))
202 | test[d, 3] = pval
203 | #print(testit['test'])
204 | #print(testit['psihat'])
205 | # psihat[d, 2] = testit['psihat'][0, 1] - testb[ic] * testit['test'][0, 3]
206 | # psihat[d, 3] = testit['psihat'][0, 1] + testb[ic] * testit['test'][0, 3]
207 | # psihat[d, 1] = testit['psihat'][0, 1]
208 | psihat[d, 2] = testit['psihat']['psihat'][0] - testb[ic] * testit['test']['se'][0]
209 | psihat[d, 3] = testit['psihat']['psihat'][0] + testb[ic] * testit['test']['se'][0]
210 | psihat[d, 1] = testit['psihat']['psihat'][0]
211 | #test[d, 2] = testit['test'][0, 3]
212 | test[d, 2] = testit['test']['se'][0]
213 |
214 |
215 |
216 | psihat_col_names=['contrast_index', 'psihat', 'ci_low', 'ci_up']
217 | test_col_names = ['contrast_index', 'test', 'se', 'p_value']
218 |
219 | psihat = pd.DataFrame(psihat, columns=psihat_col_names)
220 | test=pd.DataFrame(test, columns=test_col_names)
221 |
222 | return {'n': nsam, 'psihat': psihat, 'test': test, 'crit': testb[ic], 'con': con}
223 |
224 | def rmmcppb(x, est, *args, alpha=.05, con=None,
225 | dif=True, nboot=None, BA=False,
226 | hoch=False, SR=False, seed=False):
227 |
228 | """
229 | Use a percentile bootstrap method to compare dependent groups.
230 | By default, compute a .95 confidence interval for all linear contrasts
231 | specified by con, a J-by-C matrix, where C is the number of
232 | contrasts to be tested, and the columns of `con` are the
233 | contrast coefficients. If con is not specified,
234 | all pairwise comparisons are done.
235 |
236 | If `est` is the function `onestep` or `mom` (these are not implemeted yet),
237 | method SR can be used to control the probability of at least one Type I error.
238 | Otherwise, Hochberg's method is used.
239 |
240 | If `dif` is `False` and `BA` is `True`, the bias adjusted
241 | estimate of the generalized p-value is recommended.
242 | Using `BA`=`True` (when `dif`=`False`)
243 | is recommended when comparing groups
244 | with M-estimators and MOM, but it is not necessary when
245 | comparing 20% trimmed means (Wilcox & Keselman, 2002).
246 |
247 | Hochberg's sequentially rejective method can be used and is used
248 | if n>=80.
249 |
250 | Note that arguments up to and including `args` are positional arguments
251 |
252 | :param x: Pandas DataFrame
253 | Each column represents a group of data
254 |
255 | :param est: function
256 | Measure of location (currently only `trim_mean` is supported)
257 |
258 | :param args: list/value
259 | Parameter(s) for measure of location (e.g., .2)
260 |
261 | :param alpha: float
262 | Alpha level (default is .05)
263 |
264 | :param con: array
265 | `con` is a J (number of columns) by d (number of contrasts)
266 | matrix containing the contrast coefficents of interest.
267 | All linear constrasts can be created automatically by using the function [con1way](J)
268 | (the result of which can be used for `con`). The default is `None` and in this
269 | case all linear contrasts are created automatically.
270 |
271 | :param dif: bool
272 | When `True`, use difference scores, otherwise use marginal distributions
273 |
274 | :param nboot: int
275 | Number of bootstrap samples. Default is `None`
276 | in which case `nboot` will be chosen for you
277 | based on the number of contrasts.
278 |
279 | :param BA: bool
280 | When `True`, use the bias adjusted estimate of the
281 | generalized p-value is applied (e.g., when `dif` is `False`)
282 |
283 | :param hoch: bool
284 | When `True`, Hochberg's sequentially rejective method can be used and is used
285 | if n>=80.
286 |
287 | :param SR: bool
288 | When `True`, use the modified "sequentially rejective", especially when
289 | comparing one-step M-estimators or M-estimators.
290 |
291 | :param seed: bool
292 | Random seed for reprodicible results (default is `False`)
293 |
294 | :return:
295 | Dictionary of results
296 |
297 | con: array
298 | Contrast matrix
299 |
300 | num_sig: int
301 | Number of statistically significant results
302 |
303 | output: DataFrame
304 | Difference score, p-value, critical value, and CI for each contrast
305 | """
306 |
307 | called_directly=False
308 | if type(x) is pd.core.frame.DataFrame:
309 | called_directly=True
310 | x=x.dropna().values
311 |
312 | if hoch:
313 | SR=False
314 |
315 | if SR:
316 | raise Exception("onestep and mom estimators are not yet implemented"
317 | "and only these can be used with SR method. Please set SR to False for now.")
318 |
319 | if dif:
320 | print("analysis is being done on difference scores",
321 | "each confidence interval has probability coverage of 1-alpha.")
322 |
323 | temp=rmmcppbd(x,est, *args, alpha=alpha,con=con,
324 | nboot=nboot,hoch=True)
325 |
326 | if called_directly:
327 |
328 | col_names = ['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper']
329 |
330 | return {'output': pd.DataFrame(temp['output'], columns=col_names),
331 | 'con': temp['con'], "num_sig": temp['num_sig']}
332 |
333 | else:
334 |
335 | return {'output': temp['output'],
336 | 'con': temp['con'], "num_sig": temp['num_sig']}
337 |
338 | else:
339 | print("dif=False so using marginal distributions")
340 |
341 | if not BA:
342 | print("If and when MOM and/or onestep estimators are implemeted, "
343 | "it is suggested to use BA=True and hoch=T")
344 |
345 | J=x.shape[1]
346 | xcen=np.full([x.shape[0], x.shape[1]], np.nan)
347 | for j in range(J):
348 | xcen[:, j] = x[:, j] - est(x[:, j], *args)
349 |
350 | if con is None:
351 | con=con1way(J)
352 |
353 | d=con.shape[1]
354 |
355 | if nboot is None:
356 | if d<4:
357 | nboot=1000
358 | elif d>4:
359 | nboot=5000
360 |
361 | n=x.shape[0]
362 | connum=con.shape[1]
363 |
364 | if seed:
365 | np.random.seed(seed)
366 |
367 | xbars=est(x,*args)
368 |
369 | psidat=np.zeros(connum)
370 | for ic in range(connum):
371 | psidat[ic]=np.sum(con[:,ic] * xbars)
372 |
373 | psihat=np.zeros([connum, nboot])
374 | psihatcen=np.zeros([connum, nboot])
375 | bvec=np.full([nboot,J], np.nan)
376 | bveccen = np.full([nboot, J], np.nan)
377 | data=np.random.randint(n,size=(nboot,n))
378 | for ib in range(nboot):
379 | bvec[ib,:] = est(x[data[ib,:],:], *args)
380 | bveccen[ib, :] = est(xcen[data[ib, :], :], *args)
381 |
382 | test=np.full(connum, np.nan)
383 | bias=np.full(connum, np.nan)
384 |
385 | for ic in range(connum):
386 | psihat[ic,:]=[bptdpsi(row, con[:, ic]) for row in bvec]
387 | psihatcen[ic,:] = [bptdpsi(row, con[:,ic]) for row in bveccen]
388 | bias[ic] = np.sum((psihatcen[ic,:] > 0)) / nboot - .5
389 | ptemp =(np.sum(psihat[ic,:] > 0) + .5 * np.sum(psihat[ic,:] == 0)) / nboot
390 |
391 | if BA:
392 | test[ic] = ptemp - .1 * bias[ic]
393 |
394 | if not BA:
395 | test[ic] = ptemp
396 |
397 | test[ic] = np.min([test[ic], 1 - test[ic]])
398 | test[ic] = np.max([test[ic], 0]) # bias corrected might be less than zero
399 |
400 | test=2*test
401 | ncon=con.shape[1]
402 | dvec=alpha/np.arange(1,ncon+1)
403 |
404 | if SR:
405 |
406 | if alpha == .05:
407 |
408 | dvec =[.025,
409 | .025,
410 | .0169,
411 | .0127,
412 | .0102,
413 | .00851,
414 | .0073,
415 | .00639,
416 | .00568,
417 | .00511]
418 |
419 | dvecba = [.05,
420 | .025,
421 | .0169,
422 | .0127,
423 | .0102,
424 | .00851,
425 | .0073,
426 | .00639,
427 | .00568,
428 | .00511]
429 |
430 | if ncon > 10:
431 | avec = .05 / np.arange(11,ncon+1)
432 | dvec = np.append(dvec, avec)
433 |
434 | elif alpha == .01:
435 |
436 | dvec =[.005,
437 | .005,
438 | .00334,
439 | .00251,
440 | .00201,
441 | .00167,
442 | .00143,
443 | .00126,
444 | .00112,
445 | .00101]
446 |
447 | dvecba =[.01,
448 | .005,
449 | .00334,
450 | .00251,
451 | .00201,
452 | .00167,
453 | .00143,
454 | .00126,
455 | .00112,
456 | .00101]
457 |
458 | if ncon > 10:
459 | avec = .01 / np.arange(11,ncon+1)
460 | dvec = np.append(dvec, avec)
461 |
462 |
463 | else:
464 |
465 | dvec = alpha / np.arange(1,ncon+1)
466 | dvecba = dvec
467 | dvec[1] = alpha
468 |
469 | if hoch:
470 | dvec=alpha/np.arange(1,ncon+1)
471 |
472 | dvecba=dvec
473 | temp2 = (-test).argsort()
474 | zvec = dvec[:ncon]
475 |
476 | if BA:
477 | zvec = dvecba[:ncon]
478 |
479 | output=np.zeros([connum, 6])
480 | tmeans=est(x, *args)
481 |
482 | output[temp2, 3] = zvec
483 | for ic in range(ncon):
484 | output[ic, 1] = np.sum(con[:, ic] * tmeans)
485 | output[ic, 0] = ic
486 | output[ic, 2] = test[ic]
487 | temp = np.sort(psihat[ic, :])
488 | icl = round(alpha * nboot / 2) #+ 1
489 | icu = nboot - icl - 1 #nboot - (icl - 1)
490 | output[ic, 4] = temp[icl]
491 | output[ic, 5] = temp[icu]
492 |
493 | num_sig = output.shape[0]
494 | ior = (-output[:, 2]).argsort()
495 | for j in range(output.shape[0]):
496 | if output[ior[j], 2] <= output[ior[j], 3]:
497 | break
498 | else:
499 | num_sig = num_sig - 1
500 |
501 | if called_directly:
502 | col_names=['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper']
503 | results={"output": pd.DataFrame(output, columns=col_names), "con": con, "num_sig": num_sig}
504 | print(results)
505 |
506 | else:
507 | results={"output": output, "con": con, "num_sig": num_sig}
508 |
509 |
510 | return results
511 |
512 | def rmmcppbd(x, est, *args, alpha=.05, con=None,
513 | nboot=None, hoch=True, seed=False):
514 |
515 | """
516 | Use a percentile bootstrap method to compare dependent groups
517 | based on difference scores.
518 | By default,
519 | compute a .95 confidence interval for all linear contrasts
520 | specified by con, a J by C matrix, where C is the number of
521 | contrasts to be tested, and the columns of con are the
522 | contrast coefficients.
523 | If con is not specified, all pairwise comparisons are done.
524 |
525 | nboot is the bootstrap sample size. If not specified, a value will
526 | be chosen depending on the number of contrasts there are.
527 |
528 | A sequentially rejective method is used to control alpha.
529 | If n>=80, hochberg's method is used.
530 |
531 | Note that arguments up to and including `args` are positional arguments
532 |
533 | :param x:
534 | :param y:
535 | :param alpha:
536 | :param con:
537 | :param est:
538 | :param nboot:
539 | :param hoch:
540 | :param seed:
541 | :return:
542 | """
543 |
544 | x = x[~np.isnan(x).any(axis=1)]
545 | J=x.shape[1]
546 | n=x.shape[0]
547 | if n>=80:
548 | hoch=True
549 |
550 | #Jm=J-1
551 | if con is None:
552 | con=con1way(J)
553 |
554 | d = con.shape[1]
555 | if not nboot:
556 |
557 | if d <= 10:
558 | nboot = 3000
559 |
560 | elif d <= 6:
561 | nboot = 2000
562 |
563 | elif d <= 4:
564 | nboot = 1000
565 |
566 | else:
567 | nboot=5000
568 |
569 | connum=d
570 | xx=x@con
571 |
572 | if seed:
573 | np.random.seed(seed)
574 |
575 | psihat=np.zeros([connum, nboot])
576 | data=np.random.randint(n, size=(nboot,n))
577 |
578 | # wilcox's implementation in R is a bit more complicated,
579 | # I have simplified. Hopefully correctly.
580 | for ib in range(nboot):
581 | psihat[:,ib]=est(xx[data[ib,:], :], *args)
582 |
583 | test = np.full(connum, np.nan)
584 | icl = round(alpha * nboot // 2) #+ 1
585 | icu = nboot - icl - 2 #- 1
586 | cimat=np.full([connum, 2], np.nan)
587 |
588 | for ic in range(connum):
589 |
590 | test[ic] =(sum(psihat[ic, :] > 0) + .5 * sum(psihat[ic, :] == 0)) / nboot
591 | test[ic] = min(test[ic], 1 - test[ic])
592 | temp = np.sort(psihat[ic, :])
593 | cimat[ic, 0] = temp[icl]
594 | cimat[ic, 1] = temp[icu]
595 |
596 | test = 2 * test
597 | ncon = con.shape[1]
598 |
599 | if alpha == .05:
600 | dvec =[.025,
601 | .025,
602 | .0169,
603 | .0127,
604 | .0102,
605 | .00851,
606 | .0073,
607 | .00639,
608 | .00568,
609 | .00511]
610 |
611 | if ncon > 10:
612 | avec = .05 / np.arange(11, ncon+1)
613 | dvec = np.append(dvec, avec)
614 |
615 | elif alpha == .01:
616 | dvec =[.005,
617 | .005,
618 | .00334,
619 | .00251,
620 | .00201,
621 | .00167,
622 | .00143,
623 | .00126,
624 | .00112,
625 | .00101]
626 |
627 | if ncon > 10:
628 | avec = .01 / np.arange(11,ncon+1)
629 | dvec = np.append(dvec, avec)
630 |
631 | else:
632 | dvec = alpha / np.arange(1,ncon+1)
633 | dvec[1] = alpha / 2
634 |
635 | if hoch:
636 | dvec = alpha / (2 * np.arange(1,ncon+1))
637 |
638 | dvec = 2 * dvec
639 | temp2 = (-test).argsort()
640 | ncon = con.shape[1]
641 | zvec = dvec[:ncon]
642 | output=np.zeros([connum, 6])
643 |
644 | tmeans=est(xx,*args)
645 | output[temp2, 3] = zvec
646 |
647 | for ic in range(ncon):
648 | output[ic, 1] = tmeans[ic]
649 | output[ic, 0] = ic
650 | output[ic, 2] = test[ic]
651 | output[ic, 4:6] = cimat[ic,:]
652 |
653 | num_sig = np.sum(output[:, 2] <= output[:, 3])
654 |
655 | return {"output": output, "con": con, "num_sig": num_sig}
656 |
657 | def lindepbt(x, tr=.2, con=None, alpha=.05, nboot=599, dif=True, seed=False):
658 |
659 | """
660 | Multiple comparisons on trimmed means with FWE controlled with Rom's method
661 | Using a bootstrap-t method.
662 |
663 | :param x: Pandas DataFrame
664 | Each column in the data represents a different group
665 |
666 | :param tr: float
667 | Proportion to trim (default is .2)
668 |
669 | :param con: array
670 | `con` is a J (number of groups) by d (number of contrasts)
671 | matrix containing the contrast coefficents of interest.
672 | All linear constrasts can be created automatically by using the function [con1way](J)
673 | (the result of which can be used for `con`). The default is `None` and in this
674 | case all linear contrasts are created automatically.
675 |
676 | :param alpha: float
677 | Alpha level. Default is .05.
678 |
679 | :param nboot: int
680 | Number of bootstrap samples (default is 2000)
681 |
682 | :param dif: bool
683 | When `True`, use difference scores, otherwise use marginal distributions
684 |
685 | :param seed: bool
686 | Random seed for reprodicible results (default is `False`)
687 |
688 | :return:
689 | Dictionary of results
690 |
691 | con: array
692 | Contrast matrix
693 |
694 | num_sig: int
695 | Number of observations for each group
696 |
697 | psihat: DataFrame
698 | Difference score and CI for each contrast
699 |
700 | test: DataFrame
701 | Test statistic, p-value, critical value, and standard error
702 | for each contrast
703 | """
704 |
705 | called_directly=False
706 | if type(x) is pd.DataFrame:
707 | x = pandas_to_arrays(x)
708 | x = remove_nans_based_on_design(x, design_values=len(x), design_type='dependent_groups')
709 | x = np.r_[x].T
710 | called_directly=True
711 |
712 | from hypothesize.measuring_associations import wincor
713 |
714 | if seed:
715 | np.random.seed(seed)
716 |
717 | if con is None:
718 | con=con2way(1,x.shape[1])[1] # all pairwise
719 | ncon = con.shape[1]
720 |
721 | else:
722 | ncon = con.shape[1]
723 |
724 | x = x[~np.isnan(x).any(axis=1)]
725 | n=x.shape[0]
726 | J=x.shape[1]
727 | nval=x.shape[0]
728 | h1 = nval - 2 * np.floor(tr * nval)
729 | #df=h1-1
730 | xbar=trim_mean(x, tr)
731 |
732 | if alpha == .05:
733 |
734 | dvec = [.05,
735 | .025,
736 | .0169,
737 | .0127,
738 | .0102,
739 | .00851,
740 | .0073,
741 | .00639,
742 | .00568,
743 | .00511]
744 |
745 | if ncon > 10:
746 | avec = .05 / np.arange(11, ncon + 1)
747 | dvec = np.append(dvec, avec)
748 |
749 | elif alpha == .01:
750 |
751 | dvec = [.01,
752 | .005,
753 | .00334,
754 | .00251,
755 | .00201,
756 | .00167,
757 | .00143,
758 | .00126,
759 | .00112,
760 | .00101]
761 |
762 | if ncon > 10:
763 | avec = .01 / np.arange(11, ncon + 1)
764 | dvec = np.append(dvec, avec)
765 |
766 |
767 | else:
768 | dvec = alpha / np.arange(1, ncon + 1)
769 |
770 |
771 | psihat=np.zeros([ncon,4])
772 | test = np.zeros([ncon, 5])
773 | temp1=np.array([])
774 |
775 | for d in range(ncon):
776 | psihat[d, 0] = d
777 |
778 | if not dif:
779 | psihat[d, 1] = np.sum(con[:, d] * xbar)
780 | sejk = 0
781 |
782 | for j in range(J):
783 | for k in range(J):
784 | djk = (nval - 1) * wincor(x[:, j], x[:, k], tr)['wcov'] / (h1 * (h1 - 1))
785 | sejk = sejk + con[j, d] * con[k, d] * djk
786 |
787 | sejk = np.sqrt(sejk)
788 | test[d, 0] = d
789 | test[d, 1] = np.sum(con[:, d] * xbar) / sejk
790 | test[d, 4] = sejk
791 |
792 | data=np.random.randint(n, size=(nboot, n))
793 | xcen = np.full([x.shape[0], x.shape[1]], np.nan)
794 | for j in range(J):
795 | xcen[:, j] = x[:, j] - trim_mean(x[:, j], tr)
796 |
797 | bvec=[lindep_sub(data_row, xcen, con[:,d], tr=tr)
798 | for data_row in data]
799 |
800 | bsort = np.sort(np.abs(bvec))
801 | ic = round((1 - alpha) * nboot) - 1 # correct for python with the "- 1"?
802 | psihat[d, 2] = psihat[d, 1] - bsort[ic] * test[d, 4]
803 | psihat[d, 3] = psihat[d, 1] + bsort[ic] * test[d, 4]
804 | p_value = np.mean(np.abs(test[d, 1]) <= np.abs(bvec))
805 | temp1 = np.append(temp1, p_value)
806 |
807 | elif dif:
808 |
809 | for j in range(J):
810 | if j==0:
811 | dval=con[j,d] * x[:,j]
812 |
813 | elif j>0:
814 | dval=dval+con[j,d] * x[:,j]
815 |
816 | temp = trimcibt(dval,tr=tr,alpha=alpha,nboot=nboot,seed=seed)
817 | temp1 = np.append(temp1, temp['p_value'])
818 | test[d, 0] = d
819 | test[d, 1]=temp['test_stat'] ## missing in R?
820 | test[d, 4] = trimse(dval, tr=tr)
821 | psihat[d, 1] = trim_mean(dval, tr)
822 | psihat[d, 2] = temp['ci'][0]
823 | psihat[d, 3] = temp['ci'][1]
824 |
825 | test[:, 2] = temp1
826 | temp2 = (-temp1).argsort()
827 | zvec = dvec[:ncon]
828 | test[temp2, 3] = zvec
829 |
830 | # if flagcon
831 | num_sig = np.sum(test[:, 2] <= test[:, 3])
832 |
833 | if called_directly:
834 |
835 | test=pd.DataFrame(test, columns=["con_num", "test", "p_value", "p_crit", "se"])
836 | psihat=pd.DataFrame(psihat, columns=["con_num", "psihat", "ci_lower", "ci_upper"])
837 |
838 |
839 | return {'test': test, 'psihat': psihat, 'con': con, 'num_sig': num_sig}
840 |
841 | def lindep_sub(data, x, con = None, tr = .2):
842 |
843 | con = con.reshape(len(con), 1) # make 2D col vector
844 | res = rmmcp(x[data,:], con=con, tr=tr, dif=False)['test'][:, 1]
845 |
846 | return res[0]
847 |
848 | def pb2gen(x, y, est, *args, alpha=.05, nboot=2000, seed=False):
849 |
850 | """
851 | Compute a bootstrap confidence interval for the
852 | the difference between any two parameters corresponding to two
853 | independent groups.
854 |
855 | Note that arguments up to and including `args` are positional arguments
856 |
857 | :param x: Pandas Series
858 | Data for group one
859 |
860 | :param y: Pandas Series
861 | Data for group two
862 |
863 | :param est: function
864 | Measure of location (currently only `trim_mean` is supported)
865 |
866 | :param args: list/value
867 | Parameter(s) for measure of location (e.g., .2)
868 |
869 | :param alpha: float
870 | Alpha level (default is .05)
871 |
872 | :param nboot: int
873 | Number of bootstrap samples (default is 2000)
874 |
875 | :param seed: bool
876 | Random seed for reprodicible results (default is `False`)
877 |
878 | :return:
879 | Dictionary of results
880 |
881 | ci: list
882 |
883 | Confidence interval
884 |
885 | est_1: float
886 | Estimated value (based on `est`) for group one
887 |
888 | est_2: float
889 | Estimated value (based on `est`) for group two
890 |
891 | est_dif: float
892 | Estimated difference between group one and two
893 |
894 | n1: int
895 | Number of observations in group one
896 |
897 | n2: int
898 | Number of observations in group two
899 |
900 | p_value: float
901 |
902 | p-value
903 |
904 | variance: float
905 | Variance of group one and two
906 | """
907 |
908 | x, y = pandas_to_arrays([x, y])
909 |
910 | x=x[~np.isnan(x)]
911 | y=y[~np.isnan(y)]
912 |
913 | if seed:
914 | np.random.seed(seed)
915 |
916 |
917 | datax = np.random.choice(x, size=(nboot, len(x)))
918 | datay = np.random.choice(y, size=(nboot, len(y)))
919 |
920 | bvecx=est(datax, *args, axis=1)
921 | bvecy = est(datay, *args, axis=1)
922 |
923 | bvec = np.sort(bvecx - bvecy)
924 | low = round((alpha / 2) * nboot) #+ 1
925 | up = nboot - low - 2
926 | temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot)
927 | sig_level = 2 * (min(temp, 1 - temp))
928 | se = np.var(bvec)
929 |
930 | results={'est_1': est(x,*args),
931 | 'est_2': est(y,*args),
932 | 'est_dif': est(x, *args) - est(y, *args),
933 | 'ci': [bvec[low], bvec[up]],
934 | 'p_value': sig_level,
935 | 'variance': se,
936 | 'n1': len(x),
937 | 'n2': len(y)}
938 |
939 | return results
940 |
941 | def bootdpci(x, est, *args, nboot=None, alpha=.05,
942 | dif=True, BA=False, SR=False):
943 |
944 | """
945 | Use percentile bootstrap method, compute a .95 confidence interval
946 | for the difference between a measure of location or scale
947 | when comparing two dependent groups.
948 |
949 | The argument `dif` defaults to `True` indicating
950 | that difference scores will be used, in which case Hochberg’s
951 | method is used to control FWE. If `dif` is `False`, measures of
952 | location associated with the marginal distributions are used
953 | instead.
954 |
955 | If `dif` is `False` and `BA` is `True`, the bias adjusted
956 | estimate of the generalized p-value is recommended.
957 | Using `BA`=`True` (when `dif`=`False`)
958 | is recommended when comparing groups
959 | with M-estimators and MOM, but it is not necessary when
960 | comparing 20% trimmed means (Wilcox & Keselman, 2002).
961 |
962 | The so-called the SR method, which is a slight
963 | modification of Hochberg's (1988) "sequentially rejective"
964 | method can be applied to control FWE, especially when
965 | comparing one-step M-estimators or M-estimators.
966 |
967 | Note that arguments up to and including `args` are positional arguments
968 |
969 | :param x: Pandas DataFrame
970 | Each column represents a group of data
971 |
972 | :param est: function
973 | Measure of location (currently only `trim_mean` is supported)
974 |
975 | :param args: list/value
976 | Parameter(s) for measure of location (e.g., .2)
977 |
978 | :param alpha: float
979 | Alpha level. Default is .05.
980 |
981 | :param nboot: int
982 | Number of bootstrap samples. Default is `None`
983 | in which case `nboot` will be chosen for you
984 | based on the number of contrasts.
985 |
986 | :param dif: bool
987 | When `True`, use difference scores, otherwise use marginal distributions
988 |
989 | :param BA: bool
990 | When `True`, use the bias adjusted estimate of the
991 | generalized p-value is applied (e.g., when `dif` is `False`)
992 |
993 | :param SR: bool
994 | When `True`, use the modified "sequentially rejective", especially when
995 | comparing one-step M-estimators or M-estimators
996 |
997 | :return:
998 | Dictionary of results
999 |
1000 | con: array
1001 | Contrast matrix
1002 |
1003 | num_sig: int
1004 | Number of statistically significant results
1005 |
1006 | output: DataFrame
1007 | Difference score, p-value, critical value, and CI for each contrast
1008 |
1009 | """
1010 |
1011 | # replace with actual estimators when implemented
1012 | if SR and est not in ('onestep', 'mom'):
1013 | SR=False
1014 | print("setting SR to False. SR=True should apparently "
1015 | "only be used with onestep or mom")
1016 |
1017 | ## in R
1018 | # okay=False
1019 | # if est in (onestep, mom):
1020 | # okay=True
1021 | #
1022 | # if not okay:
1023 | # SR=False
1024 |
1025 | results=rmmcppb(x, est, *args, nboot=nboot,alpha=alpha,
1026 | SR=SR, dif=dif, BA=BA)
1027 |
1028 | col_names = ['con_num', 'psihat', 'p_value', 'p_crit', 'ci_lower', 'ci_upper']
1029 | results.update({'output': pd.DataFrame(results['output'], columns=col_names)})
1030 |
1031 | return results
1032 |
1033 | def ydbt(x, y, tr=.2, alpha=.05, nboot=599, side=True, seed=False):
1034 |
1035 | """
1036 | Using the bootstrap-t method,
1037 | compute a .95 confidence interval for the difference between
1038 | the marginal trimmed means of paired data.
1039 | By default, 20% trimming is used with 599 bootstrap samples.
1040 |
1041 |
1042 | :param x: Pandas Series
1043 | Data for group one
1044 |
1045 | :param y: Pandas Series
1046 | Data for group two
1047 |
1048 | :param tr: float
1049 | Proportion to trim (default is .2)
1050 |
1051 | :param alpha: float
1052 | Alpha level. Default is .05.
1053 |
1054 | :param nboot: int
1055 | Number of bootstrap samples (default is 2000)
1056 |
1057 | :param side: boolWhen `True` the function returns a symmetric CI and a p value,
1058 | otherwise the function returns equal-tailed CI (no p value)
1059 |
1060 | :param seed: bool
1061 | Random seed for reprodicible results (default is `False`)
1062 |
1063 | :return:
1064 | Dictionary of results
1065 |
1066 | ci: list
1067 | Confidence interval
1068 |
1069 | dif: float
1070 | Difference between group one and two
1071 |
1072 | p_value: float
1073 | p-value
1074 | """
1075 |
1076 | x = pandas_to_arrays([x, y])
1077 | x=remove_nans_based_on_design(x, 2, 'dependent_groups')
1078 | x,y=[x[0], x[1]]
1079 |
1080 | if seed:
1081 | np.random.seed(seed)
1082 |
1083 | data = np.random.randint(len(x), size=(nboot, len(x)))
1084 |
1085 | xcen = x - trim_mean(x, tr)
1086 | ycen = y - trim_mean(y, tr)
1087 |
1088 | bvec=[tsub(row, xcen, ycen, tr) for row in data]
1089 |
1090 | dotest = yuend(x, y, tr=tr)
1091 |
1092 | estse = dotest['se']
1093 | p_value = np.nan
1094 | dif = trim_mean(x, tr) - trim_mean(y, tr)
1095 | ci=[]
1096 |
1097 | if not side:
1098 | print('p_value is only returned when side=True')
1099 | ilow = round((alpha / 2) * nboot) -1
1100 | ihi = nboot - ilow - 2
1101 | bsort = np.sort(bvec)
1102 | ci.append(dif - bsort[ihi] * estse)
1103 | ci.append(dif - bsort[ilow + 1] * estse)
1104 |
1105 | else:
1106 | bsort = np.sort(np.abs(bvec))
1107 | ic = round((1 - alpha) * nboot)-1
1108 | ci.append(dif - bsort[ic] * estse)
1109 | ci.append(dif + bsort[ic] * estse)
1110 | p_value = (np.sum(np.abs(dotest['teststat']) <= np.abs(bvec))) / nboot
1111 |
1112 |
1113 | return {'ci': ci, 'dif': dif, 'p_value': p_value}
1114 |
1115 | def tsub(isub, x, y, tr):
1116 |
1117 | """
1118 | Compute test statistic for trimmed means
1119 | when comparing dependent groups.
1120 | By default, 20% trimmed means are used.
1121 | isub is an array of length n of random integers
1122 | to control bootstrap sampling.
1123 |
1124 | This function is used by ydbt
1125 |
1126 | :param isub:
1127 | :param x:
1128 | :param y:
1129 | :param tr:
1130 | :return:
1131 | """
1132 |
1133 | tsub_res = yuend(x[isub], y[isub], tr = tr)['teststat']
1134 |
1135 | return tsub_res
1136 |
1137 | def tmcppb(x, est, *args, con=None, bhop=False, alpha=.05, nboot=None, seed=False):
1138 |
1139 | """
1140 | Multiple comparisons for J independent groups using trimmed means and
1141 | the percentile bootstrap method. Rom’s method is used to control the
1142 | probability of one or more type I errors. For C > 10 hypotheses,
1143 | or when the goal is to test at some level other than .05 and .01,
1144 | Hochberg’s method is used. Setting the argument `bhop` to `True` uses the
1145 | Benjamini–Hochberg method instead.
1146 |
1147 | Note that arguments up to and including `args` are positional arguments
1148 |
1149 | :param x: Pandas DataFrame
1150 | Each column represents a group of data
1151 |
1152 | :param est: function
1153 | Measure of location (currently only `trim_mean` is supported)
1154 |
1155 | :param args: list/value
1156 | Parameter(s) for measure of location (e.g., .2)
1157 |
1158 | :param con: array
1159 | `con` is a J (number of columns) by d (number of contrasts)
1160 | matrix containing the contrast coefficents of interest.
1161 | All linear constrasts can be created automatically by using the function [con1way](J)
1162 | (the result of which can be used for `con`). The default is `None` and in this
1163 | case all linear contrasts are created automatically.
1164 |
1165 | :param bhop: bool
1166 | If `True`, the Benjamini–Hochberg method is used to control FWE
1167 |
1168 | :param alpha: float
1169 | Alpha level. Default is .05.
1170 |
1171 | :param nboot: int
1172 | Number of bootstrap samples (default is 2000)
1173 |
1174 | :param seed: bool
1175 | Random seed for reproducible results. Default is `False`.
1176 |
1177 | :return:
1178 | Dictionary of results
1179 |
1180 | con: array
1181 | Contrast matrix
1182 |
1183 | num_sig: int
1184 | Number of statistically significant results
1185 |
1186 | output: DataFrame
1187 | Difference score, p-value, critical value, and CI for each contrast
1188 | """
1189 |
1190 | x=pandas_to_arrays(x)
1191 | x=remove_nans_based_on_design(x, len(x), 'independent_groups')
1192 | J=len(x)
1193 |
1194 | mvec = [est(i, *args) for i in x]
1195 |
1196 | if con is None:
1197 | con=con1way(J)
1198 |
1199 | ncon=con.shape[1]
1200 |
1201 | if not nboot:
1202 | nboot = 5000
1203 | if J <= 8:
1204 | nboot = 4000
1205 | elif J <= 3:
1206 | nboot = 2000
1207 |
1208 | if not bhop:
1209 |
1210 | if alpha == .05:
1211 | dvec=[.05,
1212 | .025,
1213 | .0169,
1214 | .0127,
1215 | .0102,
1216 | .00851,
1217 | .0073,
1218 | .00639,
1219 | .00568,
1220 | .00511]
1221 |
1222 | if ncon > 10:
1223 | avec = .05 / np.arange(11,ncon+1)
1224 | dvec = [dvec, avec]
1225 |
1226 | elif alpha == .01:
1227 | dvec =[.01,
1228 | .005,
1229 | .00334,
1230 | .00251,
1231 | .00201,
1232 | .00167,
1233 | .00143,
1234 | .00126,
1235 | .00112,
1236 | .00101]
1237 |
1238 | if ncon > 10:
1239 | avec = .01 / np.arange(11,ncon+1)
1240 | dvec = [dvec, avec]
1241 |
1242 | else: #not (alpha != .05 or alpha != .01):
1243 | dvec = alpha / np.arange(1,ncon+1)
1244 |
1245 | else:
1246 | dvec = (ncon - np.arange(1,ncon+1) + 1) * alpha / ncon
1247 |
1248 | if seed:
1249 | np.random.seed(seed)
1250 |
1251 | bvec=np.full([J,nboot], np.nan)
1252 | for i, j in enumerate(x):
1253 | data = np.random.choice(j, size=(nboot, len(j)))
1254 | bvec[i,:]=[est(row, *args) for row in data]
1255 |
1256 | bcon=con.T @ bvec
1257 | tvec=con.T @ mvec
1258 | test=np.full(ncon, np.nan)
1259 | for d in range(ncon):
1260 | tv = np.sum(bcon[d,:] == 0) / nboot
1261 | test[d] = np.sum(bcon[d, :] > 0) / nboot + .5 * tv
1262 | if test[d] > .5:
1263 | test[d] = 1 - test[d]
1264 |
1265 | output=np.full([ncon,6], np.nan)
1266 | test=2*test
1267 | temp2=(-test).argsort()
1268 | zvec = dvec[:ncon]
1269 | output[temp2, 3] = zvec
1270 | icl = int(np.round(dvec[-1] * nboot / 2) + 1) - 1
1271 | icu = nboot - icl - 3
1272 |
1273 | for ic in range(ncon):
1274 | output[ic, 1] = tvec[ic]
1275 | output[ic, 0] = ic
1276 | output[ic, 2] = test[ic]
1277 | temp = np.sort(bcon[ic, :])
1278 | output[ic, 4] = temp[icl]
1279 | output[ic, 5] = temp[icu]
1280 |
1281 |
1282 | num_sig = np.sum(output[:, 2] <= output[:, 3])
1283 | cols=["con_num","psihat", "p_value", "p_crit", "ci_lower", "ci_upper"]
1284 | output=pd.DataFrame(output, columns=cols)
1285 |
1286 | results={'output': output, 'con': con, 'num_sig': num_sig}
1287 |
1288 | return results
1289 |
1290 | def l2drmci(x,y, est, *args, pairwise_drop_na=True, alpha=.05, nboot=2000, seed=False):
1291 |
1292 | """
1293 | Compute a bootstrap confidence interval for a
1294 | measure of location associated with the distribution of x-y.
1295 | That is, compare x and y by looking at all possible difference scores
1296 | in random samples of `x` and `y`. `x` and `y` are possibly dependent.
1297 |
1298 | Note that arguments up to and including `args` are positional arguments
1299 |
1300 | :param x: Pandas Series
1301 | Data for group one
1302 |
1303 | :param y: Pandas Series
1304 | Data for group two
1305 |
1306 | :param est: function
1307 | Measure of location (currently only `trim_mean` is supported)
1308 |
1309 | :param args: list/value
1310 | Parameter(s) for measure of location (e.g., .2)
1311 |
1312 | :param pairwise_drop_na: bool
1313 | If True, treat data as dependent and remove any row with missing data. If False,
1314 | remove missing data for each group seperately (cannot deal with unequal sample sizes)
1315 |
1316 | :param alpha: float
1317 | Alpha level (default is .05)
1318 |
1319 | :param nboot: int
1320 | Number of bootstrap samples (default is 2000)
1321 |
1322 | :param seed: bool
1323 | Random seed for reprodicible results (default is `False`)
1324 |
1325 | :return:
1326 | Dictionary of results
1327 |
1328 | ci: list
1329 |
1330 | Confidence interval
1331 |
1332 | p_value: float
1333 |
1334 | p-value
1335 | """
1336 |
1337 | x, y = pandas_to_arrays([x, y])
1338 |
1339 | if pairwise_drop_na:
1340 | m1 = np.c_[x, y] # cbind
1341 | x = m1[~np.isnan(m1).any(axis=1)]
1342 |
1343 | else:
1344 | x = x[~np.isnan(x)]
1345 | y = y[~np.isnan(y)]
1346 |
1347 | if len(x) != len(y):
1348 | raise Exception("With unequal sample sizes, you might consider wmwpb "
1349 | "(currently not implemented)")
1350 |
1351 | else:
1352 | x = np.c_[x, y] # cbind
1353 |
1354 | if seed:
1355 | np.random.seed(seed)
1356 |
1357 | data = np.random.choice(x.shape[0], size=(nboot, len(x)))
1358 |
1359 | bvec=np.full(nboot, np.nan)
1360 | for i in range(nboot):
1361 | bvec[i] = \
1362 | loc2dif(x[data[i,:], 0], x[data[i,:], 1], est, *args,
1363 | drop_na=pairwise_drop_na)
1364 |
1365 | bvec=np.sort(bvec)
1366 | low = int(np.round((alpha / 2) * nboot) + 1) -1
1367 | up = nboot - low -2
1368 | temp = np.sum(bvec < 0) / nboot + np.sum(bvec == 0) / (2 * nboot)
1369 | sig_level = 2 * (np.min([temp, 1 - temp]))
1370 | ci=[bvec[low], bvec[up]]
1371 |
1372 | results=dict(zip(['ci', 'p_value'], [ci, sig_level]))
1373 |
1374 | return results
1375 |
1376 | def loc2dif(x,y, est, *args, drop_na=True):
1377 |
1378 | """
1379 | Compute a measure of location associated with the
1380 | distribution of x-y, the typical difference between two randomly sampled values.
1381 | The measure of location is indicated by the argument
1382 | est.
1383 |
1384 | x and y are paired data or independent variables having the same length.
1385 | If x and y have different lengths, use the function wmwloc (not currently implemented)
1386 |
1387 | Advantage of this estimator: relatively high efficiency even under normality versus
1388 | using sample means.
1389 |
1390 | :param x:
1391 | :param y:
1392 | :param est:
1393 | :param args:
1394 | :param drop_na:
1395 | :return:
1396 | """
1397 |
1398 | if drop_na:
1399 | m1 = np.c_[x, y] # cbind
1400 | m1 = m1[~np.isnan(m1).any(axis=1)]
1401 | x, y = [m1[:,0], m1[:,1]]
1402 |
1403 | else:
1404 | x=x[~np.isnan(x)]
1405 | y=y[~np.isnan(y)]
1406 |
1407 | temp=np.subtract.outer(x,y).reshape(len(x)*len(y))
1408 | val=est(temp, *args)
1409 |
1410 | return val
1411 |
1412 |
1413 |
1414 |
1415 |
1416 |
1417 |
1418 |
1419 |
1420 |
1421 |
1422 |
--------------------------------------------------------------------------------
/hypothesize/compare_groups_with_two_factors/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from ._compare_groups_with_two_factors import *
3 |
--------------------------------------------------------------------------------
/hypothesize/measuring_associations/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from ._measuring_associations import *
--------------------------------------------------------------------------------
/hypothesize/measuring_associations/_measuring_associations.py:
--------------------------------------------------------------------------------
1 | __all__ = ["wincor", "pbcor", "corb", "pball", "winall"]
2 |
3 | import numpy as np
4 | from scipy.stats.mstats import winsorize
5 | from scipy.stats import t, chi2, trim_mean
6 | from hypothesize.utilities import pandas_to_arrays
7 |
8 | def wincor(x, y, tr=.2):
9 |
10 | """
11 | Compute the winsorized correlation between `x` and `y`.
12 | This function also returns the winsorized covariance.
13 |
14 |
15 | :param x: Pandas Series
16 | Data for group one
17 |
18 | :param y: Pandas Series
19 | Data for group two
20 |
21 | :param tr: float
22 | Proportion to winsorize (default is .2)
23 |
24 | :return:
25 | Dictionary of results
26 |
27 | cor: float
28 | Winsorized correlation
29 |
30 | nval: int
31 | Number of observations
32 |
33 | sig: float
34 | p-value
35 |
36 | wcov: float
37 | Winsorized covariance
38 | """
39 |
40 | if type(x) is not np.ndarray:
41 | x, y=pandas_to_arrays([x, y])
42 |
43 | m1 = np.c_[x, y] # cbind
44 | m1 = m1[~np.isnan(m1).any(axis=1)]
45 | nval = m1.shape[0]
46 | x = m1[:, 0]
47 | y = m1[:, 1]
48 | g = np.floor(tr * len(x))
49 | xvec = winsorize(x, limits=(tr,tr))
50 | yvec = winsorize(y, limits=(tr,tr))
51 | wcor = np.corrcoef(xvec, yvec)[0,1]
52 | wcov = np.cov(xvec, yvec)[0,1]
53 | test = wcor * np.sqrt((len(x) - 2) / (1. - wcor ** 2))
54 | sig = 2 * (1 - t.cdf(abs(test), len(x) - 2 * g - 2))
55 |
56 | res={'cor': wcor, 'wcov': wcov, 'sig': sig, 'nval': nval}
57 |
58 | return res
59 |
60 | def pbcor(x, y, beta=.2):
61 |
62 | """
63 | Compute the percentage bend
64 | correlation between `x` and `y`
65 |
66 |
67 | :param x: Pandas Series
68 | Data for group one
69 |
70 | :param y: Pandas Series
71 | Data for group two
72 |
73 | :param beta: float
74 | `0 < beta < .5`. Beta is analogous to trimming in
75 | other functions and related to the measure of
76 | dispersion used in the percentage bend
77 | calculation.
78 |
79 | :return:
80 | Dictionary of results
81 |
82 | cor: float
83 | Correlation
84 |
85 | nval: int
86 | Number of observations
87 |
88 | p_value
89 | p-value
90 |
91 | test: float
92 | Test statistic
93 |
94 | """
95 |
96 | if type(x) is not np.ndarray:
97 | x, y = pandas_to_arrays([x, y])
98 |
99 | if len(x) != len(y):
100 | raise Exception("The arrays do not have equal lengths")
101 |
102 | m1 = np.c_[x, y] # cbind
103 | m1 = m1[~np.isnan(m1).any(axis=1)]
104 | nval = m1.shape[0]
105 | x = m1[:, 0]
106 | y = m1[:, 1]
107 | temp = np.sort(abs(x - np.median(x)))
108 | omhatx = temp[int(np.floor((1 - beta) * len(x)))-1]
109 | temp = np.sort(abs(y - np.median(y)))
110 | omhaty = temp[int(np.floor((1 - beta) * len(y)))-1]
111 |
112 | a = (x - pbos(x, beta)) / omhatx
113 | b = (y - pbos(y, beta)) / omhaty
114 |
115 | a = np.where(a <= -1, -1, a)
116 | a = np.where(a >= 1, 1, a)
117 | b = np.where(b <= -1, -1, b)
118 | b = np.where(b >= 1, 1, b)
119 |
120 | pbcor_result = sum(a * b) / np.sqrt(sum(a ** 2) * sum(b ** 2))
121 | test = pbcor_result * np.sqrt((len(x) - 2) / (1 - pbcor_result ** 2))
122 | sig = 2 * (1 - t.cdf(abs(test), len(x) - 2))
123 |
124 | res = {'cor': pbcor_result, 'test': test, 'p_value': sig, 'nval': nval}
125 | return res
126 |
127 | def pbos(x, beta=.2):
128 |
129 | """
130 | Compute the one-step percentage bend measure of location
131 |
132 | :param x:
133 | :param beta:
134 | :return:
135 | """
136 |
137 | temp = np.sort(abs(x - np.median(x)))
138 | omhatx = temp[int(np.floor((1 - beta) * len(x)))-1]
139 | psi = (x - np.median(x)) / omhatx
140 | i1 = len(psi[psi < -1])
141 | i2 = len(psi[psi > 1])
142 |
143 | sx = np.where(psi < -1, 0, x)
144 | sx = np.where(psi > 1, 0, sx)
145 |
146 | pbos_result = (sum(sx) + omhatx * (i2 - i1)) / (len(x) - i1 - i2)
147 |
148 | return pbos_result
149 |
150 | def corb(corfun, x, y, alpha, nboot, *args, seed=False):
151 |
152 | """
153 | Compute a 1-alpha confidence interval for a
154 | correlation using percentile bootstrap method
155 | The function `corfun` is any function that returns a
156 | correlation coefficient. The functions pbcor and
157 | wincor follow this convention. When using
158 | Pearson's correlation, and when n<250, use
159 | lsfitci instead (not yet implemented).
160 |
161 | Note that arguments up to and including `args` are positional arguments
162 |
163 | :param corfun: function
164 | corfun is any function that returns a correlation coefficient
165 |
166 | :param x: Pandas Series
167 | Data for group one
168 |
169 | :param y: Pandas Series
170 | Data for group two
171 |
172 | :param alpha: float
173 | Alpha level (default is .05)
174 |
175 | :param nboot: int
176 | Number of bootstrap samples
177 |
178 | :param args: list/value
179 | List of arguments to corfun (e.g., .2)
180 |
181 | :param seed: bool
182 | Random seed for reprodicible results. Default is `False`.
183 |
184 | :return:
185 | Dictionary of results
186 |
187 | ci: list
188 | Confidence interval
189 |
190 | cor: float
191 | Correlation estimate
192 |
193 | p_value: float
194 | p-value
195 |
196 | """
197 |
198 | x, y=pandas_to_arrays([x, y])
199 |
200 |
201 | m1 = np.c_[x, y] # cbind
202 | m1 = m1[~np.isnan(m1).any(axis=1)]
203 | nval = m1.shape[0]
204 | x = m1[:, 0]
205 | y = m1[:, 1]
206 | est = corfun(x, y, *args)['cor']#[0]
207 |
208 | if seed:
209 | np.random.seed(seed)
210 |
211 | data_inds = np.random.choice(len(x), size=(nboot, len(x)))
212 | bvec = np.array([corbsub(row_inds, x, y, corfun, *args) for row_inds in data_inds])
213 |
214 | ihi = int(np.floor((1 - alpha / 2) * nboot + .5))
215 | ilow = int(np.floor((alpha / 2) * nboot + .5))
216 | bsort = sorted(bvec)
217 | corci = [bsort[ilow], bsort[ihi]]
218 | phat = sum(bvec < 0) / nboot
219 | sig = 2 * min(phat, 1 - phat)
220 |
221 | #return corci, sig, est
222 | return {'ci': corci, 'p_value': sig, 'cor': est}
223 |
224 | def corbsub(isub, x, y, corfun, *args):
225 |
226 | """
227 | Compute correlation for x[isub] and y[isub]
228 | isub is a vector of length n,
229 | a bootstrap sample from the sequence of integers
230 | 0, 1, 2, 3, ..., n
231 |
232 | This function is used by other functions when computing
233 | bootstrap estimates.
234 |
235 | corfun is some correlation function
236 | """
237 |
238 | corbsub_results = corfun(x[isub], y[isub], *args)['cor']#[0]
239 |
240 | return corbsub_results
241 |
242 | def pball(x, beta=.2):
243 |
244 | """
245 | Compute the percentage bend correlation matrix
246 | for all pairs of columns in `x`. This function also
247 | returns the two-sided significance level for all pairs
248 | of variables, plus a test of zero correlation
249 | among all pairs.
250 |
251 |
252 | :param x: Pandas DataFrame
253 | Each column represents a variable to use in the correlations
254 |
255 | :param beta: float
256 | `0 < beta < .5`. Beta is analogous to trimming in
257 | other functions and related to the measure of
258 | dispersion used in the percentage bend
259 | calculation.
260 |
261 | :return:
262 | Dictionary of results
263 |
264 | H: float
265 | The test statistic $H$.Reject null if $H > \chi^2_{1−lpha}$ ,
266 | the 1−α quantile.
267 |
268 | H_p_value: float
269 | p-value corresponding to the test that all correlations are equal to zero
270 |
271 | p_value: array
272 | p-value matrix corresponding to each pairwise correlation
273 |
274 | pbcorm: array
275 | Correlation matrix
276 |
277 | """
278 |
279 | m=x.values
280 | ncol=m.shape[1]
281 |
282 | pbcorm=np.zeros([ncol, ncol])
283 | temp=np.ones([ncol, ncol])
284 | siglevel=np.full([ncol, ncol], np.nan)
285 | #cmat = np.zeros([ncol, ncol])
286 |
287 | for i in range(ncol):
288 | for j in range(i,ncol):
289 | if i < j:
290 | pbc = pbcor(m[:, i], m[:, j], beta)
291 | pbcorm[i, j] = pbc['cor']
292 | temp[i, j] = pbcorm[i, j]
293 | temp[j, i] = pbcorm[i, j]
294 | siglevel[i, j] = pbc['p_value']
295 | siglevel[j, i] = siglevel[i, j]
296 |
297 |
298 | tstat = pbcorm * np.sqrt((m.shape[0] - 2) / (1 - pbcorm ** 2))
299 | cmat = np.sqrt((m.shape[0] - 2.5) * np.log(1 + tstat ** 2 / (m.shape[0] - 2)))
300 | bv = 48 * (m.shape[0] - 2.5) ** 2
301 | cmat = \
302 | cmat + (cmat ** 3 + 3 * cmat) / bv - (4 * cmat ** 7 + 33 * cmat ** 5 + 240 ** cmat ** 3 + 855 * cmat) / \
303 | (10 * bv ** 2 + 8 * bv * cmat ** 4 + 1000 * bv)
304 |
305 | H = np.sum(cmat ** 2)
306 | df = ncol * (ncol - 1) / 2
307 | h_siglevel = 1 - chi2.cdf(H, df)
308 |
309 | results={"pbcorm": temp, "p_value": siglevel,
310 | "H":H, "H_p_value": h_siglevel}
311 |
312 | return results
313 |
314 | def winall(x, tr=.2):
315 |
316 | """
317 | Compute the Winsorized correlation and covariance matrix
318 | for all pairs of columns in `x`. This function also
319 | returns the two-sided significance level for all pairs
320 | of variables, plus a test of zero correlation
321 | among all pairs.
322 |
323 |
324 | :param x: Pandas DataFrame
325 | Each column represents a variable to use in the correlations
326 |
327 | :param tr: float
328 | Proportion to winsorize (default is .2)
329 |
330 | :return:
331 | Dictionary of results
332 |
333 | center: array
334 | Trimmed mean for each group
335 |
336 | p_value: array
337 | p-value array corresponding to the pairwise correlations
338 |
339 | wcor: array
340 | Winsorized correlation matrix
341 |
342 | wcov: array
343 | Winsorized covariance matrix
344 |
345 |
346 | """
347 |
348 | m = x.values
349 | ncol = m.shape[1]
350 |
351 | wcor = np.ones([ncol, ncol])
352 | wcov = np.zeros([ncol, ncol])
353 | siglevel = np.full([ncol, ncol], np.nan)
354 |
355 | for i in range(ncol):
356 | #ip = i
357 | for j in range(i,ncol):
358 | val = wincor(m[:, i], m[:, j], tr)
359 | wcor[i, j] = val['cor']
360 | wcor[j, i] = wcor[i, j]
361 |
362 | if i == j:
363 | wcor[i, j] = 1
364 |
365 | wcov[i, j] = val['cor']
366 | wcov[j, i] = wcov[i, j]
367 |
368 | if i != j:
369 | siglevel[i, j] = val['sig']
370 | siglevel[j, i] = siglevel[i, j]
371 |
372 | m=m[~np.isnan(m).any(axis=1)]
373 | cent=trim_mean(m, tr)
374 |
375 | return {"wcor": wcor, "wcov": wcov, "center": cent, "p_value": siglevel}
376 |
--------------------------------------------------------------------------------
/hypothesize/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/__init__.py
--------------------------------------------------------------------------------
/hypothesize/tests/build_test_data.py:
--------------------------------------------------------------------------------
1 | from hypothesize.measuring_associations import *
2 | from hypothesize.compare_groups_with_single_factor import *
3 | from hypothesize.compare_groups_with_two_factors import *
4 | from hypothesize.utilities import create_example_data, trim_mean, con1way, con2way
5 | import numpy as np
6 | import pickle
7 |
8 | alpha=.05
9 | nboot=100
10 | tr=.2
11 | beta=.2
12 |
13 | def pkl_l2drmci():
14 |
15 | np.random.seed(42)
16 | df = create_example_data(2)
17 | results = l2drmci(df.cell_1, df.cell_2, trim_mean, tr)
18 | pickle.dump(results, open("hypothesize/tests/test_data/l2drmci.pkl", "wb"))
19 |
20 | def pkl_linconb():
21 |
22 | np.random.seed(42)
23 | df = create_example_data(3)
24 | results = linconb(df, con1way(3))
25 | pickle.dump(results, open("hypothesize/tests/test_data/linconb.pkl", "wb"))
26 |
27 | def pkl_pb2gen():
28 |
29 | np.random.seed(42)
30 | df = create_example_data(2)
31 | results = pb2gen(df.cell_1, df.cell_2, trim_mean, tr)
32 | pickle.dump(results, open("hypothesize/tests/test_data/pb2gen.pkl", "wb"))
33 |
34 | def pkl_tmcppb():
35 |
36 | np.random.seed(42)
37 | df = create_example_data(3)
38 | results = tmcppb(df, trim_mean, tr)
39 | pickle.dump(results, open("hypothesize/tests/test_data/tmcppb.pkl", "wb"))
40 |
41 | def pkl_yuenbt():
42 |
43 | np.random.seed(42)
44 | df = create_example_data(2)
45 | results = yuenbt(df.cell_1, df.cell_2)
46 | pickle.dump(results, open("hypothesize/tests/test_data/yuenbt.pkl", "wb"))
47 |
48 | def pkl_bootdpci():
49 |
50 | np.random.seed(42)
51 | df = create_example_data(3)
52 | results = bootdpci(df, trim_mean, tr)
53 | pickle.dump(results, open("hypothesize/tests/test_data/bootdpci.pkl", "wb"))
54 |
55 | def pkl_rmmcppb():
56 |
57 | np.random.seed(42)
58 | df = create_example_data(3)
59 | results = rmmcppb(df, trim_mean, tr)
60 | pickle.dump(results, open("hypothesize/tests/test_data/rmmcppb.pkl", "wb"))
61 |
62 | def pkl_lindepbt():
63 |
64 | np.random.seed(42)
65 | df = create_example_data(3)
66 | results = lindepbt(df)
67 | pickle.dump(results, open("hypothesize/tests/test_data/lindepbt.pkl", "wb"))
68 |
69 | def pkl_ydbt():
70 |
71 | np.random.seed(42)
72 | df = create_example_data(2)
73 | results = ydbt(df.cell_1, df.cell_2)
74 | pickle.dump(results, open("hypothesize/tests/test_data/ydbt.pkl", "wb"))
75 |
76 | def pkl_wwmcppb():
77 |
78 | np.random.seed(42)
79 | df = create_example_data(6)
80 | results = wwmcppb(2, 3, df, trim_mean, tr)
81 | pickle.dump(results, open("hypothesize/tests/test_data/wwmcppb.pkl", "wb"))
82 |
83 | def pkl_wwmcpbt():
84 |
85 | np.random.seed(42)
86 | df = create_example_data(6)
87 | results = wwmcpbt(2, 3, df, tr)
88 | pickle.dump(results, open("hypothesize/tests/test_data/wwmcpbt.pkl", "wb"))
89 |
90 | def pkl_bwamcp():
91 |
92 | np.random.seed(42)
93 | df = create_example_data(6)
94 | results = bwamcp(2, 3, df)
95 | pickle.dump(results, open("hypothesize/tests/test_data/bwamcp.pkl", "wb"))
96 |
97 | def pkl_bwbmcp():
98 |
99 | np.random.seed(42)
100 | df = create_example_data(6)
101 | results = bwbmcp(2, 3, df)
102 | pickle.dump(results, open("hypothesize/tests/test_data/bwbmcp.pkl", "wb"))
103 |
104 | def pkl_bwmcp():
105 |
106 | np.random.seed(42)
107 | df = create_example_data(6)
108 | results = bwmcp(2, 3, df)
109 | pickle.dump(results, open("hypothesize/tests/test_data/bwmcp.pkl", "wb"))
110 |
111 | def pkl_bwimcp():
112 |
113 | np.random.seed(42)
114 | df = create_example_data(6)
115 | results = bwimcp(2, 3, df)
116 | pickle.dump(results, open("hypothesize/tests/test_data/bwimcp.pkl", "wb"))
117 |
118 | def pkl_bwmcppb():
119 |
120 | np.random.seed(42)
121 | df = create_example_data(6)
122 | results = bwmcppb(2, 3, df, trim_mean, tr)
123 | pickle.dump(results, open("hypothesize/tests/test_data/bwmcppb.pkl", "wb"))
124 |
125 | def pkl_spmcpa():
126 |
127 | np.random.seed(42)
128 | df = create_example_data(6)
129 | results = spmcpa(2, 3, df, trim_mean, tr)
130 | pickle.dump(results, open("hypothesize/tests/test_data/spmcpa.pkl", "wb"))
131 |
132 | def pkl_spmcpb():
133 |
134 | np.random.seed(42)
135 | df = create_example_data(6)
136 | results = spmcpb(2, 3, df, trim_mean, tr)
137 | pickle.dump(results, open("hypothesize/tests/test_data/spmcpb.pkl", "wb"))
138 |
139 | def pkl_spmcpi():
140 |
141 | np.random.seed(42)
142 | df = create_example_data(6)
143 | results = spmcpi(2, 3, df, trim_mean, tr)
144 | pickle.dump(results, open("hypothesize/tests/test_data/spmcpi.pkl", "wb"))
145 |
146 | def pkl_corb():
147 |
148 | np.random.seed(42)
149 | df = create_example_data(2)
150 | results = corb(wincor, df.cell_1, df.cell_2, alpha, nboot, tr)
151 | pickle.dump(results, open("hypothesize/tests/test_data/corb.pkl", "wb"))
152 |
153 | def pkl_pball():
154 |
155 | np.random.seed(42)
156 | df = create_example_data(3)
157 | results = pball(df)
158 | pickle.dump(results, open("hypothesize/tests/test_data/pball.pkl", "wb"))
159 |
160 | def pkl_pbcor():
161 |
162 | np.random.seed(42)
163 | df = create_example_data(2)
164 | results = pbcor(df.cell_1, df.cell_2)
165 | pickle.dump(results, open("hypothesize/tests/test_data/pbcor.pkl", "wb"))
166 |
167 | def pkl_winall():
168 |
169 | np.random.seed(42)
170 | df = create_example_data(3)
171 | results = winall(df)
172 | pickle.dump(results, open("hypothesize/tests/test_data/winall.pkl", "wb"))
173 |
174 | def pkl_wincor():
175 |
176 | np.random.seed(42)
177 | df = create_example_data(2)
178 | results = wincor(df.cell_1, df.cell_2)
179 | pickle.dump(results, open("hypothesize/tests/test_data/wincor.pkl", "wb"))
180 |
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bootdpci.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bootdpci.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwamcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwamcp.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwbmcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwbmcp.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwimcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwimcp.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwmcp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwmcp.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/bwmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/bwmcppb.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/corb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/corb.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/l2drmci.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/l2drmci.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/linconb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/linconb.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/lindepbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/lindepbt.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/pb2gen.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pb2gen.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/pball.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pball.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/pbcor.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/pbcor.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/rmmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/rmmcppb.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/spmcpa.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpa.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/spmcpb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpb.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/spmcpi.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/spmcpi.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/tmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/tmcppb.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/winall.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/winall.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/wincor.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wincor.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/wwmcpbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wwmcpbt.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/wwmcppb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/wwmcppb.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/ydbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/ydbt.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_data/yuenbt.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alcampopiano/hypothesize/fd9766b7b66f59ae000b4038926f95d0e2c56f70/hypothesize/tests/test_data/yuenbt.pkl
--------------------------------------------------------------------------------
/hypothesize/tests/test_funcs.py:
--------------------------------------------------------------------------------
1 | from hypothesize.measuring_associations import *
2 | from hypothesize.compare_groups_with_single_factor import *
3 | from hypothesize.compare_groups_with_two_factors import *
4 | from hypothesize.utilities import create_example_data, trim_mean, con1way
5 | import numpy as np
6 | import pandas as pd
7 | from pandas._testing import assert_frame_equal
8 | import pickle
9 | import os
10 |
11 | alpha=.05
12 | nboot=100
13 | tr=.2
14 | beta=.2
15 |
16 | try:
17 | os.chdir('hypothesize/tests')
18 | except:
19 | pass
20 |
21 | def run_all_pkl_funcs():
22 |
23 | from hypothesize.tests import build_test_data
24 |
25 | for i in dir(build_test_data):
26 | item = getattr(build_test_data,i)
27 | if callable(item) and i.startswith('pkl'):
28 | item()
29 |
30 | def build_truth_list(expected_results):
31 |
32 | truth_list=[]
33 |
34 | if type(expected_results) is list:
35 |
36 | for item in expected_results:
37 | nested_truth_list=build_truth_list(item)
38 | truth_list.append(nested_truth_list)
39 |
40 | elif type(expected_results) is dict:
41 |
42 | for k in expected_results:
43 |
44 | if type(expected_results[k]) is dict:
45 | nested_truth_list=[True] * len(expected_results[k])
46 |
47 | truth_list.append(nested_truth_list)
48 | else:
49 | truth_list.append(True)
50 |
51 | return truth_list
52 |
53 | def check_dict_items_equality(expected_results, actual_results):
54 |
55 | actual_truth=[]
56 |
57 | if type(expected_results) is list:
58 | for exp_item, act_item in zip(expected_results, actual_results):
59 | nested_truth = check_dict_items_equality(exp_item, act_item)
60 | actual_truth.append(nested_truth)
61 |
62 | elif type(expected_results) is dict:
63 |
64 | for k in expected_results:
65 |
66 | if type(expected_results[k]) is np.ndarray:
67 |
68 | # truth=True if not np.testing.assert_array_equal(expected_results[k], actual_results[k]) \
69 | # else False
70 |
71 | truth=True if not np.testing.assert_allclose(expected_results[k], actual_results[k]) \
72 | else False
73 |
74 | actual_truth.append(truth)
75 |
76 | elif type(expected_results[k]) is pd.DataFrame:
77 |
78 | # truth=True if not assert_frame_equal(expected_results[k], actual_results[k]) \
79 | # else False
80 |
81 | truth=True if not assert_frame_equal(expected_results[k], actual_results[k], check_less_precise=True) \
82 | else False
83 |
84 | actual_truth.append(truth)
85 |
86 | elif type(expected_results[k]) is dict:
87 | nested_truth=check_dict_items_equality(expected_results[k], actual_results[k])
88 | actual_truth.append(nested_truth)
89 |
90 | else:
91 |
92 | if expected_results[k] is None and actual_results[k] is None: \
93 | truth = True
94 | else:
95 | truth=True if not np.testing.assert_almost_equal(expected_results[k], actual_results[k]) \
96 | else False
97 |
98 | actual_truth.append(truth)
99 |
100 | return actual_truth
101 |
102 | def test_l2drmci():
103 |
104 | np.random.seed(42)
105 | df = create_example_data(2)
106 | results = l2drmci(df.cell_1, df.cell_2, trim_mean, tr)
107 | expected = pickle.load(open("test_data/l2drmci.pkl", "rb"))
108 | expected_truth=build_truth_list(expected)
109 | actual_truth = check_dict_items_equality(expected, results)
110 |
111 | #assert results == expected
112 | assert actual_truth == expected_truth
113 |
114 | def test_linconb():
115 |
116 | np.random.seed(42)
117 | df = create_example_data(3)
118 | results = linconb(df, con1way(3))
119 | expected = pickle.load(open("test_data/linconb.pkl", "rb"))
120 | expected_truth=build_truth_list(expected)
121 | actual_truth = check_dict_items_equality(expected, results)
122 |
123 | assert actual_truth == expected_truth
124 |
125 | def test_pb2gen():
126 |
127 | np.random.seed(42)
128 | df = create_example_data(2)
129 | results = pb2gen(df.cell_1, df.cell_2, trim_mean, tr)
130 | expected = pickle.load(open("test_data/pb2gen.pkl", "rb"))
131 | expected_truth=build_truth_list(expected)
132 | actual_truth = check_dict_items_equality(expected, results)
133 |
134 | #assert results == expected
135 | assert actual_truth == expected_truth
136 |
137 | def test_tmcppb():
138 |
139 | np.random.seed(42)
140 | df = create_example_data(3)
141 | results = tmcppb(df, trim_mean, tr)
142 | expected = pickle.load(open("test_data/tmcppb.pkl", "rb"))
143 | expected_truth=build_truth_list(expected)
144 | actual_truth = check_dict_items_equality(expected, results)
145 |
146 | assert actual_truth == expected_truth
147 |
148 | def test_yuenbt():
149 |
150 | np.random.seed(42)
151 | df = create_example_data(2)
152 | results = yuenbt(df.cell_1, df.cell_2)
153 | expected = pickle.load(open("test_data/yuenbt.pkl", "rb"))
154 | expected_truth=build_truth_list(expected)
155 | actual_truth = check_dict_items_equality(expected, results)
156 |
157 | #assert results == expected
158 | assert actual_truth == expected_truth
159 |
160 | def test_bootdpci():
161 |
162 | np.random.seed(42)
163 | df = create_example_data(3)
164 | results = bootdpci(df, trim_mean, tr)
165 | expected = pickle.load(open("test_data/bootdpci.pkl", "rb"))
166 | expected_truth=build_truth_list(expected)
167 | actual_truth = check_dict_items_equality(expected, results)
168 |
169 | assert actual_truth == expected_truth
170 |
171 | def test_rmmcppb():
172 |
173 | np.random.seed(42)
174 | df = create_example_data(3)
175 | results = rmmcppb(df, trim_mean, tr)
176 | expected = pickle.load(open("test_data/rmmcppb.pkl", "rb"))
177 | expected_truth=build_truth_list(expected)
178 | actual_truth = check_dict_items_equality(expected, results)
179 |
180 | assert actual_truth == expected_truth
181 |
182 | def test_lindepbt():
183 |
184 | np.random.seed(42)
185 | df = create_example_data(3)
186 | results = lindepbt(df)
187 | expected = pickle.load(open("test_data/lindepbt.pkl", "rb"))
188 | expected_truth=build_truth_list(expected)
189 | actual_truth = check_dict_items_equality(expected, results)
190 |
191 | assert actual_truth == expected_truth
192 |
193 | def test_ydbt():
194 |
195 | np.random.seed(42)
196 | df = create_example_data(2)
197 | results = ydbt(df.cell_1, df.cell_2)
198 | expected = pickle.load(open("test_data/ydbt.pkl", "rb"))
199 | expected_truth=build_truth_list(expected)
200 | actual_truth = check_dict_items_equality(expected, results)
201 |
202 | #assert results == expected
203 | assert actual_truth == expected_truth
204 |
205 | def test_wwmcppb():
206 |
207 | np.random.seed(42)
208 | df = create_example_data(6)
209 | results = wwmcppb(2, 3, df, trim_mean, tr)
210 | expected = pickle.load(open("test_data/wwmcppb.pkl", "rb"))
211 | expected_truth=build_truth_list(expected)
212 | actual_truth = check_dict_items_equality(expected, results)
213 |
214 | assert actual_truth == expected_truth
215 |
216 | def test_wwmcpbt():
217 |
218 | np.random.seed(42)
219 | df = create_example_data(6)
220 | results = wwmcpbt(2, 3, df, tr)
221 | expected = pickle.load(open("test_data/wwmcpbt.pkl", "rb"))
222 | expected_truth=build_truth_list(expected)
223 | actual_truth = check_dict_items_equality(expected, results)
224 |
225 | assert actual_truth == expected_truth
226 |
227 | def test_bwamcp():
228 |
229 | np.random.seed(42)
230 | df = create_example_data(6)
231 | results = bwamcp(2, 3, df)
232 | expected = pickle.load(open("test_data/bwamcp.pkl", "rb"))
233 | expected_truth=build_truth_list(expected)
234 | actual_truth = check_dict_items_equality(expected, results)
235 |
236 | assert actual_truth == expected_truth
237 |
238 | def test_bwbmcp():
239 |
240 | np.random.seed(42)
241 | df = create_example_data(6)
242 | results = bwbmcp(2, 3, df)
243 | expected = pickle.load(open("test_data/bwbmcp.pkl", "rb"))
244 |
245 | print(results)
246 | print(expected)
247 | expected_truth=build_truth_list(expected)
248 | actual_truth = check_dict_items_equality(expected, results)
249 |
250 | assert actual_truth == expected_truth
251 |
252 | def test_bwmcp():
253 |
254 | np.random.seed(42)
255 | df = create_example_data(6)
256 | results = bwmcp(2, 3, df)
257 | expected = pickle.load(open("test_data/bwmcp.pkl", "rb"))
258 | expected_truth=build_truth_list(expected)
259 | actual_truth = check_dict_items_equality(expected, results)
260 |
261 | assert actual_truth == expected_truth
262 |
263 | def test_bwimcp():
264 |
265 | np.random.seed(42)
266 | df = create_example_data(6)
267 | results = bwimcp(2, 3, df)
268 | expected = pickle.load(open("test_data/bwimcp.pkl", "rb"))
269 | expected_truth=build_truth_list(expected)
270 | actual_truth = check_dict_items_equality(expected, results)
271 |
272 | assert actual_truth == expected_truth
273 |
274 | def test_bwmcppb():
275 |
276 | np.random.seed(42)
277 | df = create_example_data(6)
278 | results = bwmcppb(2, 3, df, trim_mean, tr)
279 | expected = pickle.load(open("test_data/bwmcppb.pkl", "rb"))
280 | expected_truth=build_truth_list(expected)
281 | actual_truth = check_dict_items_equality(expected, results)
282 |
283 | assert actual_truth == expected_truth
284 |
285 | def test_spmcpa():
286 |
287 | np.random.seed(42)
288 | df = create_example_data(6)
289 | results = spmcpa(2, 3, df, trim_mean, tr)
290 | expected = pickle.load(open("test_data/spmcpa.pkl", "rb"))
291 | expected_truth=build_truth_list(expected)
292 | actual_truth = check_dict_items_equality(expected, results)
293 |
294 | assert actual_truth == expected_truth
295 |
296 | def test_spmcpb():
297 |
298 | np.random.seed(42)
299 | df = create_example_data(6)
300 | results = spmcpb(2, 3, df, trim_mean, tr)
301 | expected = pickle.load(open("test_data/spmcpb.pkl", "rb"))
302 | expected_truth=build_truth_list(expected)
303 | actual_truth = check_dict_items_equality(expected, results)
304 |
305 | assert actual_truth == expected_truth
306 |
307 | def test_spmcpi():
308 |
309 | np.random.seed(42)
310 | df = create_example_data(6)
311 | results = spmcpi(2, 3, df, trim_mean, tr)
312 | expected = pickle.load(open("test_data/spmcpi.pkl", "rb"))
313 | expected_truth=build_truth_list(expected)
314 | actual_truth = check_dict_items_equality(expected, results)
315 |
316 | assert actual_truth == expected_truth
317 |
318 | def test_corb():
319 |
320 | np.random.seed(42)
321 | df = create_example_data(2)
322 | results = corb(wincor, df.cell_1, df.cell_2, alpha, nboot, tr)
323 | expected = pickle.load(open("test_data/corb.pkl", "rb"))
324 | expected_truth = build_truth_list(expected)
325 | actual_truth = check_dict_items_equality(expected, results)
326 |
327 | #assert results == expected
328 | assert actual_truth == expected_truth
329 |
330 | def test_pball():
331 |
332 | np.random.seed(42)
333 | df = create_example_data(3)
334 | results = pball(df)
335 | expected = pickle.load(open("test_data/pball.pkl", "rb"))
336 | expected_truth=build_truth_list(expected)
337 | actual_truth = check_dict_items_equality(expected, results)
338 |
339 | assert actual_truth == expected_truth
340 |
341 | def test_pbcor():
342 |
343 | np.random.seed(42)
344 | df = create_example_data(2)
345 | results = pbcor(df.cell_1, df.cell_2)
346 | expected = pickle.load(open("test_data/pbcor.pkl", "rb"))
347 | expected_truth=build_truth_list(expected)
348 | actual_truth = check_dict_items_equality(expected, results)
349 |
350 | #assert results == expected
351 | assert actual_truth == expected_truth
352 |
353 | def test_winall():
354 |
355 | np.random.seed(42)
356 | df = create_example_data(3)
357 | results = winall(df)
358 | expected = pickle.load(open("test_data/winall.pkl", "rb"))
359 | expected_truth=build_truth_list(expected)
360 | actual_truth = check_dict_items_equality(expected, results)
361 |
362 | assert actual_truth == expected_truth
363 |
364 | def test_wincor():
365 |
366 | np.random.seed(42)
367 | df = create_example_data(2)
368 | results = wincor(df.cell_1, df.cell_2)
369 | expected = pickle.load(open("test_data/wincor.pkl", "rb"))
370 | print(results)
371 | print(expected)
372 | expected_truth = build_truth_list(expected)
373 | actual_truth = check_dict_items_equality(expected, results)
374 |
375 | #assert results == expected
376 | assert actual_truth == expected_truth
377 |
378 |
379 |
380 |
--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
1 | @article{20000755025,
2 | author="Tukey, J. W.",
3 | title="A survey of sampling from contaminated distributions",
4 | journal="Contributions to Probability and Statistics",
5 | ISSN="",
6 | publisher="Stanford University Press",
7 | year="1960",
8 | month="",
9 | volume="",
10 | number="",
11 | pages="448-485",
12 | URL="https://ci.nii.ac.jp/naid/20000755025/en/",
13 | DOI="",
14 | }
15 |
16 | @article{bradley1993introduction,
17 | title={An introduction to the bootstrap},
18 | author={Efron, Bradley and Tibshirani, Robert J},
19 | journal={Monographs on Statistics and Applied Probability},
20 | volume={57},
21 | year={1993}
22 | }
23 |
24 | @article{wilcox1998many,
25 | title={How many discoveries have been lost by ignoring modern statistical methods?},
26 | author={Wilcox, Rand R},
27 | journal={American Psychologist},
28 | volume={53},
29 | number={3},
30 | pages={300},
31 | year={1998},
32 | publisher={American Psychological Association},
33 | DOI={10.1037/0003-066X.53.3.300}
34 | }
35 |
36 | @book{wilcox2013introduction,
37 | title={Introduction to robust estimation and hypothesis testing},
38 | author={Wilcox, Rand R},
39 | year={2013},
40 | publisher={Academic press},
41 | DOI={10.1016/c2010-0-67044-1}
42 | }
43 |
44 | @inproceedings{seabold2010statsmodels,
45 | title={statsmodels: Econometric and statistical modeling with python},
46 | author={Seabold, Skipper and Perktold, Josef},
47 | booktitle={9th Python in Science Conference},
48 | year={2010},
49 | DOI={10.25080/majora-92bf1922-011}
50 | }
51 |
52 | @article{ho2019moving,
53 | title={Moving beyond P values: Data analysis with estimation graphics},
54 | author={Ho, Joses and Tumkaya, Tayfun and Aryal, Sameer and Choi, Hyungwon and Claridge-Chang, Adam},
55 | journal={Nature Methods},
56 | volume={16},
57 | number={7},
58 | pages={565--566},
59 | year={2019},
60 | publisher={Nature Publishing Group},
61 | DOI={10.1038/s41592-019-0470-3}
62 | }
63 |
64 | @InProceedings{mckinney-proc-scipy-2010,
65 | author = {{W}es {M}c{K}inney },
66 | title = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython},
67 | booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference},
68 | pages = {56 - 61},
69 | year = {2010 },
70 | editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman},
71 | doi = {10.25080/Majora-92bf1922-00a}
72 | }
73 |
74 | @article{Vallat2018,
75 | doi = {10.21105/joss.01026},
76 | url = {https://doi.org/10.21105/joss.01026},
77 | year = {2018},
78 | publisher = {The Open Journal},
79 | volume = {3},
80 | number = {31},
81 | pages = {1026},
82 | author = {Raphael Vallat},
83 | title = {Pingouin: Statistics in {P}ython},
84 | journal = {Journal of Open Source Software}
85 | }
86 |
87 | @article{rom1990sequentially,
88 | title={A sequentially rejective test procedure based on a modified {B}onferroni inequality},
89 | author={Rom, Dror M},
90 | journal={Biometrika},
91 | volume={77},
92 | number={3},
93 | pages={663--665},
94 | year={1990},
95 | publisher={Oxford University Press},
96 | DOI={10.1093/biomet/77.3.663}
97 | }
98 |
99 | @article{hochberg1988sharper,
100 | title={A sharper {B}onferroni procedure for multiple tests of significance},
101 | author={Hochberg, Yosef},
102 | journal={Biometrika},
103 | volume={75},
104 | number={4},
105 | pages={800--802},
106 | year={1988},
107 | publisher={Oxford University Press},
108 | DOI = {10.1093/biomet/75.4.800}
109 | }
110 |
111 | @article{benjamini1995controlling,
112 | title={Controlling the false discovery rate: A practical and powerful approach to multiple testing},
113 | author={Benjamini, Y and Hochberg},
114 | journal={Journal of the Royal Statistical Society. Series B (Methodological)},
115 | volume={57},
116 | pages={289--300},
117 | year={1995},
118 | DOI = {10.1111/j.2517-6161.1995.tb02031.x}
119 | }
--------------------------------------------------------------------------------
/paper/paper.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: 'Hypothesize: Robust Statistics for Python'
3 | tags:
4 | - Python
5 | - R
6 | - statistics
7 | - statistical analysis
8 | - bootstrapping
9 | - trimmed mean
10 | - data analysis
11 | - data science
12 | - social science
13 | - hypothesis testing
14 | authors:
15 | - name: Allan Campopiano
16 | orcid: 0000-0002-3280-4447
17 | affiliation: 1
18 | - name: Rand R. Wilcox
19 | orcid: 0000-0002-2524-2976
20 | affiliation: 2
21 |
22 | affiliations:
23 | - name: Halton Catholic District School Board
24 | index: 1
25 | - name: University of Southern California
26 | index: 2
27 | date: 08 May 2020
28 | bibliography: paper.bib
29 | ---
30 |
31 | # Summary
32 |
33 | Hypothesize is a robust null hypothesis significance testing (NHST) library for Python. In general,
34 | robust hypothesis testing uses techniques which minimize the effects of violating standard statistical
35 | assumptions. In particular, robust methods based on the trimmed mean [@20000755025]
36 | and/or bootstrapping [@bradley1993introduction], routinely outperform traditional statistical
37 | approaches in terms of power and accuracy. This is especially true when dealing with
38 | distributions that produce outliers [@wilcox1998many; @wilcox2013introduction].
39 |
40 | Hypothesize is based on Rand R. Wilcox's collection of [R functions](https://dornsife.usc.edu/labs/rwilcox/software/)
41 | which contains hundreds of robust methods developed since the 1960's.
42 | Hypothesize brings many of these functions into the Python library ecosystem with the goal
43 | of making robust hypothesis testing easy for researchers, even
44 | if they have not had extensive training in statistics or computer science. It is, however, assumed
45 | that users have a basic understanding of the concepts and terms related to robust hypothesis
46 | testing (e.g., trimmed mean and bootstrapping).
47 |
48 | In contrast to other statistical libraries in Python [@Vallat2018; @seabold2010statsmodels; @ho2019moving],
49 | Hypothesize is focused solely on robust methods for comparing groups and measuring associations. Researchers
50 | who are familiar with traditional NHST and related concepts (e.g., t-test, ANOVA, Pearson's correlation)
51 | will find analogous approaches in Hypothesize. A broad range of choices exist in Hypothesize both in terms of the
52 | supported statistical designs as well as options for fine-grained control over how tests are computed.
53 | For example:
54 |
55 | - Where applicable, many hypothesis tests allow the specification of an estimator. That is, users may
56 | choose when to use the mean, median, trimmed mean, winsorized correlation, percentage bend correlation,
57 | or any other compatible statistical estimator.
58 |
59 | - Single- and multi-factor designs are supported, and this includes supporting
60 | independent, dependent, and mixed groups.
61 |
62 | - Family-wise error can be robustly controlled with sequentially
63 | rejective methods [@rom1990sequentially; @hochberg1988sharper; @benjamini1995controlling].
64 |
65 | In terms of learning to use the software, Hypothesize keeps the barrier to entry low for researchers. For example:
66 |
67 | - To easily incorporate Hypothesize with standard data processing tools
68 | [see @mckinney-proc-scipy-2010], all top-level
69 | functions take a Pandas DataFrame/Series as input and return a Python Dictionary.
70 |
71 | - The API maps cleanly onto features of the user's statistical design.
72 | This makes it easier to find and discover the set of appropriate functions for a
73 | given use case.
74 |
75 | - All top-level functions can be run directly in the browser alongside the documentation via
76 | [Google Colab Notebooks](https://colab.research.google.com/notebooks/intro.ipynb)
77 | (no local installation required).
78 |
79 | # Acknowledgements
80 |
81 | The authors would like to thank
82 | James Desjardins,
83 | Stefon van Noordt,
84 | Lisa Collimore,
85 | Martina G. Vilas,
86 | Andrew Bennett,
87 | Charlotte Soneson,
88 | Whedon,
89 | the Journal of Open Source Software,
90 | and the Halton Catholic District School Board
91 | for their support of this project.
92 |
93 | # References
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy==1.7.3
2 | pandas==1.4.2
3 | numpy==1.22.0
4 | more-itertools==8.12.0
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 | import io
3 | import os
4 |
5 | def get_install_requirements(path):
6 | content = read(path)
7 | return [req for req in content.split("\n") if req != "" and not req.startswith("#")]
8 |
9 | def read(path, encoding="utf-8"):
10 | path = os.path.join(os.path.dirname(__file__), path)
11 | with io.open(path, encoding=encoding) as fp:
12 | return fp.read()
13 |
14 | setup(
15 | name='hypothesize',
16 | version='1.2.2',
17 | description='A Python package for comparing groups and measuring associations using robust statistics.',
18 | author='Allan Campopiano',
19 | author_email="campopianoa@hcdsb.org",
20 | license='BSD 3-clause',
21 | long_description=read('README.md'),
22 | long_description_content_type='text/markdown',
23 | url="https://github.com/Alcampopiano/hypothesize",
24 | packages=find_packages(),
25 | include_package_data=True,
26 | install_requires=get_install_requirements("requirements.txt"),
27 | python_requires=">=3.6",
28 | tests_require=['pytest'],
29 | classifiers=[
30 | "Development Status :: 5 - Production/Stable",
31 | "Environment :: Console",
32 | "Intended Audience :: Science/Research",
33 | "License :: OSI Approved :: BSD License",
34 | "Natural Language :: English",
35 | "Programming Language :: Python :: 3.6",
36 | "Programming Language :: Python :: 3.7",
37 | "Programming Language :: Python :: 3.8",
38 | "Programming Language :: Python :: 3.10",
39 | ],
40 |
41 | )
42 |
--------------------------------------------------------------------------------